# Feature Selection


In [None]:
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.feature_selection import SelectKBest, chi2, RFE
from sklearn.model_selection import train_test_split

df = pd.read_csv("data/processed/heart_processed.csv")
X = df.drop(columns=['target'])
y = df['target']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

rf = RandomForestClassifier(n_estimators=100, random_state=42)
rf.fit(X_train, y_train)
importances = rf.feature_importances_
feat_importance = sorted(zip(X.columns, importances), key=lambda x: x[1], reverse=True)
print(feat_importance[:15])

kbest = SelectKBest(score_func=chi2, k=min(10, X.shape[1]))
kbest.fit(X_train.abs(), y_train)
kbest_feats = X.columns[kbest.get_support()]
print("Chi2 selected:", list(kbest_feats))

rfe = RFE(estimator=RandomForestClassifier(n_estimators=100, random_state=42), n_features_to_select=min(10, X.shape[1]))
rfe.fit(X_train, y_train)
rfe_feats = X.columns[rfe.support_]
print("RFE selected:", list(rfe_feats))

