In [None]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.feature_selection import RFE, SelectKBest, chi2, mutual_info_classif
from sklearn.preprocessing import MinMaxScaler
from sklearn.linear_model import LogisticRegression
import matplotlib.pyplot as plt

df = pd.read_csv('../data/heart_disease_clean.csv')
X = df.drop(columns=['target'])
y = df['target']

rf = RandomForestClassifier(random_state=42)
rf.fit(X, y)
importances = pd.Series(rf.feature_importances_, index=X.columns).sort_values(ascending=False)
importances.plot(kind='bar', figsize=(10,4), title='Feature Importances (Random Forest)')

lr = LogisticRegression(max_iter=1000)
rfe = RFE(lr, n_features_to_select=8)
rfe.fit(X, y)
selected_rfe = X.columns[rfe.support_].tolist()
print('RFE selected:', selected_rfe)

X_nonneg = MinMaxScaler().fit_transform(X)
chi2_selector = SelectKBest(score_func=chi2, k=8)
chi2_selector.fit(X_nonneg, y)
chi2_selected = X.columns[chi2_selector.get_support()]
print('Chi2 selected:', list(chi2_selected))

mi = SelectKBest(score_func=mutual_info_classif, k=8)
mi.fit(X, y)
mi_selected = X.columns[mi.get_support()]
print('Mutual Information selected:', list(mi_selected))
