In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.ensemble import RandomForestClassifier
from sklearn.feature_selection import RFE, chi2, SelectKBest

In [None]:
try:
    df = pd.read_csv('cleaned_heart_disease.csv')
    print("Cleaned dataset loaded successfully.")
except FileNotFoundError:
    print("Error: 'cleaned_heart_disease.csv' not found.")
    exit()

X = df.drop('target', axis=1)
y = df['target']

In [None]:
print("\nStep 1: Calculating Feature Importance using Random Forest...")
rf = RandomForestClassifier(n_estimators=100, random_state=42)
rf.fit(X, y)

importances = pd.Series(rf.feature_importances_, index=X.columns).sort_values(ascending=False)

plt.figure(figsize=(12, 8))
sns.barplot(x=importances, y=importances.index)
plt.title('Feature Importance from Random Forest')
plt.xlabel('Importance Score')
plt.ylabel('Features')
plt.show()

In [None]:
print("\nStep 2: Applying Recursive Feature Elimination (RFE)...")
rfe = RFE(estimator=RandomForestClassifier(random_state=42), n_features_to_select=10)
rfe.fit(X, y)

rfe_features = X.columns[rfe.support_]
print("Selected features via RFE:", list(rfe_features))


In [None]:
print("\nStep 3: Applying Chi-Square Test...")

from sklearn.preprocessing import MinMaxScaler
X_chi = df.drop('target', axis=1)
X_chi = MinMaxScaler().fit_transform(X_chi)

chi2_selector = SelectKBest(score_func=chi2, k=10)
chi2_selector.fit(X_chi, y)

chi2_scores = pd.DataFrame({'Feature': X.columns, 'Chi2_Score': chi2_selector.scores_})
print("\nChi-Square Scores for Features:")
print(chi2_scores.sort_values(by='Chi2_Score', ascending=False))

In [None]:
print("\nSelecting final features based on Random Forest importance...")
final_features = importances.index[:12].tolist()
print("Final selected features:", final_features)

df_reduced = df[final_features + ['target']]
df_reduced.to_csv('feature_selected_dataset.csv', index=False)
print("\nReduced dataset saved to 'feature_selected_dataset.csv'.")
print("Shape of reduced dataset:", df_reduced.shape)