In [2]:
# Import necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.metrics import classification_report, accuracy_score
from imblearn.over_sampling import SMOTE

# Load the dataset
file_path = 'C:/Users/ashwa/OneDrive/Desktop/cleaned_standardized_heart_data.csv'  # Adjust the path as needed
heart_data = pd.read_csv(file_path)

# Select features for the model
selected_features = ['physicalhealthdays', 'weightinkilograms', 'bmi',
                     'heightinmeters', 'sleephours', 'generalhealth',
                     'hadangina', 'agecategory']
X = heart_data[selected_features].copy()
y = heart_data['hadheartattack']

# Handle missing values (imputation)
X = X.fillna(X.median(numeric_only=True))

# Convert categorical features to minimal encodings (if necessary)
X = pd.get_dummies(X, drop_first=True)

# Address class imbalance using SMOTE
smote = SMOTE(random_state=42)
X_resampled, y_resampled = smote.fit_resample(X, y)

# Split the resampled dataset into training and testing sets
X_train_smote, X_test_smote, y_train_smote, y_test_smote = train_test_split(
    X_resampled, y_resampled, test_size=0.2, random_state=42
)

# Train a Random Forest model
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_train_smote, y_train_smote)
y_pred_rf = rf_model.predict(X_test_smote)

# Evaluate the Random Forest model
accuracy_rf = accuracy_score(y_test_smote, y_pred_rf)
report_rf = classification_report(y_test_smote, y_pred_rf)

print("=== Random Forest Results ===")
print(f"Accuracy: {accuracy_rf * 100:.2f}%")
print("\nClassification Report:\n", report_rf)

=== Random Forest Results ===
Accuracy: 94.79%

Classification Report:
               precision    recall  f1-score   support

           0       0.95      0.95      0.95     84126
           1       0.95      0.95      0.95     83884

    accuracy                           0.95    168010
   macro avg       0.95      0.95      0.95    168010
weighted avg       0.95      0.95      0.95    168010



In [None]:
# Train a Gradient Boosting model
gb_model = GradientBoostingClassifier(n_estimators=100, random_state=42)
gb_model.fit(X_train_smote, y_train_smote)
y_pred_gb = gb_model.predict(X_test_smote)

# Evaluate the Gradient Boosting model
accuracy_gb = accuracy_score(y_test_smote, y_pred_gb)
report_gb = classification_report(y_test_smote, y_pred_gb)

print("=== Gradient Boosting Results ===")
print(f"Accuracy: {accuracy_gb * 100:.2f}%")
print("\nClassification Report:\n", report_gb)

=== Gradient Boosting Results ===
Accuracy: 88.37%

Classification Report:
               precision    recall  f1-score   support

           0       0.85      0.93      0.89     84126
           1       0.92      0.83      0.88     83884

    accuracy                           0.88    168010
   macro avg       0.89      0.88      0.88    168010
weighted avg       0.89      0.88      0.88    168010

