In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.inspection import permutation_importance
from sklearn.metrics import classification_report, confusion_matrix

data = pd.read_csv('heart.csv')


In [3]:
 #  features and target
X = data.drop('target', axis=1)
y = data['target']

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [4]:
# Scale the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Train SVM with RBF kernel
svm_model = SVC(kernel='rbf', probability=True, random_state=42)
svm_model.fit(X_train_scaled, y_train)

# Predictions and Evaluation
y_pred = svm_model.predict(X_test_scaled)
print("Classification Report:")
print(classification_report(y_test, y_pred))

Classification Report:
              precision    recall  f1-score   support

           0       0.93      0.83      0.88       102
           1       0.85      0.94      0.89       103

    accuracy                           0.89       205
   macro avg       0.89      0.89      0.89       205
weighted avg       0.89      0.89      0.89       205



In [5]:
# Confusion Matrix Visualization
plt.figure(figsize=(8, 6))
cm = confusion_matrix(y_test, y_pred)
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
plt.title('Confusion Matrix')
plt.ylabel('True Label')
plt.xlabel('Predicted Label')
plt.tight_layout()
plt.savefig('confusion_matrix.png')
plt.close()

In [6]:
# Importance Analysis
perm_importance = permutation_importance(svm_model, X_test_scaled, y_test, n_repeats=10, random_state=42)

# Create a DataFrame of feature importances
feature_importance_df = pd.DataFrame({
    'feature': X.columns,
    'importance': perm_importance.importances_mean,
    'std': perm_importance.importances_std
})

# Sort features by importance
feature_importance_df = feature_importance_df.sort_values('importance', ascending=False)

In [7]:
# Visualize Feature Importance with error handling
plt.figure(figsize=(10, 6))
plt.bar(feature_importance_df['feature'], feature_importance_df['importance'])
plt.errorbar(range(len(feature_importance_df)), 
             feature_importance_df['importance'], 
             yerr=feature_importance_df['std'], 
             fmt='o', color='red', capsize=5)
plt.title('Feature Importance (Permutation Importance)')
plt.xlabel('Features')
plt.ylabel('Mean Importance')
plt.xticks(rotation=45, ha='right')
plt.tight_layout()
plt.savefig('feature_importance.png')
plt.close()

print("\nFeature Importance:")
print(feature_importance_df)


Feature Importance:
     feature  importance       std
12      thal    0.053171  0.011839
1        sex    0.046829  0.015610
11        ca    0.044390  0.015799
2         cp    0.042439  0.013279
5        fbs    0.033659  0.006341
6    restecg    0.023902  0.007697
7    thalach    0.022439  0.014503
0        age    0.014634  0.006899
10     slope    0.014146  0.008001
9    oldpeak    0.011220  0.014804
3   trestbps    0.009756  0.011544
8      exang    0.007317  0.004497
4       chol    0.004390  0.011005


In [8]:
# Preventive Care Strategy Recommendations
def generate_preventive_care_recommendations(importance_df):
    recommendations = []
    
    # Top modifiable risk factors
    modifiable_factors = ['chol', 'trestbps', 'thalach', 'oldpeak', 'ca']
    
    for factor in modifiable_factors:
        row = importance_df[importance_df['feature'] == factor].iloc[0]
        
        recommendations_map = {
            'chol': f"Cholesterol Management: This is a critical factor with importance {row['importance']:.4f}. "
                    "Consider dietary interventions, regular exercise, and potential medication.",
            
            'trestbps': f"Blood Pressure Control: With an importance of {row['importance']:.4f}, "
                        "focus on regular blood pressure monitoring, stress reduction techniques, "
                        "and potentially antihypertensive medications.",
            
            'thalach': f"Heart Rate Management: The maximum heart rate achieved (importance: {row['importance']:.4f}) "
                       "suggests the need for cardiovascular fitness assessments and tailored exercise programs.",
            
            'oldpeak': f"ST Depression Monitoring: With importance {row['importance']:.4f}, "
                       "recommend regular cardiac stress tests and close monitoring of exercise-induced cardiac stress.",
            
            'ca': f"Cardiovascular Assessment: The number of major vessels (importance: {row['importance']:.4f}) "
                  "indicates the need for comprehensive cardiovascular diagnostic procedures."
        }
        
        recommendations.append(recommendations_map[factor])
    
    return recommendations

In [9]:
# Preventive care recommendations
preventive_care_recommendations = generate_preventive_care_recommendations(feature_importance_df)
print("\nPreventive Care Recommendations:")
for rec in preventive_care_recommendations:
    print(f"- {rec}")


Preventive Care Recommendations:
- Cholesterol Management: This is a critical factor with importance 0.0044. Consider dietary interventions, regular exercise, and potential medication.
- Blood Pressure Control: With an importance of 0.0098, focus on regular blood pressure monitoring, stress reduction techniques, and potentially antihypertensive medications.
- Heart Rate Management: The maximum heart rate achieved (importance: 0.0224) suggests the need for cardiovascular fitness assessments and tailored exercise programs.
- ST Depression Monitoring: With importance 0.0112, recommend regular cardiac stress tests and close monitoring of exercise-induced cardiac stress.
- Cardiovascular Assessment: The number of major vessels (importance: 0.0444) indicates the need for comprehensive cardiovascular diagnostic procedures.
