In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
import joblib


In [2]:
# 1. Load Data
df = pd.read_csv('/content/heart_failure_clinical_records_dataset (1).csv')

In [3]:
# 2. Preprocessing
# Select features (Correlation analysis usually shows these are most important,
# but we will use all for now to be safe, or you can select specific ones)
X = df.drop(['DEATH_EVENT', 'time'], axis=1) # Drop 'time' as it causes data leakage (explained below)
y = df['DEATH_EVENT']

# ‚ö†Ô∏è CRITICAL FIX: Scaling
# SVMs essentially crash without scaling. We fit on Train, transform on Test.
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [4]:
# IMPROVED TRAINING CODE (Run this to update your model)
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report, confusion_matrix

# 1. We keep the same X and y from before (Time is dropped)

# 2. Key Change: Add 'class_weight="balanced"'
# This forces the SVM to pay more attention to the minority class (Deaths)
svm = SVC(probability=True, class_weight='balanced', random_state=42)

param_grid = {
    'C': [0.1, 1, 10, 50],
    'gamma': [0.1, 0.01, 0.001],
    'kernel': ['rbf', 'linear']
}

grid = GridSearchCV(svm, param_grid, refit=True, verbose=1, cv=5)
grid.fit(X_train_scaled, y_train)

print(f"‚úÖ Improved Best Params: {grid.best_params_}")



Fitting 5 folds for each of 24 candidates, totalling 120 fits
‚úÖ Improved Best Params: {'C': 10, 'gamma': 0.1, 'kernel': 'linear'}


In [5]:
# 3. New Evaluation
new_preds = grid.predict(X_test_scaled)
print("\n--- üìù Improved Classification Report ---")
print(classification_report(y_test, new_preds))




--- üìù Improved Classification Report ---
              precision    recall  f1-score   support

           0       0.77      0.73      0.75        41
           1       0.48      0.53      0.50        19

    accuracy                           0.67        60
   macro avg       0.62      0.63      0.62        60
weighted avg       0.68      0.67      0.67        60



In [6]:
# 4. Save the better model
import joblib
joblib.dump(grid.best_estimator_, 'heart_failure_model.pkl')
print("üíæ Updated Model Saved!")

üíæ Updated Model Saved!
