In [1]:
pip install imbalanced-learn streamlit

Note: you may need to restart the kernel to use updated packages.


In [2]:
import numpy as np
import pandas as pd
import pickle
from sklearn.model_selection import train_test_split
from sklearn import svm
from sklearn.metrics import accuracy_score, precision_score, recall_score, confusion_matrix
from sklearn.preprocessing import StandardScaler
from imblearn.under_sampling import TomekLinks

In [3]:
# Load Heart Disease dataset
heart_df = pd.read_csv("../datasets/heart.csv")


In [4]:
# Explore the dataset
print("Heart Disease Dataset:")
print(heart_df.head())
print("Original Target Value Counts:\n", heart_df['target'].value_counts())

Heart Disease Dataset:
   age  sex  cp  trestbps  chol  fbs  restecg  thalach  exang  oldpeak  slope  \
0   63    1   3       145   233    1        0      150      0      2.3      0   
1   37    1   2       130   250    0        1      187      0      3.5      0   
2   41    0   1       130   204    0        0      172      0      1.4      2   
3   56    1   1       120   236    0        1      178      0      0.8      2   
4   57    0   0       120   354    0        1      163      1      0.6      2   

   ca  thal  target  
0   0     1       1  
1   0     2       1  
2   0     2       1  
3   0     2       1  
4   0     2       1  
Original Target Value Counts:
 target
1    165
0    138
Name: count, dtype: int64


In [5]:
# Splitting Features and Target
X = heart_df.drop(columns=['target'])
y = heart_df['target']

In [6]:
# First Split into Train and Test to avoid data leakage
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=44, stratify=y)

In [7]:

# Apply Tomek Links only on training data
tomek = TomekLinks()
X_train_resampled, y_train_resampled = tomek.fit_resample(X_train, y_train)

In [8]:
print("After Tomek Links Resampling (Train Set):")
print(pd.Series(y_train_resampled).value_counts())


After Tomek Links Resampling (Train Set):
target
0    110
1    108
Name: count, dtype: int64


In [9]:
# Feature Scaling - Very Important for SVM
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train_resampled)
X_test_scaled = scaler.transform(X_test)

In [10]:
# Train SVM model
model = svm.SVC(kernel='linear', random_state=44)
model.fit(X_train_scaled, y_train_resampled)

In [11]:
# Make predictions
y_pred = model.predict(X_test_scaled)

In [12]:
# Evaluate the model
print("\nHeart Disease Prediction Metrics:")
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Precision:", precision_score(y_test, y_pred))
print("Recall:", recall_score(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))


Heart Disease Prediction Metrics:
Accuracy: 0.8032786885245902
Precision: 0.8181818181818182
Recall: 0.8181818181818182
Confusion Matrix:
 [[22  6]
 [ 6 27]]


In [13]:
# Save the model and scaler
pickle.dump((model, scaler), open('heart_disease_model.sav', 'wb'))