In [5]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import roc_auc_score, accuracy_score, f1_score, precision_score, recall_score
from sklearn.svm import SVC
from sklearn.pipeline import Pipeline

df = pd.read_csv("heart_attack_prediction_dataset.csv")
bp_split = df['Blood Pressure'].str.split('/', expand=True)
df['Systolic BP'] = pd.to_numeric(bp_split[0], errors='coerce')
df['Diastolic BP'] = pd.to_numeric(bp_split[1], errors='coerce')
df.drop(columns=['Blood Pressure', 'Patient ID'], inplace=True)

categorical_cols = ['Sex', 'Country', 'Continent', 'Hemisphere', 'Diet']
df = pd.get_dummies(df, columns=categorical_cols, drop_first=True)
df.dropna(inplace=True)

top_features = [
    'Age',
    'Diet_Healthy',
    'Country_France',
    'Country_South Africa',
    'Country_United Kingdom',
    'Exercise Hours Per Week',
    'Country_India',
    'BMI',
    'Continent_Australia',
    'Alcohol Consumption'
]

X = df[top_features]
y = df['Heart Attack Risk']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

svm_rbf_balanced_pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('svm', SVC(kernel='rbf', probability=True, class_weight='balanced', random_state=42))
])

svm_rbf_balanced_pipeline.fit(X_train, y_train)

y_pred = svm_rbf_balanced_pipeline.predict(X_test)
y_pred_proba = svm_rbf_balanced_pipeline.predict_proba(X_test)[:, 1]

print("ROC AUC:", roc_auc_score(y_test, y_pred_proba))
print("Accuracy:", accuracy_score(y_test, y_pred))
print("F1 Score:", f1_score(y_test, y_pred))
print("Precision:", precision_score(y_test, y_pred))
print("Recall:", recall_score(y_test, y_pred))


ROC AUC: 0.503170559094126
Accuracy: 0.5008556759840274
F1 Score: 0.4277305428384565
Precision: 0.36293007769145397
Recall: 0.5207006369426752
