In [29]:
# import pandas as pd
# from sklearn.model_selection import train_test_split
# from sklearn.preprocessing import LabelEncoder, StandardScaler
# from sklearn.svm import SVC
# from sklearn.metrics import classification_report, accuracy_score

# # Load data
# data = pd.read_csv(r'D:\PYTHON\Edunet2.0\day-7\heart.csv')

# # Encode categorical columns
# categorical_cols = ['Sex', 'ChestPain', 'RestECG', 'Slope', 'Thal', 'AHD']
# label_encoders = {}
# for col in categorical_cols:
#     le = LabelEncoder()
#     data[col] = le.fit_transform(data[col])
#     label_encoders[col] = le

# # Features and target
# X = data.drop('AHD', axis=1)
# y = data['AHD']

# # Scale features
# scaler = StandardScaler()
# X_scaled = scaler.fit_transform(X)

# # Train/test split
# X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# # Train SVM
# svm = SVC(C=1.0,kernel='rbf', random_state=42)
# svm.fit(X_train, y_train)

# # Predict and evaluate
# y_pred = svm.predict(X_test)
# print("Accuracy:", accuracy_score(y_test, y_pred))
# print(classification_report(y_test, y_pred))

In [30]:
# Improved SVM pipeline for higher accuracy
# 1. Encode all categorical columns (including AHD)
# 2. Scale features
# 3. Use GridSearchCV for hyperparameter tuning
# 4. Stratified train/test split

In [31]:
import numpy as np
import pandas as pd
from sklearn import svm
from sklearn.model_selection import train_test_split, GridSearchCV, StratifiedKFold
from sklearn.metrics import classification_report, accuracy_score
from sklearn.preprocessing import LabelEncoder, StandardScaler
import matplotlib.pyplot as plt

In [32]:
df = pd.read_csv(r'D:\PYTHON\Edunet2.0\day-7\heart.csv')
df = df.copy()
df.head()

Unnamed: 0,Age,Sex,ChestPain,RestBP,Chol,Fbs,RestECG,MaxHR,ExAng,Oldpeak,Slope,Ca,Thal,AHD
0,63,1,typical,145,233,1,2,150,0,2.3,3,0,fixed,No
1,67,1,asymptomatic,160,286,0,2,108,1,1.5,2,3,normal,Yes
2,67,1,asymptomatic,120,229,0,2,129,1,2.6,2,2,reversable,Yes
3,37,1,nonanginal,130,250,0,0,187,0,3.5,3,0,normal,No
4,41,0,nontypical,130,204,0,2,172,0,1.4,1,0,normal,No


In [33]:
# Encode categorical columns
categorical_cols = ['Sex', 'ChestPain', 'RestECG', 'Slope', 'Thal', 'AHD']
label_encoders = {}
for col in categorical_cols:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])
    label_encoders[col] = le

In [34]:
# Features and target
X = df.drop('AHD', axis=1)
y = df['AHD']

In [35]:
# Scale features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Stratified train/test split
X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y, test_size=0.2, random_state=42, stratify=y
)

In [36]:
# Hyperparameter tuning using GridSearchCV
param_grid = {
    'C': [0.1, 1, 10, 100],
    'gamma': ['scale', 0.01, 0.1, 1, 10],
    'kernel': ['rbf', 'linear', 'poly']
}
cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
grid = GridSearchCV(svm.SVC(), param_grid, refit=True, cv=cv, n_jobs=-1, scoring='accuracy')
grid.fit(X_train, y_train)
print("Best parameters:", grid.best_params_)
print("Best cross-validation accuracy:", grid.best_score_)

Best parameters: {'C': 1, 'gamma': 0.01, 'kernel': 'rbf'}
Best cross-validation accuracy: 0.8222789115646257


In [37]:
# Evaluate the best model
best_model = grid.best_estimator_
y_pred = best_model.predict(X_test)
print("Test Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))

Test Accuracy: 0.8688524590163934
              precision    recall  f1-score   support

           0       0.90      0.85      0.88        33
           1       0.83      0.89      0.86        28

    accuracy                           0.87        61
   macro avg       0.87      0.87      0.87        61
weighted avg       0.87      0.87      0.87        61

