# **Importing libraries**

In [4]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, recall_score, precision_score, f1_score
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import LinearSVC, SVC
from sklearn.ensemble import RandomForestClassifier

#Loading and exploring the dataset

In [5]:
df = pd.read_csv('/content/Dataset.csv')

In [6]:
print(df.head())
print(df.info())
print(df.describe())

   age     sex   cp  trestbps   chol  fbs  restecg  thalach  exang  oldpeak  \
0   63    Male  3.0     145.0  233.0  1.0      0.0    150.0    0.0      2.3   
1   37  Female  2.0     130.0  250.0  0.0      1.0    187.0    0.0      3.5   
2   41  Female  1.0     130.0  204.0  0.0      0.0    172.0    0.0      1.4   
3   56  Female  1.0     120.0  236.0  0.0      1.0    178.0    0.0      0.8   
4   57  Female  0.0     120.0  354.0  0.0      1.0    163.0    1.0      0.6   

   slope   ca  thal  target  
0    0.0  0.0   1.0    True  
1    0.0  0.0   2.0    True  
2    2.0  0.0   2.0    True  
3    2.0  0.0   2.0    True  
4    2.0  0.0   2.0    True  
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 303 entries, 0 to 302
Data columns (total 14 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   age       303 non-null    int64  
 1   sex       300 non-null    object 
 2   cp        302 non-null    float64
 3   trestbps  298 non-null    float64
 4   c

##Checking for missing values

In [7]:
df.isnull().sum()

Unnamed: 0,0
age,0
sex,3
cp,1
trestbps,5
chol,3
fbs,2
restecg,2
thalach,6
exang,1
oldpeak,2


In [8]:
# Handle missing values by filling them with the column mean
df = df.fillna(df.mean(numeric_only=True))

# Data Preprocessing

In [9]:
X = df.drop('target', axis=1)
y = df['target']

In [10]:
# Identify categorical columns
categorical_cols = X.select_dtypes(include=['object']).columns
print("Categorical columns:", categorical_cols)

Categorical columns: Index(['sex'], dtype='object')


In [11]:
# One-hot encode categorical columns
X_encoded = pd.get_dummies(X, drop_first=True)

In [12]:
# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X_encoded, y, test_size=0.2, random_state=42)

In [13]:
# Standardize numerical features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Model Training

### K-Nearest Neighbors (KNN)

In [14]:
params_knn = {'n_neighbors': [3, 5, 7], 'weights': ['uniform', 'distance']}
grid_knn = GridSearchCV(KNeighborsClassifier(), params_knn, cv=5)
grid_knn.fit(X_train, y_train)

## Logistic Regression

In [15]:
lr = {'C': [0.01, 0.1, 1, 10], 'solver': ['liblinear', 'lbfgs']}
grid_lr = GridSearchCV(LogisticRegression(max_iter=1000), lr, cv=5)
grid_lr.fit(X_train, y_train)

## Decision Tree

In [16]:
dt = {'max_depth': [None, 5, 10], 'min_samples_split': [2, 5, 10]}

grid_dt = GridSearchCV(DecisionTreeClassifier(), dt, cv=5)
grid_dt.fit(X_train, y_train)

## Linear SVC

In [17]:
lsvc = {'C': [0.01, 0.1, 1, 10]}
grid_lsvc = GridSearchCV(LinearSVC(max_iter=10000), lsvc, cv=5)
grid_lsvc.fit(X_train, y_train)

## Kernelized SVC

In [18]:
ksvc = {'C': [0.1, 1, 10], 'kernel': ['rbf', 'poly'], 'gamma': ['scale', 'auto']}
grid_ksvc = GridSearchCV(SVC(), ksvc, cv=5)
grid_ksvc.fit(X_train, y_train)



##Random Forest

In [19]:
rf = {'n_estimators': [50, 100, 200], 'max_depth': [None, 10, 20]}
grid_rf = GridSearchCV(RandomForestClassifier(), rf, cv=5)
grid_rf.fit(X_train, y_train)



# Making Predictions from Models

## K-Nearest Neighbors

In [20]:
y_pred_knn = grid_knn.predict(X_test)
print("KNN Accuracy:", accuracy_score(y_test, y_pred_knn))
print("KNN Recall:", recall_score(y_test, y_pred_knn))
print("KNN Precision:", precision_score(y_test, y_pred_knn))
print("KNN F1 Score:", f1_score(y_test, y_pred_knn))


KNN Accuracy: 0.9016393442622951
KNN Recall: 0.9375
KNN Precision: 0.8823529411764706
KNN F1 Score: 0.9090909090909091


## Logistic Regression

In [21]:
y_pred_lr = grid_lr.predict(X_test)
print("Logistic Regression Accuracy:", accuracy_score(y_test, y_pred_lr))
print("Logistic Regression Recall:", recall_score(y_test, y_pred_lr))
print("Logistic Regression Precision:", precision_score(y_test, y_pred_lr))
print("Logistic Regression F1 Score:", f1_score(y_test, y_pred_lr))


Logistic Regression Accuracy: 0.8524590163934426
Logistic Regression Recall: 0.875
Logistic Regression Precision: 0.8484848484848485
Logistic Regression F1 Score: 0.8615384615384616


## Decision Tree

In [22]:
y_pred_dt = grid_dt.predict(X_test)
print("Decision Tree Accuracy:", accuracy_score(y_test, y_pred_dt))
print("Decision Tree Recall:", recall_score(y_test, y_pred_dt))
print("Decision Tree Precision:", precision_score(y_test, y_pred_dt))
print("Decision Tree F1 Score:", f1_score(y_test, y_pred_dt))


Decision Tree Accuracy: 0.8524590163934426
Decision Tree Recall: 0.8125
Decision Tree Precision: 0.896551724137931
Decision Tree F1 Score: 0.8524590163934426


## Linear SVC

In [23]:
y_pred_lsvc = grid_lsvc.predict(X_test)
print("Linear SVC Accuracy:", accuracy_score(y_test, y_pred_lsvc))
print("Linear SVC Recall:", recall_score(y_test, y_pred_lsvc))
print("Linear SVC Precision:", precision_score(y_test, y_pred_lsvc))
print("Linear SVC F1 Score:", f1_score(y_test, y_pred_lsvc))


Linear SVC Accuracy: 0.8524590163934426
Linear SVC Recall: 0.84375
Linear SVC Precision: 0.8709677419354839
Linear SVC F1 Score: 0.8571428571428571


## Kernel SVC

In [24]:
y_pred_ksvc = grid_ksvc.predict(X_test)
print("Kernel SVC Accuracy:", accuracy_score(y_test, y_pred_ksvc))
print("Kernel SVC Recall:", recall_score(y_test, y_pred_ksvc))
print("Kernel SVC Precision:", precision_score(y_test, y_pred_ksvc))
print("Kernel SVC F1 Score:", f1_score(y_test, y_pred_ksvc))


Kernel SVC Accuracy: 0.8688524590163934
Kernel SVC Recall: 0.90625
Kernel SVC Precision: 0.8529411764705882
Kernel SVC F1 Score: 0.8787878787878788


## Random Forest

In [25]:
y_pred_rf = grid_rf.predict(X_test)
print("Random Forest Accuracy:", accuracy_score(y_test, y_pred_rf))
print("Random Forest Recall:", recall_score(y_test, y_pred_rf))
print("Random Forest Precision:", precision_score(y_test, y_pred_rf))
print("Random Forest F1 Score:", f1_score(y_test, y_pred_rf))


Random Forest Accuracy: 0.8524590163934426
Random Forest Recall: 0.875
Random Forest Precision: 0.8484848484848485
Random Forest F1 Score: 0.8615384615384616


# Evaluating the Models

In [26]:
def evaluate_all_metrics(y_true, y_pred, model_name):
    print(f"\n {model_name} Evaluation")
    print("Accuracy:", accuracy_score(y_true, y_pred))
    print("Recall:", recall_score(y_true, y_pred))
    print("Precision:", precision_score(y_true, y_pred))
    print("F1 Score:", f1_score(y_true, y_pred))


## K-Nearest Neighbors

In [27]:
evaluate_all_metrics(y_test, y_pred_knn, "KNN")



 KNN Evaluation
Accuracy: 0.9016393442622951
Recall: 0.9375
Precision: 0.8823529411764706
F1 Score: 0.9090909090909091


## Logistic Regression

In [28]:
evaluate_all_metrics(y_test, y_pred_lr, "Logistic Regression")


 Logistic Regression Evaluation
Accuracy: 0.8524590163934426
Recall: 0.875
Precision: 0.8484848484848485
F1 Score: 0.8615384615384616


## Decision Tree

In [29]:
evaluate_all_metrics(y_test, y_pred_dt, "Decision Tree")


 Decision Tree Evaluation
Accuracy: 0.8524590163934426
Recall: 0.8125
Precision: 0.896551724137931
F1 Score: 0.8524590163934426


## Linear SVC

In [30]:
evaluate_all_metrics(y_test, y_pred_lsvc, "Linear SVC")


 Linear SVC Evaluation
Accuracy: 0.8524590163934426
Recall: 0.84375
Precision: 0.8709677419354839
F1 Score: 0.8571428571428571


## Kernel SVC

In [31]:
evaluate_all_metrics(y_test, y_pred_ksvc, "Kernel SVC")


 Kernel SVC Evaluation
Accuracy: 0.8688524590163934
Recall: 0.90625
Precision: 0.8529411764705882
F1 Score: 0.8787878787878788


## Random Forest

In [32]:
evaluate_all_metrics(y_test, y_pred_rf, "Random Forest")


 Random Forest Evaluation
Accuracy: 0.8524590163934426
Recall: 0.875
Precision: 0.8484848484848485
F1 Score: 0.8615384615384616


# Table of Comparison

In [33]:
def get_metrics(y_true, y_pred, model_name):
    return {
        'Model': model_name,
        'Accuracy': round(accuracy_score(y_true, y_pred), 4),
        'Recall': round(recall_score(y_true, y_pred), 4),
        'Precision': round(precision_score(y_true, y_pred), 4),
        'F1 Score': round(f1_score(y_true, y_pred), 4)
    }

In [34]:
results = [
    get_metrics(y_test, y_pred_knn, "KNN"),
    get_metrics(y_test, y_pred_lr, "Logistic Regression"),
    get_metrics(y_test, y_pred_dt, "Decision Tree"),
    get_metrics(y_test, y_pred_lsvc, "Linear SVC"),
    get_metrics(y_test, y_pred_ksvc, "Kernel SVC"),
    get_metrics(y_test, y_pred_rf, "Random Forest")
]


In [35]:
results_df = pd.DataFrame(results).sort_values(by='Accuracy', ascending=False)


In [36]:
print(results_df)

                 Model  Accuracy  Recall  Precision  F1 Score
0                  KNN    0.9016  0.9375     0.8824    0.9091
4           Kernel SVC    0.8689  0.9062     0.8529    0.8788
1  Logistic Regression    0.8525  0.8750     0.8485    0.8615
2        Decision Tree    0.8525  0.8125     0.8966    0.8525
3           Linear SVC    0.8525  0.8438     0.8710    0.8571
5        Random Forest    0.8525  0.8750     0.8485    0.8615
