In [3]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report, roc_auc_score

# a. Create a data frame for "lung_disease.csv"
data = pd.read_csv("heart.csv")

# Print the shape and first 5 records
print("Shape of the dataset:", data.shape)
print("First 5 records:\n", data.head())

Shape of the dataset: (303, 14)
First 5 records:
    age  sex  cp  trestbps  chol  fbs  restecg  thalach  exang  oldpeak  slope   
0   63    1   3       145   233    1        0      150      0      2.3      0  \
1   37    1   2       130   250    0        1      187      0      3.5      0   
2   41    0   1       130   204    0        0      172      0      1.4      2   
3   56    1   1       120   236    0        1      178      0      0.8      2   
4   57    0   0       120   354    0        1      163      1      0.6      2   

   ca  thal  target  
0   0     1       1  
1   0     2       1  
2   0     2       1  
3   0     2       1  
4   0     2       1  


In [4]:
# b. Create input features X, target Y, classifier object, train-test-split using 80-20% split
X = data.drop("target", axis=1)  # Input features
y = data["target"]  # Target variable

# Train-test split using 80-20% split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)



In [5]:
# c. Train a kNN classifier using GridSearchCV for hyperparameter tuning
param_grid = {
    "leaf_size": list(range(1, 16)),
    "n_neighbors": list(range(1, 11)),
    "p": [1, 2]
}


In [6]:
knn = KNeighborsClassifier()
grid_search = GridSearchCV(knn, param_grid, cv=5)


In [7]:
grid_search.fit(X_train, y_train)

In [8]:

# Find the best values of the parameters
best_params = grid_search.best_params_
print("Best parameters:", best_params)



Best parameters: {'leaf_size': 2, 'n_neighbors': 3, 'p': 1}


In [9]:
# Print the classification report and AUC score on the validation set
best_model = grid_search.best_estimator_
y_pred = best_model.predict(X_test)
print("Classification Report:\n", classification_report(y_test, y_pred))
auc_score = roc_auc_score(y_test, y_pred)
print("AUC Score:", auc_score)

Classification Report:
               precision    recall  f1-score   support

           0       0.66      0.72      0.69        29
           1       0.72      0.66      0.69        32

    accuracy                           0.69        61
   macro avg       0.69      0.69      0.69        61
weighted avg       0.69      0.69      0.69        61

AUC Score: 0.6901939655172414
