In [None]:


import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.metrics import accuracy_score, confusion_matrix, ConfusionMatrixDisplay
from sklearn.preprocessing import MinMaxScaler



In [None]:
df = pd.read_csv('/content/heart_cleveland_upload.csv')

In [None]:
df.head()



In [None]:
df.info()

In [None]:
df.describe()

In [None]:
cat_col = ["age","sex",	"cp",	"trestbps" ,	"chol",	"fbs",	"restecg",	"thalach",	"exang","oldpeak","slope","ca","thal","condition"]

fig = plt.figure(figsize=(16,15))

for idx, col in enumerate(cat_col):
    ax = plt.subplot(4,4,idx+1)
    sns.countplot(x = df[col], ax = ax)
    for container in ax.containers:
        ax.bar_label(container, label_type = "center")


In [None]:
fig = plt.figure(figsize=(16, 15))

for idx, col in enumerate(cat_col):
    ax = plt.subplot(4, 4, idx+1)
    sns.countplot(x=df[col], hue=df["condition"], ax=ax)
    for container in ax.containers:
        ax.bar_label(container, label_type="center")
plt.tight_layout()

In [None]:
df['condition'].value_counts()

In [None]:
df[df['trestbps'] == 0].info()

In [None]:
df[df['chol'] == 0].info()

In [None]:
# Cleaning the Data
df_clean = df.copy()

df_clean = df_clean[df_clean['trestbps'] != 0]

heartdisease_mask = df_clean['condition'] == 0

chol_without_heartdisease = df_clean.loc[heartdisease_mask]["chol"]
chol_with_heartdisease = df_clean.loc[heartdisease_mask]["chol"]

df_clean.loc[heartdisease_mask, "chol"] = chol_without_heartdisease.replace(0, chol_without_heartdisease.median())
df_clean.loc[~heartdisease_mask, "chol"] = chol_with_heartdisease.replace(0, chol_with_heartdisease.median())


In [None]:
df_clean[['chol','condition']].describe()

In [None]:
df_clean = pd.get_dummies(df_clean, columns= ["age","sex",	"cp",	"trestbps" ,	"chol",	"fbs",	"restecg",	"thalach",	"exang","oldpeak","slope","ca","thal","condition"])
df_clean.head()

In [None]:
df_clean.tail()

In [None]:
correlations = abs(df_clean.corr())
plt.figure(figsize=(10,10))
sns.heatmap(correlations, annot=True)
plt.show()

In [None]:
X = df_clean.drop(["condition"], axis=1)
y = df_clean["condition"]

X_train, X_val, y_val, y_train = train_test_split(X,y, test_size= 0.15, random_state = 417)

features = {
    "MaxHR",
    "oldpeak",
    "Sex_M",
    "ExerciseAngina_Y",
    "ST_Slope_Flat",
    "ST_Slope_Up"
}

for feature in features:
  knn = KNeighborsClassifier(n_neighbors=3)
  knn.fit(X_train[feature],y_train)
  accuracy = knn.score(X_val[feature],y_val)
  print(f"{feature}: {accuracy}")