In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
dataset = pd.read_csv('/kaggle/input/heart-disease/heart.csv')

In [None]:
dataset.shape

In [None]:
dataset.head()

In [None]:
dataset.describe()

In [None]:
dataset.isna().sum()

In [None]:
sns.countplot(x = 'target' , data=dataset)

In [None]:
corr_mat = dataset.corr()
plt.figure(figsize=(15,15))
sns.heatmap(corr_mat , annot=True)

In [None]:
#plot histograms for each column

dataset.hist(figsize=(12,12))
plt.show()

**Now we need to convert some categorical variables into dummy variables**

In [None]:
top_ages = dataset["age"].value_counts().head(10)

# Create a count plot using seaborn
plt.figure(figsize=(10, 6))
sns.countplot(data=dataset, x="age", order=top_ages.index)
plt.title("Top 10 Most Common Ages")
plt.xlabel("Age")
plt.ylabel("Count")
plt.xticks(rotation=45)
plt.show()

In [None]:
minAge=min(dataset.age)
maxAge=max(dataset.age)
meanAge=dataset.age.mean()
print('Min Age :',minAge)
print('Max Age :',maxAge)
print('Mean Age :',meanAge)

In [None]:
Young = dataset[(dataset.age>=29)&(dataset.age<40)]
Middle = dataset[(dataset.age>=40)&(dataset.age<55)]
Elder = dataset[(dataset.age>55)]

In [None]:
colors = ['green','blue','red']
explode = [0,0,0.1]
plt.figure(figsize=(10,10))
sns.set_context('notebook',font_scale = 1.2)
plt.pie([len(Young),len(Middle),len(Elder)],labels=['young ages','middle ages','elderly ages'],explode=explode,colors=colors, autopct='%1.1f%%')
plt.tight_layout()

In [None]:
sns.countplot(dataset['sex'])
plt.tight_layout()

In [None]:
# Map values for better labels
sex_mapping = {1: "Male", 0: "Female"}
target_mapping = {1: "Heart Disease", 0: "No Heart Disease"}
dataset["sex"] = dataset["sex"].map(sex_mapping)
dataset["target"] = dataset["target"].map(target_mapping)

# Create a count plot using seaborn
plt.figure(figsize=(8, 6))
sns.countplot(data=dataset, x="sex", hue="target", palette="Set1")
plt.title("Distribution of Heart Disease by Sex")
plt.xlabel("Sex")
plt.ylabel("Count")
plt.legend(title="Target")
plt.show()








In [None]:
sns.countplot(dataset['sex'],hue=dataset["slope"])
plt.tight_layout()

In [None]:
sns.countplot(dataset['cp'])
plt.tight_layout()

In [None]:
plt.figure(figsize=(10, 6))
sns.countplot(data=dataset, x="cp", hue="target", palette="Set1")
plt.title("Distribution of Heart Disease by Chest Pain Type")
plt.xlabel("Chest Pain Type")
plt.ylabel("Count")
plt.xticks(rotation=15)
plt.legend(title="Target")
plt.show()

In [None]:
sns.countplot(dataset['thal'])
plt.tight_layout()

In [None]:
dataset.dtypes

In [None]:
dataset = pd.read_csv('/kaggle/input/heart-disease/heart.csv')

In [None]:
dataset2 = pd.get_dummies(dataset , columns=['sex' , 'cp' , 'fbs' , 'restecg' , 'exang' , 'slope' , 'ca' , 'thal'])

In [None]:
dataset2.head()


In [None]:
dataset2.shape

In [None]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
col_to_scale = ['age', 'trestbps', 'chol', 'thalach', 'oldpeak']
dataset2[col_to_scale] = sc.fit_transform(dataset2[col_to_scale])
dataset2.head(6)

In [None]:
dataset2.columns

In [None]:
X = dataset2.drop('target',axis=1)
y = dataset2.target



In [None]:
X.columns

In [None]:
y.shape

In [None]:
from sklearn.model_selection import train_test_split
x_train , x_test , y_train , y_test = train_test_split(X , y , test_size=0.25 , random_state = 42)

In [None]:
x_train.shape

In [None]:
x_test.shape

In [None]:
y_train.shape

In [None]:
from sklearn.neighbors import KNeighborsClassifier
classifier = KNeighborsClassifier()
classifier.fit(x_train , y_train)

In [None]:
y_pred = classifier.predict(x_test)

In [None]:
x_test

In [None]:
y_pred

In [None]:
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test , y_pred)
sns.heatmap(cm , annot=True)

In [None]:
from sklearn.metrics import accuracy_score
accuracy_score(y_test , y_pred)

In [None]:
import pickle
pickle.dump(classifier , open('model.pkl' , 'wb'))
pickle.dump(sc , open('sc.pkl','wb'))