In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline


In [None]:
dataset=pd.read_csv("diabetes.csv")  
dataset 

In [None]:
dataset.info()

In [None]:
dataset.isnull().sum()

In [None]:
dataset.describe()

In [None]:
plt.figure(figsize =(10,8))
from matplotlib.colors import LinearSegmentedColormap
colors = ["yellow", "green", "blue"]
cmap = LinearSegmentedColormap.from_list("yellowgreenblue", colors)
sns.heatmap(dataset.corr(), annot = True, fmt=".3f", cmap =cmap)
plt.title("Correlation Heatmap")

In [None]:
plt.figure(figsize=(10,8))
kde= sns.kdeplot(dataset["Pregnancies"][dataset["Outcome"]==1], color = "Red", shade = True)
kde= sns.kdeplot(dataset["Pregnancies"][dataset["Outcome"]==0], color = "Blue", shade = True)
kde.set_xlabel("Pregnancies")
kde.set_ylabel("Density")
kde.legend(["Positive"],["Negative"])

In [None]:
plt.figure(figsize=(10,8))
sns.violinplot(data=dataset,x="Outcome", y="Glucose", split= True, linewidth=2, inner= "quart")

In [None]:
plt.figure(figsize=(10,8))
kde= sns.kdeplot(dataset["Glucose"][dataset["Outcome"]==1], color = "Red", shade = True)
kde= sns.kdeplot(dataset["Glucose"][dataset["Outcome"]==0], color = "Blue", shade = True)
kde.set_xlabel("Glucose")
kde.set_ylabel("Density")
kde.legend(["Positive"],["Negative"])

In [None]:
dataset["Glucose"]=dataset["Glucose"].replace(0,dataset["Glucose"].median())
dataset["BloodPressure"]=dataset["BloodPressure"].replace(0,dataset["BloodPressure"].median())
dataset["BMI"]=dataset["BMI"].replace(0,dataset["BMI"].median())
dataset["SkinThickness"]=dataset["SkinThickness"].replace(0,dataset["SkinThickness"].median())
dataset["Insulin"]=dataset["Insulin"].replace(0,dataset["Insulin"].median())
dataset

In [None]:
X= dataset.drop(["Outcome"], axis = 1)
Y= dataset["Outcome"]
X

In [None]:
Y

In [None]:
from sklearn.model_selection import train_test_split
X_train,X_test,Y_train,Y_test = train_test_split(X,Y, test_size=0.33, random_state=42)
X_train

In [None]:
from sklearn.neighbors import KNeighborsClassifier
training_accuracy = []
test_accuracy = []
for n_neighbours in range(1,11):
   knn = KNeighborsClassifier(n_neighbours*n_neighbours)
   knn.fit(X_train,Y_train)
   training_accuracy.append(knn.score(X_train,Y_train))
   test_accuracy.append(knn.score(X_train,Y_train))

In [None]:
plt.plot(range(1,11), training_accuracy, label = "training_accuracy")
plt.plot(range(1,11), test_accuracy, label = "test_accuracy")
plt.ylabel("accuracy")
plt.xlabel("n_neighbours")
plt.legend()

In [None]:
knn = KNeighborsClassifier(n_neighbors=9)
knn.fit(X_train,Y_train)
print(knn.score(X_train,Y_train),": Training Accuracy" )
print(knn.score(X_test,Y_test),": Testing Accuracy" )

In [None]:
from sklearn.tree import DecisionTreeClassifier
dt = DecisionTreeClassifier(random_state=0)
dt.fit(X_train,Y_train)
print(dt.score(X_train,Y_train),": Training Accuracy" )
print(dt.score(X_test,Y_test),": Testing Accuracy" )

In [None]:
dt1= DecisionTreeClassifier(random_state=0, max_depth=3)
dt1.fit(X_train,Y_train)
print(dt1.score(X_train,Y_train),": Training Accuracy" )
print(dt1.score(X_test,Y_test),": Testing Accuracy" )

In [None]:
from sklearn.neural_network import MLPClassifier
mlp = DecisionTreeClassifier(random_state=42)
mlp.fit(X_train,Y_train)
print(mlp.score(X_train,Y_train),": Training Accuracy" )
print(mlp.score(X_test,Y_test),": Testing Accuracy" )

In [None]:
from sklearn.preprocessing import StandardScaler
sc= StandardScaler()
X_train_scaler= sc.fit_transform(X_train)
X_test_scaler= sc.fit_transform(X_test)

In [None]:
mlp1= MLPClassifier(random_state=42)
mlp1.fit(X_train_scaler,Y_train)
print(mlp1.score(X_train_scaler,Y_train),": Training Accuracy" )
print(mlp1.score(X_test,Y_test),": Testing Accuracy" )