In [2]:
import pandas as pd
import numpy as np
import math
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score, confusion_matrix, accuracy_score
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import StandardScaler

In [3]:
data = pd.read_csv("diabetes.csv")

In [4]:
data.head()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1


In [5]:
data.isnull().sum()

Pregnancies                 0
Glucose                     0
BloodPressure               0
SkinThickness               0
Insulin                     0
BMI                         0
DiabetesPedigreeFunction    0
Age                         0
Outcome                     0
dtype: int64

In [6]:
#there are no nullvalues but use this method to apply mean to ALL NaN values
lists = ['Glucose','BloodPressure','SkinThickness','BMI','Insulin']

In [7]:
for i in lists:
    data[i] = data[i].replace(0, np.NaN)
    mean = int(data[i].mean(skipna=True))
    data[i] = data[i].replace(np.NaN, mean)

In [8]:
#Split data for test and training
X = data.iloc[:,0:8]
y = data.iloc[:,8]
X_train,X_test,y_train,y_test = train_test_split(X,y,random_state=0, test_size=0.2)

In [9]:
#Feature Scalling
#We cannot plot a value from 5 to 300 so we are going to normalize the value from 0 to 1
ss = StandardScaler()

In [10]:
X_train = ss.fit_transform(X_train)
X_test = ss.fit_transform(X_test)

In [11]:
#now we are going to fit the model with the KNN
model = KNeighborsClassifier(n_neighbors=11, p=2, metric='euclidean')

In [12]:
model.fit(X_train,y_train)

KNeighborsClassifier(metric='euclidean', n_neighbors=11)

In [13]:
ypred = model.predict(X_test)

In [14]:
ypred

array([1, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1,
       1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 1,
       1, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1,
       0, 0, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0],
      dtype=int64)

In [15]:
#evaluate the model
cm = confusion_matrix(y_test, ypred)

In [16]:
print(cm)

[[95 12]
 [18 29]]


In [17]:
#accuracy and f1 score
print(f1_score(y_test, ypred))
print(accuracy_score(y_test, ypred))

0.6590909090909092
0.8051948051948052


In [18]:
print(X_test)

[[-0.89295432  2.48519445  0.36928267 ...  1.75900682  2.78935129
  -0.93064283]
 [-0.56553774 -0.47166102  0.2077215  ...  0.29022792 -0.27698825
  -0.83598035]
 [ 0.08929543 -1.46799276 -0.76164551 ...  0.35340121 -0.31725331
  -0.64665539]
 ...
 [ 0.08929543  0.65322965  1.17708851 ...  1.93273336  0.4694641
  -0.93064283]
 [-0.23812115 -0.18240342  0.2077215  ... -0.86268455 -1.19689011
  -0.74131787]
 [-0.89295432 -0.47166102  0.04616033 ... -0.15198508  1.01459113
  -0.74131787]]


In [19]:
b = list(map(float, input().strip().split()))[:8]
a =[b]

if model.predict(a)==[0]:
    print("No Diabetes")
elif model.predict(a)==[1]:
    print("Diabetes")


    #change the a value for prediction

Diabetes
