In [99]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from sklearn.metrics import f1_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import log_loss
import warnings
warnings.filterwarnings("ignore")
%matplotlib inline

In [4]:
df=pd.read_csv('./csv_files/diabetes.csv')
df.head()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1


In [47]:
df.shape

(768, 9)

In [98]:
df.isna().sum()

Pregnancies                 0
Glucose                     0
BloodPressure               0
SkinThickness               0
Insulin                     0
BMI                         0
DiabetesPedigreeFunction    0
Age                         0
Outcome                     0
dtype: int64

In [51]:
X = df.iloc[:, 0:8]
y = df.iloc[:, 8]
x_train, x_test, y_train, y_test = train_test_split(X,y, test_size=0.2, random_state=42)

In [52]:
sc = StandardScaler()
x_train = sc.fit_transform(x_train)
x_test = sc.fit_transform(x_test)

In [73]:
clf_model = KNeighborsClassifier(n_neighbors=3)

In [74]:
clf_model.fit(x_train,y_train)

In [75]:
model_score=clf.score(x_train, y_train)
print(model_score)
if model_score>0.5:
    print("Our Trained model is Perfect.")
elif model_score<0.5:
    print("Our Trained model is not Perfect.")
else:
    print("Model is not Trained.")

0.8615635179153095
Our Trained model is Perfect.


In [76]:
predict = clf.predict(x_test)

In [77]:
predict

array([0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0,
       0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0,
       0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1,
       0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0,
       0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1,
       0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1,
       0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0],
      dtype=int64)

In [79]:
conf = confusion_matrix(y_test, predict)
print(f'The confusion matrix of this Logistic Regression is: \n{conf}')

The confusion matrix of this Logistic Regression is: 
[[75 24]
 [34 21]]


In [96]:
tn, fp, fn, tp = conf.ravel()
print("True Negatives:", tn)
print("False Positives:", fp)
print("False Negatives:", fn)
print("True Positives:", tp)

True Negatives: 75
False Positives: 24
False Negatives: 34
True Positives: 21


In [97]:
specificity = tn / (tn + fp)
print("Specificity:", specificity)

Specificity: 0.7575757575757576


In [92]:
print ("The accuracy of Logistic Regression is : ", accuracy_score(y_test, predict))

The accuracy of Logistic Regression is :  0.6233766233766234


In [93]:
print("F1 score for logistic regression is :",f1_score(y_test, predict,))

F1 score for logistic regression is : 0.42


In [87]:
precision = precision_score(y_test, predict)
print("Precision Score of logistic Regression is:", precision)

Precision Score of logistic Regression is: 0.4666666666666667


In [89]:
recall = recall_score(y_test, predict)
print("Recall Score of logistic Regression is:", recall)

Recall Score of logistic Regression is: 0.38181818181818183


In [90]:
log_loss_value = log_loss(y_test, predict)
print("Log Loss for this logistics Regression is:", log_loss_value)

Log Loss for this logistics Regression is: 13.574882445251918
