DIABETES PREDICTION SYSTEM


In [137]:
# importing libraries
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn import svm
from sklearn.metrics import accuracy_score

In [138]:
# loading the diabetes dataset to a pandas DataFrame
diabetes_dataset = pd.read_csv('diabetes.csv')

Checking and Analysing the Dataset

In [139]:
# printing the first 5 rows of the dataset
diabetes_dataset.head()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1


In [140]:
# number of rows and Columns in this dataset
diabetes_dataset.shape

(768, 9)

In [141]:
# getting the statistical measures of the data
diabetes_dataset.describe()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
count,768.0,768.0,768.0,768.0,768.0,768.0,768.0,768.0,768.0
mean,3.845052,120.894531,69.105469,20.536458,79.799479,31.992578,0.471876,33.240885,0.348958
std,3.369578,31.972618,19.355807,15.952218,115.244002,7.88416,0.331329,11.760232,0.476951
min,0.0,0.0,0.0,0.0,0.0,0.0,0.078,21.0,0.0
25%,1.0,99.0,62.0,0.0,0.0,27.3,0.24375,24.0,0.0
50%,3.0,117.0,72.0,23.0,30.5,32.0,0.3725,29.0,0.0
75%,6.0,140.25,80.0,32.0,127.25,36.6,0.62625,41.0,1.0
max,17.0,199.0,122.0,99.0,846.0,67.1,2.42,81.0,1.0


In [142]:
diabetes_dataset['Outcome'].value_counts()

0    500
1    268
Name: Outcome, dtype: int64

In [143]:
diabetes_dataset.groupby('Outcome').mean()

Unnamed: 0_level_0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age
Outcome,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
0,3.298,109.98,68.184,19.664,68.792,30.3042,0.429734,31.19
1,4.865672,141.257463,70.824627,22.164179,100.335821,35.142537,0.5505,37.067164


Seperating data and labels

In [144]:
X = diabetes_dataset.drop(columns = 'Outcome', axis=1)
Y = diabetes_dataset['Outcome']
print(X)

     Pregnancies  Glucose  BloodPressure  SkinThickness  Insulin   BMI  \
0              6      148             72             35        0  33.6   
1              1       85             66             29        0  26.6   
2              8      183             64              0        0  23.3   
3              1       89             66             23       94  28.1   
4              0      137             40             35      168  43.1   
..           ...      ...            ...            ...      ...   ...   
763           10      101             76             48      180  32.9   
764            2      122             70             27        0  36.8   
765            5      121             72             23      112  26.2   
766            1      126             60              0        0  30.1   
767            1       93             70             31        0  30.4   

     DiabetesPedigreeFunction  Age  
0                       0.627   50  
1                       0.351   31  


In [145]:
print(Y)

0      1
1      0
2      1
3      0
4      1
      ..
763    0
764    0
765    0
766    1
767    0
Name: Outcome, Length: 768, dtype: int64


Data Standardization

In [146]:
scaler = StandardScaler()
scaler.fit(X)
standardized_data = scaler.transform(X)
print(standardized_data)

[[ 0.63994726  0.84832379  0.14964075 ...  0.20401277  0.46849198
   1.4259954 ]
 [-0.84488505 -1.12339636 -0.16054575 ... -0.68442195 -0.36506078
  -0.19067191]
 [ 1.23388019  1.94372388 -0.26394125 ... -1.10325546  0.60439732
  -0.10558415]
 ...
 [ 0.3429808   0.00330087  0.14964075 ... -0.73518964 -0.68519336
  -0.27575966]
 [-0.84488505  0.1597866  -0.47073225 ... -0.24020459 -0.37110101
   1.17073215]
 [-0.84488505 -0.8730192   0.04624525 ... -0.20212881 -0.47378505
  -0.87137393]]


In [147]:
X = standardized_data 
print(X)
print(Y)

[[ 0.63994726  0.84832379  0.14964075 ...  0.20401277  0.46849198
   1.4259954 ]
 [-0.84488505 -1.12339636 -0.16054575 ... -0.68442195 -0.36506078
  -0.19067191]
 [ 1.23388019  1.94372388 -0.26394125 ... -1.10325546  0.60439732
  -0.10558415]
 ...
 [ 0.3429808   0.00330087  0.14964075 ... -0.73518964 -0.68519336
  -0.27575966]
 [-0.84488505  0.1597866  -0.47073225 ... -0.24020459 -0.37110101
   1.17073215]
 [-0.84488505 -0.8730192   0.04624525 ... -0.20212881 -0.47378505
  -0.87137393]]
0      1
1      0
2      1
3      0
4      1
      ..
763    0
764    0
765    0
766    1
767    0
Name: Outcome, Length: 768, dtype: int64


Spilitting the data into training set and testing set

In [148]:
X_train, X_test, Y_train, Y_test = train_test_split(X,Y, test_size = 0.2, stratify=Y, random_state=2)
print(X.shape, X_train.shape, X_test.shape)

(768, 8) (614, 8) (154, 8)


Initialising and Training the model

In [149]:
classifier = svm.SVC(kernel='linear')

In [150]:
#training the support vector Machine Classifier
classifier.fit(X_train, Y_train)

SVC(kernel='linear')

Checking Accuracy

In [151]:
# accuracy score on the training data
X_train_prediction = classifier.predict(X_train)
training_data_accuracy = accuracy_score(X_train_prediction, Y_train)
print('Accuracy score of the training data : ', training_data_accuracy)

Accuracy score of the training data :  0.7866449511400652


In [152]:
# accuracy score on the test data
X_test_prediction = classifier.predict(X_test)
test_data_accuracy = accuracy_score(X_test_prediction, Y_test)
print('Accuracy score of the test data : ', test_data_accuracy)

Accuracy score of the test data :  0.7727272727272727


SAVING THE MODEL


In [153]:
import joblib
joblib.dump(classifier,'model_diabetes_pre')

['model_diabetes_pre']

LOADING THE MODEL

In [154]:
model = joblib.load('model_diabetes_pre')

Graphical User Interface (GUI) Code

In [155]:
from tkinter import *
import joblib
import numpy as np
from sklearn import *


def show_entry_fields():
    p1=float(e1.get())
    p2=float(e2.get())
    p3=float(e3.get())
    p4=float(e4.get())
    p5=float(e5.get())
    p6=float(e6.get())
    p7=float(e7.get())
    p8=float(e8.get())

    input_data = (p1,p2,p3,p4,p5,p6,p7,p8)
    input_data = np.asarray(input_data)
    input_data= input_data.reshape(1,-1)
    input_data = scaler.transform(input_data)

    result=model.predict(input_data)
    if result == 0:
        label.config(text="Non-Diabetic")
    else:
        label.config(text="Diabetic")



def clear_fields():
   label.config(text="")
   for widget in gui.winfo_children():
        if isinstance(widget, Entry):   
            widget.delete(0,'end') 
            
#-----------------------------------------------------------------------------------------------------------------------------------------------------#
#Responsible for GUI WINDOW      
gui = Tk()
gui.geometry("560x540")
gui.title("Diabetes Prediction Using Machine Learning")
gui['bg']="black"


Label(gui, text="Fill the details to Predict", font=("Cooper Black", 20),bg='black',fg='white').grid(row=0,columnspan=2,sticky='N')
Label(gui, text="Pregnancies", font=("Arial", 20),bg='black',fg='white').grid(row=1,sticky='W')
Label(gui, text="Glucose", font=("Arial", 20),bg='black',fg='white').grid(row=2,sticky='W')
Label(gui, text="BloodPressure", font=("Arial", 20),bg='black',fg='white').grid(row=3,sticky='W')
Label(gui, text="SkinThickness", font=("Arial", 20),bg='black',fg='white').grid(row=4,sticky='W')
Label(gui, text="Insulin", font=("Arial", 20),bg='black',fg='white').grid(row=5,sticky='W')
Label(gui, text="BMI", font=("Arial", 20),bg='black',fg='white').grid(row=6,sticky='W')
Label(gui, text="DiabetesPedigreeFunction",font=("Arial", 20),bg='black',fg='white').grid(row=7)
Label(gui, text="Age", font=("Arial", 20),bg='black',fg='white').grid(row=8,sticky='W')
label=Label(gui, text="",font=("Cooper Black", 25),bg='black',fg='red')         
label.grid(row=11,pady=3)


#FOR INPUT
e1 = Entry(gui)
e2 = Entry(gui)
e3 = Entry(gui)
e4 = Entry(gui)
e5 = Entry(gui)
e6 = Entry(gui)
e7 = Entry(gui)
e8 = Entry(gui)


e1.grid(row=1, column=1,ipadx=30)
e2.grid(row=2, column=1,ipadx=30)
e3.grid(row=3, column=1,ipadx=30)
e4.grid(row=4, column=1,ipadx=30)
e5.grid(row=5, column=1,ipadx=30)
e6.grid(row=6, column=1,ipadx=30)
e7.grid(row=7, column=1,ipadx=30)
e8.grid(row=8, column=1,ipadx=30)


Button(gui, text='PREDICT', font=('Times',20) ,bg='lightgreen',command=show_entry_fields).grid(row=10,column=0,pady=25)
Button(gui, text='CLEAR', font=('Times',20) ,bg='lightblue',command=clear_fields).grid(row=10,column=1,pady=25)

mainloop()      #to call it infinite times.

  "X does not have valid feature names, but"
  "X does not have valid feature names, but"
  "X does not have valid feature names, but"
  "X does not have valid feature names, but"
