# Importing the required Dependencies

In [1]:

import pandas as pd
import numpy as np
import matplotlib.pyplot as mpt
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn import metrics
from sklearn.tree import DecisionTreeClassifier
import joblib

## Reading and showing sample of the csv file.

In [2]:
df = pd.read_csv('Data.csv') 
df.sample(15)

Unnamed: 0,Age,Gender,Polyuria,Polydipsia,sudden weight loss,weakness,Polyphagia,Genital thrush,visual blurring,Itching,Irritability,delayed healing,partial paresis,muscle stiffness,Alopecia,Obesity,class
420,47,Female,No,No,Yes,Yes,Yes,No,No,No,No,No,No,Yes,No,No,Positive
47,60,Female,Yes,Yes,Yes,Yes,Yes,No,Yes,Yes,No,No,Yes,Yes,No,No,Positive
177,50,Female,Yes,Yes,Yes,No,Yes,No,No,No,No,Yes,Yes,No,No,No,Positive
0,40,Male,No,Yes,No,Yes,No,No,No,Yes,No,Yes,No,Yes,Yes,Yes,Positive
64,45,Female,No,No,No,No,No,No,Yes,Yes,No,No,Yes,No,No,No,Positive
367,40,Female,Yes,Yes,Yes,Yes,No,No,Yes,No,No,Yes,Yes,Yes,No,No,Positive
514,54,Female,Yes,Yes,Yes,Yes,Yes,No,No,No,No,No,Yes,No,No,No,Positive
20,62,Male,Yes,Yes,No,Yes,Yes,No,Yes,No,Yes,No,Yes,Yes,No,No,Positive
509,54,Male,No,No,No,No,No,No,No,No,No,No,No,No,No,No,Negative
315,33,Female,No,No,No,No,No,No,No,No,No,No,No,No,No,No,Negative


## Checking if data has a null value

In [3]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 520 entries, 0 to 519
Data columns (total 17 columns):
 #   Column              Non-Null Count  Dtype 
---  ------              --------------  ----- 
 0   Age                 520 non-null    int64 
 1   Gender              520 non-null    object
 2   Polyuria            520 non-null    object
 3   Polydipsia          520 non-null    object
 4   sudden weight loss  520 non-null    object
 5   weakness            520 non-null    object
 6   Polyphagia          520 non-null    object
 7   Genital thrush      520 non-null    object
 8   visual blurring     520 non-null    object
 9   Itching             520 non-null    object
 10  Irritability        520 non-null    object
 11  delayed healing     520 non-null    object
 12  partial paresis     520 non-null    object
 13  muscle stiffness    520 non-null    object
 14  Alopecia            520 non-null    object
 15  Obesity             520 non-null    object
 16  class               520 no

## Printing shape of dataset as rows and columns

In [4]:
df.shape


(520, 17)

## Separating the data and target

In [5]:
inputs = df.drop(columns = 'class',axis=1)
target = df['class']
print(inputs)
print(target)

     Age  Gender Polyuria Polydipsia sudden weight loss weakness Polyphagia  \
0     40    Male       No        Yes                 No      Yes         No   
1     58    Male       No         No                 No      Yes         No   
2     41    Male      Yes         No                 No      Yes        Yes   
3     45    Male       No         No                Yes      Yes        Yes   
4     60    Male      Yes        Yes                Yes      Yes        Yes   
..   ...     ...      ...        ...                ...      ...        ...   
515   39  Female      Yes        Yes                Yes       No        Yes   
516   48  Female      Yes        Yes                Yes      Yes        Yes   
517   58  Female      Yes        Yes                Yes      Yes        Yes   
518   32  Female       No         No                 No      Yes         No   
519   42    Male       No         No                 No       No         No   

    Genital thrush visual blurring Itching Irritabi

## Checking outcomes

In [6]:
target.unique()

array(['Positive', 'Negative'], dtype=object)

## Counting the outcomes ("0" as -negative and "1" as +positive)

In [7]:
df['class'].value_counts()

Positive    320
Negative    200
Name: class, dtype: int64

In [8]:
df.head(2)

Unnamed: 0,Age,Gender,Polyuria,Polydipsia,sudden weight loss,weakness,Polyphagia,Genital thrush,visual blurring,Itching,Irritability,delayed healing,partial paresis,muscle stiffness,Alopecia,Obesity,class
0,40,Male,No,Yes,No,Yes,No,No,No,Yes,No,Yes,No,Yes,Yes,Yes,Positive
1,58,Male,No,No,No,Yes,No,No,Yes,No,No,No,Yes,No,Yes,No,Positive


# Converting labels into numeric form

In [9]:
new_age = LabelEncoder()
new_gender = LabelEncoder()
new_polyuria = LabelEncoder()
new_polydipsia = LabelEncoder()
new_swl = LabelEncoder()
new_weakness = LabelEncoder()
new_polyphagia = LabelEncoder()
new_genitalThrush = LabelEncoder()
new_visualBlurring = LabelEncoder()
new_itching = LabelEncoder()
new_irritability = LabelEncoder()
new_delayedHealing = LabelEncoder()
new_partialParesis = LabelEncoder()
new_muscleStiffness = LabelEncoder()
new_alopecia = LabelEncoder()
new_obesity = LabelEncoder()

inputs['new_gender'] = new_gender.fit_transform(inputs['Gender'])
inputs['new_polyuria'] = new_polyuria.fit_transform(inputs['Polyuria'])
inputs['new_polydipsia'] = new_polydipsia.fit_transform(inputs['Polydipsia'])
inputs['new_swl'] = new_swl.fit_transform(inputs['sudden weight loss'])
inputs['new_weakness'] = new_weakness.fit_transform(inputs['weakness'])
inputs['new_polyphagia'] = new_polyphagia.fit_transform(inputs['Polyphagia'])
inputs['new_genitalThrush'] = new_genitalThrush.fit_transform(inputs['Genital thrush'])
inputs['new_visualBlurring'] = new_visualBlurring.fit_transform(inputs['visual blurring'])
inputs['new_itching'] = new_itching.fit_transform(inputs['Itching'])
inputs['new_irritability'] = new_irritability.fit_transform(inputs['Irritability'])
inputs['new_delayedHealing'] = new_delayedHealing.fit_transform(inputs['delayed healing'])
inputs['new_partialParesis'] = new_partialParesis.fit_transform(inputs['partial paresis'])
inputs['new_muscleStiffness'] = new_muscleStiffness.fit_transform(inputs['muscle stiffness'])
inputs['new_alopecia'] = new_alopecia.fit_transform(inputs['Alopecia'])
inputs['new_obesity'] = new_obesity.fit_transform(inputs['Obesity'])

In [10]:
print(inputs)

     Age  Gender Polyuria Polydipsia sudden weight loss weakness Polyphagia  \
0     40    Male       No        Yes                 No      Yes         No   
1     58    Male       No         No                 No      Yes         No   
2     41    Male      Yes         No                 No      Yes        Yes   
3     45    Male       No         No                Yes      Yes        Yes   
4     60    Male      Yes        Yes                Yes      Yes        Yes   
..   ...     ...      ...        ...                ...      ...        ...   
515   39  Female      Yes        Yes                Yes       No        Yes   
516   48  Female      Yes        Yes                Yes      Yes        Yes   
517   58  Female      Yes        Yes                Yes      Yes        Yes   
518   32  Female       No         No                 No      Yes         No   
519   42    Male       No         No                 No       No         No   

    Genital thrush visual blurring Itching  ... new

In [11]:
new_inputs = inputs.drop(['Gender','Polyuria','Polydipsia','sudden weight loss','weakness','Polyphagia','Genital thrush','visual blurring','Itching','Irritability','delayed healing','partial paresis','muscle stiffness','Alopecia','Obesity'],axis='columns')

In [12]:
print(new_inputs)

     Age  new_gender  new_polyuria  new_polydipsia  new_swl  new_weakness  \
0     40           1             0               1        0             1   
1     58           1             0               0        0             1   
2     41           1             1               0        0             1   
3     45           1             0               0        1             1   
4     60           1             1               1        1             1   
..   ...         ...           ...             ...      ...           ...   
515   39           0             1               1        1             0   
516   48           0             1               1        1             1   
517   58           0             1               1        1             1   
518   32           0             0               0        0             1   
519   42           1             0               0        0             0   

     new_polyphagia  new_genitalThrush  new_visualBlurring  new_itching  \


In [13]:
X = new_inputs
Y = target
print(X)
print(Y)


     Age  new_gender  new_polyuria  new_polydipsia  new_swl  new_weakness  \
0     40           1             0               1        0             1   
1     58           1             0               0        0             1   
2     41           1             1               0        0             1   
3     45           1             0               0        1             1   
4     60           1             1               1        1             1   
..   ...         ...           ...             ...      ...           ...   
515   39           0             1               1        1             0   
516   48           0             1               1        1             1   
517   58           0             1               1        1             1   
518   32           0             0               0        0             1   
519   42           1             0               0        0             0   

     new_polyphagia  new_genitalThrush  new_visualBlurring  new_itching  \


# Standardization of the data

In [14]:
scaled_data = StandardScaler()
standardized_data = scaled_data.fit_transform(X)
print(standardized_data)

[[-0.6613669   0.76509206 -0.99233705 ...  1.29099445  1.38022749
   2.21564684]
 [ 0.82136224  0.76509206 -0.99233705 ... -0.77459667  1.38022749
  -0.45133547]
 [-0.57899306  0.76509206  1.00772212 ...  1.29099445  1.38022749
  -0.45133547]
 ...
 [ 0.82136224 -1.30703226  1.00772212 ...  1.29099445 -0.72451824
   2.21564684]
 [-1.32035762 -1.30703226 -0.99233705 ... -0.77459667  1.38022749
  -0.45133547]
 [-0.49661921  0.76509206 -0.99233705 ... -0.77459667 -0.72451824
  -0.45133547]]


In [15]:
X = standardized_data

# Splitting to Train Test And Split

In [16]:
X_train, X_test, Y_train, Y_test = train_test_split(X,Y, test_size = 0.2, stratify=Y, random_state=3)
print(X.shape,  X_test.shape, X_train.shape)

(520, 16) (104, 16) (416, 16)


## Assgning the model for the function to measure the quality of a split. And using “entropy” criteria for the information gain.

In [17]:
model = DecisionTreeClassifier(criterion='entropy')
model.fit(X_train, Y_train)
joblib.dump(model, "file.pkl")

['file.pkl']

# Evaluation

In [23]:
predict_train_data = model.predict(X_train)
print("Accuracy of train set = {0:.3f}".format(metrics.accuracy_score(Y_train, predict_train_data)))

Accuracy of train set = 1.000


In [24]:
predict_test_data = model.predict(X_test)
print("Accuracy of test set = {0:.3f}".format(metrics.accuracy_score(Y_test, predict_test_data)))

Accuracy of test set = 0.990


# Functions

## Function to check if the age is in number or not.

In [None]:
def isNumber(ans):
    if ans.strip().isdigit():
        return ans
    else:
        new=input('''
            |--------------------------------|
            |    Your input is wrong !       |
            |     enter Your Age in Number   |
            |                                |
            |--------------------------------|
            ''')
        reply=isNumber(new)
        return reply       
        

## Function to validate Gender and return numeric value.

In [None]:
def convertGender(ans):
    if(ans == "Male" or ans =="male"or ans=="m" or ans =="M"):
        print(ans)
        return 1
    elif(ans == "Female" or ans == "female" or ans == "f" or ans == "F"):
        print(ans)
        return 0
    else:
        new=input('''
            |--------------------------------|
            |    Your input is wrong !       |
            |    enter Your Gender Male as:  |
            |    "Male","male","m","M" and   |
            |    Female as:                  |
            |    "Female","female"           |
            |    ,"f","F"!                   |
            |                                |
            |                                |
            |--------------------------------|
            ''')
        reply=convertGender(new)
        return reply
  
    

## Function to validate Input and response according to input. 

In [None]:
def convert(ans):
    if(ans == "Yes" or ans=="y" or ans=="Y" or ans=="yes"):
        print(ans)
        return 1
    
    elif(ans == "No" or ans == "n" or ans == "N" or ans=="no"):
        print(ans)
        
        return 0
    else:
        new=input('''
            |--------------------------------|
            |    Your input is wrong !       |
            |     answer in either Yes as:   |
            |    "Yes","Y","yes","y" or      |
            |     No as:                     |
            |    "No","n","N","no"           |
            |                                |
            |--------------------------------|
            ''')
        reply=convert(new)
        return reply
       
        

In [None]:
new_inputs.tail(1)

# Program

In [None]:

repeat=True
while repeat==True:
        
        Age=input("Enter Your Age: ")
        age=isNumber(Age)
        Gender=input("Enter Your Gender Type-'Male' or 'Female' or 'm' or 'f': ")
        gen=convertGender(Gender)
        print("Reply 'y' for Yes and 'n' for No only! ")
        Polyuria=input("Are you noticing accessive secretion of urine? ")
        ria=convert(Polyuria)
        Polydipsia=input("Are you having unusual thirst?")
        sia=convert(Polydipsia)
        Swl=input("Are you having sudden weight loss? ")
        swl=convert(Swl)
        Weak=input("Are you feeling weakness? ")
        weak=convert(Weak)
        Polyphagia=input("Are you having excessive appetite or eating? ")
        gia=convert(Polyphagia)
        GeniThru=input("Do you have infection or symptoms of infection near genital parts? ")
        thru=convert(GeniThru)
        visualblurring=input("Are You having visual issues recently i.e Lack of sharpness of vision? ")
        vsl=convert(visualblurring)
        Itch=input ("Do you have Itching? ")
        itch=convert(Itch)
        Irrr=input("Do you easily annoyed or irritated? ") 
        irr=convert(Irrr)
        delahe=input("Do you have delayed hearing? ")
        hear=convert(delahe)
        Para=input ("Do you have mild paralysis or the weakening of a muscle? ")
        para=convert(Para)
        MS=input("Are you facing muscle stiffness? ")
        ms=convert(MS)
        Alp=input("Are your hair usually falls out in small patches on the scalp? ")
        alp=convert(Alp)
        Obesity=input("Are you overweight? ")
        ob=convert(Obesity)
        a=[age,gen,ria,sia,swl,weak,gia,thru,vsl,itch,irr,hear,para,ms,alp,ob]
        try:
            output=model.predict([a])
        

            if (output == "Negative"):
                print('''
            |--------------------------------|
            |         Non Diabetic           |
            |--------------------------------|
                    ''')
            else:
                print('''
            |--------------------------------|
            |           Diabetic             |
            |--------------------------------|      

                    ''')
        except:
            print('''
            |--------------------------------|
            |    Program is restarted again! |
            |     Due to invalid input       |
            |                                |
            |--------------------------------|
            ''')
        forloop=input('Do you want to re-run the program?')
        forlooprep=convert(forloop)
        if(forlooprep==1):
            repeat=True
        else:
            repeat=False
        
        
        
  



