# Diabetes Prediction Using k Nearest Neighbors

In [1]:
import pandas as pd
import numpy as np

In [2]:
from pandas import read_csv
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score
from sklearn.neighbors import KNeighborsClassifier

In [3]:
df = read_csv("diabetes.csv")

In [4]:
df.head()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1


In [5]:
df.shape

(768, 9)

In [6]:
df.describe()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
count,768.0,768.0,768.0,768.0,768.0,768.0,768.0,768.0,768.0
mean,3.845052,120.894531,69.105469,20.536458,79.799479,31.992578,0.471876,33.240885,0.348958
std,3.369578,31.972618,19.355807,15.952218,115.244002,7.88416,0.331329,11.760232,0.476951
min,0.0,0.0,0.0,0.0,0.0,0.0,0.078,21.0,0.0
25%,1.0,99.0,62.0,0.0,0.0,27.3,0.24375,24.0,0.0
50%,3.0,117.0,72.0,23.0,30.5,32.0,0.3725,29.0,0.0
75%,6.0,140.25,80.0,32.0,127.25,36.6,0.62625,41.0,1.0
max,17.0,199.0,122.0,99.0,846.0,67.1,2.42,81.0,1.0


In [7]:
df1=df.copy()

In [8]:
# Zero not acceppted features - replace zeros with mean of the respective columns
z_n_a = ['Glucose','BloodPressure','SkinThickness','Insulin','BMI']
for column in z_n_a:
    df1[column] = df1[column].replace(0,np.NaN)
    mean = int(df1[column].mean(skipna=True))
    df1[column] = df1[column].replace(np.NaN,mean)
    

In [9]:
array = df.values
X = array[:,0:8]
y = array[:,8]

In [10]:
def func1():
    k_range = range(1, 10)
    n_range = range(2,10)
    kscores = []
    nsplits = []
    f_results = 0
    f_k = 0
    f_ns = 0
    result_l = []
    for n in n_range:
        for k in k_range:
            kfold = KFold(n_splits=n, random_state=2, shuffle=True)
            knn = KNeighborsClassifier(n_neighbors=k)
            results = cross_val_score(knn, X, y, cv = kfold)
            if(results.mean() > f_results):
                f_k = k
                f_ns = n
                f_results = results.mean()
                result_l.append(results.mean())
                nsplits.append(n)
                kscores.append(k)
    print('k = ',f_k,' n_splits = ',f_ns,' result = ',f_results)
    print()

In [11]:
func1()

k =  9  n_splits =  8  result =  0.74609375



Import Library

In [12]:
import pandas as pd
import numpy as np

from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import RobustScaler
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import GridSearchCV

import warnings
warnings.filterwarnings('ignore')

In [13]:
df_model = df.copy()

features = list(df_model.columns)
features = [features[:-1]]
print(features)

[['Pregnancies', 'Glucose', 'BloodPressure', 'SkinThickness', 'Insulin', 'BMI', 'DiabetesPedigreeFunction', 'Age']]


Scaling with MinMaxScaler

In [14]:
scaler = MinMaxScaler()
for feature in features:
    df_model[feature] = scaler.fit_transform(df_model[feature])

#Create KNN Object
knn = KNeighborsClassifier()

#Create X and y variable
X = df_model.drop(columns=['Outcome'])
y = df_model['Outcome']

func1()

k =  7  n_splits =  8  result =  0.7447916666666666



Scaling with Robust Scaler

In [15]:
df_model = df.copy()

features = list(df_model.columns)
features = [features[:-1]]
#print(features)

scaler = RobustScaler()
for feature in features:
    df_model[feature] = scaler.fit_transform(df_model[feature])

#Create KNN Object
knn = KNeighborsClassifier()

#Create X and y variable
X = df_model.drop(columns=['Outcome'])
y = df_model['Outcome']

func1()

k =  5  n_splits =  3  result =  0.7630208333333334



Modeling with Standard Scaler

In [16]:
df_model = df.copy()

#list(df_model.columns.values.tolist())
#list(df_model.columns.values)
features = list(df_model.columns)
#print(features)
features = [features[:-1]]

#for feature in features:
#    print(feature)

scaler = StandardScaler()

for feature in features:
    df_model[feature] = scaler.fit_transform(df_model[feature])

#Create KNN Object
knn = KNeighborsClassifier()

#Create X and y variable
X = df_model.drop(columns=['Outcome'])
y = df_model['Outcome']

func1()

k =  5  n_splits =  3  result =  0.7526041666666666



# Feature Selection and Feature Scaling

In [17]:
l = []
for i in range(1,9):
    for j in range(1,9):
        l.append([i,j])

# Modeling with RobustScaler

In [18]:
#RobustScaler
# Removing 1 Feature and Selecting 7 other features
features = list(df1.columns)
features = [features[:-1]]

scaler = RobustScaler()
for feature in features:
    df1[feature] = scaler.fit_transform(df1[feature])


#Create X and y variable
for feature in features[0]: 
    print(feature)
    X = df1.drop(columns=['Outcome',feature])
    y = df1['Outcome']
    func1()

Pregnancies
k =  7  n_splits =  3  result =  0.7565104166666666

Glucose
k =  9  n_splits =  9  result =  0.6823985408116735

BloodPressure
k =  9  n_splits =  6  result =  0.7526041666666666

SkinThickness
k =  8  n_splits =  6  result =  0.7526041666666666

Insulin
k =  9  n_splits =  3  result =  0.74609375

BMI
k =  7  n_splits =  3  result =  0.7369791666666666

DiabetesPedigreeFunction
k =  7  n_splits =  3  result =  0.7708333333333334

Age
k =  9  n_splits =  3  result =  0.7369791666666666



In [19]:
#RobustScaler
# Removal of 2 features selecting 6 other 
features = list(df1.columns)
features = [features[:-1]]

scaler = RobustScaler()
for feature in features:
    df1[feature] = scaler.fit_transform(df1[feature])

l = []
for i in range(0,8):
    for j in range(i+1,8):
        l.append([i,j])


#Create X and y variable
for i in l: 
    print(features[0][i[0]],features[0][i[1]])
    X = df1.drop(columns=['Outcome', features[0][i[0]],features[0][i[1]]])
    y = df1['Outcome']
    func1()
    

Pregnancies Glucose
k =  9  n_splits =  9  result =  0.7122663018695851

Pregnancies BloodPressure
k =  7  n_splits =  2  result =  0.7552083333333333

Pregnancies SkinThickness
k =  9  n_splits =  2  result =  0.76171875

Pregnancies Insulin
k =  9  n_splits =  3  result =  0.7473958333333334

Pregnancies BMI
k =  7  n_splits =  3  result =  0.73046875

Pregnancies DiabetesPedigreeFunction
k =  9  n_splits =  3  result =  0.76953125

Pregnancies Age
k =  9  n_splits =  3  result =  0.75

Glucose BloodPressure
k =  9  n_splits =  8  result =  0.6940104166666666

Glucose SkinThickness
k =  9  n_splits =  3  result =  0.69921875

Glucose Insulin
k =  9  n_splits =  2  result =  0.6666666666666666

Glucose BMI
k =  9  n_splits =  3  result =  0.67578125

Glucose DiabetesPedigreeFunction
k =  7  n_splits =  6  result =  0.68359375

Glucose Age
k =  9  n_splits =  9  result =  0.6940112479100168

BloodPressure SkinThickness
k =  9  n_splits =  4  result =  0.7552083333333334

BloodPressure 

In [20]:
#RobustScaler
# Removal of 3 features 
features = list(df1.columns)
features = [features[:-1]]

scaler = RobustScaler()
for feature in features:
    df1[feature] = scaler.fit_transform(df1[feature])

l = []
for i in range(0,8):
    for j in range(i+1,8):
        for k in range(j+1,8):
            l.append([i,j,k])


    
#Create X and y variable
for i in l: 
    print(features[0][i[0]],features[0][i[1]],features[0][i[2]])
    X = df1.drop(columns=['Outcome', features[0][i[0]],features[0][i[1]],features[0][i[2]]])
    y = df1['Outcome']
    func1()
    

Pregnancies Glucose BloodPressure
k =  7  n_splits =  5  result =  0.6992700110347169

Pregnancies Glucose SkinThickness
k =  9  n_splits =  9  result =  0.7005319957440341

Pregnancies Glucose Insulin
k =  9  n_splits =  5  result =  0.6849673202614379

Pregnancies Glucose BMI
k =  9  n_splits =  8  result =  0.6822916666666666

Pregnancies Glucose DiabetesPedigreeFunction
k =  9  n_splits =  6  result =  0.6809895833333334

Pregnancies Glucose Age
k =  9  n_splits =  6  result =  0.7122395833333334

Pregnancies BloodPressure SkinThickness
k =  7  n_splits =  2  result =  0.77734375

Pregnancies BloodPressure Insulin
k =  8  n_splits =  2  result =  0.7578125

Pregnancies BloodPressure BMI
k =  7  n_splits =  6  result =  0.7395833333333334

Pregnancies BloodPressure DiabetesPedigreeFunction
k =  9  n_splits =  3  result =  0.75390625

Pregnancies BloodPressure Age
k =  9  n_splits =  3  result =  0.7434895833333334

Pregnancies SkinThickness Insulin
k =  9  n_splits =  2  result =  0

In [21]:
#RobustScaler
# Removal of 4 features
features = list(df1.columns)
features = [features[:-1]]

scaler = RobustScaler()
for feature in features:
    df1[feature] = scaler.fit_transform(df1[feature])

l = []
for i in range(0,8):
    for j in range(i+1,8):
        for k in range(j+1,8):
            for m in range(k+1,8):
                l.append([i,j,k,m])


    
#Create X and y variable
for i in l: 
    print(features[0][i[0]],features[0][i[1]],features[0][i[2]],features[0][i[3]])
    X = df1.drop(columns=['Outcome', features[0][i[0]],features[0][i[1]],features[0][i[2]],features[0][i[3]]])
    y = df1['Outcome']
    func1()

Pregnancies Glucose BloodPressure SkinThickness
k =  8  n_splits =  4  result =  0.7161458333333333

Pregnancies Glucose BloodPressure Insulin
k =  7  n_splits =  5  result =  0.6862405568287923

Pregnancies Glucose BloodPressure BMI
k =  9  n_splits =  8  result =  0.6940104166666667

Pregnancies Glucose BloodPressure DiabetesPedigreeFunction
k =  8  n_splits =  9  result =  0.7082839337285302

Pregnancies Glucose BloodPressure Age
k =  9  n_splits =  3  result =  0.7057291666666666

Pregnancies Glucose SkinThickness Insulin
k =  8  n_splits =  9  result =  0.7044079647362821

Pregnancies Glucose SkinThickness BMI
k =  8  n_splits =  6  result =  0.6848958333333334

Pregnancies Glucose SkinThickness DiabetesPedigreeFunction
k =  9  n_splits =  3  result =  0.6822916666666666

Pregnancies Glucose SkinThickness Age
k =  9  n_splits =  5  result =  0.6939733469145234

Pregnancies Glucose Insulin BMI
k =  8  n_splits =  9  result =  0.6730810153518771

Pregnancies Glucose Insulin Diabetes

In [24]:
#RobustScaler
# Removal of 5 features
features = list(df1.columns)
features = [features[:-1]]

scaler = RobustScaler()
for feature in features:
    df1[feature] = scaler.fit_transform(df1[feature])

l = []
for i in range(0,8):
    for j in range(i+1,8):
        for k in range(j+1,8):
            for m in range(k+1,8):
                for o in range(m+1,8):
                    l.append([i,j,k,m,o])


                    
#Create X and y variable
for i in l: 
    print(features[0][i[0]],features[0][i[1]],features[0][i[2]],features[0][i[3]],features[0][i[4]])
    X = df1.drop(columns=['Outcome', features[0][i[0]],features[0][i[1]],features[0][i[2]],features[0][i[3]],features[0][i[4]]])
    y = df1['Outcome']
    func1()

Pregnancies Glucose BloodPressure SkinThickness Insulin
k =  9  n_splits =  5  result =  0.712282488753077

Pregnancies Glucose BloodPressure SkinThickness BMI
k =  5  n_splits =  6  result =  0.6927083333333334

Pregnancies Glucose BloodPressure SkinThickness DiabetesPedigreeFunction
k =  8  n_splits =  7  result =  0.7042654593113309

Pregnancies Glucose BloodPressure SkinThickness Age
k =  8  n_splits =  2  result =  0.7109375

Pregnancies Glucose BloodPressure Insulin BMI
k =  6  n_splits =  2  result =  0.671875

Pregnancies Glucose BloodPressure Insulin DiabetesPedigreeFunction
k =  9  n_splits =  9  result =  0.6795561635506916

Pregnancies Glucose BloodPressure Insulin Age
k =  9  n_splits =  3  result =  0.671875

Pregnancies Glucose BloodPressure BMI DiabetesPedigreeFunction
k =  7  n_splits =  5  result =  0.68882098293863

Pregnancies Glucose BloodPressure BMI Age
k =  7  n_splits =  8  result =  0.69140625

Pregnancies Glucose BloodPressure DiabetesPedigreeFunction Age
k =

In [25]:
#RobustScaler
# Removal of 6 features
features = list(df1.columns)
features = [features[:-1]]

scaler = RobustScaler()
for feature in features:
    df1[feature] = scaler.fit_transform(df1[feature])

l = []
for i in range(0,8):
    for j in range(i+1,8):
        for k in range(j+1,8):
            for m in range(k+1,8):
                for o in range(m+1,8):
                    for p in range(o+1,8):
                        l.append([i,j,k,m,o,p])


                    
#Create X and y variable
for i in l: 
    print(features[0][i[0]],features[0][i[1]],features[0][i[2]],features[0][i[3]],features[0][i[4]],features[0][i[5]])
    X = df1.drop(columns=['Outcome', features[0][i[0]],features[0][i[1]],features[0][i[2]],features[0][i[3]],features[0][i[4]],features[0][i[5]]])
    y = df1['Outcome']
    func1()

Pregnancies Glucose BloodPressure SkinThickness Insulin BMI
k =  7  n_splits =  2  result =  0.6770833333333333

Pregnancies Glucose BloodPressure SkinThickness Insulin DiabetesPedigreeFunction
k =  9  n_splits =  3  result =  0.6888020833333334

Pregnancies Glucose BloodPressure SkinThickness Insulin Age
k =  8  n_splits =  9  result =  0.6938896488828089

Pregnancies Glucose BloodPressure SkinThickness BMI DiabetesPedigreeFunction
k =  9  n_splits =  4  result =  0.703125

Pregnancies Glucose BloodPressure SkinThickness BMI Age
k =  7  n_splits =  6  result =  0.66796875

Pregnancies Glucose BloodPressure SkinThickness DiabetesPedigreeFunction Age
k =  3  n_splits =  3  result =  0.6875

Pregnancies Glucose BloodPressure Insulin BMI DiabetesPedigreeFunction
k =  9  n_splits =  2  result =  0.65234375

Pregnancies Glucose BloodPressure Insulin BMI Age
k =  8  n_splits =  8  result =  0.6575520833333334

Pregnancies Glucose BloodPressure Insulin DiabetesPedigreeFunction Age
k =  2  n_s

# Modeling with StandardScaler

In [26]:
#StandardScaler
# Removing 1 Feature and Selecting 7 other features
features = list(df1.columns)
features = [features[:-1]]

scaler = StandardScaler()
for feature in features:
    df1[feature] = scaler.fit_transform(df1[feature])


#Create X and y variable
for feature in features[0]: 
    print(feature)
    X = df1.drop(columns=['Outcome',feature])
    y = df1['Outcome']
    func1()

Pregnancies
k =  9  n_splits =  3  result =  0.7760416666666666

Glucose
k =  9  n_splits =  3  result =  0.6927083333333334

BloodPressure
k =  7  n_splits =  3  result =  0.76171875

SkinThickness
k =  7  n_splits =  3  result =  0.7591145833333334

Insulin
k =  9  n_splits =  3  result =  0.7552083333333334

BMI
k =  5  n_splits =  7  result =  0.7370427737400214

DiabetesPedigreeFunction
k =  7  n_splits =  3  result =  0.7591145833333334

Age
k =  9  n_splits =  3  result =  0.7513020833333334



In [27]:
#StandardScaler
# Removal of 2 features selecting 6 other 
features = list(df1.columns)
features = [features[:-1]]

scaler = StandardScaler()
for feature in features:
    df1[feature] = scaler.fit_transform(df1[feature])

l = []
for i in range(0,8):
    for j in range(i+1,8):
        l.append([i,j])


#Create X and y variable
for i in l: 
    print(features[0][i[0]],features[0][i[1]])
    X = df1.drop(columns=['Outcome', features[0][i[0]],features[0][i[1]]])
    y = df1['Outcome']
    func1()

Pregnancies Glucose
k =  9  n_splits =  9  result =  0.7044383644930841

Pregnancies BloodPressure
k =  9  n_splits =  4  result =  0.7786458333333333

Pregnancies SkinThickness
k =  9  n_splits =  3  result =  0.78125

Pregnancies Insulin
k =  9  n_splits =  3  result =  0.7669270833333334

Pregnancies BMI
k =  9  n_splits =  9  result =  0.7460860313117496

Pregnancies DiabetesPedigreeFunction
k =  9  n_splits =  9  result =  0.7643714850281198

Pregnancies Age
k =  9  n_splits =  5  result =  0.746150581444699

Glucose BloodPressure
k =  5  n_splits =  7  result =  0.7044560943643513

Glucose SkinThickness
k =  8  n_splits =  3  result =  0.6875

Glucose Insulin
k =  9  n_splits =  2  result =  0.6744791666666667

Glucose BMI
k =  9  n_splits =  3  result =  0.67578125

Glucose DiabetesPedigreeFunction
k =  9  n_splits =  2  result =  0.671875

Glucose Age
k =  9  n_splits =  9  result =  0.6707250341997263

BloodPressure SkinThickness
k =  9  n_splits =  4  result =  0.763020833333

In [28]:
#StandardScaler
# Removal of 3 features 
features = list(df1.columns)
features = [features[:-1]]

scaler = StandardScaler()
for feature in features:
    df1[feature] = scaler.fit_transform(df1[feature])

l = []
for i in range(0,8):
    for j in range(i+1,8):
        for k in range(j+1,8):
            l.append([i,j,k])


    
#Create X and y variable
for i in l: 
    print(features[0][i[0]],features[0][i[1]],features[0][i[2]])
    X = df1.drop(columns=['Outcome', features[0][i[0]],features[0][i[1]],features[0][i[2]]])
    y = df1['Outcome']
    func1()

Pregnancies Glucose BloodPressure
k =  7  n_splits =  3  result =  0.7109375

Pregnancies Glucose SkinThickness
k =  4  n_splits =  9  result =  0.7070071439428485

Pregnancies Glucose Insulin
k =  7  n_splits =  5  result =  0.6823189882013413

Pregnancies Glucose BMI
k =  8  n_splits =  9  result =  0.6822617419060648

Pregnancies Glucose DiabetesPedigreeFunction
k =  6  n_splits =  9  result =  0.6639762881896945

Pregnancies Glucose Age
k =  9  n_splits =  9  result =  0.693859249126007

Pregnancies BloodPressure SkinThickness
k =  7  n_splits =  2  result =  0.77734375

Pregnancies BloodPressure Insulin
k =  9  n_splits =  2  result =  0.76171875

Pregnancies BloodPressure BMI
k =  9  n_splits =  2  result =  0.7447916666666667

Pregnancies BloodPressure DiabetesPedigreeFunction
k =  9  n_splits =  4  result =  0.7643229166666666

Pregnancies BloodPressure Age
k =  9  n_splits =  3  result =  0.7513020833333334

Pregnancies SkinThickness Insulin
k =  9  n_splits =  2  result =  0.

In [29]:
#StandardScaler
# Removal of 4 features
features = list(df1.columns)
features = [features[:-1]]

scaler = StandardScaler()
for feature in features:
    df1[feature] = scaler.fit_transform(df1[feature])

l = []
for i in range(0,8):
    for j in range(i+1,8):
        for k in range(j+1,8):
            for m in range(k+1,8):
                l.append([i,j,k,m])


    
#Create X and y variable
for i in l: 
    print(features[0][i[0]],features[0][i[1]],features[0][i[2]],features[0][i[3]])
    X = df1.drop(columns=['Outcome', features[0][i[0]],features[0][i[1]],features[0][i[2]],features[0][i[3]]])
    y = df1['Outcome']
    func1()

Pregnancies Glucose BloodPressure SkinThickness
k =  8  n_splits =  3  result =  0.7174479166666666

Pregnancies Glucose BloodPressure Insulin
k =  9  n_splits =  2  result =  0.7018229166666667

Pregnancies Glucose BloodPressure BMI
k =  9  n_splits =  7  result =  0.6810437269152866

Pregnancies Glucose BloodPressure DiabetesPedigreeFunction
k =  9  n_splits =  9  result =  0.6978112175102598

Pregnancies Glucose BloodPressure Age
k =  9  n_splits =  2  result =  0.6953125

Pregnancies Glucose SkinThickness Insulin
k =  8  n_splits =  9  result =  0.7056999544003648

Pregnancies Glucose SkinThickness BMI
k =  8  n_splits =  3  result =  0.6966145833333334

Pregnancies Glucose SkinThickness DiabetesPedigreeFunction
k =  6  n_splits =  9  result =  0.6835385316917465

Pregnancies Glucose SkinThickness Age
k =  6  n_splits =  6  result =  0.6822916666666666

Pregnancies Glucose Insulin BMI
k =  8  n_splits =  9  result =  0.6665602675178599

Pregnancies Glucose Insulin DiabetesPedigreeF

In [30]:
#StandardScaler
# Removal of 5 features
features = list(df1.columns)
features = [features[:-1]]

scaler = RobustScaler()
for feature in features:
    df1[feature] = scaler.fit_transform(df1[feature])

l = []
for i in range(0,8):
    for j in range(i+1,8):
        for k in range(j+1,8):
            for m in range(k+1,8):
                for o in range(m+1,8):
                    l.append([i,j,k,m,o])


                    
#Create X and y variable
for i in l: 
    print(features[0][i[0]],features[0][i[1]],features[0][i[2]],features[0][i[3]],features[0][i[4]])
    X = df1.drop(columns=['Outcome', features[0][i[0]],features[0][i[1]],features[0][i[2]],features[0][i[3]],features[0][i[4]]])
    y = df1['Outcome']
    func1()

Pregnancies Glucose BloodPressure SkinThickness Insulin
k =  9  n_splits =  5  result =  0.712282488753077

Pregnancies Glucose BloodPressure SkinThickness BMI
k =  5  n_splits =  6  result =  0.6927083333333334

Pregnancies Glucose BloodPressure SkinThickness DiabetesPedigreeFunction
k =  8  n_splits =  7  result =  0.7055641606100321

Pregnancies Glucose BloodPressure SkinThickness Age
k =  8  n_splits =  2  result =  0.7109375

Pregnancies Glucose BloodPressure Insulin BMI
k =  6  n_splits =  2  result =  0.671875

Pregnancies Glucose BloodPressure Insulin DiabetesPedigreeFunction
k =  9  n_splits =  9  result =  0.6795561635506916

Pregnancies Glucose BloodPressure Insulin Age
k =  9  n_splits =  3  result =  0.671875

Pregnancies Glucose BloodPressure BMI DiabetesPedigreeFunction
k =  7  n_splits =  5  result =  0.6927085985909516

Pregnancies Glucose BloodPressure BMI Age
k =  7  n_splits =  8  result =  0.6927083333333334

Pregnancies Glucose BloodPressure DiabetesPedigreeFuncti

In [31]:
#StandardScaler
# Removal of 6 features
features = list(df1.columns)
features = [features[:-1]]

scaler = StandardScaler()
for feature in features:
    df1[feature] = scaler.fit_transform(df1[feature])

l = []
for i in range(0,8):
    for j in range(i+1,8):
        for k in range(j+1,8):
            for m in range(k+1,8):
                for o in range(m+1,8):
                    for p in range(o+1,8):
                        l.append([i,j,k,m,o,p])


                    
#Create X and y variable
for i in l: 
    print(features[0][i[0]],features[0][i[1]],features[0][i[2]],features[0][i[3]],features[0][i[4]],features[0][i[5]])
    X = df1.drop(columns=['Outcome', features[0][i[0]],features[0][i[1]],features[0][i[2]],features[0][i[3]],features[0][i[4]],features[0][i[5]]])
    y = df1['Outcome']
    func1()

Pregnancies Glucose BloodPressure SkinThickness Insulin BMI
k =  9  n_splits =  3  result =  0.6809895833333334

Pregnancies Glucose BloodPressure SkinThickness Insulin DiabetesPedigreeFunction
k =  9  n_splits =  3  result =  0.69921875

Pregnancies Glucose BloodPressure SkinThickness Insulin Age
k =  8  n_splits =  9  result =  0.6965192278461774

Pregnancies Glucose BloodPressure SkinThickness BMI DiabetesPedigreeFunction
k =  9  n_splits =  2  result =  0.69140625

Pregnancies Glucose BloodPressure SkinThickness BMI Age
k =  9  n_splits =  4  result =  0.6653645833333333

Pregnancies Glucose BloodPressure SkinThickness DiabetesPedigreeFunction Age
k =  4  n_splits =  9  result =  0.709515123879009

Pregnancies Glucose BloodPressure Insulin BMI DiabetesPedigreeFunction
k =  6  n_splits =  3  result =  0.6432291666666666

Pregnancies Glucose BloodPressure Insulin BMI Age
k =  2  n_splits =  5  result =  0.6562431033019268

Pregnancies Glucose BloodPressure Insulin DiabetesPedigreeFun

# Modeling with MinMaxScaler

In [32]:
# MinMaxScaler
# Removing 1 Feature and Selecting 7 other features
features = list(df1.columns)
features = [features[:-1]]

scaler = MinMaxScaler()
for feature in features:
    df1[feature] = scaler.fit_transform(df1[feature])


#Create X and y variable
for feature in features[0]: 
    print(feature)
    X = df1.drop(columns=['Outcome',feature])
    y = df1['Outcome']
    func1()

Pregnancies
k =  7  n_splits =  3  result =  0.7747395833333334

Glucose
k =  7  n_splits =  2  result =  0.6848958333333334

BloodPressure
k =  4  n_splits =  4  result =  0.7591145833333334

SkinThickness
k =  6  n_splits =  3  result =  0.7591145833333334

Insulin
k =  5  n_splits =  2  result =  0.7604166666666666

BMI
k =  5  n_splits =  6  result =  0.73828125

DiabetesPedigreeFunction
k =  9  n_splits =  3  result =  0.7565104166666666

Age
k =  8  n_splits =  5  result =  0.7383498854087089



In [33]:
# MinMaxScaler
# Removal of 2 features selecting 6 other 
features = list(df1.columns)
features = [features[:-1]]

scaler = MinMaxScaler()
for feature in features:
    df1[feature] = scaler.fit_transform(df1[feature])

l = []
for i in range(0,8):
    for j in range(i+1,8):
        l.append([i,j])


#Create X and y variable
for i in l: 
    print(features[0][i[0]],features[0][i[1]])
    X = df1.drop(columns=['Outcome', features[0][i[0]],features[0][i[1]]])
    y = df1['Outcome']
    func1()

Pregnancies Glucose
k =  3  n_splits =  3  result =  0.7005208333333334

Pregnancies BloodPressure
k =  9  n_splits =  3  result =  0.7864583333333334

Pregnancies SkinThickness
k =  9  n_splits =  3  result =  0.7903645833333334

Pregnancies Insulin
k =  5  n_splits =  3  result =  0.7643229166666666

Pregnancies BMI
k =  9  n_splits =  8  result =  0.7578125

Pregnancies DiabetesPedigreeFunction
k =  9  n_splits =  3  result =  0.7721354166666666

Pregnancies Age
k =  9  n_splits =  3  result =  0.7604166666666666

Glucose BloodPressure
k =  7  n_splits =  4  result =  0.6953125000000001

Glucose SkinThickness
k =  9  n_splits =  9  result =  0.6862897096823226

Glucose Insulin
k =  7  n_splits =  9  result =  0.6718346253229974

Glucose BMI
k =  9  n_splits =  3  result =  0.6848958333333334

Glucose DiabetesPedigreeFunction
k =  9  n_splits =  6  result =  0.6770833333333334

Glucose Age
k =  8  n_splits =  2  result =  0.6627604166666667

BloodPressure SkinThickness
k =  9  n_spli

In [34]:
# MinMaxScaler
# Removal of 3 features 
features = list(df1.columns)
features = [features[:-1]]

scaler = MinMaxScaler()
for feature in features:
    df1[feature] = scaler.fit_transform(df1[feature])

l = []
for i in range(0,8):
    for j in range(i+1,8):
        for k in range(j+1,8):
            l.append([i,j,k])


    
#Create X and y variable
for i in l: 
    print(features[0][i[0]],features[0][i[1]],features[0][i[2]])
    X = df1.drop(columns=['Outcome', features[0][i[0]],features[0][i[1]],features[0][i[2]]])
    y = df1['Outcome']
    func1()

Pregnancies Glucose BloodPressure
k =  6  n_splits =  3  result =  0.7096354166666666

Pregnancies Glucose SkinThickness
k =  6  n_splits =  9  result =  0.7070375436996504

Pregnancies Glucose Insulin
k =  8  n_splits =  2  result =  0.6901041666666667

Pregnancies Glucose BMI
k =  9  n_splits =  3  result =  0.6927083333333334

Pregnancies Glucose DiabetesPedigreeFunction
k =  9  n_splits =  3  result =  0.6875

Pregnancies Glucose Age
k =  8  n_splits =  9  result =  0.6822161422708618

Pregnancies BloodPressure SkinThickness
k =  6  n_splits =  2  result =  0.7916666666666667

Pregnancies BloodPressure Insulin
k =  9  n_splits =  2  result =  0.7734375

Pregnancies BloodPressure BMI
k =  5  n_splits =  2  result =  0.7604166666666666

Pregnancies BloodPressure DiabetesPedigreeFunction
k =  9  n_splits =  7  result =  0.7734540688669129

Pregnancies BloodPressure Age
k =  7  n_splits =  3  result =  0.7526041666666666

Pregnancies SkinThickness Insulin
k =  9  n_splits =  4  result 

In [35]:
# MinMaxScaler
# Removal of 4 features
features = list(df1.columns)
features = [features[:-1]]

scaler = MinMaxScaler()
for feature in features:
    df1[feature] = scaler.fit_transform(df1[feature])

l = []
for i in range(0,8):
    for j in range(i+1,8):
        for k in range(j+1,8):
            for m in range(k+1,8):
                l.append([i,j,k,m])


    
#Create X and y variable
for i in l: 
    print(features[0][i[0]],features[0][i[1]],features[0][i[2]],features[0][i[3]])
    X = df1.drop(columns=['Outcome', features[0][i[0]],features[0][i[1]],features[0][i[2]],features[0][i[3]]])
    y = df1['Outcome']
    func1()

Pregnancies Glucose BloodPressure SkinThickness
k =  8  n_splits =  3  result =  0.72265625

Pregnancies Glucose BloodPressure Insulin
k =  8  n_splits =  2  result =  0.7096354166666666

Pregnancies Glucose BloodPressure BMI
k =  9  n_splits =  3  result =  0.6861979166666666

Pregnancies Glucose BloodPressure DiabetesPedigreeFunction
k =  9  n_splits =  7  result =  0.7029667580126296

Pregnancies Glucose BloodPressure Age
k =  7  n_splits =  2  result =  0.7044270833333334

Pregnancies Glucose SkinThickness Insulin
k =  7  n_splits =  4  result =  0.7031250000000001

Pregnancies Glucose SkinThickness BMI
k =  8  n_splits =  3  result =  0.6901041666666666

Pregnancies Glucose SkinThickness DiabetesPedigreeFunction
k =  7  n_splits =  9  result =  0.6913208694330445

Pregnancies Glucose SkinThickness Age
k =  9  n_splits =  5  result =  0.6822510822510822

Pregnancies Glucose Insulin BMI
k =  8  n_splits =  9  result =  0.6730658154734762

Pregnancies Glucose Insulin DiabetesPedigree

In [36]:
# MinMaxScaler
# Removal of 5 features
features = list(df1.columns)
features = [features[:-1]]

scaler = MinMaxScaler()
for feature in features:
    df1[feature] = scaler.fit_transform(df1[feature])

l = []
for i in range(0,8):
    for j in range(i+1,8):
        for k in range(j+1,8):
            for m in range(k+1,8):
                for o in range(m+1,8):
                    l.append([i,j,k,m,o])


                    
#Create X and y variable
for i in l: 
    print(features[0][i[0]],features[0][i[1]],features[0][i[2]],features[0][i[3]],features[0][i[4]])
    X = df1.drop(columns=['Outcome', features[0][i[0]],features[0][i[1]],features[0][i[2]],features[0][i[3]],features[0][i[4]]])
    y = df1['Outcome']
    func1()

Pregnancies Glucose BloodPressure SkinThickness Insulin
k =  4  n_splits =  2  result =  0.7174479166666666

Pregnancies Glucose BloodPressure SkinThickness BMI
k =  9  n_splits =  3  result =  0.7135416666666666

Pregnancies Glucose BloodPressure SkinThickness DiabetesPedigreeFunction
k =  9  n_splits =  5  result =  0.7134368899074781

Pregnancies Glucose BloodPressure SkinThickness Age
k =  8  n_splits =  2  result =  0.7083333333333334

Pregnancies Glucose BloodPressure Insulin BMI
k =  9  n_splits =  2  result =  0.67578125

Pregnancies Glucose BloodPressure Insulin DiabetesPedigreeFunction
k =  8  n_splits =  3  result =  0.68359375

Pregnancies Glucose BloodPressure Insulin Age
k =  9  n_splits =  3  result =  0.6796875

Pregnancies Glucose BloodPressure BMI DiabetesPedigreeFunction
k =  7  n_splits =  5  result =  0.6744758509464391

Pregnancies Glucose BloodPressure BMI Age
k =  5  n_splits =  8  result =  0.671875

Pregnancies Glucose BloodPressure DiabetesPedigreeFunction Ag

In [37]:
# MinMaxScaler
# Removal of 6 features
features = list(df1.columns)
features = [features[:-1]]

scaler = MinMaxScaler()
for feature in features:
    df1[feature] = scaler.fit_transform(df1[feature])

l = []
for i in range(0,8):
    for j in range(i+1,8):
        for k in range(j+1,8):
            for m in range(k+1,8):
                for o in range(m+1,8):
                    for p in range(o+1,8):
                        l.append([i,j,k,m,o,p])


                    
#Create X and y variable
for i in l: 
    print(features[0][i[0]],features[0][i[1]],features[0][i[2]],features[0][i[3]],features[0][i[4]],features[0][i[5]])
    X = df1.drop(columns=['Outcome', features[0][i[0]],features[0][i[1]],features[0][i[2]],features[0][i[3]],features[0][i[4]],features[0][i[5]]])
    y = df1['Outcome']
    func1()

Pregnancies Glucose BloodPressure SkinThickness Insulin BMI
k =  9  n_splits =  3  result =  0.67578125

Pregnancies Glucose BloodPressure SkinThickness Insulin DiabetesPedigreeFunction
k =  9  n_splits =  3  result =  0.6979166666666666

Pregnancies Glucose BloodPressure SkinThickness Insulin Age
k =  8  n_splits =  9  result =  0.6952120383036937

Pregnancies Glucose BloodPressure SkinThickness BMI DiabetesPedigreeFunction
k =  9  n_splits =  5  result =  0.6914353620235973

Pregnancies Glucose BloodPressure SkinThickness BMI Age
k =  9  n_splits =  4  result =  0.6705729166666666

Pregnancies Glucose BloodPressure SkinThickness DiabetesPedigreeFunction Age
k =  4  n_splits =  6  result =  0.7018229166666666

Pregnancies Glucose BloodPressure Insulin BMI DiabetesPedigreeFunction
k =  9  n_splits =  3  result =  0.6484375

Pregnancies Glucose BloodPressure Insulin BMI Age
k =  2  n_splits =  5  result =  0.6562176385705797

Pregnancies Glucose BloodPressure Insulin DiabetesPedigreeFun