# Akimel O’odham diabetes
## Setup

In [201]:
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.model_selection import train_test_split, StratifiedKFold, cross_val_score
from tensorflow.python.keras.wrappers.scikit_learn import KerasClassifier

## Lets have a sneaky peak at that data

In [202]:
df = pd.read_csv("diabetes.csv")
df

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72.0,35,169.5,33.6,0.627,50,1
1,1,85,66.0,29,102.5,26.6,0.351,31,0
2,8,183,64.0,32,169.5,23.3,0.672,32,1
3,1,89,66.0,23,94.0,28.1,0.167,21,0
4,0,137,40.0,35,168.0,43.1,2.288,33,1
...,...,...,...,...,...,...,...,...,...
763,10,101,76.0,48,180.0,32.9,0.171,63,0
764,2,122,70.0,27,102.5,36.8,0.340,27,0
765,5,121,72.0,23,112.0,26.2,0.245,30,0
766,1,126,60.0,32,169.5,30.1,0.349,47,1


Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72.0,35,169.5,33.6,0.627,50,1
1,1,85,66.0,29,102.5,26.6,0.351,31,0
2,8,183,64.0,32,169.5,23.3,0.672,32,1
3,1,89,66.0,23,94.0,28.1,0.167,21,0
4,0,137,40.0,35,168.0,43.1,2.288,33,1
...,...,...,...,...,...,...,...,...,...
763,10,101,76.0,48,180.0,32.9,0.171,63,0
764,2,122,70.0,27,102.5,36.8,0.340,27,0
765,5,121,72.0,23,112.0,26.2,0.245,30,0
766,1,126,60.0,32,169.5,30.1,0.349,47,1


Now lets add if those ranges are within normals. Here's the normals from [Kaggle](https://www.kaggle.com/jamaltariqcheema/pima-indians-diabetes-dataset)


> Glucose: Glucose (< 140) = Normal, Glucose (140-200) = Pre-Diabetic, Glucose (> 200) = Diabetic
> BloodPressure: B.P (< 60) = Below Normal, B.P (60-80) = Normal, B.P (80-90) = Stage 1 Hypertension, B.P (90-120) = Stage 2 Hypertension, B.P (> 120) = Hypertensive Crisis
> SkinThickness: SkinThickness (< 10) = Below Normal, SkinThickness (10-30) = Normal, SkinThickness (> 30) = Above Normal
> Insulin: Insulin (< 200) = Normal, Insulin (> 200) = Above Normal
> BMI: BMI (< 18.5) = Underweight, BMI (18.5-25) = Normal, BMI (25-30) = Overweight, BMI (> 30) = Obese


In [203]:
df["GlucoseRange"] = pd.cut(df['Glucose'], [0, 140, 200, 999], labels=[0, 1, 2])
df["BloodPressureRange"] = pd.cut(df['BloodPressure'], [0, 60, 80, 90, 120, 999], labels=[0, 1, 2, 3, 4])
df["SkinThicknessRange"] = pd.cut(df['SkinThickness'], [0, 10, 30, 999], labels=[0, 1, 2])
df["InsulinRange"] = pd.cut(df['Insulin'], [0, 200, 999], labels=[0, 1])
df["BMIRange"] = pd.cut(df['BMI'], [0, 18.5, 25, 30, 999], labels=[0, 1, 2, 3])
df

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome,GlucoseRange,BloodPressureRange,SkinThicknessRange,InsulinRange,BMIRange
0,6,148,72.0,35,169.5,33.6,0.627,50,1,1,1,2,0,3
1,1,85,66.0,29,102.5,26.6,0.351,31,0,0,1,1,0,2
2,8,183,64.0,32,169.5,23.3,0.672,32,1,1,1,2,0,1
3,1,89,66.0,23,94.0,28.1,0.167,21,0,0,1,1,0,2
4,0,137,40.0,35,168.0,43.1,2.288,33,1,0,0,2,0,3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
763,10,101,76.0,48,180.0,32.9,0.171,63,0,0,1,2,0,3
764,2,122,70.0,27,102.5,36.8,0.340,27,0,0,1,1,0,3
765,5,121,72.0,23,112.0,26.2,0.245,30,0,0,1,1,0,2
766,1,126,60.0,32,169.5,30.1,0.349,47,1,0,0,2,0,3


Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome,GlucoseRange,BloodPressureRange,SkinThicknessRange,InsulinRange,BMIRange
0,6,148,72.0,35,169.5,33.6,0.627,50,1,1,1,2,0,3
1,1,85,66.0,29,102.5,26.6,0.351,31,0,0,1,1,0,2
2,8,183,64.0,32,169.5,23.3,0.672,32,1,1,1,2,0,1
3,1,89,66.0,23,94.0,28.1,0.167,21,0,0,1,1,0,2
4,0,137,40.0,35,168.0,43.1,2.288,33,1,0,0,2,0,3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
763,10,101,76.0,48,180.0,32.9,0.171,63,0,0,1,2,0,3
764,2,122,70.0,27,102.5,36.8,0.340,27,0,0,1,1,0,3
765,5,121,72.0,23,112.0,26.2,0.245,30,0,0,1,1,0,2
766,1,126,60.0,32,169.5,30.1,0.349,47,1,0,0,2,0,3


Now we pull out the data from

In [204]:
y = df.Outcome.to_numpy()
x = df.drop(columns=["Outcome"]).to_numpy()
x

array([[  6., 148.,  72., ...,   2.,   0.,   3.],
       [  1.,  85.,  66., ...,   1.,   0.,   2.],
       [  8., 183.,  64., ...,   2.,   0.,   1.],
       ...,
       [  5., 121.,  72., ...,   1.,   0.,   2.],
       [  1., 126.,  60., ...,   2.,   0.,   3.],
       [  1.,  93.,  70., ...,   2.,   0.,   3.]])

array([[  6., 148.,  72., ...,   2.,   0.,   3.],
       [  1.,  85.,  66., ...,   1.,   0.,   2.],
       [  8., 183.,  64., ...,   2.,   0.,   1.],
       ...,
       [  5., 121.,  72., ...,   1.,   0.,   2.],
       [  1., 126.,  60., ...,   2.,   0.,   3.],
       [  1.,  93.,  70., ...,   2.,   0.,   3.]])

In [205]:
y

array([1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0,
       1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1,
       0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0,
       1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0,
       1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1,
       1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 1,
       1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0,
       1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1,
       0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 1,
       1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1,
       1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0,
       1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0,
       1, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0,
       0, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0, 1,

array([1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0,
       1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1,
       0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0,
       1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0,
       1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1,
       1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 1,
       1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0,
       1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1,
       0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 1,
       1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1,
       1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0,
       1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0,
       1, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0,
       0, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0, 1,

## Now the keras stuff

In [206]:
def create_model():
    model = tf.keras.Sequential()
    model.add(tf.keras.Input(shape=(len(df.columns) - 1,)))
    model.add(
        tf.keras.layers.Dense(
            len(df.columns) - 1,

            kernel_initializer=tf.keras.initializers.random_uniform,
            activation=tf.keras.activations.relu
        )
    )
    model.add(
        tf.keras.layers.Dense(
            len(df.columns) - 1,
            kernel_initializer=tf.keras.initializers.random_uniform,
            activation=tf.keras.activations.relu
        )
    )
    model.add(
        tf.keras.layers.Dense(
            1,
            kernel_initializer=tf.keras.initializers.random_uniform,
            activation=tf.keras.activations.sigmoid
        )
    )

    model.compile(
        loss=tf.keras.losses.BinaryCrossentropy(),
        optimizer=tf.keras.optimizers.Adam(),
        metrics=[
            tf.keras.metrics.BinaryAccuract(name="accuracy"),
        ]
    )
    return model

## Fitting

We are going to use a KFold strategy for evaluating the model, which will do our fitting on different bits of the dataset to evaluate it more accurately

In [199]:
classifier = KerasClassifier(build_fn=create_model, epochs=150, batch_size=10)
kfold = StratifiedKFold(n_splits=10, shuffle=True)
results = cross_val_score(classifier, x, y, cv=kfold)

Epoch 93/150
Epoch 94/150
Epoch 95/150
Epoch 96/150
Epoch 97/150
Epoch 98/150
Epoch 99/150
Epoch 100/150
Epoch 101/150
Epoch 102/150
Epoch 103/150
Epoch 104/150
Epoch 105/150
Epoch 106/150
Epoch 107/150
Epoch 108/150
Epoch 109/150
Epoch 110/150
Epoch 111/150
Epoch 112/150
Epoch 113/150
Epoch 114/150
Epoch 115/150
Epoch 116/150
Epoch 117/150
Epoch 118/150
Epoch 119/150
Epoch 120/150
Epoch 121/150
Epoch 122/150
Epoch 123/150
Epoch 124/150
Epoch 125/150
Epoch 126/150
Epoch 127/150
Epoch 128/150
Epoch 129/150
Epoch 130/150
Epoch 131/150
Epoch 132/150
Epoch 133/150
Epoch 134/150
Epoch 135/150
Epoch 136/150
Epoch 137/150
Epoch 138/150
Epoch 139/150
Epoch 140/150
Epoch 141/150
Epoch 142/150
Epoch 143/150
Epoch 144/150
Epoch 145/150
Epoch 146/150
Epoch 147/150
Epoch 148/150
Epoch 149/150
Epoch 150/150
Epoch 1/150
Epoch 2/150
Epoch 3/150
Epoch 4/150
Epoch 5/150
Epoch 6/150
Epoch 7/150
Epoch 8/150
Epoch 9/150
Epoch 10/150
Epoch 11/150
Epoch 12/150
Epoch 13/150
Epoch 14/150
Epoch 15/150
Epoch 16/

## Now we see how well it worked

In [200]:
results.mean()

0.8529391705989837

0.8619959056377411