In [1]:
import tensorflow as tf
import pandas as pd
import numpy as np
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from pickle import dump

In [2]:
data = pd.read_csv('diabetes.csv')
data , test_data = train_test_split(data, test_size=0.2) 
print(data.dtypes)

Pregnancies                   int64
Glucose                       int64
BloodPressure                 int64
SkinThickness                 int64
Insulin                       int64
BMI                         float64
DiabetesPedigreeFunction    float64
Age                           int64
Outcome                       int64
dtype: object


In [3]:
def normalize(data:pd.DataFrame):
    scaler = preprocessing.MinMaxScaler()
    data[['Pregnancies' , 'Glucose', 'BloodPressure' , 'SkinThickness' , 'Insulin', 'BMI','DiabetesPedigreeFunction','Age']] = scaler.fit_transform(data[['Pregnancies' , 'Glucose', 'BloodPressure' , 'SkinThickness' , 'Insulin', 'BMI','DiabetesPedigreeFunction','Age']].to_numpy())
    dump(scaler , open("MinMaxScaler.pkl" , "wb"))
    return data


In [4]:
normalize(data)
data[:5].head

<bound method NDFrame.head of      Pregnancies   Glucose  BloodPressure  SkinThickness   Insulin       BMI  \
335     0.000000  0.829146       0.622951       0.434343  0.301418  0.713860   
161     0.411765  0.512563       0.606557       0.404040  0.124113  0.554396   
405     0.117647  0.618090       0.393443       0.323232  0.195035  0.627422   
724     0.058824  0.557789       0.770492       0.000000  0.000000  0.488823   
15      0.411765  0.502513       0.000000       0.000000  0.000000  0.447094   

     DiabetesPedigreeFunction       Age  Outcome  
335                  0.077284  0.098039        0  
161                  0.053800  0.470588        0  
405                  0.188728  0.098039        0  
724                  0.079846  0.470588        0  
15                   0.173356  0.215686        1  >

In [5]:
target = data['Outcome']
data.pop('Outcome')

335    0
161    0
405    0
724    0
15     1
      ..
223    0
644    0
725    0
525    0
345    0
Name: Outcome, Length: 614, dtype: int64

In [6]:
def get_compiled_model():
    model = tf.keras.Sequential([
    tf.keras.layers.Dense(8 ,activation='relu'),
    tf.keras.layers.Dense(4, activation='relu'),
    tf.keras.layers.Dense(1,activation='tanh')
  ])

    model.compile(optimizer='adam',
                loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
                metrics=['accuracy'],)
    return model


In [7]:
train_dataset = tf.data.Dataset.from_tensor_slices((data.values , target.values))
train_dataset = train_dataset.shuffle(len(data)).batch(1)
for feat , targ in train_dataset.take(5):
    print("features: {0} targets: {1}".format(feat , targ))

features: [[0.35294118 0.95477387 0.75409836 0.         0.         0.5290611
  0.0853971  0.88235294]] targets: [1]
features: [[0.05882353 0.65326633 0.57377049 0.13131313 0.12411348 0.38599106
  0.16823228 0.01960784]] targets: [0]
features: [[0.17647059 0.69849246 0.44262295 0.         0.         0.38152012
  0.1383433  0.01960784]] targets: [1]
features: [[0.17647059 0.44221106 0.47540984 0.11111111 0.06382979 0.36959762
  0.08070026 0.01960784]] targets: [0]
features: [[0.         0.52261307 0.52459016 0.23232323 0.13711584 0.414307
  0.16054654 0.03921569]] targets: [0]


In [8]:
model = get_compiled_model()
model.fit(train_dataset , epochs=20)

Epoch 1/20


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<tensorflow.python.keras.callbacks.History at 0x7f86c94cb9a0>

In [9]:
test_data = normalize(test_data)
test_data.head

<bound method NDFrame.head of      Pregnancies   Glucose  BloodPressure  SkinThickness   Insulin       BMI  \
479     0.333333  0.539007       0.811321       0.574074  0.000000  0.535373   
647     0.000000  0.872340       0.471698       0.666667  0.213710  0.722753   
662     0.666667  0.787234       1.000000       0.851852  0.310484  0.718929   
381     0.000000  0.347518       0.641509       0.407407  0.000000  0.382409   
570     0.250000  0.156028       0.660377       0.000000  0.000000  0.621415   
..           ...       ...            ...            ...       ...       ...   
38      0.166667  0.241135       0.641509       0.777778  0.000000  0.730402   
333     1.000000  0.354610       0.754717       0.000000  0.000000  0.451243   
618     0.750000  0.397163       0.773585       0.444444  0.000000  0.539197   
536     0.000000  0.347518       0.849057       0.000000  0.000000  0.565966   
459     0.750000  0.553191       0.698113       0.611111  0.080645  0.495220   

     Diab

In [10]:
new_target = test_data.pop('Outcome')
test_dataset = tf.data.Dataset.from_tensor_slices((test_data.values , new_target.values))
test_dataset = test_dataset.shuffle(len(data)).batch(1)



In [11]:
results = model.evaluate(test_dataset)
print("test loss, test acc:", results)

test loss, test acc: [0.5728100538253784, 0.7532467246055603]


In [12]:
model.save('\models')

Instructions for updating:
This property should not be used in TensorFlow 2.0, as updates are applied automatically.
Instructions for updating:
This property should not be used in TensorFlow 2.0, as updates are applied automatically.
INFO:tensorflow:Assets written to: \models/assets


In [13]:
model.predict( [[0.06666666666666667, 0.4358974358974359, 0.6, 0.537037037037037, 0.0, 0.569593147751606, 0.127867252318204, 0.20833333333333326]] , verbose=1)



array([[-0.8543557]], dtype=float32)