# Importing Modules

In [15]:
import tensorflow as tf
import pandas as pd

# Getting the Data

In [16]:
#Download the data set from Kaggle and load it using pandas
diabetes = pd.read_csv('PIMA.csv')
#.head() method allows us to look at the the first 5 rows of the dataset
diabetes.head()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1


# Keeping the Data clean

### The predictor variables are defined as follows

- Pregnancies: Number of times pregnant
- Glucose: Plasma glucose concentration a 2 hours in an oral glucose tolerance test
- Blood Pressure: Diastolic blood pressure (mm Hg)
- Skin Thickness: Triceps skin fold thickness (mm)
- Insulin: 2-Hour serum insulin (mu U/ml)
- BMI: Body mass index (weight in kg/(height in m)^2)
- DPF: Diabetes pedigree function
- Age

## Feature Scaling

In [17]:
#Now we'll list the column names and normalize them between 0 & 1 only for the continuous features
cols = ['Pregnancies', 'Glucose', 'BloodPressure', 'SkinThickness', 'Insulin',
       'BMI', 'DiabetesPedigreeFunction', 'Age']
#here (x.max() - x.min()) is considered as standard deviation
diabetes[cols] = diabetes[cols].apply(lambda x: (x - x.min())/(x.max() - x.min()))
diabetes.head()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,0.352941,0.743719,0.590164,0.353535,0.0,0.500745,0.234415,0.483333,1
1,0.058824,0.427136,0.540984,0.292929,0.0,0.396423,0.116567,0.166667,0
2,0.470588,0.919598,0.52459,0.0,0.0,0.347243,0.253629,0.183333,1
3,0.058824,0.447236,0.540984,0.232323,0.111111,0.418778,0.038002,0.0,0
4,0.0,0.688442,0.327869,0.353535,0.198582,0.642325,0.943638,0.2,1


## Preparing the training and test set from the available dataset

In [18]:
x_data = diabetes.drop('Outcome', axis=1)
print(x_data.head())
y_data = diabetes['Outcome']
print(y_data.head())
print(diabetes.shape)

   Pregnancies   Glucose  BloodPressure  SkinThickness   Insulin       BMI  \
0     0.352941  0.743719       0.590164       0.353535  0.000000  0.500745   
1     0.058824  0.427136       0.540984       0.292929  0.000000  0.396423   
2     0.470588  0.919598       0.524590       0.000000  0.000000  0.347243   
3     0.058824  0.447236       0.540984       0.232323  0.111111  0.418778   
4     0.000000  0.688442       0.327869       0.353535  0.198582  0.642325   

   DiabetesPedigreeFunction       Age  
0                  0.234415  0.483333  
1                  0.116567  0.166667  
2                  0.253629  0.183333  
3                  0.038002  0.000000  
4                  0.943638  0.200000  
0    1
1    0
2    1
3    0
4    1
Name: Outcome, dtype: int64
(768, 9)


In [19]:
#converting the above dataframe into numPy array
x_data = x_data[cols].values
for i in range(len(x_data)):
    for j in range(8):
        #converting into float values to use them effectively for calculation
        float(x_data[i,j])
x_data=x_data.tolist()
x_test=x_data[568:769]

In [20]:
y_data=y_data.values
for i in range(len(y_data)):
    float(y_data[i])
y_data=y_data.tolist()
y_test=y_data[568:769]

# Designing a model

In [21]:
#using the tf.keras.Sequential model for stacking layers. 
#Choosing an optimizer and loss function for training.
model = tf.keras.models.Sequential([
  tf.keras.layers.Dense(100, activation='relu'),
  tf.keras.layers.Dense(250, activation='relu'),
  tf.keras.layers.Dropout(0.2),
  tf.keras.layers.Dense(1,activation='sigmoid')
])

# Training the Model

In [22]:
#Compile simply defines the loss function, the optimizer and the metrics.
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

In [23]:
#here we use the complete data for training the model to get better accuracy for the test dataset
#The Model.fit method adjusts the model parameters to minimize the loss
model.fit(x_data,y_data, epochs=150, batch_size=10,verbose=0)

<tensorflow.python.keras.callbacks.History at 0x13edd2690>

# Evaluating the Model

In [24]:
#The Model.evaluate method checks the models performance on the"Test-set"
model.evaluate(x_test,  y_test, verbose=2)

7/7 - 0s - loss: 0.2453 - accuracy: 0.9050


[0.2453005462884903, 0.9049999713897705]

In [25]:
# Generate predictions (probabilities -- the output of the last layer) on test data using `predict`
predictions = model.predict(x_test)
rounded = [round(x[0]) for x in predictions]
print(rounded)
model.evaluate(x_test,  y_test, verbose=2)

[0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0]

[0.2453005462884903, 0.9049999713897705]