

# Imports & Data loading


In [18]:
import numpy as np
import torch
import torch.nn as nn
import pandas as pd
from sklearn.preprocessing import StandardScaler
from torch.utils.data import Dataset
from torch.utils.data import dataloader

In [2]:
# Load Dataset using Pandas
data = pd.read_csv("diabetes.csv")

In [3]:
data

Unnamed: 0,Number of times pregnant,Plasma glucose concentration,Diastolic blood pressure,Triceps skin fold thickness,2-Hour serum insulin,Body mass index,Age,Class
0,6,148,72,35,0,33.6,50,positive
1,1,85,66,29,0,26.6,31,negative
2,8,183,64,0,0,23.3,32,positive
3,1,89,66,23,94,28.1,21,negative
4,0,137,40,35,168,43.1,33,positive
...,...,...,...,...,...,...,...,...
763,10,101,76,48,180,32.9,63,negative
764,2,122,70,27,0,36.8,27,negative
765,5,121,72,23,112,26.2,30,negative
766,1,126,60,0,0,30.1,47,positive


In [4]:
# Extract the train & test data
x = data.iloc[: , 0:-1].values
y_string = list(data.iloc[:,-1])
y_string[:10]

['positive',
 'negative',
 'positive',
 'negative',
 'positive',
 'negative',
 'positive',
 'negative',
 'positive',
 'positive']

In [5]:
# convert the test data to integers
y_int = []
for s in y_string:
  if s == 'positive':
    y_int.append(1)
  else:
    y_int.append(0)

In [6]:
y_int[:10]

[1, 0, 1, 0, 1, 0, 1, 0, 1, 1]

In [7]:
# convert to a numpy array
y = np.array(y_int, dtype = 'float64')

# Normalize X

In [8]:
x[:5]

array([[  6. , 148. ,  72. ,  35. ,   0. ,  33.6,  50. ],
       [  1. ,  85. ,  66. ,  29. ,   0. ,  26.6,  31. ],
       [  8. , 183. ,  64. ,   0. ,   0. ,  23.3,  32. ],
       [  1. ,  89. ,  66. ,  23. ,  94. ,  28.1,  21. ],
       [  0. , 137. ,  40. ,  35. , 168. ,  43.1,  33. ]])

In [9]:
# Normalize every feature in the X dataset to (-1, 1)
sc = StandardScaler()
x = sc.fit_transform(x)   # calc mean 

In [10]:
x[:5]

array([[ 0.63994726,  0.84832379,  0.14964075,  0.90726993, -0.69289057,
         0.20401277,  1.4259954 ],
       [-0.84488505, -1.12339636, -0.16054575,  0.53090156, -0.69289057,
        -0.68442195, -0.19067191],
       [ 1.23388019,  1.94372388, -0.26394125, -1.28821221, -0.69289057,
        -1.10325546, -0.10558415],
       [-0.84488505, -0.99820778, -0.16054575,  0.15453319,  0.12330164,
        -0.49404308, -1.04154944],
       [-1.14185152,  0.5040552 , -1.50468724,  0.90726993,  0.76583594,
         1.4097456 , -0.0204964 ]])

# Converting to Torch

In [11]:
x = torch.tensor(x)
y = torch.tensor(y).unsqueeze(1)

In [14]:
print(x.shape)
print(y.shape)

torch.Size([768, 7])
torch.Size([768, 1])


# define Dateset

In [15]:
class Dataset(Dataset):

  def __init__(self, x, y):
    self.x = x
    self.y = y
  
  def __getitem__(self, index):
      return self.x[index], self.y[index]
    
  def __len__(self):
    return len(self.x)
    

In [17]:
dataset = Dataset(x,y)
len(dataset)

768

In [20]:
# load data using dataloader
train_loader = torch.utils.data.DataLoader(dataset = dataset, batch_size= 32, shuffle= True)

In [23]:
# a glance on our data loader
print("There is {} batches in the dataset".format(len(train_loader)))
for (x,y) in train_loader:
  print("For one iteration (batch) there is: ")
  print("Data:    {}".format(x.shape))
  print("Labels:  {}".format(y.shape))
  break

There is 24 batches in the dataset
For one iteration (batch) there is: 
Data:    torch.Size([32, 7])
Labels:  torch.Size([32, 1])


# Building the Neural Network

we build a 7 input features NN with one output layer. <br>
The output will go throught a Sigmoid function in order to get the distrbutuion of out network and to range the output (0-1)

In [25]:
class NN(nn.Module):

  def __init__(self, input_featurs, output_featurs):

    super(NN, self).__init__()
    #layers
    self.fc1 = nn.Linear(input_featurs,5)
    self.fc2 = nn.Linear(5,4)
    self.fc3 = nn.Linear(4,3)
    self.fc4 = nn.Linear(3,output_featurs)
    #activation functions
    self.sigmoid = nn.Sigmoid()
    self.tanh = nn.Tanh()

  def forward(self, x):
    
    out = self.fc1(x)
    out = self.tanh(out)
    out = self.fc2(out)
    out = self.tanh(out)
    out = self.fc3(out)
    out = self.tanh(out)
    out = self.fc4(out)
    out = self.sigmoid(out)
    return out


In [26]:
# create a neural network
net = NN(7,1)

In [27]:
# Binary Cross Entrepy
criterion = torch.nn.BCELoss(size_average= True)



In [28]:
# use SGD optimizer
optimizer = torch.optim.SGD(net.parameters(), lr = 0.1, momentum= 0.9)

# Train the Neural Network

In [31]:
# define hyperparameters
epochs = 200
# train loop
for epoch in range(epochs):
  for inputs, labels in train_loader:
    inputs = inputs.float()
    labels = labels.float()
    # forward Prop
    outputs = net(inputs)   # same as net.forward(inputs)
    # Loss Calculation
    loss = criterion(outputs, labels)
    # Clear the gradiant buffer (w <-- w - Lr*gradiant)
    optimizer.zero_grad()
    # Back Prop
    loss.backward()
    # Update Weigths
    optimizer.step()

  # Accuracy calculation 
  output = (outputs>0.5).float()   # if the output is bigger than 0.5 then change to 1 , else 0
  # compare the results with the labels
  accuracy = (output == labels).float().mean()
  # print statistics
  print("Epoch {}/{}, Loss: {:.3f}, Accuracy{:.3f}".format(epoch+1,epochs, loss, accuracy))


Epoch 1/200, Loss: 0.422, Accuracy0.844
Epoch 2/200, Loss: 0.320, Accuracy0.844
Epoch 3/200, Loss: 0.345, Accuracy0.812
Epoch 4/200, Loss: 0.486, Accuracy0.719
Epoch 5/200, Loss: 0.323, Accuracy0.875
Epoch 6/200, Loss: 0.415, Accuracy0.844
Epoch 7/200, Loss: 0.392, Accuracy0.812
Epoch 8/200, Loss: 0.426, Accuracy0.750
Epoch 9/200, Loss: 0.474, Accuracy0.781
Epoch 10/200, Loss: 0.368, Accuracy0.781
Epoch 11/200, Loss: 0.338, Accuracy0.812
Epoch 12/200, Loss: 0.377, Accuracy0.875
Epoch 13/200, Loss: 0.256, Accuracy0.938
Epoch 14/200, Loss: 0.336, Accuracy0.875
Epoch 15/200, Loss: 0.563, Accuracy0.656
Epoch 16/200, Loss: 0.370, Accuracy0.844
Epoch 17/200, Loss: 0.398, Accuracy0.812
Epoch 18/200, Loss: 0.568, Accuracy0.594
Epoch 19/200, Loss: 0.387, Accuracy0.812
Epoch 20/200, Loss: 0.565, Accuracy0.719
Epoch 21/200, Loss: 0.354, Accuracy0.844
Epoch 22/200, Loss: 0.395, Accuracy0.812
Epoch 23/200, Loss: 0.196, Accuracy0.938
Epoch 24/200, Loss: 0.330, Accuracy0.875
Epoch 25/200, Loss: 0.432