# Import Libraries and Packages

In [0]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
import torch
from torch.utils.data import Dataset
# from torch.utils.data import DataLoader
import torch.nn as nn

## Load the Dataset using Pandas

In [0]:
data = pd.read_csv('/content/diabetes.csv')

In [0]:
# For x: Extract out the dataset from all the rows (all samples and all columns except last column(all features))
# For y: Extract out the last column(which is the label)
# Convert both to numpy using the .values method

x = data.iloc[:,0:-1].values
y_string = list(data.iloc[:,-1])
#y_string = data.iloc[:,-1]

In [0]:
# Our neural network only understand numbers! so convert the strings to label
y_int = []
for s in y_string:
  if s == "positive":
    y_int.append(1)
  else:
    y_int.append(0)


In [0]:
# Now convert to an array
y = np.array(y_int, dtype= 'float64')

In [0]:
# Feature normalization, All features should have the same range of values(-1, 1)
sc = StandardScaler()
x = sc.fit_transform(x)

In [53]:
x

array([[ 0.63994726,  0.84832379,  0.14964075, ..., -0.69289057,
         0.20401277,  1.4259954 ],
       [-0.84488505, -1.12339636, -0.16054575, ..., -0.69289057,
        -0.68442195, -0.19067191],
       [ 1.23388019,  1.94372388, -0.26394125, ..., -0.69289057,
        -1.10325546, -0.10558415],
       ...,
       [ 0.3429808 ,  0.00330087,  0.14964075, ...,  0.27959377,
        -0.73518964, -0.27575966],
       [-0.84488505,  0.1597866 , -0.47073225, ..., -0.69289057,
        -0.24020459,  1.17073215],
       [-0.84488505, -0.8730192 ,  0.04624525, ..., -0.69289057,
        -0.20212881, -0.87137393]])

In [0]:
x = torch.tensor(x)
y = torch.tensor(y)
y = y.unsqueeze(1)

In [55]:
print(x.shape)
print(y.shape)

torch.Size([768, 7])
torch.Size([768, 1])


In [0]:
class Dataset(Dataset):

  def __init__(self,x,y):
    self.x = x
    self.y = y

  def __getitem__(self, index):
    return self.x[index], self.y[index]

  def __len__(self):
    return len(self.x)

In [0]:
dataset = Dataset(x,y)

In [58]:
len(dataset)

768

In [0]:
# Load the data to Dataloader for batch processing and shuffling
train_loader = torch.utils.data.DataLoader(dataset=dataset, batch_size=32, shuffle=True)

In [60]:
# Let's have a look at the dataloader
print("There is {} batch in the dataset".format(len(train_loader)))
for (x,y) in train_loader:
  print("For one iteration or batch there is:")
  print("Data {}".format(x.shape))
  print("Labels {}".format(y.shape))
  break

There is 24 batch in the dataset
For one iteration or batch there is:
Data torch.Size([32, 7])
Labels torch.Size([32, 1])


In [0]:
# Let's create our model
class Model(nn.Module):
  def __init__(self,input_features, output_features):
    super(Model, self).__init__()
    self.fc1 = nn.Linear(input_features, 5)
    self.fc2 = nn.Linear(5, 4)
    self.fc3 = nn.Linear(4, 3)
    self.fc4 = nn.Linear(3, output_features)
    self.sigmoid = nn.Sigmoid()
    self.tanh = nn.Tanh()

  def forward(self, x):
    out = self.fc1(x)
    out = self.tanh(out)
    out = self.fc2(out)
    out = self.tanh(out)
    out = self.fc3(out)
    out = self.tanh(out)
    out = self.fc4(out)
    out = self.sigmoid(out)
    return out


In [62]:
# Let's create network (an object of the Model class)
net = Model(7,1)
#In Binary Cross Entropy: the input and output should have the same shape
#size_average = True ----> the losses are averaged over observations for each minibatch

creterion = torch.nn.BCELoss(size_average=True)
# We wil use SGD with momentum with a learning rate of 0.1
optimizer =  torch.optim.SGD(net.parameters(), lr=0.1, momentum=0.9)




In [63]:
# Training Network

epochs = 200
for epoch in range(epochs):
  for inputs, labels in train_loader:
    inputs = inputs.float()
    labels = labels.float()
    #FORWARD PROP
    outputs = net(inputs)
    ## outputs = net.forward(inputs, labels)
    
    #CALCULATE LOSS
    loss = creterion(outputs, labels)
    
    #CLEAR THE GRADIENT BUFFER (w= w -lr*gradient)
    optimizer.zero_grad()

    #BACKWARD PROP
    loss.backward()

    #Update weights 
    optimizer.step()
  
  # Accuracy Calculation
  output = (outputs>0.5).float()
  accuracy = (output == labels).float().mean()
  #output = (output == labels).float().sum()/ output.shape[0]
  # Print Statistics
  print("Epoch: {}/{}, Loss: {:.3f}, Accuracy: {:.3f}".format(epoch+1, epochs, loss, accuracy))




Epoch: 1/200, Loss: 0.576, Accuracy: 0.719
Epoch: 2/200, Loss: 0.558, Accuracy: 0.688
Epoch: 3/200, Loss: 0.557, Accuracy: 0.719
Epoch: 4/200, Loss: 0.517, Accuracy: 0.688
Epoch: 5/200, Loss: 0.556, Accuracy: 0.719
Epoch: 6/200, Loss: 0.418, Accuracy: 0.781
Epoch: 7/200, Loss: 0.486, Accuracy: 0.812
Epoch: 8/200, Loss: 0.477, Accuracy: 0.812
Epoch: 9/200, Loss: 0.499, Accuracy: 0.719
Epoch: 10/200, Loss: 0.468, Accuracy: 0.781
Epoch: 11/200, Loss: 0.421, Accuracy: 0.812
Epoch: 12/200, Loss: 0.406, Accuracy: 0.750
Epoch: 13/200, Loss: 0.341, Accuracy: 0.844
Epoch: 14/200, Loss: 0.496, Accuracy: 0.750
Epoch: 15/200, Loss: 0.626, Accuracy: 0.656
Epoch: 16/200, Loss: 0.311, Accuracy: 0.906
Epoch: 17/200, Loss: 0.422, Accuracy: 0.844
Epoch: 18/200, Loss: 0.595, Accuracy: 0.719
Epoch: 19/200, Loss: 0.356, Accuracy: 0.844
Epoch: 20/200, Loss: 0.482, Accuracy: 0.750
Epoch: 21/200, Loss: 0.320, Accuracy: 0.875
Epoch: 22/200, Loss: 0.399, Accuracy: 0.844
Epoch: 23/200, Loss: 0.495, Accuracy: 0.7