In [41]:
import numpy as np
import torch
import torch.nn as nn
import pandas as pd
from sklearn.preprocessing import StandardScaler
from torch.utils.data import Dataset
from torch.utils.data import DataLoader

In [42]:
data = pd.read_csv("../src/diabetes.csv")

In [43]:
X = data.iloc[:,:-1].values
y_str = data.iloc[:,-1].values

In [44]:
y = []
for s in y_str:
    if s == "positive":
        y.append(1)
    else:
        y.append(0)

In [45]:
X

array([[  6. , 148. ,  72. , ...,   0. ,  33.6,  50. ],
       [  1. ,  85. ,  66. , ...,   0. ,  26.6,  31. ],
       [  8. , 183. ,  64. , ...,   0. ,  23.3,  32. ],
       ...,
       [  5. , 121. ,  72. , ..., 112. ,  26.2,  30. ],
       [  1. , 126. ,  60. , ...,   0. ,  30.1,  47. ],
       [  1. ,  93. ,  70. , ...,   0. ,  30.4,  23. ]])

Feature Normalization. All features will have standard distribution

In [46]:
sc = StandardScaler()
X = sc.fit_transform(X) # Standard Scale Normalization

In [47]:
X

array([[ 0.63994726,  0.84832379,  0.14964075, ..., -0.69289057,
         0.20401277,  1.4259954 ],
       [-0.84488505, -1.12339636, -0.16054575, ..., -0.69289057,
        -0.68442195, -0.19067191],
       [ 1.23388019,  1.94372388, -0.26394125, ..., -0.69289057,
        -1.10325546, -0.10558415],
       ...,
       [ 0.3429808 ,  0.00330087,  0.14964075, ...,  0.27959377,
        -0.73518964, -0.27575966],
       [-0.84488505,  0.1597866 , -0.47073225, ..., -0.69289057,
        -0.24020459,  1.17073215],
       [-0.84488505, -0.8730192 ,  0.04624525, ..., -0.69289057,
        -0.20212881, -0.87137393]])

In [48]:
# Now we convert the arrays to PyTorch tensors
X = torch.tensor(X)
y = torch.tensor(y)

In [49]:
X.shape

torch.Size([768, 7])

In [50]:
y.shape

torch.Size([768])

Currently the problem is they are different dimensionally. So we need to add another dimension by using **unsqueeze()**

In [51]:
y = y.unsqueeze(1)

In [52]:
y.shape

torch.Size([768, 1])

We will build our own Dataset class

In [53]:
class Dataset(Dataset):
    
    def __init__(self, x, y):
        self.x = x
        self.y = y
        
    def __getitem__(self, index):
        return self.x[index], self.y[index]
    
    def __len__(self):
        return len(self.x)

In [54]:
dataset = Dataset(X,y)

In [55]:
len(dataset)

768

Load the data to your DataLoader for batch processing and shuffling

In [56]:
train_loader = DataLoader(dataset=dataset,
          batch_size=32,
          shuffle=True)

In [57]:
train_loader

<torch.utils.data.dataloader.DataLoader at 0x247139f3248>

Let us look at the data loader

In [58]:
print("There is {} batches in the dataset".format(len(train_loader)))

There is 24 batches in the dataset


In [60]:
for (x,y) in train_loader:
    print("For one iteration (batch), there is : ")
    print("Data   :    {}".format(x.shape))
    print("Labels :    {}".format(y.shape))
    break

For one iteration (batch), there is : 
Data   :    torch.Size([32, 7])
Labels :    torch.Size([32, 1])


# Building the Network
<img src="../img/Neural Network Image.png"/>

We will build the above model<br>
We will build fully connected layer (Linear Layer/ Multi-layer perceptron)

**nn.Linear(no_of_input, no_of_output)**

In [64]:
class Model(nn.Module): # Class Inheritance
    def __init__(self, no_of_input_features, no_of_output_features):
        # We want to take all of the features of the Module class
        super(Model, self).__init__()
        # our attributes = Layers
        # our functionalities = Forward Propagation
        self.fc1 = nn.Linear(no_of_input_features, 5) # FC (Fully Connected Layer)
        self.fc2 = nn.Linear(5,4)
        self.fc3 = nn.Linear(4,3)
        self.fc4 = nn.Linear(3,no_of_output_features)
        # Define activation Function
        self.sigmoid = nn.Sigmoid()
        self.tanh = nn.Tanh()
        
    def forward(self, x):
        out = self.fc1(x)
        out = self.tanh(out)
        out = self.fc2(out)
        out = self.tanh(out)
        out = self.fc3(out)
        out = self.tanh(out)
        out = self.fc4(out)
        out = self.sigmoid(out)
        return out

<img src="../img/equation.svg" style="float:left"/>

cost = -(Y torch.log(hypothesis) + (1 - Y) torch.log(1 - hypothesis)).mean()

Creating a network (An object of Net class)

In [65]:
net = Model(7,1)

In Binary Cross Entropy, the input and output should have the same shape

`size_average=True` The losses are averaged over observation for minibatch

In [68]:
criterion = torch.nn.BCELoss(reduction='mean')

We will use SGD with momentum with a learning rate of 0.1<br>
torch.optim. provides bunch of optimezers<br><br>
**SGD()**<br><br>
Accessing the weights<br>
**net.parameters()**

In [69]:
optimizer = torch.optim.SGD(net.parameters(), lr=0.1, momentum = 0.9)

## Trainning the network

In [72]:
epochs = 200
for epoch in range(epochs):
    for inputs,labels in train_loader:
        inputs = inputs.float()
        labels = labels.float()
        # Feeding our data to the neural network (Forward Propagation)
        outputs = net(inputs) # Pytorch will automatically feed to forward function
        # So we could also just use net.forward().
        
        # Calculate loss
        loss = criterion(outputs, labels)
        
        # Begin forward propagation (3 Steps)
        # 1. Clear the gradient Buffer 
        optimizer.zero_grad()
        # 2. Calculate all the gradient (back propagation)
        loss.backward()
        # 3. Update Weights ( w <-- w - lr*gradient)
        optimizer.step()
        
    # After this is done, we want to calculate the training accuracy
    # Accuracy Calculation
    output = (outputs > 0.5).float() # returns [1.,0.,0.,1.,...]
    accuracy = (output == labels).float().mean() # Check if they are equal and returns 1 if equal, 0 not equal
    # Another way of calculating is 
    #   (output == labels).sum() / output.shape[0]
    
    # Print Statistics
    print("Epoch {}/{}, Loss : {:.3f}, Accuracy: {:.3f}".format(epoch+1, epochs, loss, accuracy))

Epoch 1/200, Loss : 0.518, Accuracy: 0.719
Epoch 2/200, Loss : 0.329, Accuracy: 0.875
Epoch 3/200, Loss : 0.630, Accuracy: 0.688
Epoch 4/200, Loss : 0.441, Accuracy: 0.719
Epoch 5/200, Loss : 0.568, Accuracy: 0.750
Epoch 6/200, Loss : 0.579, Accuracy: 0.688
Epoch 7/200, Loss : 0.458, Accuracy: 0.781
Epoch 8/200, Loss : 0.474, Accuracy: 0.781
Epoch 9/200, Loss : 0.403, Accuracy: 0.844
Epoch 10/200, Loss : 0.620, Accuracy: 0.625
Epoch 11/200, Loss : 0.437, Accuracy: 0.781
Epoch 12/200, Loss : 0.533, Accuracy: 0.750
Epoch 13/200, Loss : 0.513, Accuracy: 0.781
Epoch 14/200, Loss : 0.556, Accuracy: 0.688
Epoch 15/200, Loss : 0.358, Accuracy: 0.875
Epoch 16/200, Loss : 0.300, Accuracy: 0.906
Epoch 17/200, Loss : 0.486, Accuracy: 0.781
Epoch 18/200, Loss : 0.436, Accuracy: 0.781
Epoch 19/200, Loss : 0.436, Accuracy: 0.844
Epoch 20/200, Loss : 0.382, Accuracy: 0.844
Epoch 21/200, Loss : 0.404, Accuracy: 0.781
Epoch 22/200, Loss : 0.658, Accuracy: 0.688
Epoch 23/200, Loss : 0.503, Accuracy: 0.8

Epoch 185/200, Loss : 0.433, Accuracy: 0.750
Epoch 186/200, Loss : 0.321, Accuracy: 0.875
Epoch 187/200, Loss : 0.407, Accuracy: 0.812
Epoch 188/200, Loss : 0.395, Accuracy: 0.812
Epoch 189/200, Loss : 0.330, Accuracy: 0.844
Epoch 190/200, Loss : 0.418, Accuracy: 0.844
Epoch 191/200, Loss : 0.449, Accuracy: 0.750
Epoch 192/200, Loss : 0.248, Accuracy: 0.875
Epoch 193/200, Loss : 0.345, Accuracy: 0.812
Epoch 194/200, Loss : 0.253, Accuracy: 0.906
Epoch 195/200, Loss : 0.653, Accuracy: 0.625
Epoch 196/200, Loss : 0.302, Accuracy: 0.875
Epoch 197/200, Loss : 0.401, Accuracy: 0.812
Epoch 198/200, Loss : 0.353, Accuracy: 0.812
Epoch 199/200, Loss : 0.695, Accuracy: 0.656
Epoch 200/200, Loss : 0.449, Accuracy: 0.781
