In [21]:
import numpy as np
import torch
import torch.nn as nn
import pandas as pd
from sklearn.preprocessing import StandardScaler
from torch.utils.data import Dataset
from torch.utils.data import DataLoader

In [4]:
# Load the dataset using Pandas
data = pd.read_csv('diabetes.csv')
data

Unnamed: 0,Number of times pregnant,Plasma glucose concentration,Diastolic blood pressure,Triceps skin fold thickness,2-Hour serum insulin,Body mass index,Age,Class
0,6,148,72,35,0,33.6,50,positive
1,1,85,66,29,0,26.6,31,negative
2,8,183,64,0,0,23.3,32,positive
3,1,89,66,23,94,28.1,21,negative
4,0,137,40,35,168,43.1,33,positive
...,...,...,...,...,...,...,...,...
763,10,101,76,48,180,32.9,63,negative
764,2,122,70,27,0,36.8,27,negative
765,5,121,72,23,112,26.2,30,negative
766,1,126,60,0,0,30.1,47,positive


In [5]:
# For x: Extract out the dataset from all the rows (all samples) and all columns except last column (all features). 
# For y: Extract out the last column (which is the label)
# Convert both to numpy using the .values method
x = data.iloc[:,0:-1].values
y_string= list(data.iloc[:,-1])

In [6]:
# Lets have a look some samples from our data
print(x[:3])
print(y_string[:3])

[[  6.  148.   72.   35.    0.   33.6  50. ]
 [  1.   85.   66.   29.    0.   26.6  31. ]
 [  8.  183.   64.    0.    0.   23.3  32. ]]
['positive', 'negative', 'positive']


In [7]:
# Our neural network only understand numbers! So convert the string to labels
y_int = []
for string in y_string:
    if string == 'positive':
        y_int.append(1)
    else:
        y_int.append(0)

In [8]:
# Now convert to an array
y = np.array(y_int, dtype = 'float64')

In [9]:
# Feature Normalization. All features should have the same range of values (-1,1)
sc = StandardScaler()
x = sc.fit_transform(x)

In [10]:
# Now we convert the arrays to PyTorch tensors
x = torch.tensor(x)
# We add an extra dimension to convert this array to 2D
y = torch.tensor(y).unsqueeze(1)

In [11]:
#lets watch shape of it
print(x.shape)
print(y.shape)

torch.Size([768, 7])
torch.Size([768, 1])


In [12]:
class Dataset(Dataset):

    def __init__(self,x,y):
        self.x = x
        self.y = y
        
    def __getitem__(self,index):
        # Get one item from the dataset
        return self.x[index], self.y[index]
    
    def __len__(self):
        return len(self.x)

In [13]:
dataset = Dataset(x,y)

In [14]:
len(dataset)

768

In [23]:
# Load the data to your dataloader for batch processing and shuffling
train_loader = torch.utils.data.DataLoader(dataset=dataset,
                                           batch_size=32,
                                           shuffle=True)
train_loader

<torch.utils.data.dataloader.DataLoader at 0x202d2b09d90>

In [16]:
# Let's have a look at the data loader
print("There is {} batches in the dataset".format(len(train_loader)))
for (x,y) in train_loader:
    print("For one iteration (batch), there is:")
    print("Data:    {}".format(x.shape))
    print("Labels:  {}".format(y.shape))
    break

There is 24 batches in the dataset
For one iteration (batch), there is:
Data:    torch.Size([32, 7])
Labels:  torch.Size([32, 1])


In [26]:
# Now let's build the above network
class Model(nn.Module):
    def __init__(self, input_features , output_features):
        super(Model, self).__init__()
        self.fc1 = nn.Linear(input_features, 5)
        self.fc2 = nn.Linear(5, 4)
        self.fc3 = nn.Linear(4, 3)
        self.fc4 = nn.Linear(3, 1)
        self.sigmoid = nn.Sigmoid()
        self.tanh = nn.Tanh()

    def forward(self, x):
        out = self.fc1(x)
        out = self.tanh(out)
        out = self.fc2(out)
        out = self.tanh(out)
        out = self.fc3(out)
        out = self.tanh(out)
        out = self.fc4(out)
        out = self.sigmoid(out)
        return out

In [29]:
# Create the network (an object of the Net class)
net = Model(7,1)
#In Binary Cross Entropy: the input and output should have the same shape 
#size_average = True --> the losses are averaged over observations for each minibatch
criterion = torch.nn.BCELoss(size_average=True)   
# We will use SGD with momentum with a learning rate of 0.1
optimizer = torch.optim.SGD(net.parameters(), lr=0.1, momentum=0.9)

In [32]:
# Train the network 
num_epochs = 200
for epoch in range(num_epochs):
    for inputs,labels in train_loader:
        inputs = inputs.float()
        labels = labels.float()
        # Feed Forward
        output = net(inputs)
        # Loss Calculation
        loss = criterion(output, labels)
        # Clear the gradient buffer (we don't want to accumulate gradients)
        optimizer.zero_grad()
        # Backpropagation 
        loss.backward()
        # Weight Update: w <-- w - lr * gradient
        optimizer.step()
        
    #Accuracy
    # Since we are using a sigmoid, we will need to perform some thresholding
    output = (output>0.5).float()
    # Accuracy: (output == labels).float().sum() / output.shape[0]
    accuracy = (output == labels).float().mean()
    # Print statistics 
    print("Epoch {}/{}, Loss: {:.5f}, Accuracy: {:.5f}".format(epoch+1,num_epochs, loss, accuracy))

Epoch 1/200, Loss: 0.47433, Accuracy: 0.68750
Epoch 2/200, Loss: 0.47199, Accuracy: 0.65625
Epoch 3/200, Loss: 0.41792, Accuracy: 0.81250
Epoch 4/200, Loss: 0.26280, Accuracy: 0.84375
Epoch 5/200, Loss: 0.42737, Accuracy: 0.71875
Epoch 6/200, Loss: 0.28289, Accuracy: 0.87500
Epoch 7/200, Loss: 0.27447, Accuracy: 0.84375
Epoch 8/200, Loss: 0.45021, Accuracy: 0.78125
Epoch 9/200, Loss: 0.52810, Accuracy: 0.65625
Epoch 10/200, Loss: 0.35457, Accuracy: 0.81250
Epoch 11/200, Loss: 0.33479, Accuracy: 0.87500
Epoch 12/200, Loss: 0.30650, Accuracy: 0.87500
Epoch 13/200, Loss: 0.47745, Accuracy: 0.75000
Epoch 14/200, Loss: 0.34004, Accuracy: 0.87500
Epoch 15/200, Loss: 0.37817, Accuracy: 0.84375
Epoch 16/200, Loss: 0.39060, Accuracy: 0.78125
Epoch 17/200, Loss: 0.26434, Accuracy: 0.87500
Epoch 18/200, Loss: 0.57882, Accuracy: 0.75000
Epoch 19/200, Loss: 0.36213, Accuracy: 0.81250
Epoch 20/200, Loss: 0.39697, Accuracy: 0.78125
Epoch 21/200, Loss: 0.52269, Accuracy: 0.68750
Epoch 22/200, Loss: 0.

Epoch 175/200, Loss: 0.33519, Accuracy: 0.84375
Epoch 176/200, Loss: 0.47846, Accuracy: 0.75000
Epoch 177/200, Loss: 0.40866, Accuracy: 0.84375
Epoch 178/200, Loss: 0.48812, Accuracy: 0.78125
Epoch 179/200, Loss: 0.34045, Accuracy: 0.81250
Epoch 180/200, Loss: 0.35518, Accuracy: 0.87500
Epoch 181/200, Loss: 0.42491, Accuracy: 0.84375
Epoch 182/200, Loss: 0.44241, Accuracy: 0.84375
Epoch 183/200, Loss: 0.38357, Accuracy: 0.81250
Epoch 184/200, Loss: 0.57944, Accuracy: 0.75000
Epoch 185/200, Loss: 0.38926, Accuracy: 0.81250
Epoch 186/200, Loss: 0.40576, Accuracy: 0.81250
Epoch 187/200, Loss: 0.30477, Accuracy: 0.87500
Epoch 188/200, Loss: 0.45690, Accuracy: 0.87500
Epoch 189/200, Loss: 0.20368, Accuracy: 0.84375
Epoch 190/200, Loss: 0.34483, Accuracy: 0.81250
Epoch 191/200, Loss: 0.49055, Accuracy: 0.84375
Epoch 192/200, Loss: 0.39685, Accuracy: 0.68750
Epoch 193/200, Loss: 0.26727, Accuracy: 0.90625
Epoch 194/200, Loss: 0.25368, Accuracy: 0.90625
Epoch 195/200, Loss: 0.35184, Accuracy: 