In [1]:
import numpy as np
import torch
import torch.nn as nn
import pandas as pd
from sklearn.preprocessing import StandardScaler
from torch.utils.data import Dataset

In [2]:
# Load the dataset using Pandas
data = pd.read_csv('diabetes.csv')

In [4]:
# For x: Extract out the dataset from all the rows (all samples) and all columns except last column (all features). 
# For y: Extract out the last column (which is the label)
# Convert x to numpy using the .values method
x = data.iloc[:,0:-1].values
y_string= list(data.iloc[:,-1])

In [5]:
print(x[:3]) # Display the first three rows of the feature variables stored in 'x'.
print(y_string[:3]) # Display the first three elements of the target variable array 'y_string'.

[[  6.  148.   72.   35.    0.   33.6  50. ]
 [  1.   85.   66.   29.    0.   26.6  31. ]
 [  8.  183.   64.    0.    0.   23.3  32. ]]
['positive', 'negative', 'positive']


In [6]:
# Convert the string labels in 'y_string' to integer labels and store them in 'y_int'.
y_int = []
for string in y_string:
    if string == 'positive':
        y_int.append(1) # Append 1 to 'y_int' for each 'positive' label in 'y_string'.
    else:
        y_int.append(0) # Append 0 to 'y_int' for each label in 'y_string' that is not 'positive'.

In [7]:
# Convert the list 'y_int' to a NumPy array 'y' with the data type 'float64'.
y = np.array(y_int, dtype = 'float64')

### $x^{\prime}=\frac{x-\mu}{\sigma}$

In [8]:
# Apply Feature Normalization to x through StandardScaler(): all features should have the same range of values (-1,1)
sc = StandardScaler()
x = sc.fit_transform(x)

In [9]:
# Convert the NumPy array 'x' into a PyTorch tensor for use in neural network training.
x = torch.tensor(x)
# Convert the NumPy array 'y' into a PyTorch tensor and add an extra dimension.
# An extra dimension is added to convert this array to 2D.
y = torch.tensor(y).unsqueeze(1)

In [10]:
print(x.shape)
print(y.shape)

torch.Size([768, 7])
torch.Size([768, 1])


In [11]:
# Define a custom Dataset class for handling the diabetes dataset in PyTorch.

# __init__: Initialize the Dataset object with feature tensor 'x' and label tensor 'y'.

# __getitem__: Enable retrieval of a single data point (features and label) from the dataset using its index.

# __len__: Return the total number of samples in the dataset, which is useful for iterating over the dataset.

class Dataset(Dataset):

    def __init__(self,x,y):
        self.x = x
        self.y = y
        
    def __getitem__(self,index):
        return self.x[index], self.y[index]
    
    def __len__(self):
        return len(self.x)

In [12]:
dataset = Dataset(x,y)

In [13]:
len(dataset)

768

In [14]:
# Initialize a DataLoader for the dataset, for batch processing, with a batch size of 32 and enabled shuffling.
train_loader = torch.utils.data.DataLoader(dataset=dataset,
                                           batch_size=32,
                                           shuffle=True)

In [16]:
# Display the number of batches in the train_loader and the shape of data and labels for one batch.
print("There is {} batches in the dataset".format(len(train_loader)))
for (x,y) in train_loader:
    print("For one iteration (batch), there are:")
    print("Data:    {}".format(x.shape))
    print("Labels:  {}".format(y.shape))
    break

There is 24 batches in the dataset
For one iteration (batch), there are:
Data:    torch.Size([32, 7])
Labels:  torch.Size([32, 1])


![demo](https://user-images.githubusercontent.com/30661597/60379583-246e5e80-9a68-11e9-8b7f-a4294234c201.png)

In [17]:
# Define a neural network model class 'Model' with four fully connected layers and activation functions.

# __init__: Initialize the Model with layers and activation functions.
# - Four fully connected (linear) layers with varying input/output features.
# - Tanh activation function for non-linearity.
# - Sigmoid activation function for classification.

# forward: Define the forward pass of the neural network.
# - Sequentially pass the input 'x' through the fully connected layers.
# - Apply Tanh activation after each of the first three layers.
# - Apply Sigmoid activation after the final layer to output a probability.
class Model(nn.Module):
    def __init__(self, input_features):
        super(Model, self).__init__()
        self.fc1 = nn.Linear(input_features, 5)
        self.fc2 = nn.Linear(5, 4)
        self.fc3 = nn.Linear(4, 3)
        self.fc4 = nn.Linear(3, 1)
        self.sigmoid = nn.Sigmoid()
        self.tanh = nn.Tanh()

    def forward(self, x):
        out = self.fc1(x)
        out = self.tanh(out)
        out = self.fc2(out)
        out = self.tanh(out)
        out = self.fc3(out)
        out = self.tanh(out)
        out = self.fc4(out)
        out = self.sigmoid(out)
        return out

$H_{p}(q)=-\frac{1}{N} \sum_{i=1}^{N} y_{i} \cdot \log \left(p\left(y_{i}\right)\right)+\left(1-y_{i}\right) \cdot \log \left(1-p\left(y_{i}\right)\right)$


cost = -(Y * torch.log(hypothesis) + (1 - Y) * torch.log(1 - hypothesis)).mean()

In [19]:
# Initialize the neural network 'net' with the number of input features from 'x'.
net = Model(x.shape[1])
# In Binary Cross Entropy: the input and output should have the same shape.
# size_average = True --> the losses are averaged over observations for each minibatch.
# Note: size_average has been depreciated. reduction='mean' is the new form.
# BCELoss is used to measure the discrepancy between predicted probabilities and actual labels, essential for effective model training.
# Initialize the Binary Cross-Entropy (BCE) loss function with size averaging:
criterion = torch.nn.BCELoss(reduction='mean')   
# SGD with momentum with a learning rate of 0.1 is used.
optimizer = torch.optim.SGD(net.parameters(), lr=0.1, momentum=0.9)

## Training the network

In [20]:
num_epochs = 200 # the network will be trained for 200 epochs

for epoch in range(num_epochs): # Loop over each epoch.
    for inputs,labels in train_loader:
        # Convert inputs and labels to float type for compatibility with model.
        inputs = inputs.float()
        labels = labels.float()
        # Feed Forward: Pass the data through the network to get output.
        output = net(inputs)
        # Loss Calculation: Compute the loss between the network's output and actual labels.
        loss = criterion(output, labels)
        # Clear the gradient buffer to prevent accumulation of gradients from previous iterations.
        optimizer.zero_grad()
        # Backpropagation: Compute the gradient of the loss with respect to the network's weights.
        loss.backward()
        # Weight Update: Adjust the weights of the network based on the calculated gradients.
        # <<Weight Update: w <-- w - lr * gradient>>
        optimizer.step()
        

    # Since the sigmoid activation function is used for classification, thresholding needs to be performed.
    # Convert the output probabilities to binary values (0 or 1) based on a threshold of 0.5:
    output = (output>0.5).float()
    # Accuracy: (output == labels).float().sum() / output.shape[0]
    accuracy = (output == labels).float().mean()
    # Statistics for tracking network training process: 
    # Print the current epoch number, total epochs, loss, and accuracy for each epoch during training.
    print("Epoch {}/{}, Loss: {:.3f}, Accuracy: {:.3f}".format(epoch+1,num_epochs, loss, accuracy))

Epoch 1/200, Loss: 0.617, Accuracy: 0.625
Epoch 2/200, Loss: 0.445, Accuracy: 0.750
Epoch 3/200, Loss: 0.530, Accuracy: 0.750
Epoch 4/200, Loss: 0.458, Accuracy: 0.781
Epoch 5/200, Loss: 0.514, Accuracy: 0.688
Epoch 6/200, Loss: 0.482, Accuracy: 0.812
Epoch 7/200, Loss: 0.421, Accuracy: 0.844
Epoch 8/200, Loss: 0.385, Accuracy: 0.844
Epoch 9/200, Loss: 0.476, Accuracy: 0.719
Epoch 10/200, Loss: 0.453, Accuracy: 0.750
Epoch 11/200, Loss: 0.422, Accuracy: 0.812
Epoch 12/200, Loss: 0.453, Accuracy: 0.750
Epoch 13/200, Loss: 0.582, Accuracy: 0.688
Epoch 14/200, Loss: 0.423, Accuracy: 0.781
Epoch 15/200, Loss: 0.425, Accuracy: 0.812
Epoch 16/200, Loss: 0.369, Accuracy: 0.844
Epoch 17/200, Loss: 0.519, Accuracy: 0.688
Epoch 18/200, Loss: 0.426, Accuracy: 0.781
Epoch 19/200, Loss: 0.476, Accuracy: 0.875
Epoch 20/200, Loss: 0.350, Accuracy: 0.844
Epoch 21/200, Loss: 0.406, Accuracy: 0.781
Epoch 22/200, Loss: 0.286, Accuracy: 0.906
Epoch 23/200, Loss: 0.409, Accuracy: 0.750
Epoch 24/200, Loss: 

Epoch 192/200, Loss: 0.371, Accuracy: 0.688
Epoch 193/200, Loss: 0.562, Accuracy: 0.594
Epoch 194/200, Loss: 0.351, Accuracy: 0.844
Epoch 195/200, Loss: 0.460, Accuracy: 0.750
Epoch 196/200, Loss: 0.469, Accuracy: 0.781
Epoch 197/200, Loss: 0.408, Accuracy: 0.781
Epoch 198/200, Loss: 0.219, Accuracy: 0.906
Epoch 199/200, Loss: 0.464, Accuracy: 0.781
Epoch 200/200, Loss: 0.471, Accuracy: 0.719
