In [35]:
import numpy as np
import torch
import torch.nn as nn
import pandas as pd
from sklearn.preprocessing import StandardScaler
from torch.utils.data import Dataset, DataLoader
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
df = pd.read_csv('/content/drive/Shareddrives/Project/NN course/diabetes.csv')
df.shape

(768, 8)

In [3]:
df.head()

Unnamed: 0,Number of times pregnant,Plasma glucose concentration,Diastolic blood pressure,Triceps skin fold thickness,2-Hour serum insulin,Body mass index,Age,Class
0,6,148,72,35,0,33.6,50,positive
1,1,85,66,29,0,26.6,31,negative
2,8,183,64,0,0,23.3,32,positive
3,1,89,66,23,94,28.1,21,negative
4,0,137,40,35,168,43.1,33,positive


In [5]:
df.isnull().sum()

Number of times pregnant        0
Plasma glucose concentration    0
Diastolic blood pressure        0
Triceps skin fold thickness     0
2-Hour serum insulin            0
Body mass index                 0
Age                             0
Class                           0
dtype: int64

In [6]:
df.Class.value_counts()

negative    500
positive    268
Name: Class, dtype: int64

In [13]:
x = df.iloc[:, :-1].values # cols, rows
y_str = list(df.iloc[:, -1])

In [15]:
x.shape, len(y_str)

((768, 7), 768)

In [19]:
y_int = [1 if i == 'positive' else 0 for i in y_str]

In [22]:
y_int.count(0), y_int.count(1)

(500, 268)

In [24]:
y = np.array(y_int, dtype='float64')

In [25]:
y

array([1., 0., 1., 0., 1., 0., 1., 0., 1., 1., 0., 1., 0., 1., 1., 1., 1.,
       1., 0., 1., 0., 0., 1., 1., 1., 1., 1., 0., 0., 0., 0., 1., 0., 0.,
       0., 0., 0., 1., 1., 1., 0., 0., 0., 1., 0., 1., 0., 0., 1., 0., 0.,
       0., 0., 1., 0., 0., 1., 0., 0., 0., 0., 1., 0., 0., 1., 0., 1., 0.,
       0., 0., 1., 0., 1., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 1.,
       0., 0., 0., 1., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 1., 1., 0.,
       0., 0., 0., 0., 0., 0., 0., 1., 1., 1., 0., 0., 1., 1., 1., 0., 0.,
       0., 1., 0., 0., 0., 1., 1., 0., 0., 1., 1., 1., 1., 1., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 1.,
       0., 1., 1., 0., 0., 0., 1., 0., 0., 0., 0., 1., 1., 0., 0., 0., 0.,
       1., 1., 0., 0., 0., 1., 0., 1., 0., 1., 0., 0., 0., 0., 0., 1., 1.,
       1., 1., 1., 0., 0., 1., 1., 0., 1., 0., 1., 1., 1., 0., 0., 0., 0.,
       0., 0., 1., 1., 0., 1., 0., 0., 0., 1., 1., 1., 1., 0., 1., 1., 1.,
       1., 0., 0., 0., 0.

In [26]:
sc = StandardScaler()
x = sc.fit_transform(x)


In [30]:
x = torch.tensor(x)
# Change y into 2 dim for BCE

y = torch.tensor(y).unsqueeze(1)

  x = torch.tensor(x)
  y = torch.tensor(y).unsqueeze(1)


In [31]:
x.shape, y.shape

(torch.Size([768, 7]), torch.Size([768, 1]))

In [32]:
# Custom dataset pytorch

class Dataset(Dataset):

  def __init__(self, x, y):
    self.x = x
    self.y = y


  def __getitem__(self, index):
    return self.x[index], self.y[index]

  def __len__(self):
    return len(self.x)



In [33]:
dataset = Dataset(x, y)

In [34]:
len(dataset)

768

In [37]:
# Load data using dataloader
train_loader = torch.utils.data.DataLoader(dataset=dataset, batch_size=32, shuffle=True)

In [38]:
train_loader

<torch.utils.data.dataloader.DataLoader at 0x7f0958d9fb80>

In [39]:
print(f'There are {len(train_loader)} batches in the dataset')

for (x, y) in train_loader:
  print('For one iteration (batch), there are: ')
  print(f'Data: {x.shape}')
  print(f'Labels: {y.shape}')
  break

There are 24 batches in the dataset
For one iteration (batch), there are: 
Data: torch.Size([32, 7])
Labels: torch.Size([32, 1])


In [42]:
# 7->5->4->3->1 layered NN
# Hidden tanh, output sigmoid
class Model(nn.Module):

  def __init__(self, input_features, output_features):
    super(Model, self).__init__()
    self.fc1 = nn.Linear(input_features, 5)
    self.fc2 = nn.Linear(5, 4)
    self.fc3 = nn.Linear(4, 3)
    self.fc4 = nn.Linear(3, output_features)
    self.sigmoid = nn.Sigmoid()
    self.tanh = nn.Tanh()

  def forward(self, x):
    out = self.fc1(x)
    out = self.tanh(out)
    out = self.fc2(out)
    out = self.tanh(out)
    out = self.fc3(out)
    out = self.tanh(out)
    out = self.fc4(out)
    out = self.sigmoid(out)
    return out

In [44]:
# NN
net = Model(7, 1)
# Loss
criterion = nn.BCELoss(size_average=True)
# Optimizer
optimizer = torch.optim.SGD(net.parameters(), lr=0.1, momentum=0.9)


In [45]:
# Train network
epochs = 200
for epoch in range(epochs):
  for inputs, labels in train_loader:
    inputs = inputs.float()
    labels = labels.float()
    # forward prop
    outputs = net(inputs) # it is same as net.forward(inputs)
    # loss calc
    loss = criterion(outputs, labels)
    # clear the gradient buffer (other frameworks do this automatically)
    optimizer.zero_grad()
    # back prop
    loss.backward()
    # update weights
    optimizer.step()
  # accuracy calculation
  output = (outputs > 0.5).float()
  accuracy = (output == labels).float().mean()
  # print statistics
  print('Epoch {} / {}, Loss: {:.3f}, Accuracy: {:.3f}'.format(epoch + 1, epochs, loss, accuracy))



Epoch 1 / 200, Loss: 0.507, Accuracy: 0.719
Epoch 2 / 200, Loss: 0.547, Accuracy: 0.719
Epoch 3 / 200, Loss: 0.467, Accuracy: 0.750
Epoch 4 / 200, Loss: 0.498, Accuracy: 0.750
Epoch 5 / 200, Loss: 0.589, Accuracy: 0.781
Epoch 6 / 200, Loss: 0.634, Accuracy: 0.688
Epoch 7 / 200, Loss: 0.373, Accuracy: 0.812
Epoch 8 / 200, Loss: 0.470, Accuracy: 0.781
Epoch 9 / 200, Loss: 0.393, Accuracy: 0.844
Epoch 10 / 200, Loss: 0.382, Accuracy: 0.875
Epoch 11 / 200, Loss: 0.525, Accuracy: 0.719
Epoch 12 / 200, Loss: 0.395, Accuracy: 0.844
Epoch 13 / 200, Loss: 0.470, Accuracy: 0.719
Epoch 14 / 200, Loss: 0.393, Accuracy: 0.875
Epoch 15 / 200, Loss: 0.286, Accuracy: 0.938
Epoch 16 / 200, Loss: 0.477, Accuracy: 0.750
Epoch 17 / 200, Loss: 0.501, Accuracy: 0.688
Epoch 18 / 200, Loss: 0.446, Accuracy: 0.719
Epoch 19 / 200, Loss: 0.364, Accuracy: 0.875
Epoch 20 / 200, Loss: 0.410, Accuracy: 0.750
Epoch 21 / 200, Loss: 0.647, Accuracy: 0.750
Epoch 22 / 200, Loss: 0.407, Accuracy: 0.750
Epoch 23 / 200, Los