# Diabetes

In [1]:
import torch
import torch.nn as nn
import numpy as np

from torch.utils.data import TensorDataset
from torch.utils.data import DataLoader

device = torch.device("cpu")

In [2]:
from sklearn.datasets import load_diabetes

data = load_diabetes()
print (data.feature_names)

X,y = data.data, data.target

['age', 'sex', 'bmi', 'bp', 's1', 's2', 's3', 's4', 's5', 's6']


## 1. Specifying input and targets

In [3]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42 )

In [4]:
train_input  = torch.tensor(X_train , dtype = torch.float32)
test_input   = torch.tensor(X_test, dtype = torch.float32)
train_target = torch.tensor(y_train, dtype = torch.float32)
test_target  = torch.tensor(y_test, dtype = torch.float32)

train_input.shape, test_input.shape, train_target.shape, test_target.shape

(torch.Size([296, 10]),
 torch.Size([146, 10]),
 torch.Size([296]),
 torch.Size([146]))

## 2. Dataloaders

In [5]:
train_ds = TensorDataset (train_input, train_target)
train_ds[0]

(tensor([ 0.0090, -0.0446, -0.0310,  0.0219,  0.0081,  0.0087,  0.0045, -0.0026,
          0.0094,  0.0113]),
 tensor(154.))

In [6]:
test_ds = TensorDataset (test_input, test_target)

In [7]:
# check the size of train data to get the batch size
len(train_ds)

# 309 / 32 = 10 round

296

In [8]:
batch_size = 32 # binary - fits nicely to CPU
train_dl = DataLoader(train_ds, batch_size=batch_size, shuffle=True, num_workers = 4)
test_dl = DataLoader(test_ds, batch_size=batch_size, shuffle=True, num_workers = 4)

In [9]:
# test
for batch_x, batch_y, in train_dl:
    print ("One batch of X: ", batch_x.shape)
    print ("One batch of y: ", batch_y.shape)
    break
    

One batch of X:  torch.Size([32, 10])
One batch of y:  torch.Size([32])


## 3. Layers

In [10]:
model = nn.Sequential (
    nn.Linear (10, 24),
    nn.ReLU(), # allow us to learn non-linear funcitons- activation function
    nn.Linear (24, 12),
    nn.ReLU(),
    nn.Linear (12, 6),
    nn.ReLU(),
    nn.Linear (6,1)
)

# any number of matrix multiplication can be approximated into one matrix multiplication.
# nn.Linear (10, 24),
# nn.Linear (24, 12),
# nn.Linear (12, 6),
# nn.Linear (6,1)
# same as nn.Linear(10, 1)

# if you use Sigmoid(), gradient will too small.

# we have tanh, relu, leakly, swinrelu, etc. etc... 
# these activation function make sure gradient > 1

In [11]:
# for p in model.parameters():
    # print (p)
    
# for ix, p in enumerate (model.parameters()):
#     # print (p.shape)
#     print (p.numel())
total_num_of_params = 0
for ix, p in enumerate (model.parameters()):
    total_num_of_params += p.numel()
print (total_num_of_params)

649


In [12]:
# test before training
yhat = model(train_input)
assert yhat.shape[1] ==1
print(yhat.shape)
# print (yhat)

torch.Size([296, 1])


## 4. Loss Function

In [13]:
criterion = nn.MSELoss()

## 5. Optimizer

In [14]:
optimizer = torch.optim.SGD (model.parameters(), lr = 0.0001, momentum= 0.9)

## 6. Training

In [15]:
num_epoch = 5

for epoch in range (num_epoch):
    
    for batch_x, batch_y in train_dl:
        
        batch_x.to(device)
        batch_y.to(device)
        batch_y = batch_y.reshape(-1, 1) #(m,) ==> (m,1)
        
        yhat = model (batch_x)
        
        loss = criterion(yhat, batch_y)
        
        optimizer.zero_grad()
        loss.backward()
        
        optimizer.step()
        
    print(f"Epoch: {epoch} | Loss: {loss:.2f}")

Epoch: 0 | Loss: 17434.09
Epoch: 1 | Loss: 26013.16
Epoch: 2 | Loss: 3225.57
Epoch: 3 | Loss: 9387.80
Epoch: 4 | Loss: 4205.36


## save the model

In [16]:
# save your model
filename = 'model/diabetes.pth'
torch.save(model.state_dict(), filename)

In [17]:

# model = torch.load(filename)

## 7. Testing

In [18]:
type(model)

torch.nn.modules.container.Sequential

In [25]:
model.eval () # change the model to eval mode - it will not keep the gradients

total_avg_mse = 0
with torch.no_grad():
    
    for batch_x, batch_y in test_dl:
        
        yhat = model(batch_x)
        batch_y = batch_y.reshape (-1,1)
        
        mse = criterion(yhat, batch_y)
        
        total_avg_mse += mse.item() / len(test_dl)

print ("total Average MSE: ", total_avg_mse)

total Average MSE:  6225.57470703125
