## Diabetes

In [1]:
import torch
import torch.nn as nn
from torch.utils.data import TensorDataset
from torch.utils.data import DataLoader

import numpy as np

device = torch.device('cpu')

In [2]:
from sklearn.datasets import load_diabetes

data = load_diabetes()
print (data.feature_names)

X, y = data.data, data.target

['age', 'sex', 'bmi', 'bp', 's1', 's2', 's3', 's4', 's5', 's6']


# 1. Specifying input and targets

In [3]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split (X, y, test_size = 0.33, random_state = 42)

In [4]:
# change numpy to torch
train_input  = torch.tensor (X_train, dtype = torch.float32)
test_input   = torch.tensor (X_test, dtype = torch.float32)
train_output = torch.tensor (y_train, dtype = torch.float32)
test_output  = torch.tensor (y_test, dtype = torch.float32)

In [5]:
train_input.shape, test_input.shape, train_output.shape, test_output.shape

(torch.Size([296, 10]),
 torch.Size([146, 10]),
 torch.Size([296]),
 torch.Size([146]))

In [6]:
train_input.dtype, test_input.dtype, train_output.dtype, test_output.dtype

(torch.float32, torch.float32, torch.float32, torch.float32)

# 2. Dataloaders

In [7]:
print (data.feature_names)

['age', 'sex', 'bmi', 'bp', 's1', 's2', 's3', 's4', 's5', 's6']


In [8]:
train_ds = TensorDataset (train_input, train_output)
test_ds  = TensorDataset (test_input, test_output)
# train_ds[0]

In [9]:
print(f"",len(train_ds))
296 / 32
'''

296 / 32 = 10 rounds
'''


 296


'\n\n296 / 32 = 10 rounds\n'

In [10]:
batch_size = 32 # binary 
train_dl = DataLoader (train_ds, batch_size = batch_size, shuffle = True,  num_workers = 4)
test_dl  = DataLoader (test_ds,  batch_size = batch_size, shuffle = True, num_workers = 4)

In [11]:
# # test
# for batch_x, batch_y in train_dl:
#     print ("One batch of X:", batch_x.shape)
#     print ("One batch of y:", batch_y.shape)
#     break

# 3. Layers

In [12]:
model = nn.Sequential(
    nn.Linear (10, 24), # 10 features in the dataset
    nn.ReLU(), # gradient = 1
    # nn.Sigmoid () >> allow us to learn non-linear funciton - activation function
    nn.Linear (24, 12),
    nn.ReLU(), # you can use this instead of Sigmoid: tanh, relu, leakly, swinrelu, etc.
    nn.Linear (12, 6),
    nn.ReLU(),
    nn.Linear (6, 1)
)

# any number of matrix mautiplication can be approximate into one matrix multiplication

In [13]:
total_num_of_params = 0
for ix, p in enumerate(model.parameters()):
    # print (p.shape)
    print (p.numel()) # total number in layer: [10, 24] = 240 | bias = 24
    total_num_of_params += p.numel()
print (total_num_of_params)

240
24
288
12
72
6
6
1
649


In [14]:
# train_input.dtype

In [15]:
# always good to test your neural network before training
yhat = model(train_input)
yhat.shape
assert yhat.shape[1] == 1

# 4. Loss function

In [16]:
criterion = nn.MSELoss()

# 5. Optimizer


In [17]:
optimizer = torch.optim.SGD (model.parameters(), lr = 0.0001, momentum = 0.9)

# 6. Training

In [18]:
# ## Putting them together - actually learning!

# basically same as linear / logistic regression
num_epochs = 5

for epoch in range (num_epochs):
    
    for batch_x, batch_y in train_dl:
        
        batch_x.to (device)
        batch_y.to (device)
        batch_y = batch_y.reshape (-1,1) #(m,1) ==> 
        
        # 2.1 Predict
        yhat = model (batch_x)
        
        # 2.2 Calculate loss
        loss = criterion (yhat, batch_y)
        
        # 2.3 Calculate gradients
        optimizer .zero_grad()
        loss.backward()

    print (f"Epoch: {epoch} | Loss: {loss:.2f}")

  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)


Epoch: 0 | Loss: 39314.57
Epoch: 1 | Loss: 29296.86
Epoch: 2 | Loss: 36494.19
Epoch: 3 | Loss: 29096.76
Epoch: 4 | Loss: 27859.09


In [23]:
# save your model
filename = 'model/diabetes.pth'
torch.save (model.state_dict(), filename)

In [None]:
# model = torch.load(filename)

# 7. Testing

In [25]:
len(test_dl)

5

In [27]:
model.eval() # change the model to eval mode 0 it will not keep the gradients, it will skip dropout, batch 

total_avg_mse = 0
with torch.no_grad():
    
    for batch_x, batch_y in test_dl:
        
        yhat = model(batch_x)
        batch_y = batch_y.reshape((-1, 1))
        mse  = criterion (yhat, batch_y)
        
        print (mse)
        total_avg_mse += mse.item() / len (test_dl)
        
        
    print ("Trotal Average MSE = ", total_avg_mse)

tensor(24884.0039)
tensor(29758.6016)
tensor(27268.7578)
tensor(33819.4492)
tensor(28912.0859)
Trotal Average MSE =  28928.579687499998
