**Table of contents**<a id='toc0_'></a>    
- [example data](#toc1_)    
- [basic components](#toc2_)    
  - [Dataloader](#toc2_1_)    
  - [define layer](#toc2_2_)    
  - [define loss](#toc2_3_)    
  - [define optimizer == gradient descent](#toc2_4_)    
- [learning](#toc3_)    
- [try real dataset](#toc4_)    
  - [Dataloader](#toc4_1_)    
  - [model](#toc4_2_)    
  - [training](#toc4_3_)    
  - [Testing](#toc4_4_)    

<!-- vscode-jupyter-toc-config
	numbering=false
	anchor=true
	flat=false
	minLevel=1
	maxLevel=6
	/vscode-jupyter-toc-config -->
<!-- THIS CELL WILL BE REPLACED ON TOC UPDATE. DO NOT WRITE YOUR TEXT IN THIS CELL -->

In [1]:
import torch

In [2]:
torch.__version__

'1.13.1'

In [3]:
torch.cuda.is_available()

True

In [4]:
device = torch.device("cuda") # cpu or cuda
device

device(type='cuda')

# <a id='toc1_'></a>[example data](#toc0_)

m,n -> 5,3   
y -> (1,2)

In [5]:
import numpy as np
# Input (temp, rainfall, humidity)
X_train = np.array([[73, 67, 43], [91, 88, 64], [87, 134, 58], 
                   [102, 43, 37], [69, 96, 70], [73, 67, 43], 
                   [91, 88, 64], [87, 134, 58], [102, 43, 37], 
                   [69, 96, 70], [73, 67, 43], [91, 88, 64], 
                   [87, 134, 58], [102, 43, 37], [69, 96, 70]], 
                  dtype='float32')

# Targets (apples, oranges)
Y_train = np.array([[56, 70], [81, 101], [119, 133], 
                    [22, 37], [103, 119], [56, 70], 
                    [81, 101], [119, 133], [22, 37], 
                    [103, 119], [56, 70], [81, 101], 
                    [119, 133], [22, 37], [103, 119]], 
                   dtype='float32')


In [6]:
inputs =torch.from_numpy(X_train)
targets = torch.from_numpy(Y_train)

type(inputs), type(targets) # torch.tensor

(torch.Tensor, torch.Tensor)

# <a id='toc2_'></a>[basic components](#toc0_)

## <a id='toc2_1_'></a>[Dataloader](#toc0_)

In [7]:
from torch.utils.data import TensorDataset

train_dataset = TensorDataset(inputs,targets)

train_dataset[0]

(tensor([73., 67., 43.]), tensor([56., 70.]))

In [8]:
from torch.utils.data import DataLoader

batch_size = 3
train_dl = DataLoader(train_dataset,batch_size,shuffle = False,num_workers=4) # shuffle should be True commonly you would want randomness


In [9]:
for X,y in train_dl:
    print("="*10)
    print(X)
    print(y)

tensor([[ 73.,  67.,  43.],
        [ 91.,  88.,  64.],
        [ 87., 134.,  58.]])
tensor([[ 56.,  70.],
        [ 81., 101.],
        [119., 133.]])
tensor([[102.,  43.,  37.],
        [ 69.,  96.,  70.],
        [ 73.,  67.,  43.]])
tensor([[ 22.,  37.],
        [103., 119.],
        [ 56.,  70.]])
tensor([[ 91.,  88.,  64.],
        [ 87., 134.,  58.],
        [102.,  43.,  37.]])
tensor([[ 81., 101.],
        [119., 133.],
        [ 22.,  37.]])
tensor([[69., 96., 70.],
        [73., 67., 43.],
        [91., 88., 64.]])
tensor([[103., 119.],
        [ 56.,  70.],
        [ 81., 101.]])
tensor([[ 87., 134.,  58.],
        [102.,  43.,  37.],
        [ 69.,  96.,  70.]])
tensor([[119., 133.],
        [ 22.,  37.],
        [103., 119.]])


## <a id='toc2_2_'></a>[define layer](#toc0_)

In [10]:
import torch.nn as nn

In [11]:
layer = nn.Linear(3,2)

In [12]:
print(layer.weight)
print(layer.weight.shape)
print(layer.bias)
print(layer.bias.shape)

Parameter containing:
tensor([[-0.5331,  0.0341, -0.3684],
        [ 0.2660,  0.2188, -0.1042]], requires_grad=True)
torch.Size([2, 3])
Parameter containing:
tensor([-0.3903,  0.2751], requires_grad=True)
torch.Size([2])


In [13]:
outputs = layer(inputs)
# (15,3) @ (3,2) => (15,2)
outputs.shape

torch.Size([15, 2])

In [14]:
total = 0
for param in layer.parameters():
    print(param)
    total += param.numel()

print(f"total param = {total}")

Parameter containing:
tensor([[-0.5331,  0.0341, -0.3684],
        [ 0.2660,  0.2188, -0.1042]], requires_grad=True)
Parameter containing:
tensor([-0.3903,  0.2751], requires_grad=True)
total param = 8


## <a id='toc2_3_'></a>[define loss](#toc0_)

In [15]:
criterion = nn.MSELoss()

In [16]:
mse = criterion(targets,outputs)
mse,mse.item()

(tensor(12288.3438, grad_fn=<MseLossBackward0>), 12288.34375)

## <a id='toc2_4_'></a>[define optimizer == gradient descent](#toc0_)

In [17]:
optimizer = torch.optim.SGD(layer.parameters(),lr = 0.0001,momentum = 0.9)

In [18]:
optimizer

SGD (
Parameter Group 0
    dampening: 0
    differentiable: False
    foreach: None
    lr: 0.0001
    maximize: False
    momentum: 0.9
    nesterov: False
    weight_decay: 0
)

# <a id='toc3_'></a>[learning](#toc0_)

1. for each epoch loop each batch  
2. calculate output each batch  
3. compare use loss function  
4. cal gradient or step  
5. update  




In [19]:
layer = nn.Linear(3,2)
criterion = nn.MSELoss()
optimizer = torch.optim.SGD(layer.parameters(),lr = 0.0001,momentum = 0.9)

# define hyper param
num_epochs = 5
display_every = 1


# loop epoch
for epoch in range(num_epochs):

    # loop mini batch
    for batch_x,batch_y in train_dl:

        # put data to gpu if you using gpu
        batch_x.to(device)
        batch_y.to(device)

        # predict 
        yhat = layer(batch_x)

        # cal loss
        loss = criterion(yhat,batch_y)

        # cal grad
        optimizer.zero_grad()
        loss.backward()
    
        print(layer.weight.grad)

        # update
        optimizer.step()

    if epoch%display_every == 0:
        print(f"epoch {epoch+1}: {loss:.2f}")



tensor([[ -8987.1553, -10930.8652,  -5963.0078],
        [ -5426.0518,  -6749.8057,  -3609.3916]])
tensor([[7982.0811, 6565.8457, 4813.4258],
        [5886.3369, 4282.3770, 3184.8538]])
tensor([[13520.0605, 13558.4980,  7899.5293],
        [ 8004.2021,  7331.7725,  4446.8135]])
tensor([[-8258.9102, -9007.4883, -6392.8276],
        [-5804.2354, -6414.4336, -4564.1934]])
tensor([[-12928.3389, -14509.6533,  -8479.1504],
        [ -6600.6025,  -7894.0820,  -4566.8721]])
epoch 1: 14974.38
tensor([[ 9230.2529, 11177.5762,  6120.7539],
        [ 6188.8853,  7475.1304,  4098.7227]])
tensor([[7239.4448, 6968.9258, 5024.8555],
        [3838.1812, 3580.1328, 2587.7407]])
tensor([[-2834.3872, -1781.9666, -1339.0400],
        [-2774.8772, -2144.1343, -1453.2002]])
tensor([[-7273.5254, -7759.3281, -5486.3457],
        [-3980.1348, -4232.3906, -2992.8855]])
tensor([[-1099.9762,   272.0613,  -111.5534],
        [  735.5317,  2000.4954,   922.2926]])
epoch 2: 930.21
tensor([[7634.3970, 9400.5059, 5069.

# <a id='toc4_'></a>[try real dataset](#toc0_)

🦆

In [1]:
import torch
from torch.utils.data import TensorDataset,DataLoader
import torch.nn as nn
import numpy as np

device = torch.device("cuda")


In [2]:
from sklearn.datasets import load_diabetes

data = load_diabetes()
print(data.feature_names)

X,y = data.data, data.target

['age', 'sex', 'bmi', 'bp', 's1', 's2', 's3', 's4', 's5', 's6']


In [3]:
from sklearn.model_selection import train_test_split

X_train,X_test,y_train,y_test = train_test_split(X,y,test_size = 0.25,random_state=96)

In [4]:
# change data to float 32 because model weight is float32
train_input= torch.tensor(X_train,dtype=torch.float32)
train_output = torch.tensor(y_train,dtype=torch.float32)
test_input = torch.tensor(X_test,dtype=torch.float32)
test_output = torch.tensor(y_test,dtype=torch.float32)

train_input.shape,train_output.shape,test_input.shape,test_output.shape

(torch.Size([331, 10]),
 torch.Size([331]),
 torch.Size([111, 10]),
 torch.Size([111]))

## <a id='toc4_1_'></a>[Dataloader](#toc0_)

In [5]:
train_ds = TensorDataset(train_input,train_output)
test_ds = TensorDataset(test_input,test_output)

In [6]:
batch_size = 32

train_dl = DataLoader(train_ds,batch_size=batch_size,shuffle=True,num_workers=4)
test_dl = DataLoader(test_ds,batch_size=batch_size,shuffle=True,num_workers=4)

In [7]:
# for batch_x,batch_y in train_dl:
#     print("batch_x: ",batch_x.shape)
#     print("batch_y: ",batch_y.shape)
#     break

## <a id='toc4_2_'></a>[model](#toc0_)

sequential => stack of layer but it can't use only linear because actually it not different from use 1 linear
layer => basically dot product
activation => sigmoid, tanh, relu, leaky relu 


In [8]:
model = nn.Sequential(
    nn.Linear(10,24),
    nn.ReLU(),
    nn.Linear(24,12),
    nn.ReLU(),
    nn.Linear(12,6),
    nn.ReLU(),
    nn.Linear(6,1)
)


In [9]:
total = 0

for param in model.parameters():
    total += param.numel()

print(f"total param: {total}")

total param: 649


In [10]:
#always good to test your neural network before training
yhat = model(train_input)
assert yhat.shape[1] == 1

## <a id='toc4_3_'></a>[training](#toc0_)

In [None]:
# define save path
import os
from pathlib import Path

file_path = "models/ann"

if not os.path.exists(file_path):
    os.makedirs(file_path,exist_ok=True)

save_path = Path(file_path,"diabetes.pth")
check_path = Path(file_path,"diabetes_best.pth")

In [12]:


lr = 0.01
display_every = 10
num_epochs = 100
lowest_loss = np.inf

criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(),lr = lr)


for epoch in range(num_epochs):

    for batch_x,batch_y in train_dl:

        batch_x.to(device)
        batch_y.to(device)

        yhat = model(batch_x)

        loss = criterion(yhat,batch_y)

        optimizer.zero_grad()
        loss.backward()

        optimizer.step()

    if lowest_loss > loss.item():
        lowest_loss = loss.item()
        torch.save(model.state_dict(),check_path)

    if epoch%display_every == display_every-1:
        print(f"epoch {epoch+1} | {loss:.2f}")

  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)


epoch 1 | 22849.40
epoch 11 | 47688.19
epoch 21 | 31080.00
epoch 31 | 13929.93
epoch 41 | 5119.35
epoch 51 | 6010.41
epoch 61 | 6263.44
epoch 71 | 4858.86
epoch 81 | 4205.63
epoch 91 | 9103.70


In [None]:
num_epochs = 5
best_val = np.inf

for i in range(num_epochs):

    train_loss = 0.0
    train_correct = 0
    train_samples = 0

    for images, labels in train_dl:
        images = images.to(device)
        labels = labels.to(device)
        y_hat = model(images)
        loss = criterion(y_hat, labels)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        train_loss += loss.item()
        train_correct += (torch.max(y_hat, 1)[1] == labels).sum().item()
        train_samples += len(images)

    epoch_train_loss = train_loss / len(train_dl)
    epoch_train_acc = (train_correct / train_samples) * 100

    print(f"Epoch: {i} | Train acc: {epoch_train_acc:3.2f} | Train Loss: {epoch_train_loss:3.4f}")


manual save

In [15]:
torch.save(model.state_dict(),save_path)

## <a id='toc4_4_'></a>[Testing](#toc0_)

In [None]:
model.eval()  #change the model to eval mode - it will skip dropout, batch norm
total_avg_mse = 0
with torch.no_grad():
    
    for batch_x, batch_y in test_dl:
        
        yhat = model(batch_x)
        batch_y = batch_y.reshape((-1, 1))
        mse  = criterion(yhat, batch_y)
        
        total_avg_mse += mse.item() / len(test_dl)
        
print("Total Average MSE: ", total_avg_mse)