# Chapter 2 

In [1]:
# Import the libraries
from torch.utils.data import DataLoader, Dataset
import torch
import torch.nn as nn

In [2]:
# Define the device
device = 'cuda' if torch.cuda.is_available() else 'cpu'

# Define the data
x = [[1, 2], [3, 4], [5, 6], [7, 8]]
y = [[3], [7], [11], [15]]
X = torch.tensor(x).float().to(device)
Y = torch.tensor(y).float().to(device)

In [3]:
# Define the dataset class
class Dataset(Dataset):
    def __init__(self, x, y):
        self.x = torch.tensor(x).float()
        self.y = torch.tensor(y).float()
    
    
    def __len__(self):
        return len(self.x)


    def __getitem__(self, ix):
        return self.x[ix], self.y[ix]

In [4]:
# Instantiating the dataset and creating a dataloader
ds = Dataset(X, Y)
dl = DataLoader(ds, batch_size=2, shuffle=True)

  self.x = torch.tensor(x).float()
  self.y = torch.tensor(y).float()


In [5]:
# Inspect the dataset
for x, y in dl:
    print(x)
    print(y)

tensor([[1., 2.],
        [3., 4.]])
tensor([[3.],
        [7.]])
tensor([[7., 8.],
        [5., 6.]])
tensor([[15.],
        [11.]])


In [6]:
# Create a neural network inheriting from nn.Module
class Neural(nn.Module):
    def __init__(self) -> None:
        super().__init__()
        self.input_to_hidden_layer = nn.Linear(2, 8)
        self.activation_hidden_layer = nn.ReLU()
        self.output_layer = nn.Linear(8, 1)
    
    
    def forward(self, x):
        x = self.input_to_hidden_layer(x)
        x = self.activation_hidden_layer(x)
        x = self.output_layer(x)
        return x

In [7]:
from torch.optim import SGD

# Instantiate the network
mynet = Neural().to(device)

# MSE for the loss function
loss_fn = nn.MSELoss()

# SGD for optimizer and set it to update the model parameters
optim = SGD(mynet.parameters(), lr = 10e-4)


In [8]:
import time

# Train the network
loss_history = []
start = time.time()

for _ in range(20):
    for x, y in dl:
        
        # First zero the gradient
        optim.zero_grad()

        # Calculate the loss function
        loss_value = loss_fn(mynet(x), y)

        # Do the backward propagation
        loss_value.backward()

        # Step the optimizer
        optim.step()

        # Save the loss
        loss_history.append(loss_value)

end = time.time()
print(end - start)

0.02000570297241211


In [9]:
# Predicting with the model
val_x = [[10, 11]]
val_x = torch.tensor(val_x).float().to(device)

# Pass the value through the model
mynet(val_x)


tensor([[20.7034]], grad_fn=<AddmmBackward0>)

# Custom loss function

In [10]:
# Use the same model/data that we used with the previous loss fn
model = Neural().to(device)

# Define the custom loss fn
def my_MSE(_y, y): # _y predictions and y is the y true
    loss = (_y-y)**2
    loss = loss.mean()
    return loss


loss_value = my_MSE(model(X), Y)
print(loss_value)

tensor(121.3294, grad_fn=<MeanBackward0>)


# Checking the intermediate values of the model (values of a hidden layer)

In [11]:
class neuralNet(nn.Module):
    def __init__(self):
        super().__init__()
        self.input_to_hidden_layer = nn.Linear(2, 8)
        self.activation_hidden_layer = nn.ReLU()
        self.output_layer = nn.Linear(8, 1)


    def forward(self, x):
        hidden1 = self.input_to_hidden_layer(x)
        hidden2 = self.activation_hidden_layer(hidden1)
        output = self.output_layer(hidden2)
        return output, hidden2

In [12]:
mynet = neuralNet().to(device)
print(mynet(X)[0])

tensor([[ 0.0391],
        [-0.0334],
        [-0.1058],
        [-0.1782]], grad_fn=<AddmmBackward0>)


# Defining a model using the sequential method

In [13]:
# Let's remember ourselves the dataset
ds = Dataset(x, y)
dl = DataLoader(ds, batch_size=2, shuffle=True)

  self.x = torch.tensor(x).float()
  self.y = torch.tensor(y).float()


In [14]:
# Defining the model using the sequential method
model = nn.Sequential(
    nn.Linear(2, 8),
    nn.ReLU(),
    nn.Linear(8, 1)
).to(device)

In [17]:
# Import and use the torch.summary
from torchsummary import summary

summary(model, torch.zeros(1, 2))

Layer (type:depth-idx)                   Output Shape              Param #
├─Linear: 1-1                            [-1, 8]                   24
├─ReLU: 1-2                              [-1, 8]                   --
├─Linear: 1-3                            [-1, 1]                   9
Total params: 33
Trainable params: 33
Non-trainable params: 0
Total mult-adds (M): 0.00
Input size (MB): 0.00
Forward/backward pass size (MB): 0.00
Params size (MB): 0.00
Estimated Total Size (MB): 0.00


Layer (type:depth-idx)                   Output Shape              Param #
├─Linear: 1-1                            [-1, 8]                   24
├─ReLU: 1-2                              [-1, 8]                   --
├─Linear: 1-3                            [-1, 1]                   9
Total params: 33
Trainable params: 33
Non-trainable params: 0
Total mult-adds (M): 0.00
Input size (MB): 0.00
Forward/backward pass size (MB): 0.00
Params size (MB): 0.00
Estimated Total Size (MB): 0.00

In [19]:
from torch.optim import SGD
import time

# Training the model
loss_func = nn.MSELoss()
optim = SGD(model.parameters(), lr=10e-4)
start = time.time()
loss_history = []
for _ in range(50):
    for ix, iy, in dl:
        
        # Zero grad for the optim
        optim.zero_grad()

        # Calculate the loss
        loss = loss_func(model(ix), iy)
        
        # Do the backward propagation
        loss.backward()

        # Step the optimizer
        optim.step()

        # Save the loss
        loss_history.append(loss_value)
end = time.time()
print(end - start)

0.019675731658935547


In [22]:
# Define a validation dataset
val = [[8, 9], [10, 11], [1.5, 2.5]]
model(torch.tensor(val).float().to(device))

tensor([[16.5398],
        [20.3068],
        [ 4.2971]], grad_fn=<AddmmBackward0>)

# Saving the model

Always remember that when saving a model, is good practice to send it to cpu (if it is on GPU), because if the model is saved on GPU it'll save the tensors as CUDA tensors, and if the machine where the model is further loaded isn't compatible with CUDA you'll run into an error, so it's better to save it as a CPU tensor.

In [27]:
# Saving the model after sending it to CPU
# !mkdir ./savedmodel/
torch.save(model.to('cpu').state_dict(), './savedmodel/mymodel.pth')

# Loading a model

In [30]:
# To load a model we need to have a model with the same structure
# because we're only loading the model's weights
model = nn.Sequential(
    nn.Linear(2, 8),
    nn.ReLU(),
    nn.Linear(8, 1)
)

# Load the model
state_dict = torch.load("./savedmodel/mymodel.pth")

# Now we load the state_dict into our model
print(model.load_state_dict(state_dict=state_dict))

# Send it to device and make a prediction
model.to(device)
model(torch.tensor(val).float().to(device))

<All keys matched successfully>


tensor([[16.5398],
        [20.3068],
        [ 4.2971]], grad_fn=<AddmmBackward0>)