Creating a two-layer neural network using Pytorch
- Input layer 1*3
- Linear Layer - in_features = 3, out_features = 2
- Output 1*2

In [2]:
import torch
import torch.nn as nn
#Input layer
input_layer = torch.tensor([0.414, 0.173, 0.223])

#Linear layer -> takes the first input layer values and produces a linear function
linear_layer = nn.Linear(in_features = 3, out_features = 2)
output = linear_layer(input_layer) #output = W0 @ input + b0 -> randomly assigned weights and biases
print(output)

tensor([-0.4492, -0.1195], grad_fn=<ViewBackward0>)


More linear layers

In [3]:
model = nn.Sequential(
    nn.Linear(3,6),
    nn.Linear(6,8),
    nn.Linear(8,2)
)

In [4]:
input_tensor = torch.rand(1,3)

In [5]:
output_tensor = model(input_tensor)
output_tensor

tensor([[-0.0311, -0.1743]], grad_fn=<AddmmBackward0>)

Non-Linear Layers using Activation Function for complex relationships

In [6]:
#Sigmoid function for binary classification tasks, used as the last layer
import torch
import torch.nn as nn

input_tensor = torch.tensor([[9.0]])
sigmoid = nn.Sigmoid()
output = sigmoid(input_tensor)

In [7]:
output

tensor([[0.9999]])

In [8]:
#Softmax for multiclass classification and outputs a probability distribution

input_tensor = torch.tensor([[0.6, 0.7, 0.8]])
probabilities = nn.Softmax(dim = -1) #Applied along last dimension
output_tensor = probabilities(input_tensor)

output_tensor

tensor([[0.3006, 0.3322, 0.3672]])

In [9]:
#Create a neural network and get the result. Just forward pass, i.e no backpropagation to update the weights and biases
input_tensor = torch.tensor([[0.6, 0.7, 0.8, 0.9]])

model = nn.Sequential(
    nn.Linear(4,6), 
    nn.Linear(6,3), 
    nn.Softmax(dim = -1))
output_tensor = model(input_tensor)

output_tensor

tensor([[0.3545, 0.3770, 0.2685]], grad_fn=<SoftmaxBackward0>)

In [32]:
#ReLu
input_tensor = torch.tensor([[0.6, 0.7, 0.8, 0.9]])

model = nn.Sequential(
    nn.Linear(4,6), 
    nn.Linear(6,3), 
    nn.ReLU())
output_tensor = model(input_tensor)

output_tensor

tensor([[0.0000, 0.1046, 0.2055]], grad_fn=<ReluBackward0>)

Loss function

In [11]:

#1. Cross Entropy Loss function

import torch.nn.functional as F
from torch.nn import CrossEntropyLoss

input = torch.tensor([[0.6, 0.7, 0.8]])
one_hot_target = F.one_hot(torch.tensor([0]), input.shape[1]) #one hot encoding

criterion = CrossEntropyLoss()
output_tensor = criterion(input_tensor.double(), one_hot_target.double())

print(output_tensor)

RuntimeError: 0D or 1D target tensor expected, multi-target not supported

In [None]:
#2. L1 Loss

import torch.nn.functional as F
from torch.nn import L1Loss

input = torch.tensor([[0.6, 0.7, 0.8]])
one_hot_target = F.one_hot(torch.tensor([0]), input.shape[1]) #one hot encoding

criterion = L1Loss()
output_tensor = criterion(input_tensor.double(), one_hot_target.double())

print(output_tensor)


tensor(0.6333, dtype=torch.float64)


Minimize Loss using derivatives and backpropagating

In [12]:
#1. Forward pass
import torch
import torch.nn as nn


input_tensor = torch.tensor([[0.6, 0.7, 0.8]])
model = nn.Sequential(
    nn.Linear(3,4),
    nn.Linear(4,6),
    nn.Linear(6,2)
)

prediction = model(input_tensor)
prediction

tensor([[0.5910, 0.5889]], grad_fn=<AddmmBackward0>)

In [13]:
#2. Calculate loss
one_hot_target = F.one_hot(torch.tensor([0]), prediction.shape[1])

criterion = CrossEntropyLoss()
loss = criterion(prediction.double(), one_hot_target.double())
loss

tensor(0.6921, dtype=torch.float64, grad_fn=<DivBackward1>)

In [14]:
#Calculate gradients 
loss.backward()

In [16]:
model[0].weight.grad, model[0].bias.grad

(tensor([[ 0.0317,  0.0370,  0.0423],
         [ 0.0144,  0.0169,  0.0193],
         [-0.0699, -0.0815, -0.0932],
         [ 0.0434,  0.0506,  0.0578]]),
 tensor([ 0.0528,  0.0241, -0.1165,  0.0723]))

In [17]:
model[1].weight.grad, model[1].bias.grad

(tensor([[-0.0274, -0.0475,  0.0581, -0.0627],
         [ 0.0027,  0.0046, -0.0056,  0.0061],
         [ 0.0077,  0.0133, -0.0163,  0.0176],
         [ 0.0626,  0.1084, -0.1327,  0.1432],
         [-0.0037, -0.0065,  0.0079, -0.0085],
         [-0.0376, -0.0651,  0.0798, -0.0861]]),
 tensor([ 0.1064, -0.0103, -0.0299, -0.2430,  0.0145,  0.1461]))

In [18]:
model[2].weight.grad, model[2].bias.grad

(tensor([[-0.0836, -0.0664, -0.3102, -0.1577,  0.2364, -0.2583],
         [ 0.0836,  0.0664,  0.3102,  0.1577, -0.2364,  0.2583]]),
 tensor([-0.4995,  0.4995]))

In [23]:
#Update weights and biases

learning_rate = 0.001

weight = model[0].weight
weight_grad = model[0].weight.grad
weight = weight -  learning_rate * weight_grad

bias = model[0].bias
bias_grad = model[0].bias.grad
bias = bias - learning_rate * bias_grad

weight, bias

#Add so on for other layers

(tensor([[-0.3857, -0.2188, -0.5619],
         [-0.1874, -0.1888, -0.1104],
         [ 0.0100, -0.0538,  0.2804],
         [-0.3467, -0.0900, -0.2593]], grad_fn=<SubBackward0>),
 tensor([ 0.5764, -0.1131,  0.3537, -0.1109], grad_fn=<SubBackward0>))

In [26]:
#we can just use SGD(stochastic gradient descent)
import torch.optim as optim

optimizer = optim.SGD(model.parameters(), lr = 0.001)
optimizer.step() #updates the model parameters after calculating local gradients

In [28]:
#MSE Loss

import numpy as np
y_hat = np.array(10)
y = np.array(1)

# Calculate the MSELoss using NumPy
mse_numpy = (y - y_hat) ** 2

# Create the MSELoss function
criterion = nn.MSELoss()

# Calculate the MSELoss using the created loss function
mse_pytorch = criterion(torch.from_numpy(y_hat).double(), torch.from_numpy(y).double())
print(mse_pytorch)

tensor(81., dtype=torch.float64)


In [None]:
Typical training loop

# Loop over the number of epochs and the dataloader
for i in range(num_epochs):
  for data in dataloader:
    # Set the gradients to zero
    optimizer.zero_grad()
    # Run a forward pass
    feature, target = data
    prediction = model(feature)    
    # Calculate the loss
    loss = criterion(prediction, target)    
    # Compute the gradients
    loss.backward()
    # Update the model's parameters
    optimizer.step()
show_results(model, dataloader)