# Deep Learning with PyTorch

## Introduction

### Tensors

In [1]:
import numpy as np
np.random.seed(0)
import torch
torch.manual_seed(0)

a = np.array([
    [2, 3, 5],
    [1, 2, 9]
])
print(a)

a = torch.tensor([
    [2, 3, 5],
    [1, 2, 9]
])
if torch.cuda.is_available():
    a = a.cuda()
print(a)

a = np.random.randn(3, 5)
print(a, "\nshape:", a.shape)

a = torch.randn(size=(3, 5))
if torch.cuda.is_available():
    a = a.cuda()
print(a, "\nshape:", a.shape)

[[2 3 5]
 [1 2 9]]
tensor([[2, 3, 5],
        [1, 2, 9]], device='cuda:0')
[[ 1.76405235  0.40015721  0.97873798  2.2408932   1.86755799]
 [-0.97727788  0.95008842 -0.15135721 -0.10321885  0.4105985 ]
 [ 0.14404357  1.45427351  0.76103773  0.12167502  0.44386323]] 
shape: (3, 5)
tensor([[ 1.5410, -0.2934, -2.1788,  0.5684, -1.0845],
        [-1.3986,  0.4033,  0.8380, -0.7193, -0.4033],
        [-0.5966,  0.1820, -0.8567,  1.1006, -1.0712]], device='cuda:0') 
shape: torch.Size([3, 5])


In [13]:
import numpy as np
np.random.seed(0)
import torch
torch.manual_seed(0)

a = np.random.rand(2, 2)
b = np.random.rand(2, 2)
# Dot product
print(np.dot(a=a, b=b))
# Element-wise multiplication
print(np.multiply(a, b))

a = torch.rand(size=(2, 2))
b = torch.rand(size=(2, 2))

if torch.cuda.is_available():
    a = a.cuda()
    b = b.cuda()

# Dot product
print(torch.matmul(input=a, other=b))
# Element-wise multiplication
print(a * b)

[[0.5454652  0.99226198]
 [0.49379751 0.87523343]]
[[0.23250747 0.4619366 ]
 [0.26376154 0.48591211]]
tensor([[0.5291, 1.0033],
        [0.0919, 0.1745]], device='cuda:0')
tensor([[0.1526, 0.4871],
        [0.0434, 0.1184]], device='cuda:0')


In [8]:
import numpy as np
np.random.seed(0)
import torch
torch.manual_seed(0)

a = np.zeros(shape=(2, 2))
print(a)

a = torch.zeros(size=(2, 2))
if torch.cuda.is_available():
    a = a.cuda()
print(a)

a = np.ones(shape=(2, 2))
print(a)

a = torch.ones(size=(2, 2))
if torch.cuda.is_available():
    a = a.cuda()
print(a)

a = np.identity(n=2)
print(a)

a = torch.eye(n=2)
if torch.cuda.is_available():
    a = a.cuda()
print(a)

[[0. 0.]
 [0. 0.]]
tensor([[0., 0.],
        [0., 0.]], device='cuda:0')
[[1. 1.]
 [1. 1.]]
tensor([[1., 1.],
        [1., 1.]], device='cuda:0')
[[1. 0.]
 [0. 1.]]
tensor([[1., 0.],
        [0., 1.]], device='cuda:0')


In [10]:
import numpy as np
np.random.seed(0)
import torch
torch.manual_seed(0)

a = np.random.rand(2, 2)
a = torch.from_numpy(a)

if torch.cuda.is_available():
    a = a.cuda()

a = a.cpu()
print(a.numpy())

[[0.5488135  0.71518937]
 [0.60276338 0.54488318]]


In [11]:
# Import torch
import torch
torch.manual_seed(0)

# Create random tensor of size 3 by 3
a = torch.rand(size=(3, 3))

if torch.cuda.is_available():
    a = a.cuda()

# Calculate the shape of the tensor
a_shape = a.shape

# Print the values of the tensor and its shape
print(a)
print(a_shape)

tensor([[0.4963, 0.7682, 0.0885],
        [0.1320, 0.3074, 0.6341],
        [0.4901, 0.8964, 0.4556]], device='cuda:0')
torch.Size([3, 3])


In [14]:
import torch
torch.manual_seed(0)

# Create a matrix of ones with shape 3 by 3
a = torch.ones(size=(3, 3))
# Create an identity matrix with shape 3 by 3
b = torch.eye(n=3)

if torch.cuda.is_available():
    a = a.cuda()
    b = b.cuda()

# Matrix multiplication of a with b
c = torch.matmul(input=a, other=b)
print(c)

# Element-wise multiplication of a with b
c = a * b
print(c)

tensor([[1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.]], device='cuda:0')
tensor([[1., 0., 0.],
        [0., 1., 0.],
        [0., 0., 1.]], device='cuda:0')


### Forward propagation

In [15]:
import torch
torch.manual_seed(0)

a = torch.Tensor([2])
b = torch.Tensor([-4])
c = torch.Tensor([-2])
d = torch.Tensor([2])

if torch.cuda.is_available():
    a = a.cuda()
    b = b.cuda()
    c = c.cuda()
    d = d.cuda()

e = a + b
f = c * d
g = e * f
print(e, f, g)

tensor([-2.], device='cuda:0') tensor([-4.], device='cuda:0') tensor([8.], device='cuda:0')


In [16]:
import torch
torch.manual_seed(0)

# Initialize tensors x, y and z
x = torch.rand(size=(1000, 1000))
y = torch.rand(size=(1000, 1000))
z = torch.rand(size=(1000, 1000))

if torch.cuda.is_available():
    x = x.cuda()
    y = y.cuda()
    z = z.cuda()

# Multiply x with y
q = torch.matmul(input=x, other=y)
# Multiply element-wise z with q
f = z * q
mean_f = torch.mean(input=f)
print(mean_f)

tensor(125.1406, device='cuda:0')


### Backpropagation

In [1]:
import torch
torch.manual_seed(0)

# Initialize x, y and z to values 4, -3 and 5
x = torch.tensor(4., requires_grad=True)
y = torch.tensor(-3., requires_grad=True)
z = torch.tensor(5., requires_grad=True)

# Set q to sum of x and y
q = x + y
# Set f to product of q with z
f = q * z
# Compute the derivatives
f.backward()

# Print the gradients
print("Gradient of z is:", z.grad)
print("Gradient of y is:", y.grad)
print("Gradient of x is:", x.grad)

Gradient of z is: tensor(1.)
Gradient of y is: tensor(5.)
Gradient of x is: tensor(5.)


In [None]:
import torch
torch.manual_seed(0)

# Initialize tensors x, y and z
x = torch.rand(size=(1000, 1000))
y = torch.rand(size=(1000, 1000))
z = torch.rand(size=(1000, 1000))

if torch.cuda.is_available():
    x = x.cuda()
    y = y.cuda()
    z = z.cuda()

# Multiply tensors x and y
q = torch.matmul(input=x, other=y)
# Element-wise multiply tensors z with q
f = z * q
mean_f = torch.mean(input=f)
# Calculate the gradients
mean_f.backward()

### Neural networks

In [1]:
import torch
torch.manual_seed(0)

input_layer = torch.rand(10)
w1 = torch.rand(size=(10, 20))
w2 = torch.rand(size=(20, 20))
w3 = torch.rand(size=(20, 4))

if torch.cuda.is_available():
    input_layer = input_layer.cuda()
    w1 = w1.cuda()
    w2 = w2.cuda()
    w3 = w3.cuda()

h1 = torch.matmul(input=input_layer, other=w1)
h2 = torch.matmul(input=h1, other=w2)
output_layer = torch.matmul(input=h2, other=w3)
print(output_layer)

tensor([243.7668, 219.5830, 211.5230, 254.8253], device='cuda:0')


In [3]:
import torch
torch.manual_seed(0)

input_layer = torch.rand(784)
# Initialize the weights of the neural network
weight_1 = torch.rand(size=(784, 200))
weight_2 = torch.rand(size=(200, 10))

if torch.cuda.is_available():
    input_layer = input_layer.cuda()
    weight_1 = weight_1.cuda()
    weight_2 = weight_2.cuda()

# Multiply input_layer with weight_1
hidden_1 = torch.matmul(input=input_layer, other=weight_1)
# Multiply hidden_1 with weight_2
output_layer = torch.matmul(input=hidden_1, other=weight_2)
print(output_layer)

tensor([21105.2676, 19629.4141, 18506.6523, 19196.1504, 19759.8438, 19620.8242,
        19557.7090, 21682.9746, 19894.4688, 19432.3047], device='cuda:0')


In [2]:
import torch
torch.manual_seed(0)

class Net(torch.nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.fc1 = torch.nn.Linear(in_features=10, out_features=20)
        self.fc2 = torch.nn.Linear(in_features=20, out_features=20)
        self.output = torch.nn.Linear(in_features=20, out_features=4)

    def forward(self, x):
        x = self.fc1(x)
        x = self.fc2(x)
        x = self.output(x)
        return x

input_layer = torch.rand(10)
net = Net()

if torch.cuda.is_available():
    input_layer = input_layer.cuda()
    net = net.cuda()

result = net(input_layer)
print(result)

tensor([-0.0571,  0.1801, -0.0272, -0.0133], device='cuda:0',
       grad_fn=<ViewBackward0>)


In [4]:
import torch
torch.manual_seed(0)

class Net(torch.nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.fc1 = torch.nn.Linear(in_features=784, out_features=200)
        self.fc2 = torch.nn.Linear(in_features=200, out_features=10)

    def forward(self, x):
        # use the instantiated layers and return x
        x = self.fc1(x)
        x = self.fc2(x)
        return x

input_layer = torch.rand(784)
net = Net()

if torch.cuda.is_available():
    input_layer = input_layer.cuda()
    net = net.cuda()

result = net(input_layer)
print(result)

tensor([-0.1113, -0.0151,  0.1005,  0.2801,  0.2317, -0.1189, -0.3775,  0.0440,
         0.3325, -0.1394], device='cuda:0', grad_fn=<ViewBackward0>)


## Artificial Neural Networks

### Activation functions

In [2]:
import torch
torch.manual_seed(0)

input_layer = torch.tensor([2., 1.])
weight_1 = torch.tensor([[0.45, 0.32], [-0.12, 0.29]])
weight_2 = torch.tensor([[0.48, -0.12], [0.64, 0.91]])

if torch.cuda.is_available():
    input_layer = input_layer.cuda()
    weight_1 = weight_1.cuda()
    weight_2 = weight_2.cuda()

hidden_layer = torch.matmul(input=input_layer, other=weight_1)
output_layer = torch.matmul(input=hidden_layer, other=weight_2)
print("output layer:", output_layer)

weight = torch.matmul(input=weight_1, other=weight_2)
print("weight:", weight)
output_layer = torch.matmul(input=input_layer, other=weight)
print("output layer:", output_layer)

output layer: tensor([0.9696, 0.7527], device='cuda:0')
weight: tensor([[0.4208, 0.2372],
        [0.1280, 0.2783]], device='cuda:0')
output layer: tensor([0.9696, 0.7527], device='cuda:0')


In [5]:
import torch
torch.manual_seed(0)

relu = torch.nn.ReLU()
tensor_1 = torch.tensor([2., -4.])
tensor_2 = torch.tensor([[2., -4.], [1.2, 0.]])

if torch.cuda.is_available():
    relu = relu.cuda()
    tensor_1 = tensor_1.cuda()
    tensor_2 = tensor_2.cuda()

print("ReLU tensor_1", relu(tensor_1))
print("ReLU tensor_2", relu(tensor_2))

ReLU tensor_1 tensor([2., 0.], device='cuda:0')
ReLU tensor_2 tensor([[2.0000, 0.0000],
        [1.2000, 0.0000]], device='cuda:0')


In [None]:
import torch
torch.manual_seed(0)

# Calculate the first and second hidden layer
hidden_1 = torch.matmul(input=input_layer, other=weight_1)
hidden_2 = torch.matmul(input=hidden_1, other=weight_2)

# Calculate the output
print(torch.matmul(input=hidden_2, other=weight_3))

# Calculate weight_composed_1 and weight
weight_composed_1 = torch.matmul(input=weight_1, other=weight_2)
weight = torch.matmul(input=weight_composed_1, other=weight_3)

# Multiply input_layer with weight
print(torch.matmul(input_layer, weight))

In [None]:
# Instantiate non-linearity
relu = nn.ReLU()

# Apply non-linearity on hidden_1 and hidden_2
hidden_1_activated = relu(torch.matmul(input_layer, weight_1))
hidden_2_activated = relu(torch.matmul(hidden_1_activated, weight_2))
print(torch.matmul(hidden_2_activated, weight_3))

# Apply non-linearity to the product of first two weights. 
weight_composed_1_activated = relu(torch.matmul(weight_1, weight_2))

# Multiply `weight_composed_1_activated` with `weight_3
weight = torch.matmul(weight_composed_1_activated, weight_3)

# Multiply input_layer with weight
print(torch.matmul(input_layer, weight))

In [None]:
# Instantiate ReLU activation function as relu
relu = nn.ReLU()

# Initialize weight_1 and weight_2 with random numbers
weight_1 = torch.rand(4, 6)
weight_2 = torch.rand(6, 2)

# Multiply input_layer with weight_1
hidden_1 = torch.matmul(input_layer, weight_1)

# Apply ReLU activation function over hidden_1 and multiply with weight_2
hidden_1_activated = relu(hidden_1)
print(torch.matmul(hidden_1_activated, weight_2))

In [None]:
# Initialize the scores and ground truth
logits = torch.tensor([[-1.2, 0.12, 4.8]])
ground_truth = torch.tensor([2])

# Instantiate cross entropy loss
criterion = nn.CrossEntropyLoss()

# Compute and print the loss
loss = criterion(logits, ground_truth)
print(loss)

In [None]:
# Import torch and torch.nn
import torch
import torch.nn as nn

# Initialize logits and ground truth
logits = torch.rand(1, 1000)
ground_truth = torch.tensor([111])

# Instantiate cross-entropy loss
criterion = nn.CrossEntropyLoss()

# Calculate and print the loss
loss = criterion(logits, ground_truth)
print(loss)

In [None]:
# Transform the data to torch tensors and normalize it 
transform = transforms.Compose([transforms.ToTensor(),
								transforms.Normalize((0.1307), ((0.3081)))])

# Prepare training set and testing set
trainset = torchvision.datasets.MNIST('mnist', train=True, 
									  download=True, transform=transform)
testset = torchvision.datasets.MNIST('mnist', train=False, 
									  download=True, transform=transform)

# Prepare training loader and testing loader
trainloader = torch.utils.data.DataLoader(trainset, batch_size=32,
										  shuffle=True, num_workers=0)
testloader = torch.utils.data.DataLoader(testset, batch_size=32,
										 shuffle=False, num_workers=0)       

In [None]:
# Compute the shape of the training set and testing set
trainset_shape = trainloader.dataset.train_data.shape
testset_shape = testloader.dataset.test_data.shape

# Print the computed shapes
print(trainset_shape, testset_shape)

# Compute the size of the minibatch for training set and testing set
trainset_batchsize = trainloader.batch_size
testset_batchsize = testloader.batch_size

# Print sizes of the minibatch
print(trainset_batchsize, testset_batchsize)

In [None]:
# Define the class Net
class Net(nn.Module):
    def __init__(self):    
    	# Define all the parameters of the net
        super(Net, self).__init__()
        self.fc1 = nn.Linear(28 * 28 * 1, 200)
        self.fc2 = nn.Linear(200, 10)

    def forward(self, x):    
    	# Do the forward pass
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x

In [None]:
# Instantiate the network, the Adam optimizer and Cross-Entropy loss function
model = Net()   
optimizer = optim.Adam(model.parameters(), lr=3e-4)
criterion = nn.CrossEntropyLoss()

for batch_idx, data_target in enumerate(train_loader):
    data = data_target[0]
    target = data_target[1]
    data = data.view(-1, 28 * 28)
    optimizer.zero_grad()

    # Complete a forward pass
    output = model(data)

    # Compute the loss, gradients and change the weights
    loss = criterion(output, target)
    loss.backward()
    optimizer.step()

In [None]:
# Set the model in eval mode
model.eval()

for i, data in enumerate(test_loader, 0):
    inputs, labels = data
    
    # Put each image into a vector
    inputs = inputs.view(-1, 28 * 28)
    
    # Do the forward pass and get the predictions
    outputs = model(inputs)
    _, outputs = torch.max(outputs.data, 1)
    total += labels.size(0)
    correct += (outputs == labels).sum().item()
print('The testing set accuracy of the network is: %d %%' % (100 * correct / total))

In [None]:
# Create 10 random images of shape (1, 28, 28)
images = torch.rand(10, 1, 28, 28)

# Build 6 conv. filters
conv_filters = torch.nn.Conv2d(in_channels=1, out_channels=6, kernel_size=3, stride=1, padding=1)

# Convolve the image with the filters
output_feature = conv_filters(images)
print(output_feature.shape)

In [None]:
# Create 10 random images
image = torch.rand(10, 1, 28, 28)

# Create 6 filters
filters = torch.rand(6, 1, 3, 3)

# Convolve the image with the filters
output_feature = F.conv2d(image, filters, stride=1, padding=1)
print(output_feature.shape)

In [None]:
# Build a pooling operator with size `2`.
max_pooling = torch.nn.MaxPool2d(2)

# Apply the pooling operator
output_feature = max_pooling(im)

# Use pooling operator in the image
output_feature_F = F.max_pool2d(im, 2)

# print the results of both cases
print(output_feature)
print(output_feature_F)

In [None]:
# Build a pooling operator with size `2`.
avg_pooling = torch.nn.AvgPool2d(2)

# Apply the pooling operator
output_feature = avg_pooling(im)

# Use pooling operator in the image
output_feature_F = F.avg_pool2d(im, 2)

# print the results of both cases
print(output_feature)
print(output_feature_F)

In [None]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        
        # Instantiate two convolutional layers
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=5, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(in_channels=5, out_channels=10, kernel_size=3, padding=1)
        
        # Instantiate the ReLU nonlinearity
        self.relu = nn.ReLU()
        
        # Instantiate a max pooling layer
        self.pool = nn.MaxPool2d(2, 2)
        
        # Instantiate a fully connected layer
        self.fc = nn.Linear(7 * 7 * 10, 10)

In [None]:
class Net(nn.Module):
    def __init__(self, num_classes):
        super(Net, self).__init__()
		
        # Instantiate the ReLU nonlinearity
        self.relu = nn.ReLU()
        
        # Instantiate two convolutional layers
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=5, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(in_channels=5, out_channels=10, kernel_size=3, padding=1)
        
        # Instantiate a max pooling layer
        self.pool = nn.MaxPool2d(2, 2)
        
        # Instantiate a fully connected layer
        self.fc = nn.Linear(7 * 7 * 10, 10)

    def forward(self, x):
  
        # Apply conv followd by relu, then in next line pool
        x = self.relu(self.conv1(x))
        x = self.pool(x)

        # Apply conv followd by relu, then in next line pool
        x = self.relu(self.conv2(x))
        x = self.pool(x)

        # Prepare the image for the fully connected layer
        x = x.view(-1, 7 * 7 * 10)

        # Apply the fully connected layer and return the result
        return self.fc(x)

In [None]:
for i, data in enumerate(train_loader, 0):
    inputs, labels = data
    optimizer.zero_grad()

    # Compute the forward pass
    outputs = net(inputs)
        
    # Compute the loss function
    loss = criterion(outputs, labels)
        
    # Compute the gradients
    loss.backward()
        
    # Update the weights
    optimizer.step()

In [None]:
# Iterate over the data in the test_loader
for i, data in enumerate(test_loader):

    # Get the image and label from data
    image, label = data

    # Make a forward pass in the net with your image
    output = net(image)

    # Argmax the results of the net
    _, predicted = torch.max(output.data, 1)
    if predicted == label:
        print("Yipes, your net made the right prediction " + str(predicted))
    else:
        print("Your net prediction was " + str(predicted) + ", but the correct label is: " + str(label))

In [None]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        
        # Declare all the layers for feature extraction
        self.features = nn.Sequential(nn.Conv2d(in_channels=1, out_channels=5, kernel_size=3, padding=1), 
                                      nn.ReLU(inplace=True),
                                      nn.Conv2d(in_channels=5, out_channels=10, kernel_size=3, padding=1), 
                                      nn.MaxPool2d(2, 2), nn.ReLU(inplace=True),
                                      nn.Conv2d(in_channels=10, out_channels=20, kernel_size=3, padding=1),
                                      nn.ReLU(inplace=True),
                                      nn.Conv2d(in_channels=20, out_channels=40, kernel_size=3, padding=1),
                                      nn.MaxPool2d(2, 2), nn.ReLU(inplace=True))
        
        # Declare all the layers for classification
        self.classifier = nn.Sequential(nn.Linear(7 * 7 * 40, 1024), nn.ReLU(inplace=True),
                                       	nn.Linear(1024, 2048), nn.ReLU(inplace=True),
                                        nn.Linear(2048, 10))

In [None]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        
        # Declare all the layers for feature extraction
        self.features = nn.Sequential(nn.Conv2d(in_channels=1, out_channels=5, kernel_size=3, padding=1), 
                                      nn.ReLU(inplace=True),
                                      nn.Conv2d(in_channels=5, out_channels=10, kernel_size=3, padding=1), 
                                      nn.MaxPool2d(2, 2), nn.ReLU(inplace=True),
                                      nn.Conv2d(in_channels=10, out_channels=20, kernel_size=3, padding=1),
                                      nn.ReLU(inplace=True),
                                      nn.Conv2d(in_channels=20, out_channels=40, kernel_size=3, padding=1),
                                      nn.MaxPool2d(2, 2), nn.ReLU(inplace=True))
        
        # Declare all the layers for classification
        self.classifier = nn.Sequential(nn.Linear(7 * 7 * 40, 1024), nn.ReLU(inplace=True),
                                       	nn.Linear(1024, 2048), nn.ReLU(inplace=True),
                                        nn.Linear(2048, 10))
        
    def forward(self, x):
      
        # Apply the feature extractor in the input
        x = self.features(x)
        
        # Squeeze the three spatial dimensions in one
        x = x.view(-1, 7 * 7 * 40)
        
        # Classify the images
        x = self.classifier(x)
        return x

In [None]:
# Shuffle the indices
indices = np.arange(60000)
np.random.shuffle(indices)

# Build the train loader
train_loader = torch.utils.data.DataLoader(datasets.MNIST('mnist', download=True, train=True,
                     transform=transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))])),
                     batch_size=64, shuffle=False, sampler=torch.utils.data.SubsetRandomSampler(indices[:55000]))

# Build the validation loader
val_loader = torch.utils.data.DataLoader(datasets.MNIST('mnist', download=True, train=True,
                   transform=transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))])),
                   batch_size=64, shuffle=False, sampler=torch.utils.data.SubsetRandomSampler(indices[55000:]))

In [None]:
# Instantiate the network
model = Net()

# Instantiate the cross-entropy loss
criterion = nn.CrossEntropyLoss()

# Instantiate the Adam optimizer
optimizer = optim.Adam(model.parameters(), lr=3e-4, weight_decay=0.001)

In [None]:
class Net(nn.Module):
    def __init__(self):
        
        # Define all the parameters of the net
        self.classifier = nn.Sequential(
            nn.Linear(28*28, 200),
            nn.ReLU(inplace=True),
            nn.Dropout(p=0.5),
            nn.Linear(200, 500),
            nn.ReLU(inplace=True),
            nn.Linear(500, 10))
        
    def forward(self, x):
    
    	# Do the forward pass
        return self.classifier(x)

In [None]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        
        # Implement the sequential module for feature extraction
        self.features = nn.Sequential(
            nn.Conv2d(in_channels=1, out_channels=10, kernel_size=3, stride=1, padding=1),
            nn.MaxPool2d(2, 2), nn.ReLU(inplace=True), nn.BatchNorm2d(10),
            nn.Conv2d(in_channels=10, out_channels=20, kernel_size=3, stride=1, padding=1),
            nn.MaxPool2d(2, 2), nn.ReLU(inplace=True), nn.BatchNorm2d(20))
        
        # Implement the fully connected layer for classification
        self.fc = nn.Linear(in_features=7*7*20, out_features=10)

In [None]:
# Create a model using
model = Net()

# Load the parameters from the old model
model.load_state_dict(torch.load('my_net.pth'))

# Change the number of out channels
model.fc = nn.Linear(7 * 7 * 512, 26)

# Train and evaluate the model
model.train()
train_net(model, optimizer, criterion)
print("Accuracy of the net is: " + str(model.eval()))

In [None]:
# Import the module
import torchvision

# Download resnet18
model = torchvision.models.resnet18(pretrained=True)

# Freeze all the layers bar the last one
for param in model.parameters():
    param.requires_grad = False

# Change the number of output units
model.fc = nn.Linear(512, 7)