# Introduction to Deep Learning with PyTorch

## Introduction to PyTorch

### Tensors

In [2]:
import numpy as np
import torch

array = [[1, 2, 3], [4, 5, 6]]    
tensor = torch.tensor(array)
if torch.cuda.is_available():
    tensor = tensor.cuda()
    
print("Tensor shape:", tensor.shape)
print("Tensor dtype:", tensor.dtype)
print("Tensor device:", tensor.device)

np_array = np.array(array)
np_tensor = torch.from_numpy(np_array)
if torch.cuda.is_available():
    np_tensor = np_tensor.cuda()

print("Input tensor:", np_tensor)

Tensor shape: torch.Size([2, 3])
Tensor dtype: torch.int64
Tensor device: cuda:0
Input tensor: tensor([[1, 2, 3],
        [4, 5, 6]], device='cuda:0')


In [3]:
import torch

a = torch.tensor([[1, 1], [2, 2]])
b = torch.tensor([[2, 2], [3, 3]])
c = torch.tensor([[2, 2, 4], [3, 3, 5]])

if torch.cuda.is_available():
    a = a.cuda()
    b = b.cuda()
    c = c.cuda()

print(a + b)
print(a * b)
print(a + c)

tensor([[3, 3],
        [5, 5]], device='cuda:0')
tensor([[2, 2],
        [6, 6]], device='cuda:0')


RuntimeError: The size of tensor a (2) must match the size of tensor b (3) at non-singleton dimension 1

In [4]:
# Import PyTorch
import torch

list_a = [1, 2, 3, 4]

# Create a tensor from list_a
tensor_a = torch.tensor(list_a)
if torch.cuda.is_available():
    tensor_a = tensor_a.cuda()
print(tensor_a)

# Display the tensor device
print("Device:", tensor_a.device)
# Display the tensor data type
print("Dtype:", tensor_a.dtype)

tensor([1, 2, 3, 4], device='cuda:0')
Device: cuda:0
Dtype: torch.int64


In [5]:
import numpy as np
import torch

# Create two tensors from the arrays
array_a = np.array([
    [1, 1, 1],
    [2, 3, 4],
    [4, 5, 6]
])
array_b = np.array([
    [7, 5, 4],
    [2, 2, 8],
    [6, 3, 8]
])
tensor_a = torch.from_numpy(array_a)
tensor_b = torch.from_numpy(array_b)
if torch.cuda.is_available():
    tensor_a = tensor_a.cuda()
    tensor_b = tensor_b.cuda()

# Subtract tensor_b from tensor_a 
tensor_c = tensor_a - tensor_b
# Multiply each element of tensor_a with each element of tensor_b
tensor_d = tensor_a * tensor_b
# Add tensor_c with tensor_d
tensor_e = tensor_c + tensor_d
print(tensor_e)

tensor([[ 1,  1,  1],
        [ 4,  7, 28],
        [22, 17, 46]], device='cuda:0')


### Neural network

In [6]:
import torch

# Create input_tensor with three features
input_tensor = torch.tensor([[0.3471, 0.4547, -0.2356]])

# Define first linear layer
linear_layer = torch.nn.Linear(in_features=3, out_features=2)
print("Weight:", linear_layer.weight)
print("Bias:", linear_layer.bias)

if torch.cuda.is_available():
    input_tensor = input_tensor.cuda()
    linear_layer = linear_layer.cuda()

# Pass input through linear layer
output = linear_layer(input_tensor)
print("Output:", output)

Weight: Parameter containing:
tensor([[ 0.3953, -0.1266, -0.5211],
        [ 0.5255, -0.4329,  0.2503]], requires_grad=True)
Bias: Parameter containing:
tensor([0.5610, 0.3169], requires_grad=True)
Output: tensor([[0.7634, 0.2436]], device='cuda:0', grad_fn=<AddmmBackward0>)


In [7]:
import torch

input_tensor = torch.tensor([
    [-0.0014, 0.4038, 1.0305, 0.7521, 0.7489, -0.3968, 0.0113, -1.3844, 0.8705, -0.9743]
])

# Create network with three linear layers
model = torch.nn.Sequential(
    torch.nn.Linear(in_features=10, out_features=18),
    torch.nn.Linear(in_features=18, out_features=20),
    torch.nn.Linear(in_features=20, out_features=5)
)

if torch.cuda.is_available():
    input_tensor = input_tensor.cuda()
    model = model.cuda()

# Pass input_tensor to model to obtain output
output_tensor = model(input_tensor)
print(output_tensor)

tensor([[ 0.0126,  0.0104, -0.0756, -0.1790,  0.0743]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


In [8]:
import torch

input_tensor = torch.tensor([[2, 3, 6, 7, 9, 3, 2, 1]], dtype=torch.float32)

# Implement a small neural network with exactly two linear layer
model = torch.nn.Sequential(
    torch.nn.Linear(in_features=8, out_features=4),
    torch.nn.Linear(in_features=4, out_features=1)
)

if torch.cuda.is_available():
    input_tensor = input_tensor.cuda()
    model = model.cuda()

output = model(input_tensor)
print(output)

tensor([[0.1250]], device='cuda:0', grad_fn=<AddmmBackward0>)


### Activation functions

In [11]:
import torch

input = torch.tensor([[6.0]])
sigmoid = torch.nn.Sigmoid()

if torch.cuda.is_available():
    input = input.cuda()
    sigmoid = sigmoid.cuda()

output = sigmoid(input)
print("Output sigmoid:", output)

Output sigmoid: tensor([[0.9975]], device='cuda:0')


In [12]:
import torch

input = torch.rand(size=(6,))

model = torch.nn.Sequential(
    torch.nn.Linear(in_features=6, out_features=4), # First linear layer
    torch.nn.Linear(in_features=4, out_features=1), # Second linear layer
    torch.nn.Sigmoid() # Sigmoid activation function
)

if torch.cuda.is_available():
    input = input.cuda()
    model = model.cuda()

output = model(input)
print("Output sigmoid:", output)

Output sigmoid: tensor([0.4675], device='cuda:0', grad_fn=<SigmoidBackward0>)


In [13]:
import torch

# Create an input tensor
input = torch.tensor([[4.3, 6.1, 2.3]])
# Apply softmax along the last dimension
probabilities = torch.nn.Softmax(dim=-1)

if torch.cuda.is_available():
    input = input.cuda()
    probabilities = probabilities.cuda()

output = probabilities(input)
print("Output softmax:", output)

Output softmax: tensor([[0.1392, 0.8420, 0.0188]], device='cuda:0')


In [14]:
import torch

score = torch.tensor([[0.8]])
# Create a sigmoid function and apply it on the score tensor
sigmoid = torch.nn.Sigmoid()

if torch.cuda.is_available():
    score = score.cuda()
    sigmoid = sigmoid.cuda()

probability = sigmoid(score)
print("Probability sigmoid:", probability)

Probability sigmoid: tensor([[0.6900]], device='cuda:0')


In [15]:
import torch

scores = torch.tensor([[1.0, -6.0, 2.5, -0.3, 1.2, 0.8]])
# Create a softmax function and apply it on the score tensor
softmax = torch.nn.Softmax(dim=-1)

if torch.cuda.is_available():
    scores = scores.cuda()
    softmax = softmax.cuda()

probabilities = softmax(scores)
print("Probabilities softmax:", probabilities)

Probabilities softmax: tensor([[1.2828e-01, 1.1698e-04, 5.7492e-01, 3.4961e-02, 1.5669e-01, 1.0503e-01]],
       device='cuda:0')


## Training Neural Network

### Forward pass

In [1]:
import torch

# Create input data of shape 5x6
input = torch.tensor([
    [-0.4421, 1.5207, 2.0607, -0.3647, 0.4691, 0.0946],
    [-0.9155, -0.0475, -1.3645, 0.6336, -0.19520, -0.3398],
    [0.7406, 1.6763, -0.8511, 0.2432, 0.1123, -0.0633],
    [-1.6630, -0.0718, -0.1285, 0.5396, -0.0288, -0.8622],
    [-0.7413, 1.7920, -0.0883, -0.6685, 0.4745, -0.4245]
])

# Create binary classification model
model = torch.nn.Sequential(
    torch.nn.Linear(in_features=6, out_features=4), # First linear layer
    torch.nn.Linear(in_features=4, out_features=1), # Second linear layer
    torch.nn.Sigmoid() # Sigmoid activation function
)

if torch.cuda.is_available():
    input = input.cuda()
    model = model.cuda()

# Pass input data through model
output = model(input)
print("Output shape:", output.shape)
print("Output:", output)

Output shape: torch.Size([5, 1])
Output: tensor([[0.5893],
        [0.6004],
        [0.6618],
        [0.5699],
        [0.5741]], device='cuda:0', grad_fn=<SigmoidBackward0>)


In [2]:
import torch

# Create input data of shape 5x6
input = torch.tensor([
    [-0.4421, 1.5207, 2.0607, -0.3647, 0.4691, 0.0946],
    [-0.9155, -0.0475, -1.3645, 0.6336, -0.19520, -0.3398],
    [0.7406, 1.6763, -0.8511, 0.2432, 0.1123, -0.0633],
    [-1.6630, -0.0718, -0.1285, 0.5396, -0.0288, -0.8622],
    [-0.7413, 1.7920, -0.0883, -0.6685, 0.4745, -0.4245]
])

# Specify model has three classes
n_classes = 3

# Create multiclass classification model
model = torch.nn.Sequential(
    torch.nn.Linear(in_features=6, out_features=4), # First linear layer
    torch.nn.Linear(in_features=4, out_features=n_classes), # Second linear layer
    torch.nn.Softmax(dim=-1) # Softmax activation
)

if torch.cuda.is_available():
    input = input.cuda()
    model = model.cuda()

# Pass input data through model
output = model(input)
print("Output shape:", output.shape)
print("Output:", output)

Output shape: torch.Size([5, 3])
Output: tensor([[0.2114, 0.3633, 0.4253],
        [0.3579, 0.4361, 0.2060],
        [0.1713, 0.4706, 0.3581],
        [0.3933, 0.3662, 0.2406],
        [0.2584, 0.4002, 0.3413]], device='cuda:0', grad_fn=<SoftmaxBackward0>)


In [3]:
import torch

# Create input data of shape 5x6
input = torch.tensor([
    [-0.4421, 1.5207, 2.0607, -0.3647, 0.4691, 0.0946],
    [-0.9155, -0.0475, -1.3645, 0.6336, -0.19520, -0.3398],
    [0.7406, 1.6763, -0.8511, 0.2432, 0.1123, -0.0633],
    [-1.6630, -0.0718, -0.1285, 0.5396, -0.0288, -0.8622],
    [-0.7413, 1.7920, -0.0883, -0.6685, 0.4745, -0.4245]
])

# Create regression model
model = torch.nn.Sequential(
    torch.nn.Linear(in_features=6, out_features=4), # First linear layer
    torch.nn.Linear(in_features=4, out_features=1) # Second linear layer
)

if torch.cuda.is_available():
    input = input.cuda()
    model = model.cuda()

# Pass input data through model
output = model(input)
# Return output
print("Output shape:", output.shape)
print("Output:", output)

Output shape: torch.Size([5, 1])
Output: tensor([[ 0.5293],
        [-0.4981],
        [-0.0318],
        [-0.2674],
        [ 0.2802]], device='cuda:0', grad_fn=<AddmmBackward0>)


In [4]:
import torch

input = torch.Tensor([[3, 4, 6, 2, 3, 6, 8, 9]])

# Implement a small neural network for binary classification
model = torch.nn.Sequential(
    torch.nn.Linear(in_features=8, out_features=1),
    torch.nn.Sigmoid()
)

if torch.cuda.is_available():
    input = input.cuda()
    model = model.cuda()

output = model(input)
print("Output shape:", output.shape)
print("Output:", output)

Output shape: torch.Size([1, 1])
Output: tensor([[0.5632]], device='cuda:0', grad_fn=<SigmoidBackward0>)


In [6]:
import torch

input = torch.Tensor([[3, 4, 6, 7, 10, 12, 2, 3, 6, 8, 9]])

# Implement a neural network with exactly four linear layers
model = torch.nn.Sequential(
    torch.nn.Linear(in_features=11, out_features=20),
    torch.nn.Linear(in_features=20, out_features=12),
    torch.nn.Linear(in_features=12, out_features=6),
    torch.nn.Linear(in_features=6, out_features=4),
    torch.nn.Softmax(dim=-1)
)

if torch.cuda.is_available():
    input = input.cuda()
    model = model.cuda()

output = model(input)
print("Output shape:", output.shape)
print("Output:", output)

Output shape: torch.Size([1, 4])
Output: tensor([[0.0736, 0.4231, 0.1553, 0.3479]], device='cuda:0',
       grad_fn=<SoftmaxBackward0>)


### Loss functions

In [8]:
import torch

print("One hot encoding for 1st class:",
    torch.nn.functional.one_hot(input=torch.tensor(0), num_classes=3))
print("One hot encoding for 2nd class:",
    torch.nn.functional.one_hot(input=torch.tensor(1), num_classes=3))
print("One hot encoding for 3rd class:",
    torch.nn.functional.one_hot(input=torch.tensor(2), num_classes=3))

One hot encoding for 1st class: tensor([1, 0, 0])
One hot encoding for 2nd class: tensor([0, 1, 0])
One hot encoding for 3rd class: tensor([0, 0, 1])


In [5]:
import torch

scores = torch.tensor([[-0.1211, 0.1059]])
one_hot_target = torch.tensor([[1, 0]])

if torch.cuda.is_available():
    scores = scores.cuda()
    one_hot_target = one_hot_target.cuda()

criterion = torch.nn.CrossEntropyLoss()

loss = criterion(input=scores.double(), target=one_hot_target.double())
print("Loss:", loss)

Loss: tensor(0.8131, device='cuda:0', dtype=torch.float64)


In [12]:
import numpy as np
import torch

y = 1
num_classes = 3

# Create the one-hot encoded vector using NumPy
one_hot_numpy = np.array([0, 1, 0])
print("One-hot numpy:", one_hot_numpy)

# Create the one-hot encoded vector using PyTorch
one_hot_pytorch = torch.nn.functional.one_hot(
    input=torch.tensor(y), num_classes=num_classes)
print("One-hot pytorch:", one_hot_pytorch)

One-hot numpy: [0 1 0]
One-hot pytorch: tensor([0, 1, 0])


In [4]:
import torch

y = [2]
scores = torch.tensor([[0.1, 6.0, -2.0, 3.2]])

# Create a one-hot encoded vector of the label y
one_hot_label = torch.nn.functional.one_hot(
    torch.tensor(y), num_classes=scores.shape[1])
print("One-hot label:", one_hot_label)

if torch.cuda.is_available():
    scores = scores.cuda()
    one_hot_label = scores.cuda()

# Create the cross entropy loss function
criterion = torch.nn.CrossEntropyLoss()

# Calculate the cross entropy loss
loss = criterion(input=scores.double(), target=one_hot_label.double())
print("Loss:", loss)

One-hot label: tensor([[0, 0, 1, 0]])
Loss: tensor(-5.9979, device='cuda:0', dtype=torch.float64)


### Update parameters

In [None]:
import torch

# Create the model and run a forward pass
model = torch.nn.Sequential(
    torch.nn.Linear(in_features=16, out_features=8),
    torch.nn.Linear(in_features=8, out_features=4),
    torch.nn.Linear(in_features=4, out_features=2)
)

if torch.cuda.is_available():
    sample = sample.cuda()
    model = model.cuda()

pred = model(sample)

# Calculate the loss and compute the gradients
criterion = torch.nn.CrossEntropyLoss()
# Calculate the loss
loss = criterion(input=pred, target=target)
# Compute the gradients of the loss
loss.backward()
# Display gradients of the weight and bias tensors in order
print(weight.grad)
print(bias.grad)

# Access the weight of the first linear layer
weight_0 = model[0].weight
# Access the bias of the second linear layer
bias_1 = model[2].bias
# Access each layer's gradients
model[0].weight.grad, model[0].bias.grad
model[1].weight.grad, model[1].bias.grad
model[2].weight.grad, model[2].bias.grad

# Learning rate is typically small
lr = 0.001
# Update the weights
weight = model[0].weight
weight_grad = model[0].weight.grad
weight = weight - lr * weight_grad
# Update the biases
bias = model[0].bias
bias_grad = model[0].bias.grad
bias = bias - lr * bias_grad

# Create the optimizer
optimizer = torch.optim.SGD(
    params=model.parameters(), lr=0.001)
# Update the model's parameters using the optimizer
optimizer.step()

### Training loop

In [None]:
import torch

# Create the dataset and the dataloader
dataset = torch.utils.data.TensorDataset(
    torch.tensor(features).float(), torch.tensor(target).float())
dataloader = torch.utils.data.DataLoader(dataset=dataset, batch_size=4, shuffle=True)

# Create the model
model = torch.nn.Sequential(
    torch.nn.Linear(in_features=4, out_features=2),
    torch.nn.Linear(in_features=2, out_features=1)
)

if torch.cuda.is_available():
    dataloader = dataloader.cuda()
    model = model.cuda()

# Create the loss and optimizer
criterion = torch.nn.MSELoss()
optimizer = torch.optim.SGD(params=model.parameters(), lr=0.001)

# Loop through the dataset multiple times
for epoch in range(num_epochs):
    for data in dataloader:
        # Set the gradients to zero
        optimizer.zero_grad()
        # Get feature and target from the data loader
        feature, target = data
        # Run a forward pass
        pred = model(feature)
        # Compute loss and gradients
        loss = criterion(input=pred, target=target)
        loss.backward()
        # Update the parameters
        optimizer.step()

In [6]:
import numpy as np
import torch

y = np.array(1)
y_hat = np.array(10)

# Calculate the MSELoss using Numpy
mse_numpy = np.mean((y_hat - y)**2)
print("MSE numpy:", mse_numpy)

if torch.cuda.is_available():
    y = torch.tensor(y).float().cuda()
    y_hat = torch.tensor(y_hat).float().cuda()

# Create the MSELoss function
criterion = torch.nn.MSELoss()

# Calculate the MSELoss using the created loss function
mse_pytorch = criterion(
    input=y_hat, target=y
)
print("MSE pytorch:", mse_pytorch)

MSE numpy: 81.0
MSE pytorch: tensor(81., device='cuda:0')


In [None]:
import torch

# Create the dataset and the dataloader
dataset = torch.utils.data.TensorDataset(
    torch.tensor(features).float(),
    torch.tensor(target).float()
)
dataloader = torch.utils.data.DataLoader(
    dataset=dataset, batch_size=4, shuffle=True
)
# Create the loss and optimizer
criterion = torch.nn.MSELoss()
optimizer = torch.optim.SGD(
    params=model.parameters(), lr=0.001
)
# Loop over the number of epochs and then the dataloader
for i in range(num_epochs):
    for data in dataloader:
        # Set the gradients to zero
        optimizer.zero_grad()
        # Run a forwarad pass
        feature, target = data
        prediction = model(feature)
        # Calculate the loss
        loss = criterion(input=prediction, target=target)
        # Compute the gradients
        loss.backward()
        # Update the model's parameters
        optimizer.step()