**Running Forward Pass**

In [None]:
"""
Generating predictions from models is called "running a forward pass" through a network.

The purpose of the forward pass is to propagate input data through the network and produce predictions or outputs based on the model's learned parameters
(weights and biases)

"""

In [3]:
### Binary Classification : forward pass

import torch
import torch.nn as nn

 # Create input data of shape 5x6
input_data = torch.tensor(
          [[-0.4421,  1.5207,  2.0607, -0.3647,  0.4691,  0.0946],
          [-0.9155, -0.0475, -1.3645,  0.6336, -1.9520, -0.3398],
          [ 0.7406,  1.6763, -0.8511,  0.2432,  0.1123, -0.0633],
          [-1.6630, -0.0718, -0.1285,  0.5396, -0.0288, -0.8622],
          [-0.7413,  1.7920, -0.0883, -0.6685,  0.4745, -0.4245]]
)

# Create binary classification model
model = nn.Sequential(
      nn.Linear(6, 4),
      nn.Linear(4, 1),
      nn.Sigmoid() # Sigmoid activation function
)
# Pass input data through model
output = model(input_data)
print(output) ### This output will not be meaningful until we use backpropagation to update layer weights and biases.

tensor([[0.5536],
        [0.6728],
        [0.6307],
        [0.5683],
        [0.6266]], grad_fn=<SigmoidBackward0>)


In [4]:
##### Multi-class classification: forward pass



# Specify model has three classes
n_classes = 3
# Create multiclass classification model
model = nn.Sequential(
      nn.Linear(6, 4),
      nn.Linear(4, n_classes),
      nn.Softmax(dim=-1) # Softmax activation
)
# Pass input data through model
output = model(input_data)
print(output.shape)
print(output)

torch.Size([5, 3])
tensor([[0.2810, 0.5063, 0.2127],
        [0.3713, 0.3129, 0.3158],
        [0.2479, 0.3024, 0.4497],
        [0.3295, 0.4224, 0.2481],
        [0.2228, 0.3656, 0.4116]], grad_fn=<SoftmaxBackward0>)


In [None]:
### Regression: forward pass

# Create regression model
model = nn.Sequential(
      nn.Linear(6, 4), # First linear layer
      nn.Linear(4, 1) # Second linear layer
)
# Pass input data through model
output = model(input_data)
# Return output
print(output)

In [None]:
"""

Create a neural network that takes a tensor of dimensions 1x8 as input, and returns an output of the correct shape for binary classification.
Pass the output of the linear layer to a sigmoid, which both takes in and return a single float.

"""

import torch
import torch.nn as nn

input_tensor = torch.Tensor([[3, 4, 6, 2, 3, 6, 8, 9]])

model = nn.Sequential(
    nn.Linear(8 , 1),
    nn.sigmoid()
)

output = model(input_tensor)
print(output)

In [None]:
"""
Create a 4-layer linear neural network compatible with input_tensor as the input, and a regression value as output.
"""

import torch
import torch.nn as nn

input_tensor = torch.Tensor([[3, 4, 6, 7, 10, 12, 2, 3, 6, 8, 9]])

# Implement a neural network with exactly four linear layers
model = nn.Sequential(
    nn.Linear(11 , 10),
    nn.Linear(10 , 8),
    nn.Linear(8 , 5),
    nn.Linear(5 , 1),
)

output = model(input_tensor)
print(output)


In [None]:
"""
Update the network provided to perform a multi-class classification with four outputs.
"""

import torch
import torch.nn as nn

input_tensor = torch.Tensor([[3, 4, 6, 7, 10, 12, 2, 3, 6, 8, 9]])

# Update network below to perform a multi-class classification with four labels
model = nn.Sequential(
  nn.Linear(11, 20),
  nn.Linear(20, 12),
  nn.Linear(12, 6),
  nn.Linear(6, 4),
  nn.Softmax(dim=-1)
)

output = model(input_tensor)
print(output)

**Using loss functions
 to assess model
 predictions**

In [None]:
"""

The loss function tells us how good our model is at making predictions during training. It takes a model prediction, y-hat, and true label, or ground truth, y,
as inputs, and outputs a float.

"""


In [6]:
### Cross entropy loss in PyTorch


import torch
from torch.nn import CrossEntropyLoss

scores = torch.tensor([[-0.1211,  0.1059]])
one_hot_target = torch.tensor([[1, 0]])

criterion = CrossEntropyLoss()
criterion(scores.double(), one_hot_target.double())  ### The output shown is the loss value.

tensor(0.8131, dtype=torch.float64)

In [8]:
### Creating one-hot encoded labels

"""
One-hot encoding is a technique that turns a single integer label into a vector of N elements, where N is the number of classes in your dataset.
This vector only contains zeros and ones. In this exercise, you'll create the one-hot encoded vector of the label y provided.

You'll practice doing this manually, and then make your life easier by leveraging the help of PyTorch! Your dataset contains three classes,
and the class labels range from 0 to 2 (e.g., 0, 1, 2).

"""






"""
Manually create a one-hot encoded vector of the ground truth label y by filling in the NumPy array provided.
Create a one-hot encoded vector of the ground truth label y using PyTorch.
"""


import torch.nn.functional as f
import numpy as np
y = 1
num_classes = 3

# Create the one-hot encoded vector using NumPy
one_hot_numpy = np.array([0, 1, 0])

# Create the one-hot encoded vector using PyTorch
one_hot_pytorch =f.one_hot(torch.tensor(1) , num_classes)

In [11]:
"""
Create the one-hot encoded vector of the ground truth label y, with 4 features (one for each class), and assign it to one_hot_label.

Create the cross entropy loss function and store it as criterion.

Calculate the cross entropy loss using the one_hot_label vector and the scores vector, by calling the loss_function you created.

"""

import torch
import torch.nn.functional as F
from torch.nn import CrossEntropyLoss

y = [2]
scores = torch.tensor([[0.1, 6.0, -2.0, 3.2]])

# Create a one-hot encoded vector of the label y
one_hot_label = F.one_hot(torch.tensor(y), num_classes=4)

# Create the cross entropy loss function
criterion = CrossEntropyLoss()

# Calculate the cross entropy loss
loss = criterion(scores.double() , one_hot_label.double())
print(loss)

tensor(8.0619, dtype=torch.float64)


**Using derivatives to
 update model
 parameters**

In [None]:
### An analogy for derivatives

"""

Let's think of the loss function we are trying to minimize as a valley. Each horizontal step (along x) involves gaining or losing some height (y).
At steeper slopes, a single step means losing or gaining a lot of elevation. Mathematically, the derivative (or slope) is high.
Inversely, at gentler slopes, a single step involves losing or gaining less elevation, meaning a smaller derivative, or slope.
Finally, the bottom of the valley is flat, and elevation does not change at each step: the derivative is null.

If the valley is our loss function, the function is at minimum when the derivative is null.

"""

In [None]:
### Backpropagation in PyTorch


# Create the model and run a forward pass
model = nn.Sequential(
    nn.Linear(16, 8),
    nn.Linear(8, 4),
    nn.Linear(4, 2),
    nn.Softmax(dim=1)
    )

prediction = model(sample)
# Calculate the loss and compute the gradients
criterion = CrossEntropyLoss()
loss = criterion(prediction, target)
loss.backward()

# Access each layer's gradients
model[0].weight.grad, model[0].bias.grad
model[1].weight.grad, model[1].bias.grad
model[2].weight.grad, model[2].bias.grad

In [None]:
### Updating model parameters

# Learning rate is typically small
lr = 0.001

# Update the weights
weight = model[0].weight
weight_grad = model[0].weight.grad
weight = weight - lr * weight_grad

# Update the biases
bias = model[0].bias
bias_grad = model[0].bias.grad
bias = bias - lr * bias_grad

In [None]:
### Gradient Descent

"""
loss functions used in deep learning are not convex! To find global minima of non-convex functions, we use a mechanism called "gradient descent".
PyTorch does this for us using "optimizers".

The most common optimizer is stochastic gradient descent (SGD). We use optim to instantiate SGD as shown.
.parameters() returns an iterable of all model parameters, which we pass to the optimizer. We use a standard learning rate, "lr", here, but this is tunable.
The optimizer calculates gradients for us, and updates model parameters automatically, by calling .step().

"""

import torch.optim as optim
# Create the optimizer
optimizer = optim.SGD(model.parameters(), lr=0.001)
optimizer.step()


In [None]:
"""

Access the weight parameter of the first linear layer.
Access the bias parameter of the second linear layer.

"""

model = nn.Sequential(nn.Linear(16, 8),
                      nn.Linear(8, 2)
                     )

# Access the weight of the first linear layer
weight_0 = model[0].weight

# Access the bias of the second linear layer
bias_1 = model[1].bias

In [None]:
"""

Create the gradient variables by accessing the local gradients of each weight tensor.

"""

weight0 = model[0].weight
weight1 = model[1].weight
weight2 = model[2].weight

# Access the gradients of the weight of each linear layer
grads0 = weight0.grad
grads1 = weight1.grad
grads2 = weight2.grad

In [None]:
"""

Update the weights using the gradients scaled by the learning rate.

"""

weight0 = model[0].weight
weight1 = model[1].weight
weight2 = model[2].weight

# Access the gradients of the weight of each linear layer
grads0 = weight0.grad
grads1 = weight1.grad
grads2 = weight2.grad

# Update the weights using the learning rate and the gradients
weight0 = weight0 - lr * grads0
weight1 = weight1 - lr * grads1
weight2 = weight2 - lr * grads2

In [None]:
"""

Use optim to create an SGD optimizer with a learning rate of your choice (must be less than one) for the model provided.
Update the model's parameters using the optimizer.

"""

# Create the optimizer
import torch.optim as optim
optimizer = optim.SGD(model.parameters() , lr = 0.001)

# Create the optimizer
optimizer = optim.SGD(model.parameters(), lr=0.001)

loss = criterion(pred, target)
loss.backward()

# Update the model's parameters using the optimizer
optimizer.step()

**Training Loop**

In [None]:
### Before the training loop


# Create the dataset and the dataloader
dataset = TensorDataset(torch.tensor(features).float(), torch.tensor(target).float())
dataloader = DataLoader(dataset, batch_size=4, shuffle=True)  ### DataLoader() enables us to create "batches" of data, that are passed through the model in each forward/backward pass

# Create the model
model = nn.Sequential(nn.Linear(4, 2),
nn.Linear(2, 1))

# Create the loss and optimizer
criterion = nn.MSELoss()
optimizer = optim.SGD(model.parameters(), lr=0.001)

In [None]:
The training loop
 # Loop through the dataset multiple times
for epoch in range(num_epochs):
    for data in dataloader:
        # Set the gradients to zero, bcz optimizer stores gradients from previous steps by default
        optimizer.zero_grad()
        # Get feature and target from the data loader
        feature, target = data
        # Run a forward pass
        pred = model(feature)
        # Compute loss and gradients
        loss = criterion(pred, target)
        loss.backward()
        # Update the parameters
        optimizer.step()


In [12]:
"""

Calculate the MSE loss using NumPy.
Create a MSE loss function using PyTorch.
Convert y_pred and y to tensors and then float data types, and then use them to calculate MSELoss using PyTorch as mse_pytorch.

"""

y_pred = np.array(10)
y = np.array(1)

# Calculate the MSELoss using NumPy
mse_numpy = np.mean((y_pred - y)**2)

# Create the MSELoss function
criterion = nn.MSELoss()

# Calculate the MSELoss using the created loss function
# mse_pytorch = criterion(y, y_pred)
mse_pytorch = criterion(torch.tensor(y_pred).float(), torch.tensor(y).float())
print(mse_pytorch)

tensor(81.)
