<a href="https://colab.research.google.com/github/Alexmarco-gif/pytorch-deep-learning-professional-path/blob/main/Deep_Learning_Foundation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Creating Tensors

In [None]:
import torch

In [None]:
data = [[1,2], [2,6]]
x_data = torch.tensor(data)
print(f"tensor from the list of {x_data}")
print(x_data.shape)

tensor from the list of tensor([[1, 2],
        [2, 6]])
torch.Size([2, 2])


From Numpy Array

In [None]:
import numpy as np

In [None]:
np_array = np.array(data)
x_np = torch.from_numpy(np_array)
print(f"numpy from list of {x_np}")

numpy from list of tensor([[1, 2],
        [2, 6]])


Initialize with random value

In [None]:
x_ones = torch.ones(2, 2) # A 2 x 2 Matrix of ones
x_rand = torch.rand (2, 2)# A 2x2 Matrix of random value uniform distributions

print(f"2x2 Matrices of Ones {x_ones}")
print(f"2x2 Matrices of random value {x_rand}")

2x2 Matrices of Ones tensor([[1., 1.],
        [1., 1.]])
2x2 Matrices of random value tensor([[0.3668, 0.8455],
        [0.9860, 0.1436]])


## Tensor Attributes

In [None]:
torch = torch.rand (3, 4)

print(f"Shape of tensor: {torch.shape}")
print(f"Datatype of tensor: {torch.dtype}")
print(f"Device tensor is stored on: {torch.device}")

Shape of tensor: torch.Size([3, 4])
Datatype of tensor: torch.float32
Device tensor is stored on: cpu


In [None]:
import torch
if torch.cuda.is_available():
  device = 'cuda'
  print("NVIDIA GPU is avaliable")
else:
  device = 'cpu'
  print("NVIDIA GPU is not avaliable")

tensor_on_device = torch.ones (5,5)
tensor_on_device = tensor_on_device.to(device) # Corrected variable name

print(f"Tensor on device: {tensor_on_device.device}")

NVIDIA GPU is not avaliable
Tensor on device: cpu


## üìù Exercise 1: Tensor Manipulation and Device Check
Your task is to create two specific tensors and report their attributes.
1. Create a $4 \times 3$ tensor named A filled with zeros, using the data type torch.float32.
2. Create a $5 \times 5$ tensor named B filled with random integers between 1 and 10.
3. Move Tensor A to the GPU if one is available.
4. Print the shape, data type, and device for both Tensor A and Tensor B.

In [None]:
tensor_1 = torch.zeros(4, 3, dtype=torch.float32 device=device)
tensor_2 = torch.randint(1, 11, (5, 5))

if torch.cuda.is_available():
  device = 'cuda'
else:
  device = 'cpu'

tensor_1 = tensor_1.to(device)


print(f"the shape of tensor 1 is {tensor_1.shape}")
print(f"the shape of tensor 2 is {tensor_2.shape}")
print(f"the data type of tensor 1 is {tensor_1.dtype}")
print(f"the data type of tensor 2 is {tensor_2.dtype}")
print(f"the device of tensor 1 is {tensor_1.device}")
print(f"the device of tensor 2 is {tensor_2.device}")

the shape of tensor 1 is torch.Size([4, 3])
the shape of tensor 2 is torch.Size([5, 5])
the data type of tensor 1 is torch.float32
the data type of tensor 2 is torch.int64
the device of tensor 1 is cpu
the device of tensor 2 is cpu


# Module 2: Autograd and Gradients

In [None]:
import torch

In [None]:
# Tracking Operations

x = torch.tensor(4.6, requires_grad=True)
print(f'requires grad{x.requires_grad}')


y = 2 * x
z = y + 5


print(f"z's operation function (grad_fn): {z.grad_fn}")

requires gradTrue
z's operation function (grad_fn): <AddBackward0 object at 0x7a095d3bfa00>


The Backpropagation Step <br>
Let's calculate the derivative of $z$ with respect to $x$.$$z = 2x + 5$$The analytical derivative (gradient) is:$$\frac{\partial z}{\partial x} = 2$$

In [None]:
x = torch.tensor(4.6, requires_grad=True)
y = x * 2
z = y + 5

# 1. Perform backpropagation
# Since z is a scalar (single number), we call .backward() with no arguments
z.backward()

# 2. Access the gradient: dz/dx
print(f"The gradient (dz/dx) is: {x.grad}")

The gradient (dz/dx) is: 2.0


In [None]:
# Preventing Gradient Tracking
x = torch.tensor(7.5, requires_grad=True)

with torch.no_grad(): # Saves memory and computational times
  y = x * 2
  z = y + 5

print(f"z's operation function (grad_fn): {z.grad_fn}")

z's operation function (grad_fn): None


## üìù Exercise 2: Implementing Autograd
Your task is to calculate the gradient of the function $L$ with respect to $w$ and $b$, given the fixed input $x$. <br>
The function is: $$L = (w \cdot x + b)^2$$ Given values: <br>
$x = 2.0$ <br>$w = 3.0$ <br>$b = 1.0$ <br>
1. Initialize $x$, $w$, and $b$ as scalar tensors. Ensure only $w$ and $b$ have requires_grad=True (as $x$ is input data, not a parameter we optimize).
2. Calculate the value of $L$.
3. Perform the backpropagation step.
4. Print the gradients $\frac{\partial L}{\partial w}$ (stored in $w.grad$) and $\frac{\partial L}{\partial b}$ (stored in $b.grad$).

In [None]:
w = torch.tensor(3.0, requires_grad=True)
b = torch.tensor(1.0, requires_grad=True)
x = torch.tensor(2.0)

y = w * x + b
L = y ** 2

L.backward()

print(f"dL/dw: {w.grad}")
print(f"dL/db: {b.grad}")

dL/dw: 28.0
dL/db: 14.0


# Module 3: Building Basic Networks `(torch.nn)`
`torch.Tensor` and `autograd` handle the math. <br>
`torch.nn` module provides the high-level building blocks for constructing neural networks.

The Linear Layer `(nn.Linear)`<br> This is the most fundamental layer, implementing a simple linear transformation:$$y = xA^T + b$$where $x$ is the input, $A$ is the weight matrix, and $b$ is the bias vector.

In [None]:
import torch
from torch import nn


class SimpleFNN(nn.Module):
  def __init__(self, input_size, hidden_size, output_size ):
    super(SimpleFNN, self).__init__()


    # Layer 1: Input to Hidden
    self.layer1 = nn.Linear(in_features=input_size, out_features=hidden_size)

    # Activation Function: Introduce non-linearity
    self.relu = nn.ReLU()

    # Layer 2: Hidden to Output
    self.out= nn.Linear (in_features = hidden_size, out_features=output_size)



  # Forward Pass: Define how data flows through the layers
  def forward(self, x):
    # x -> Linear Layer 1
    x = self.layer1(x)
    # -> ReLU Activation
    x = self.relu(x)
    # -> Linear Layer 2 (Final output)
    logit = self.out(x)
    return logit



# Instantiate the model
input_dim = 4
hidden_dim = 8
output_dim = 1
model = SimpleFNN(input_dim, hidden_dim, output_dim)

print("Model Structure:")
print(model)

# View wthe parameters
print("Model Parameters:")
for name, param in model.named_parameters():
  print(f"Name ==> {name}: Shape ==> {param.shape}")





Model Structure:
SimpleFNN(
  (layer1): Linear(in_features=4, out_features=8, bias=True)
  (relu): ReLU()
  (out): Linear(in_features=8, out_features=1, bias=True)
)
Model Parameters:
Name ==> layer1.weight: Shape ==> torch.Size([8, 4])
Name ==> layer1.bias: Shape ==> torch.Size([8])
Name ==> out.weight: Shape ==> torch.Size([1, 8])
Name ==> out.bias: Shape ==> torch.Size([1])


In [None]:
import torch
from torch import nn

class MNIST_Classifier (nn.Module):
  def __init__(self, input_size, hidden_layer_size_1, hidden_layer_size_2, output_size):
    super(MNIST_Classifier, self).__init__()

    # define the first layer
    self.layer1 = nn.Linear(input_size, hidden_layer_size_1)

    # Define ReLU activation function
    self.relu = nn.ReLU()

    # define second layer
    self.layer2 = nn.Linear(hidden_layer_size_1, hidden_layer_size_2) # Corrected input size for layer2

    # define the output layer
    self.layer_out = nn.Linear(hidden_layer_size_2, output_size)


  def forward(self, x):
    # pass the input to the first layer
    x = self.layer1(x) # Corrected layer call

    # apply the ReLU activation function
    x = self.relu(x)

    # pass the result to the second layer
    x = self.layer2(x)

    # apply the activation function
    x = self.relu(x)

    # pass the result to the output layer
    logit = self.layer_out(x)
    return logit

# initialize the model
Input = 784
hidden_layer_1 = 256
hidden_layer_2 = 64
output_size = 10

# instantiate the MNIST_Classifier model with the define sizes
model = MNIST_Classifier(Input, hidden_layer_1, hidden_layer_2, output_size)

# print the model structure

print("Model Parameters")
print(model)


# make prediction
input_try = torch.rand(32, Input)
output_pred = model(input_try)

print(output_pred.shape)

Model Parameters
MNIST_Classifier(
  (layer1): Linear(in_features=784, out_features=256, bias=True)
  (relu): ReLU()
  (layer2): Linear(in_features=256, out_features=64, bias=True)
  (layer_out): Linear(in_features=64, out_features=10, bias=True)
)
torch.Size([32, 10])


## Module 4: Training Essentials
Now that we can define a model, we need the components that allow it to learn‚Äîthe loss function and the optimizer. This leads us to the Core Training Loop. <br> <br>
1. Loss Functions (nn.Loss) The Loss Function (or Criterion) quantifies how far the model's prediction (logits) is from the true target value (label). The goal of training is to minimize this loss.
* `MSELoss()`
* `nn.CrossEntropyLoss()`
* `nn.BCELoss()`
2.  Optimizers (torch.optim) The Optimizer dictates how the model's parameters (weights and biases) are updated based on the gradients calculated by Autograd.
* SGD	Stochastic Gradient Descent. 	`optim.SGD(params, lr)`
* Adam	Adaptive Moment Estimation. 	`optim.Ad`
3. The Core Training Loop (Four Steps) <br>
Training any PyTorch model involves repeating these four steps for every batch of data (one epoch):  <br>
1. Zero Gradients: Clear any previous gradients stored in the parameters using `optimizer.zero_grad()`.
2. Forward Pass: Calculate the model's output `(logits)` for the current batch: `logits = model(input)`.
3. Calculate Loss: Compute the error: `loss = criterion(logits, labels)`.
4. Backward Pass & Step:
  * `loss.backward()`: Calculate the gradients ($\frac{\partial Loss}{\partial \text{Weights}}$).
  * `optimizer.step()`: Update the parameters using the calculated gradients.


## üìù Exercise 4: Implementing the Training Core
Your task is to set up the necessary components and simulate one step of the training loop for the MNIST_Classifier you just created.

Instructions: <br>
1. Reuse your MNIST_Classifier class and instantiate the model.
2. Define the Loss Function (appropriate for 10-class classification) and the Optimizer (using Adam with a learning rate of 0.001).
3. Create a dummy input batch (input_data, size $32 \times 784$) and a dummy label tensor (labels, size $32$). The labels should be integers between 0 and 9.
4. Implement the Four Steps of the training loop:
    * Zero gradients.
    * Forward pass to get logits.
    * Calculate loss.
    * Backward pass and optimizer step.
5. Print the initial loss

In [None]:
model = MNIST_Classifier(Input, hidden_layer_1, hidden_layer_2, output_size)

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr = 0.001)

dummy_input_batch = torch.rand(32, 784)
dummy_label_tensor = torch.randint(0, 10, (32,))

optimizer.zero_grad()

logits = model(dummy_input_batch)
loss = criterion(logits, dummy_label_tensor)

loss.backward()

optimizer.step()

print(f'loss: {loss.item()}')

loss: 2.285759687423706


## Module 5: Data Handling (Dataset and DataLoader)

In [None]:
import torch
from torch.utils.data import Dataset, DataLoader
import numpy as np

# input data
# 100 samples, 4 features each
X_features = np.random.rand(100, 4).astype(np.float32)
y_label = np.random.randint(0, 2, 100)

# Define Custom Dataset
class CustomTensorDataset():
  def __init__(self, features, labels):
    # Convert NumPy arrays to PyTorch Tensors
    self.X = torch.tensor(features)
    self.Y = torch.tensor(labels, dtype=torch.long)

  def __len__(self):
    # The total size of the dataset
    return len(self.Y)

  def __getitem__ (self, idx):
    # Returns one (feature, label) pair at the given index
    return self.X[idx], self.Y[idx]

# Instantiate Dataset and DataLoader
dataset = CustomTensorDataset(X_features, y_label)

# DataLoader setup:
# batch_size=16: 16 samples per batch
# shuffle=True: data is re-shuffled after every epoch
# num_workers=2: load data using 2 parallel processes (recommended for real data)
train_loader = DataLoader(
 dataset,
 batch_size = 16,
 shuffle = True,
 num_workers = 0 # Set to 0 for simple Colab/Jupyter for stability, use >0 in production
)

# Demonstrate Iteration
print(f"Total samples: {len(dataset)}")
print(f"Number of batches (100 samples / 16 batch size): {len(train_loader)}")


# Iterate over the DataLoader (simulating one epoch of training)
for batch_idx, (features, labels) in enumerate(train_loader):
    print(f"\n--- Batch {batch_idx+1} ---")
    print(f"Features batch shape: {features.shape}")
    print(f"Labels batch shape: {labels.shape}")
    if batch_idx == 1:
        break # Stop after 2 batches for brevity



Total samples: 100
Number of batches (100 samples / 16 batch size): 7

--- Batch 1 ---
Features batch shape: torch.Size([16, 4])
Labels batch shape: torch.Size([16])

--- Batch 2 ---
Features batch shape: torch.Size([16, 4])
Labels batch shape: torch.Size([16])


## üìù Exercise 5: Preparing Data for MNIST
Your task is to set up the DataLoader for the MNIST training set. <br>
Instructions: <br>
1. Import the necessary classes (`datasets` and `transforms` from `torchvision`).
2. Define a transformation to convert the PIL image into a PyTorch Tensor.
3. Load the Training Data from `torchvision.datasets.MNIST`.
4. Define the DataLoader with the following settings: <br>
  * `batch_size`: 64
  * `shuffle`: True
  * `num_workers` = 0
5. Iterate through the first batch of the `DataLoader` and print the shape of the image batch and the shape of the label batch.

In [None]:
import torchvision.datasets as datasets
import torchvision.transforms as transforms

# Define your transform for coverting PIL image to tensor
transform = transforms.ToTensor()

# Load the Minist training data
minst_traindata = datasets.MNIST(root='./data', train=True, download=True, transform=transform)

# define the DataLoader
train_loader = torch.utils.data.DataLoader(minst_traindata, batch_size=64, shuffle=True, num_workers=0)

# iterate through the first batch
for img, labels in train_loader:
  print(f"image batch shape: {img.shape}")
  print(f"label batch shape: {labels.shape}")
  break


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 9.91M/9.91M [00:00<00:00, 56.4MB/s]
100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 28.9k/28.9k [00:00<00:00, 1.63MB/s]
100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1.65M/1.65M [00:00<00:00, 14.4MB/s]
100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 4.54k/4.54k [00:00<00:00, 9.40MB/s]

image batch shape: torch.Size([64, 1, 28, 28])
label batch shape: torch.Size([64])





# Phase 1 Capstone Project: Fashion-MNIST Classifier

#### Project Goal: Achieve at least 80% accuracy on the test set.
#### üìã Project Specifications
1. Data Setup (Using `torchvision`)
  * Load the Fashion-MNIST dataset for both training (`train=True`) and testing (`train=False`).
  * Apply the `transforms.ToTensor()` to both datasets.
  * Create a `DataLoader` for both the training set (use `batch_size=64`, `shuffle=True`) and the test set (use `batch_size=1000`, `shuffle=False`).

2. Model Definition (Reuse Your Network)
  * Reuse your `MNIST_Classifier` structure (`Input: 784, Hidden 1: 256, Hidden 2: 64, Output: 10`).
3. Training Setup:
  * Device: Check for GPU availability and move the model and data to the correct device (CPU or CUDA).
  * Loss: `nn.CrossEntropyLoss()`.
  * Optimizer: `torch.optim.Adam` with `lr=0.001`.
  * Epochs: Run for 5 epochs.
4. The Training and Testing Loops <br>
You need two functions/loops:
    1. `train_loop(dataloader, model, loss_fn, optimizer):`
        * Iterates over the training dataloader.
        * Performs the 4-step training process (zero grad, forward, backward, step).
        * Prints the average loss at the end of the epoch.
    2. `test_loop(dataloader, model, loss_fn)`:
        * Sets the model to evaluation mode (model.eval()) and disables gradient tracking (with torch.no_grad():).
        * Calculates the average loss and the overall accuracy (number of correct predictions / total samples).
        * CRITICAL STEP: Use torch.argmax(logits, dim=1) to convert the output logits into the predicted class indices.
        * Sets the model back to training mode (model.train()) afterward.
    

üíª **Task**
Write the complete, runnable Python script that accomplishes the entire training and testing process and reports the final accuracy.

Note: You must modify your existing MNIST_Classifier class to accept the device parameter and move layers to it in the` __init__ `method, or simply move the entire model to the device after instantiation `(model.to(device))`. The latter is simpler.

In [10]:
import torch
from torch import nn
import torch.optim
import torchvision.datasets as datasets
import torchvision.transforms as transforms

In [11]:
# Define Device
if torch.cuda.is_available():
  device = 'cuda'
else:
  device = 'cpu'

# Define Transformer
transform = transforms.ToTensor()

# Load MINIST Dataset
train_dataset = datasets.FashionMNIST(root='./data', train=True, download=True, transform=transform)
test_dataset = datasets.FashionMNIST(root='./data', train=False, download=True, transform=transform)

# Create DataLoaders
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=1000, shuffle=False)

# Define Model
class FashionMNIST_Classifier(nn.Module):
  def __init__(self, input_size, hidden_layer_size_1, hidden_layer_size_2, output_size):
    super(FashionMNIST_Classifier, self).__init__()
    self.layer1 = nn.Linear(input_size, hidden_layer_size_1)
    self.relu = nn.ReLU()
    self.layer2 = nn.Linear(hidden_layer_size_1, hidden_layer_size_2)
    self.layer_out = nn.Linear(hidden_layer_size_2, output_size)

  def forward(self, x):
    x = self.layer1(x)
    x = self.relu(x)
    x = self.layer2(x)
    x = self.relu(x)
    logit = self.layer_out(x)
    return logit

model = FashionMNIST_Classifier(784, 256, 64, 10).to(device)

# Define Loss Function and Optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)


# Define Training Loop Function
def train_loop(dataloader, model, loss_fn, optimizer):
  size = len(dataloader.dataset)
  model.train() # set model training
  running_loss = 0.0
  for batch, (X, y) in enumerate(dataloader):
    # Move data to the correct device
    X, y = X.to(device), y.to(device)

    optimizer.zero_grad()
    # Forward pass
    pred = model(X.view(X.size(0), -1))
    loss = loss_fn(pred, y)

    # backward pass
    loss.backward()

    # Optimize steps
    optimizer.step()

    running_loss += loss.item()
    if batch % 100 == 99:
      current = (batch + 1) * len(X)
      print(f"Loss: {running_loss / 100:.4f}  [{current}/{size}]")
      running_loss = 0.0


# Define Testing Loop Function
def test_loop (dataloader, model, loss_fn):
  size = len(dataloader.dataset)
  num_batches = len(dataloader)
  model.eval() # Set model to evaluation mode

  test_loss = 0
  correct = 0

  with torch.no_grad():
    for X, y in dataloader:
      X, y = X.to(device), y.to(device)


      pred = model(X.view(X.size(0), -1)) # Flatten the image
      test_loss += loss_fn(pred, y).item()
      correct += (pred.argmax(1) == y).type(torch.float).sum().item()

  test_loss /= num_batches
  correct /= size
  print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")
  model.train()


# Run Training and Testing
epochs = 5
for t in range(epochs):
  print(f"Epoch {t+1}\n-------------------------------")
  train_loop(train_loader, model, criterion, optimizer)
  test_loop(test_loader, model, criterion)
  print("Done !")

Epoch 1
-------------------------------
Loss: 1.0373  [6400/60000]
Loss: 0.5938  [12800/60000]
Loss: 0.5184  [19200/60000]
Loss: 0.4974  [25600/60000]
Loss: 0.4799  [32000/60000]
Loss: 0.4644  [38400/60000]
Loss: 0.4422  [44800/60000]
Loss: 0.4202  [51200/60000]
Loss: 0.4197  [57600/60000]
Test Error: 
 Accuracy: 84.8%, Avg loss: 0.424610 

Done !
Epoch 2
-------------------------------
Loss: 0.3982  [6400/60000]
Loss: 0.3976  [12800/60000]
Loss: 0.3939  [19200/60000]
Loss: 0.3820  [25600/60000]
Loss: 0.3596  [32000/60000]
Loss: 0.3706  [38400/60000]
Loss: 0.3726  [44800/60000]
Loss: 0.3601  [51200/60000]
Loss: 0.3445  [57600/60000]
Test Error: 
 Accuracy: 85.5%, Avg loss: 0.414324 

Done !
Epoch 3
-------------------------------
Loss: 0.3527  [6400/60000]
Loss: 0.3415  [12800/60000]
Loss: 0.3316  [19200/60000]
Loss: 0.3314  [25600/60000]
Loss: 0.3494  [32000/60000]
Loss: 0.3323  [38400/60000]
Loss: 0.3283  [44800/60000]
Loss: 0.3385  [51200/60000]
Loss: 0.3389  [57600/60000]
Test Erro