In [1]:
import torch
import torch.nn as nn # Neural network modules
import torch.optim as optim # Optimization algorithms

# 1. Defining a Simple Model

In [2]:
# Models in PyTorch are typically classes inheriting from nn.Module
class SimpleLinearModel(nn.Module):
    def __init__(self, input_size, output_size):
        super(SimpleLinearModel, self).__init__() # Call parent class constructor
        # Define the layers the model will use
        # nn.Linear implements a standard linear transformation: output = input @ weight.T + bias
        self.linear_layer = nn.Linear(input_size, output_size)

    def forward(self, x):
        # Defines the forward pass: how input 'x' flows through the defined layers
        # In this case, input x just goes through the linear layer
        out = self.linear_layer(x)
        return out

## Instantiate the Model

In [3]:
input_features = 10 # Example: input data has 10 features
output_classes = 1 # Example: output is a single value (e.g., for regression)

model = SimpleLinearModel(input_features, output_classes)
print(model) # Prints the layers in the model

SimpleLinearModel(
  (linear_layer): Linear(in_features=10, out_features=1, bias=True)
)


## Inspect Model Parameters

In [4]:
# nn.Module automatically tracks parameters of its defined layers
for name, param in model.named_parameters():
    if param.requires_grad:
        print(f"Parameter: {name}")
        print(f"Shape: {param.shape}")
        print(f"Values: {param.data}")

# Note: These parameters are initlalized randomly.
# The goal of training is to adjust these parameters.

Parameter: linear_layer.weight
Shape: torch.Size([1, 10])
Values: tensor([[ 0.0329, -0.1560, -0.2677, -0.1479,  0.0884, -0.2830, -0.2560, -0.2099,
          0.1561, -0.3042]])
Parameter: linear_layer.bias
Shape: torch.Size([1])
Values: tensor([-0.1320])


# 2. Defining a Loss Function

In [5]:
# Loss functions measure the difference between model output and target values.
# Let's choose Mean Squared Error (MSE) loss, common for regression tasks.
loss_function = nn.MSELoss()
print(loss_function)

MSELoss()


In [6]:
# Create some dummy model output and target values
# Assume a batch size of 4 samples
dummy_output = torch.randn(4, output_classes) # Model predicts 1 value for each of 4 samples
dummy_target = torch.randn(4, output_classes) # True values for the 4 samples

In [7]:
loss = loss_function(dummy_output, dummy_target)
print("Example Calculation:")
print(f"Dummy Model Output:\n{dummy_output}")
print(f"Dummy Target Values:\n{dummy_target}")
print(f"Calculated MSE Loss: {loss.item()}") # .item() gets the scalar value

Example Calculation:
Dummy Model Output:
tensor([[ 1.6352],
        [-1.7905],
        [ 0.5569],
        [ 0.9428]])
Dummy Target Values:
tensor([[-0.1020],
        [-0.2389],
        [-0.3286],
        [-0.5212]])
Calculated MSE Loss: 2.0881645679473877


# 3. Defining an Optimizer

In [8]:
# The optimizer updates the model's parameters using the gradients computed by autograd.
# We need to tell it WHICH parameters to optimize and set a learning rate.
learning_rate = 0.01

In [9]:
# Stochastic Gradient Descent (SGD) is a common basic optimizer.
# We pass model.parameters() so the optimizer knows what tensors to update.
optimizer = optim.SGD(model.parameters(), lr=learning_rate)
print(optimizer)

SGD (
Parameter Group 0
    dampening: 0
    differentiable: False
    foreach: None
    lr: 0.01
    maximize: False
    momentum: 0
    nesterov: False
    weight_decay: 0
)


## How these pieces will interact

1. Get input data, pass through model: output = model(input_data)
2. Calculate loss: loss = loss_function(output, target_data)
3. Calculate gradients: loss.backward()  <-- Uses autograd!
4. Update parameters: optimizer.step()   <-- Uses gradients stored in param.grad
5. Zero gradients for next iteration: optimizer.zero_grad()

# 4. Generate Sample Data

In [10]:
# We need some data for the model to learn from.
# Let's create synthetic data where Y is linearly related to X plus some noise.
# Target relationship: Y = 2*X + 1 + noise

In [11]:
# We don't need gradients for the training data itself
X_train = torch.randn(100, input_features) * 5 # 100 samples, 10 features
true_weights = torch.tensor([[2.0] * input_features])
true_bias = torch.tensor([[1.0]])
Y_train = X_train @ true_weights.T + true_bias + torch.randn(100, output_classes) # Add noise

In [12]:
print(f"Shape of X_train: {X_train.shape}")
print(f"Shape of Y_train: {Y_train.shape}")
# Note: Our model starts with random weights/bias. Training aims to make them close to [2, 1].

Shape of X_train: torch.Size([100, 10])
Shape of Y_train: torch.Size([100, 1])


# 5. The Training Loop

In [13]:
# This is where the learning happens!
num_epochs = 100 # Number of times we iterate through the entire dataset

for epoch in range(num_epochs):
    # === Core Training Loop ===

    # 1. Forward Pass: Pass the data through the model
    # We get the model's current predictions for X_train
    outputs = model(X_train)

    # 2. Calculate Loss: Compare model outputs to true values
    loss = loss_function(outputs, Y_train)

    # 3. Backward Pass: Calculate gradients of the loss w.r.t. model parameters.
    # This uses the autograd engine we learned about.
    loss.backward()

    # 4. Optimizer Step: Update model parameters
    # The optimizer uses the gradients computed in backward() and the learning rate
    optimizer.step()

    # 5. Zero Gradients: Clear gradients from the previous iteration.
    # If we don't zero, gradients accumulate across iterations.
    optimizer.zero_grad()

    # === End Core Training Steps ===

    # Print progress periodically to see if loss is decreasing
    if (epoch+1) % 10 == 0:
        print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}")

Epoch [10/100], Loss: 1.7432
Epoch [20/100], Loss: 1.1675
Epoch [30/100], Loss: 1.0902
Epoch [40/100], Loss: 1.0368
Epoch [50/100], Loss: 0.9995
Epoch [60/100], Loss: 0.9733
Epoch [70/100], Loss: 0.9550
Epoch [80/100], Loss: 0.9421
Epoch [90/100], Loss: 0.9331
Epoch [100/100], Loss: 0.9268


# 6. Check Results (Optional but Recommended)

In [14]:
# Let's see how well the model learned the parameters.
print("Parameters learned by the model:")
# Turn off gradient tracking for inspection
with torch.no_grad():
    for name, param in model.named_parameters():
        if param.requires_grad:
            print(f"{name}: {param.data.numpy()}")
    print(f"(Target parameters were weights: {true_weights.numpy()}, bias: {true_bias.numpy()})")

Parameters learned by the model:
linear_layer.weight: [[1.9923012 1.9886528 2.031799  1.9995862 1.9901197 2.0149977 1.9897474
  1.985668  2.0027409 1.9881456]]
linear_layer.bias: [0.74791694]
(Target parameters were weights: [[2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]], bias: [[1.]])
