## PyTorch for Deep Learning Fundamentals (Module 01)

In [None]:
# Imports
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np

### Tensors

In [None]:
float_tensor = torch.tensor([1.0, 2.0, 3.0], dtype=torch.float32) # Often the default
int_tensor = torch.tensor([1, 2, 3], dtype=torch.int64)

In [None]:
import numpy as np
nump_array = np.array([1.0, 2.0, 3.0])
tensor_from_numpy = torch.from_numpy(nump_array)

print(tensor_from_numpy)

tensor([1., 2., 3.], dtype=torch.float64)


In [None]:
zeros = torch.zeros(3, 3) # 3x3 tensor of zeros
ones = torch.ones(2, 4)   # 2x4 tensor of ones
random = torch.rand(5, 5) # 5x5 tensor with random values

In [None]:
# Range of numbers
range_tensor = torch.arange(0, 10, step=1)
print("A range:", range_tensor)

A range: tensor([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])


In [None]:
x = torch.tensor([[1, 2, 3],
                  [4, 5, 6]])

In [None]:
# Reshape
reshaped = x.reshape(3, 2)
reshaped

tensor([[1, 2],
        [3, 4],
        [5, 6]])

In [None]:
# Transpose
transposed = x.transpose(0, 1)
transposed

tensor([[1, 4],
        [2, 5],
        [3, 6]])

In [None]:
distances = torch.tensor([[3.0], [7.0], [12.0], [18.0], [22.0], [28.0]], dtype=torch.float32)
print(distances.shape) # Batch Size, Num Features

torch.Size([6, 1])


In [None]:
single_value = torch.tensor([25.0]) # Is a scaler

with_batch = single_value.unsqueeze(0) # added the batch dimention

print(single_value.shape)
print(with_batch.shape)

torch.Size([1])
torch.Size([1, 1])


### Building a simple neural network

In [None]:
import torch                    # PyTorch Core functionality
import torch.nn as nn           # Components for building neural networks
import torch.optim as optim     # Tools for training

In [None]:
# Distance in miles
distances = torch.tensor([[1.0], [2.0], [3.0], [4.0]], dtype=torch.float32)

# Delivery time in minutes
times = torch.tensor([[6.96], [12.11], [16.77], [22.21]], dtype=torch.float32)

In [None]:
# Define the model
model = nn.Sequential(nn.Linear(1, 1))

# Define the loss function and optimizer
loss_function = nn.MSELoss()
optimizer = optim.SGD(model.parameters(), lr=0.1)

In [None]:
# Training Loop
for epoch in range(500):
    # rest the gradient to 0
    optimizer.zero_grad()
    # make predictions
    outputs = model(distances)
    # Calculate the loss - how bad was this guess?
    loss = loss_function(outputs, times)
    # Calculate gradients
    loss.backward()
    # Update the model
    optimizer.step()

    if (epoch + 1) % 50 == 0:
        print(f"Epoch {epoch + 1}: Loss = {loss.item()}")

Epoch 50: Loss = 0.025436047464609146
Epoch 100: Loss = 0.025418126955628395
Epoch 150: Loss = 0.025417400524020195
Epoch 200: Loss = 0.025417296215891838
Epoch 250: Loss = 0.025417478755116463
Epoch 300: Loss = 0.02541729249060154
Epoch 350: Loss = 0.025417357683181763
Epoch 400: Loss = 0.025417357683181763
Epoch 450: Loss = 0.025417357683181763
Epoch 500: Loss = 0.025417357683181763


In [None]:
with torch.no_grad():
    test_distance = torch.tensor([[7.0]], dtype=torch.float32)
    predicted_time = model(test_distance)
    print(f"Predicted time for 7 miles: {predicted_time.item():.1f} minutes")

if predicted_time.item() > 30:
        print("\nDecision: Do NOT take the job. You will likely be late.")
else:
        print("\nDecision: Take the job. You can make it!")

Predicted time for 7 miles: 37.2 minutes

Decision: Do NOT take the job. You will likely be late.


In [None]:
import numpy as np

# Access the first (and only) layer in the sequential model
layer = model[0]

# Get weights and bias
weights = layer.weight.data.numpy()
bias = layer.bias.data.numpy()

print(f"Weight: {weights}")
print(f"Bias: {bias}")

Weight: [[5.041001]]
Bias: [1.9099984]


In [None]:
# Combined dataset: bikes for short distances, cars for longer ones
new_distances = torch.tensor([
    [1.0], [1.5], [2.0], [2.5], [3.0], [3.5], [4.0], [4.5], [5.0], [5.5],
    [6.0], [6.5], [7.0], [7.5], [8.0], [8.5], [9.0], [9.5], [10.0], [10.5],
    [11.0], [11.5], [12.0], [12.5], [13.0], [13.5], [14.0], [14.5], [15.0], [15.5],
    [16.0], [16.5], [17.0], [17.5], [18.0], [18.5], [19.0], [19.5], [20.0]
], dtype=torch.float32)

# Corresponding delivery times in minutes
new_times = torch.tensor([
    [6.96], [9.67], [12.11], [14.56], [16.77], [21.7], [26.52], [32.47], [37.15], [42.35],
    [46.1], [52.98], [57.76], [61.29], [66.15], [67.63], [69.45], [71.57], [72.8], [73.88],
    [76.34], [76.38], [78.34], [80.07], [81.86], [84.45], [83.98], [86.55], [88.33], [86.83],
    [89.24], [88.11], [88.16], [91.77], [92.27], [92.13], [90.73], [90.39], [92.98]
], dtype=torch.float32)

In [None]:
# Use the already-trained linear model to make predictions
with torch.no_grad():
    predictions = model(new_distances)
    new_loss = loss_function(predictions, new_times)
print(f"Loss on new, combined data: {new_loss.item():.2f}")

Loss on new, combined data: 176.32


### Modeling Non-Linear Patterns with Activation Functions

In [None]:
# Combined dataset: bikes for short distances, cars for longer ones
distances = torch.tensor([
    [1.0], [1.5], [2.0], [2.5], [3.0], [3.5], [4.0], [4.5], [5.0], [5.5],
    [6.0], [6.5], [7.0], [7.5], [8.0], [8.5], [9.0], [9.5], [10.0], [10.5],
    [11.0], [11.5], [12.0], [12.5], [13.0], [13.5], [14.0], [14.5], [15.0], [15.5],
    [16.0], [16.5], [17.0], [17.5], [18.0], [18.5], [19.0], [19.5], [20.0]
], dtype=torch.float32)

# Corresponding delivery times in minutes
times = torch.tensor([
    [6.96], [9.67], [12.11], [14.56], [16.77], [21.7], [26.52], [32.47], [37.15], [42.35],
    [46.1], [52.98], [57.76], [61.29], [66.15], [67.63], [69.45], [71.57], [72.8], [73.88],
    [76.34], [76.38], [78.34], [80.07], [81.86], [84.45], [83.98], [86.55], [88.33], [86.83],
    [89.24], [88.11], [88.16], [91.77], [92.27], [92.13], [90.73], [90.39], [92.98]
], dtype=torch.float32)

Apply normalization. This is a standard technique that makes the training process more stable and effective by adjusting the scale of the data. This adjustment helps prevent large distance values from dominating the learning process and keeps gradients stable during training.

* calculate the mean and standard deviation for the distances and times tensors.
* apply standardization to each tensor using its respective mean and standard deviation, which creates new normalized tensors named `distances_norm` and `times_norm`.


In [None]:
# Calculate the mean and standard deviation for the 'distances' tensor
distances_mean = distances.mean()
distances_std = distances.std()

# Calculate the mean and standard deviation for the 'times' tensor
times_mean = times.mean()
times_std = times.std()

# Apply standardization to the distances.
distances_norm = (distances - distances_mean) / distances_std

# Apply standardization to the times.
times_norm = (times - times_mean) / times_std

ReLU activation function. This structure is what gives your model the ability to learn non-linear relationships.

* nn.Linear(1, 3): This is your first hidden layer. It consists of three neurons, each receiving one input feature (the normalized distance). This layer transforms the single input value into three separate values.
* nn.ReLU() applies the ReLU activation function to the output of each of the three neurons from the hidden layer. This is the crucial non-linear step that allows your model to create "bends" and learn curves instead of just straight lines.
* nn.Linear(3, 1): This is your output layer. It takes the three activated values from the previous step as its input and combines them to produce a single final output, which is your predicted (normalized) delivery time.
This creates a neural network with 1 hidden layer containing 3 neurons.

In [None]:
model = nn.Sequential(
    nn.Linear(1, 3),
    nn.ReLU(),
    nn.Linear(3, 1)
)

In [None]:
# Define the loss function and optimizer
loss_function = nn.MSELoss()
optimizer = optim.SGD(model.parameters(), lr=0.01)

In [None]:
# Training loop
for epoch in range(3000):
    # Reset the optimizer's gradients
    optimizer.zero_grad()
    # Make predictions (forward pass)
    outputs = model(distances_norm)
    # Calculate the loss
    loss = loss_function(outputs, times_norm)
    # Calculate gradients (backward pass)
    loss.backward()
    # Update the model's parameters
    optimizer.step()

    if (epoch + 1) % 250 == 0:
        print(f"Epoch {epoch + 1}: Loss = {loss.item()}")

Epoch 250: Loss = 0.2275935262441635
Epoch 500: Loss = 0.08463654667139053
Epoch 750: Loss = 0.03244657814502716
Epoch 1000: Loss = 0.01894374191761017
Epoch 1250: Loss = 0.01589258201420307
Epoch 1500: Loss = 0.01382768340408802
Epoch 1750: Loss = 0.012833425775170326
Epoch 2000: Loss = 0.011636992916464806
Epoch 2250: Loss = 0.01087852381169796
Epoch 2500: Loss = 0.010381869971752167
Epoch 2750: Loss = 0.009647107683122158
Epoch 3000: Loss = 0.00894041359424591


In [None]:
distance_to_predict = 5.1

* Normalise the new distance to predict.
* After the model provides its prediction, you must de-normalize the output. This converts the prediction from its normalized scale back into an understandable value in minutes.
* Finally, the code uses this actual predicted time to run the decision logic. For this prediction, assume your company now promises deliveries within 45 minutes (instead of 30 minutes from Lab 1) and wants to know which vehicle to use.

In [None]:
# Use the torch.no_grad() context manager for efficient prediction
with torch.no_grad():
    # Normalize the input distance
    distance_tensor = torch.tensor([[distance_to_predict]], dtype=torch.float32)
    new_distance_norm = (distance_tensor - distances_mean) / distances_std

    # Get the normalized prediction from the model
    predicted_time_norm = model(new_distance_norm)

    # De-normalize the output to get the actual time in minutes
    predicted_time_actual = (predicted_time_norm * times_std) + times_mean

    # --- Decision Making Logic ---
    print(f"Prediction for a {distance_to_predict}-mile delivery: {predicted_time_actual.item():.1f} minutes")

    # First, check if the delivery is possible within the 45-minute timeframe
    if predicted_time_actual.item() > 45:
        print("\nDecision: Do NOT promise the delivery in under 45 minutes.")
    else:
        # If it is possible, then determine the vehicle based on the distance
        if distance_to_predict <= 3:
            print(f"\nDecision: Yes, delivery is possible. Since the distance is {distance_to_predict} miles (<= 3 miles), use a bike.")
        else:
            print(f"\nDecision: Yes, delivery is possible. Since the distance is {distance_to_predict} miles (> 3 miles), use a car.")

Prediction for a 5.1-mile delivery: 37.5 minutes

Decision: Yes, delivery is possible. Since the distance is 5.1 miles (> 3 miles), use a car.
