In [1]:
import torch
# Create a tensor with requires_grad set True
x = torch.randn(3, 3, requires_grad=True)
y = x**2 + 3*x + 5 
y.sum().backward()
print(x.grad)

tensor([[0.7228, 4.7963, 5.2345],
        [2.8704, 3.6979, 0.5559],
        [1.0244, 3.8209, 1.2268]])


In [2]:
import torch

# 1. Create a tensor 'x' with the value 2.0 and enable gradient tracking.
x = torch.tensor(2.0, requires_grad=True)

# 2. Define a simple quadratic function: y = x^2 + 2*x + 1
y = x**2 + 2*x + 1

# 3. Compute the gradient of 'y' with respect to 'x'.
#    Before doing this, what do you expect the gradient to be based on calculus?
#    (The derivative of x^2 + 2x + 1 is 2x + 2. At x=2, the gradient should be 2*(2) + 2 = 6)

# 4. Call the .backward() method on 'y' to compute the gradient.
y.backward()

# 5. Access the computed gradient of 'x' using the .grad attribute.
print("Gradient of x:", x.grad)

# 6. Create another tensor 'w' with a random value and enable gradient tracking.
w = torch.randn(1, requires_grad=True)

# 7. Create a tensor 'b' with the value -1.0 and enable gradient tracking.
b = torch.tensor(-1.0, requires_grad=True)

# 8. Define a simple linear model output: prediction = w * x + b
prediction = w * x + b

# 9. Define a target value: target = 5.0
target = torch.tensor(5.0)

# 10. Calculate the mean squared error (MSE) 
loss = (prediction - target)**2  

# To compute gradients for non-scalar tensors, we typically compute the gradient of a scalar output.
# In this case, 'loss' is already a scalar.
loss.backward()

# 12. Access and print the gradients of 'w' and 'b'.
print("Gradient of w:", w.grad)
print("Gradient of b:", b.grad)

# 13. Briefly explain in the comments what the .backward() method does and why it's crucial for training neural networks.
# Your explanation here:
# The .backward() method in PyTorch calculates the gradient of a tensor with respect to all the tensors that have requires_grad=True and were involved in the computation of that tensor.
# It traverses the computational graph backwards from the tensor on which it's called, applying the chain rule of calculus to compute these gradients.
# This is crucial for training neural networks because it allows us to determine how much each parameter (weight and bias) contributes to the loss.
# These gradients are then used by optimization algorithms (like gradient descent) to update the parameters in a direction that reduces the loss, thus improving the network's performance over iterations.

Gradient of x: tensor(6.)
Gradient of w: tensor([-22.5488])
Gradient of b: tensor(-11.2744)


## Building Efficient Data Pipelines with PyTorch

In [3]:
import torch
from torch.utils.data import Dataset, random_split

# 1. Create sample input data (X) as a PyTorch tensor with 100 samples and 5 features (random values).
X = torch.randn(100, 5)

# 2. Create sample output labels (y) as a PyTorch tensor with 100 labels (random values).
y = torch.randn(100)

# 3. Define a custom Dataset class called 'MySampleDataset' that inherits from torch.utils.data.Dataset.
class MySampleDataset(Dataset):
    def __init__(self, features, labels):
        # Initialize the dataset with features and labels.
        super().__init__()
        self.features = features
        self.labels = labels
        assert len(self.features) == len(self.labels), "Number of samples must match!"

    def __len__(self):
        # Return the total number of samples in the dataset.
        return len(self.features)

    def __getitem__(self, index):
        # Retrieve the feature and label at the given index.
        feature = self.features[index]
        label = self.labels[index]
        return feature, label

# 4. Create an instance of your 'MySampleDataset' using the sample data X and y.
dataset = MySampleDataset(X, y)

# 5. Print the size of the dataset using the __len__() method.
print("Dataset size:", len(dataset))

# 6. Access and print the first sample (input and label) from the dataset using indexing.
first_sample_input, first_sample_label = dataset[0]
print("First sample input shape:", first_sample_input.shape)
print("First sample label shape:", first_sample_label.shape)
print("First sample:", (first_sample_input, first_sample_label))

# 7. Split the dataset into training and testing sets with an 80/20 ratio.
train_size = int(len(dataset) * 0.8)
test_size = len(dataset) - train_size
train_dataset, test_dataset = random_split(dataset, [train_size, test_size])

# 8. Print the sizes of the training and testing datasets.
print("Training dataset size:", len(train_dataset))
print("Testing dataset size:", len(test_dataset))

Dataset size: 100
First sample input shape: torch.Size([5])
First sample label shape: torch.Size([])
First sample: (tensor([-0.6893, -0.9583,  0.4137,  1.5224, -0.4382]), tensor(1.3775))
Training dataset size: 80
Testing dataset size: 20
