In [5]:
import torch
import torch.nn as nn
import torch.optim as optim

1. Implement SoftMax Function from Scratch & Compare with PyTorch

In [6]:
def softmax_numpy(x):
    exp_x = np.exp(x - np.max(x))  # Subtract max for numerical stability
    return exp_x / np.sum(exp_x, axis=0)

# Test input
input_data = np.array([2.0, 1.0, 0.1])

# SoftMax using NumPy
numpy_result = softmax_numpy(input_data)

# SoftMax using PyTorch
torch_result = torch.nn.functional.softmax(torch.tensor(input_data), dim=0).numpy()

# Compare results
print("NumPy SoftMax:", numpy_result)
print("PyTorch SoftMax:", torch_result)
print("Are they equal?", np.allclose(numpy_result, torch_result))

NumPy SoftMax: [0.65900114 0.24243297 0.09856589]
PyTorch SoftMax: [0.65900114 0.24243297 0.09856589]
Are they equal? True


2. Understanding Cross Entropy Loss

In [None]:
def binary_cross_entropy(y_pred, y_true, epsilon=1e-12):
    y_pred = np.clip(y_pred, epsilon, 1. - epsilon)  # Avoid log(0)
    return - (y_true * np.log(y_pred) + (1 - y_true) * np.log(1 - y_pred))

cases = [(0, 0), (0, 1), (1, 0), (1, 1)]

for pred, gt in cases:
    loss = binary_cross_entropy(pred, gt)
    print(f"Pred: {pred}, GT: {gt}, Loss: {loss}")

Pred: 0, GT: 0, Loss: 9.999778782803785e-13
Pred: 0, GT: 1, Loss: 27.631021115928547
Pred: 1, GT: 0, Loss: 27.63104323789336
Pred: 1, GT: 1, Loss: 9.999778782803785e-13


3. Understanding Adam Optimizer

In [10]:
# Define a simple neural network model
class SimpleNN(nn.Module):
    def __init__(self):
        super(SimpleNN, self).__init__()
        self.fc1 = nn.Linear(28 * 28, 128)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(128, 10)

    def forward(self, x):
        x = x.view(x.size(0), -1)  # Flatten input
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        return x

# Initialize model
model = SimpleNN()

# List of different learning rates
learning_rates = [0.1, 0.01, 0.001, 0.0001]

# Iterate through learning rates and create optimizers
optimizers = {}

for lr in learning_rates:
    optimizer = optim.Adam(model.parameters(), lr=lr)
    optimizers[lr] = optimizer
    print(f"Created Adam optimizer with learning rate: {lr}")

Created Adam optimizer with learning rate: 0.1
Created Adam optimizer with learning rate: 0.01
Created Adam optimizer with learning rate: 0.001
Created Adam optimizer with learning rate: 0.0001


4. Training an MNIST Classifier

In [None]:
from torchvision import datasets, transforms
from torch.utils.data import DataLoader

# Define transformations
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))])

# Load MNIST dataset
train_dataset = datasets.MNIST(root='./data', train=True, transform=transform, download=True)
test_dataset = datasets.MNIST(root='./data', train=False, transform=transform, download=True)

# DataLoader
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

# Define the neural network
class SimpleNN(nn.Module):
    def __init__(self):
        super(SimpleNN, self).__init__()
        self.fc1 = nn.Linear(28*28, 128)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(128, 10)

    def forward(self, x):
        x = x.view(x.size(0), -1)
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        return x

# Initialize model, loss, and optimizer
model = SimpleNN()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training loop
num_epochs = 5
for epoch in range(num_epochs):
    model.train()
    total_loss = 0
    correct = 0
    total = 0

    for images, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()
        _, predicted = torch.max(outputs, 1)
        correct += (predicted == labels).sum().item()
        total += labels.size(0)

    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {total_loss/len(train_loader):.4f}, Accuracy: {correct/total:.4f}")

100%|██████████| 9.91M/9.91M [00:01<00:00, 5.21MB/s]
100%|██████████| 28.9k/28.9k [00:00<00:00, 134kB/s]
100%|██████████| 1.65M/1.65M [00:01<00:00, 1.32MB/s]
100%|██████████| 4.54k/4.54k [00:00<?, ?B/s]


Epoch [1/5], Loss: 0.3904, Accuracy: 0.8851
Epoch [2/5], Loss: 0.2010, Accuracy: 0.9407
Epoch [3/5], Loss: 0.1411, Accuracy: 0.9582
Epoch [4/5], Loss: 0.1118, Accuracy: 0.9665
Epoch [5/5], Loss: 0.0935, Accuracy: 0.9714


5. Evaluating the Model on Test Data

In [1]:
model.eval()
test_loss = 0
correct = 0
total = 0

with torch.no_grad():
    for images, labels in test_loader:
        outputs = model(images)
        loss = criterion(outputs, labels)
        test_loss += loss.item()
        
        _, predicted = torch.max(outputs, 1)
        correct += (predicted == labels).sum().item()
        total += labels.size(0)

print(f"Test Loss: {test_loss/len(test_loader):.4f}, Test Accuracy: {correct/total:.4f}")

NameError: name 'model' is not defined