In [28]:
import torch
import torchvision
from torch.utils import data
from torchvision import transforms
import numpy as np
import matplotlib.pyplot as plt
plt.style.use('ggplot')

# Set random seed for reproducibility
torch.manual_seed(42)

# Data Transformation
trans = transforms.ToTensor()

# Download and load FashionMNIST dataset
mnist_train = torchvision.datasets.FashionMNIST(root="../data", train=True, transform=trans, download=True)
mnist_test = torchvision.datasets.FashionMNIST(root="../data", train=False, transform=trans, download=True)

# DataLoader
train_iter = data.DataLoader(mnist_train, batch_size=32, shuffle=True)
test_iter = data.DataLoader(mnist_test, batch_size=32, shuffle=False)

class SoftmaxClassifier:
    def __init__(self, num_inputs, num_outputs):
        # Initialize weights and bias
        self.W = torch.randn(num_inputs, num_outputs) * 0.01
        self.b = torch.zeros(num_outputs)
        
        # Store these as tensors that require gradients
        self.W.requires_grad_(True)
        self.b.requires_grad_(True)
    
    def softmax(self, X):
        # Compute softmax
        exp_X = torch.exp(X)
        return exp_X / torch.sum(exp_X, dim=1, keepdim=True)
    
    def cross_entropy_loss(self, y_pred, y_true):
        # One-hot encode the true labels
        m = y_pred.shape[0]
        one_hot = torch.zeros_like(y_pred)
        one_hot[torch.arange(m), y_true] = 1
        
        # Compute cross-entropy loss
        loss = -torch.sum(one_hot * torch.log(y_pred)) / m
        return loss
    
    def forward(self, X):
        # Flatten the input
        X_flat = X.view(X.shape[0], -1)
        
        # Linear transformation
        Z = torch.matmul(X_flat, self.W) + self.b
        
        # Apply softmax
        return self.softmax(Z)
    
    def predict(self, X):
        # Forward pass and return predicted class
        probs = self.forward(X)
        return torch.argmax(probs, dim=1)

# Hyperparameters
num_inputs = 784  # 28x28 flattened
num_outputs = 10  # 10 classes
learning_rate = 0.1
epochs = 10

# Initialize the model
model = SoftmaxClassifier(num_inputs, num_outputs)

# Training loop
for epoch in range(epochs):
    total_loss = 0.0
    for X, y in train_iter:
        # Forward pass
        y_pred = model.forward(X)
        
        # Compute loss
        loss = model.cross_entropy_loss(y_pred, y)
        total_loss += loss.item()
        
        # Manually compute gradients
        loss.backward()
        
        # Update parameters manually
        with torch.no_grad():
            model.W -= learning_rate * model.W.grad
            model.b -= learning_rate * model.b.grad
            
            # Zero out gradients
            model.W.grad.zero_()
            model.b.grad.zero_()
    
    # Print average loss for the epoch
    print(f'Epoch {epoch+1}, Loss: {total_loss/len(train_iter):.4f}')

# Evaluation
def evaluate_accuracy(model, data_iter):
    correct_predictions = 0
    total_samples = 0
    
    with torch.no_grad():
        for X, y in data_iter:
            y_pred = model.predict(X)
            correct_predictions += (y_pred == y).sum().item()
            total_samples += y.size(0)
    
    return correct_predictions / total_samples

# Compute and print test accuracy
test_acc = evaluate_accuracy(model, test_iter)
print(f'\nTest Accuracy: {test_acc:.4f}')

Epoch 1, Loss: 0.5886
Epoch 2, Loss: 0.4850
Epoch 3, Loss: 0.4629
Epoch 4, Loss: 0.4506
Epoch 5, Loss: 0.4461
Epoch 6, Loss: 0.4403
Epoch 7, Loss: 0.4340
Epoch 8, Loss: 0.4293
Epoch 9, Loss: 0.4277
Epoch 10, Loss: 0.4263

Test Accuracy: 0.8349
