<a href="https://colab.research.google.com/github/Sonica-B/Deep-Learning-CS541_F2024/blob/HomeWork4/homework4_Shreya_Boyane.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
from torchvision.datasets import FashionMNIST
from torch.utils.data import DataLoader, random_split


#Hyperparameters
input_size, hidden_sizes, num_classes = 28 * 28, [256, 128], 10
batch_size, num_epochs, lr = 64, 100, 0.001

#Data transformations with rotation for generalization
transform = transforms.Compose([
    transforms.RandomRotation(10),  #Rotate within ±10°
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

#Dataset
train_dataset = FashionMNIST(root='./data', train=True, transform=transform, download=True)
test_dataset = FashionMNIST(root='./data', train=False, transform=transform)

train_size = int(0.8 * len(train_dataset))
val_size = len(train_dataset) - train_size
train_data, val_data = random_split(train_dataset, [train_size, val_size])

#DataLoaders
train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_data, batch_size=batch_size)
test_loader = DataLoader(test_dataset, batch_size=batch_size)

for X, y in test_loader:
    print(f"Shape of X [N, C, H, W]: {X.shape}")
    print(f"Shape of y: {y.shape} {y.dtype}")
    break

#Device configuration
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using {device} device")

#Fully Connected Neural Network
class FCNN(nn.Module):
    def __init__(self, input_size, hidden_sizes, num_classes):
        super().__init__()
        layers = []
        current_size = input_size
        for size in hidden_sizes:
            layers.append(nn.Linear(current_size, size))
            layers.append(nn.ReLU())
            current_size = size
        layers.append(nn.Linear(current_size, num_classes))
        self.network = nn.Sequential(*layers)

    def forward(self, x):
        return self.network(x.view(x.size(0), -1))

#Initialize model, loss function, and optimizer
model = FCNN(input_size, hidden_sizes, num_classes).to(device)
criterion, optimizer = nn.CrossEntropyLoss(), optim.Adam(model.parameters(), lr=lr)

#Training function
def train(model, loader):
    model.train()
    for epoch in range(num_epochs):
        for images, labels in loader:
            images, labels = images.to(device), labels.to(device)
            optimizer.zero_grad()
            criterion(model(images), labels).backward()
            optimizer.step()
        print(f'Epoch [{epoch + 1}/{num_epochs}], Loss: {criterion(model(images), labels).item():.4f}')

#Evaluation function
def evaluate(model, loader):
    model.eval()
    with torch.no_grad():
        correct = sum((model(images.to(device)).argmax(1) == labels.to(device)).sum().item()
                      for images, labels in loader)
    return correct / len(loader.dataset)

#Train and evaluate the model
train(model, train_loader)
print(f'Validation Accuracy: {evaluate(model, val_loader) * 100:.2f}%')
print(f'Test Accuracy: {evaluate(model, test_loader) * 100:.2f}%')


Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz to ./data/FashionMNIST/raw/train-images-idx3-ubyte.gz


100%|██████████| 26.4M/26.4M [00:03<00:00, 7.79MB/s]


Extracting ./data/FashionMNIST/raw/train-images-idx3-ubyte.gz to ./data/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz to ./data/FashionMNIST/raw/train-labels-idx1-ubyte.gz


100%|██████████| 29.5k/29.5k [00:00<00:00, 135kB/s]


Extracting ./data/FashionMNIST/raw/train-labels-idx1-ubyte.gz to ./data/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz to ./data/FashionMNIST/raw/t10k-images-idx3-ubyte.gz


100%|██████████| 4.42M/4.42M [00:01<00:00, 2.52MB/s]


Extracting ./data/FashionMNIST/raw/t10k-images-idx3-ubyte.gz to ./data/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz to ./data/FashionMNIST/raw/t10k-labels-idx1-ubyte.gz


100%|██████████| 5.15k/5.15k [00:00<00:00, 15.0MB/s]


Extracting ./data/FashionMNIST/raw/t10k-labels-idx1-ubyte.gz to ./data/FashionMNIST/raw

Shape of X [N, C, H, W]: torch.Size([64, 1, 28, 28])
Shape of y: torch.Size([64]) torch.int64
Using cpu device
Epoch [1/100], Loss: 0.4057
Epoch [2/100], Loss: 0.3526
Epoch [3/100], Loss: 0.2858
Epoch [4/100], Loss: 0.3003
Epoch [5/100], Loss: 0.2930
Epoch [6/100], Loss: 0.4740
Epoch [7/100], Loss: 0.4554
Epoch [8/100], Loss: 0.3852
Epoch [9/100], Loss: 0.3222
Epoch [10/100], Loss: 0.1557
Epoch [11/100], Loss: 0.3207
Epoch [12/100], Loss: 0.2955
Epoch [13/100], Loss: 0.3426
Epoch [14/100], Loss: 0.3127
Epoch [15/100], Loss: 0.2730
Epoch [16/100], Loss: 0.1054
Epoch [17/100], Loss: 0.2153
Epoch [18/100], Loss: 0.4065
Epoch [19/100], Loss: 0.0820
Epoch [20/100], Loss: 0.1992
Epoch [21/100], Loss: 0.2002
Epoch [22/100], Loss: 0.1396
Epoch [23/100], Loss: 0.1201
Epoch [24/100], Loss: 0.1987
Epoch [25/100], Loss: 0.1221
Epoch [26/100], Loss: 0.2304
Epoch [27/100], Loss: 0.1899
Epoch [28/100], Loss: 0.19