In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torchvision import datasets, transforms
from torch.utils.data import DataLoader


In [3]:
# Deifine transformation for the dataset - converting images to tensor and normalizing them
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,),(0.5))   #Normaliztion with mean std  deviaton
])

In [4]:
# Load the Fashion MNIST training and test datasets

train_dataset = datasets.FashionMNIST(root='./data', train= True,download= True, transform= transform)
test_dataset = datasets.FashionMNIST(root='./data', train= False, download = True, transform= transform)

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz to ./data/FashionMNIST/raw/train-images-idx3-ubyte.gz


100%|██████████| 26.4M/26.4M [00:06<00:00, 4.00MB/s]


Extracting ./data/FashionMNIST/raw/train-images-idx3-ubyte.gz to ./data/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz to ./data/FashionMNIST/raw/train-labels-idx1-ubyte.gz


100%|██████████| 29.5k/29.5k [00:00<00:00, 260kB/s]


Extracting ./data/FashionMNIST/raw/train-labels-idx1-ubyte.gz to ./data/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz to ./data/FashionMNIST/raw/t10k-images-idx3-ubyte.gz


100%|██████████| 4.42M/4.42M [00:00<00:00, 4.42MB/s]


Extracting ./data/FashionMNIST/raw/t10k-images-idx3-ubyte.gz to ./data/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz to ./data/FashionMNIST/raw/t10k-labels-idx1-ubyte.gz


100%|██████████| 5.15k/5.15k [00:00<00:00, 17.5MB/s]


Extracting ./data/FashionMNIST/raw/t10k-labels-idx1-ubyte.gz to ./data/FashionMNIST/raw



In [5]:
# Create Dataloader objects for batching and shuffing the data
train_loader = DataLoader(dataset = train_dataset, batch_size = 64, shuffle= True)
test_loader = DataLoader(dataset=test_dataset, batch_size = 1000, shuffle= False)

In [6]:
test_dataset

Dataset FashionMNIST
    Number of datapoints: 10000
    Root location: ./data
    Split: Test
    StandardTransform
Transform: Compose(
               ToTensor()
               Normalize(mean=(0.5,), std=0.5)
           )

**2.Define the Neural Network model**




In [None]:
# Define the neural network architecture
# class FashionMNISTModel(nn.Module):
#     def __init__(self):
#         super(FashionMNISTModel, self).__init__()
#         self.fc1 = nn.Linear(28 * 28, 512)  # Flattened image input (28x28), output 512 nodes
#         self.fc2 = nn.Linear(512, 256)      # Hidden layer
#         self.fc3 = nn.Linear(256, 10)       # Output layer for 10 classes

#     def forward(self, x):
#         # Flatten the input
#         x = x.view(x.size(0), -1)  # Flatten to (batch_size, 784)
#         x = F.relu(self.fc1(x))    # Apply ReLU after fc1
#         x = F.relu(self.fc2(x))    # Apply ReLU after fc2
#         x = self.fc3(x)            # Output layer
#         return x

In [7]:
class FashionMNISTModel(nn.Module):
 def __init__(self):
     super(FashionMNISTModel, self).__init__()
     self.fc1 = nn.Linear(28 * 28, 512)
     self.fc2 = nn.Linear(512, 256)
     self.fc3 = nn.Linear(256, 128)
     self.fc4 = nn.Linear(128, 10)

 def forward(self, x):
     x = x.view(x.size(0), -1)  # Flatten the input
     x = F.relu(self.fc1(x))    # Apply ReLU after fc1
    #  x = F.dropout(x, p=0.2)    # Add dropout layer to prevent overfitting
     x = F.relu(self.fc2(x))    # Apply ReLU after fc2
    #  x = F.dropout(x, p=0.1)    # Add dropout
     x = F.relu(self.fc3(x))    # Apply ReLU after fc3
     x = self.fc4(x)            # Output layer
     return x

3. Model training configuration

In [8]:
# Initialize model, criterion, optimizer
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = FashionMNISTModel().to(device)
criterion = nn.CrossEntropyLoss()  # Use CrossEntropyLoss for multi-class classification
optimizer = optim.Adam(model.parameters(), lr=0.01)
# scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=200)

In [9]:
scheduler = optim.lr_scheduler.StepLR(optimizer,step_size = 7, gamma = 0.01)

In [10]:
reduce_lr = optim.lr_scheduler.ReduceLROnPlateau(
    optimizer,          # The optimizer whose learning rate will be adjusted
    mode='min',         # Mode to determine whether the monitored quantity is to be minimized or maximized
    factor=0.5,         # Factor by which the learning rate will be reduced
    patience=3,         # Number of epochs with no improvement after which learning rate will be reduced
    threshold=0.0001,   # Threshold for measuring the new optimum
    threshold_mode='rel', # 'rel' means threshold is relative to the best value
    cooldown=0,         # Number of epochs to wait before resuming normal operation after lr has been reduced
    min_lr=0.00001      # Minimum learning rate
)

In [11]:
def train(model, device, train_loader, optimizer, criterion, epoch):
    model.train()  # Set model to training mode
    epoch_loss = 0  # Initialize epoch loss to zero at the start of each epoch
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()  # Reset gradients
        output = model(data)   # Forward pass to get output
        loss = criterion(output, target)  # Compute loss
        loss.backward()  # Backpropagation
        optimizer.step()  # Update weights
        epoch_loss += loss.item()  # Accumulate loss

    avg_loss = epoch_loss / len(train_loader)
    print(f'Epoch {epoch}: Average Loss = {avg_loss:.6f}')

        # Print loss every 10 batches
        # if batch_idx % 10 == 0:
        #     print(f'Train Epoch: {epoch} [{batch_idx * len(data)}/{len(train_loader.dataset)}]  Loss: {loss.item():.6f}')


4. Trian and Test

In [12]:
def test(model, device, test_loader, criterion):
    model.eval()  # Set the model to evaluation mode
    test_loss = 0
    correct = 0
    with torch.no_grad():  # Disable gradient calculation for faster evaluation
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)  # Move data and target to GPU/CPU
            output = model(data)  # Forward pass: compute predictions
            test_loss += criterion(output, target).item()  # Accumulate loss
            pred = output.argmax(dim=1, keepdim=True)  # Get the predicted class
            correct += pred.eq(target.view_as(pred)).sum().item()  # Count correct predictions

    test_loss /= len(test_loader.dataset)  # Average test loss
    accuracy = 100. * correct / len(test_loader.dataset)  # Calculate accuracy

    # Print test loss and accuracy
    print(f'\nTest set: Average loss: {test_loss:.4f}, Accuracy: {correct}/{len(test_loader.dataset)} ({accuracy:.2f}%)\n')

In [13]:
n_epochs = 50               # Set the number of training epochs

for epoch in range(1, n_epochs +1):
  train(model,device, train_loader, optimizer, criterion, epoch)      # `train` function is called with the model, device, data loader, optimizer, loss criterion, and current epoch number
  test(model, device, test_loader, criterion)
  current_lr = optimizer.param_groups[0]['lr']       # Retrieve the current learning rate from the optimizer

  print(f'Learning Rate: {current_lr}')


Epoch 1: Average Loss = 0.594029

Test set: Average loss: 0.0005, Accuracy: 8199/10000 (81.99%)

Learning Rate: 0.01
Epoch 2: Average Loss = 0.471357

Test set: Average loss: 0.0005, Accuracy: 8386/10000 (83.86%)

Learning Rate: 0.01
Epoch 3: Average Loss = 0.459016

Test set: Average loss: 0.0005, Accuracy: 8314/10000 (83.14%)

Learning Rate: 0.01
Epoch 4: Average Loss = 0.430889

Test set: Average loss: 0.0005, Accuracy: 8385/10000 (83.85%)

Learning Rate: 0.01
Epoch 5: Average Loss = 0.428288

Test set: Average loss: 0.0005, Accuracy: 8379/10000 (83.79%)

Learning Rate: 0.01
Epoch 6: Average Loss = 0.402822

Test set: Average loss: 0.0005, Accuracy: 8426/10000 (84.26%)

Learning Rate: 0.01
Epoch 7: Average Loss = 0.409874

Test set: Average loss: 0.0005, Accuracy: 8482/10000 (84.82%)

Learning Rate: 0.01
Epoch 8: Average Loss = 0.407048

Test set: Average loss: 0.0004, Accuracy: 8504/10000 (85.04%)

Learning Rate: 0.01
Epoch 9: Average Loss = 0.392653

Test set: Average loss: 0.0005