In [90]:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, random_split
from torchvision import datasets, transforms
import torch.optim as optim
import random
import numpy as np

In [91]:
def set_seed(seed=42):

    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed) 
    np.random.seed(seed)
    random.seed(seed)

    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

set_seed(42)

In [92]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print("device using: ", device)

device using:  cuda


In [93]:

transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,),(0.5))])

train_data_ = datasets.FashionMNIST(root='./data', train = True, download=True, transform=transform)
test_data = datasets.FashionMNIST(root='./data', train = False, download=True, transform=transform)

train_len = int(0.9*len(train_data_))
val_len = len(train_data_)-train_len
train_data, val_data = random_split(train_data_, (train_len, val_len))


In [94]:
train_loader = DataLoader(train_data, batch_size=64, shuffle=True,
                              pin_memory=True, num_workers=4)
val_loader = DataLoader(val_data, batch_size=64, shuffle=False, 
                            pin_memory=True, num_workers=4)
test_loader = DataLoader(test_data, batch_size=64, shuffle=False,
                             pin_memory=True, num_workers=4)

In [95]:
class first_cnn(nn.Module):
    
    def __init__(self, num_features):
        super().__init__()
        
        #cnn
        self.feature_extractor = nn.Sequential(
            nn.Conv2d(num_features, 32, kernel_size = 3, padding='same'),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            
            nn.Conv2d(32, 64, kernel_size=3, padding='same'),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )
        
        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Linear(64*7*7, 256),
            nn.BatchNorm1d(256),
            nn.ReLU(),
            nn.Linear(256, 128),
            nn.BatchNorm1d(128),
            nn.ReLU(),
            nn.Linear(128, 64),
            nn.BatchNorm1d(64),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(64,10)
        )
        
    def forward(self, x):
        x = self.feature_extractor(x)
        x = self.classifier(x)
        return x  

In [96]:
Fashion_CNN = first_cnn(1).to(device)
Loss = nn.CrossEntropyLoss()
optimizer = optim.Adam(Fashion_CNN.parameters(), lr = 0.01)
lr_scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', patience=4, factor=0.5)

In [None]:
epochs = 9
es_patience = 7
counter = 0
best_val_loss = float('inf')
delta = 1e-4

for epoch in range(epochs):
    Fashion_CNN.train()
    total_loss=0
    correct = 0
    total = 0
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)
        
        outputs = Fashion_CNN(images)
        loss = Loss(outputs, labels)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        #train_accuracy logic
        _, predicted = torch.max(outputs, dim=1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
        #loss
        total_loss += loss.item()*labels.size(0) 
    avg_train_accuracy = 100 * correct / total
    avg_train_loss = total_loss/total

    #Validation
    Fashion_CNN.eval()
    total_val_loss = 0
    val_correct = 0
    val_total = 0
    with torch.no_grad():
        for images, labels in val_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = Fashion_CNN(images)
            
            #val loss logic
            batch_loss = Loss(outputs, labels).item() #returns mean loss per sample
            total_val_loss += batch_loss * labels.size(0)  # total loss = bl1*32+bl2*32+bl3*16
            
            
    #Note: We could also take the mean of the average loss from all batches, but this would be inaccurate if the last batch has fewer samples than the others.
            #val accuracy logic
            _, predicted = torch.max(outputs, dim=1)
            val_correct += (predicted == labels).sum().item()
            
            val_total += labels.size(0) #number of samples. Its generally equal to defined batch_size(=64) but last batch can be smaller. eg 32 32 16. 32+32+16=80
            
    avg_val_loss = total_val_loss/val_total #loss per sample
    lr_scheduler.step(avg_val_loss)  #schedular decides whether to change lr or not, epochwise, not batchwise
        
    avg_val_accuracy = 100 * val_correct / val_total #accuracy per sample 
    

    #Early Stopping Check
    if avg_val_loss < best_val_loss-delta:
        best_val_loss = avg_val_loss
        counter = 0
        torch.save(Fashion_CNN.state_dict(), "best_model.pth")
    else:
        counter+=1
        if counter>=es_patience:
            print("Early stopping triggered.")
            break
        
    print(f"Epoch {epoch+1}/{epochs} | Val Accuracy: {avg_val_accuracy:.4f}% | Train Accuracy: {avg_train_accuracy:.4f}% | Counter: {counter} | Train Loss: {avg_train_loss:.4f} | Val Loss: {avg_val_loss:.4f}")

Epoch 1/9 | Val Accuracy: 89.4833% | Train Accuracy: 84.9981% | Counter: 0 | Train Loss: 0.4273 | Val Loss: 0.2831
Epoch 2/9 | Val Accuracy: 90.9167% | Train Accuracy: 89.5889% | Counter: 0 | Train Loss: 0.2940 | Val Loss: 0.2478
Epoch 3/9 | Val Accuracy: 91.7667% | Train Accuracy: 90.8963% | Counter: 0 | Train Loss: 0.2564 | Val Loss: 0.2374
Epoch 4/9 | Val Accuracy: 92.2833% | Train Accuracy: 91.8926% | Counter: 0 | Train Loss: 0.2257 | Val Loss: 0.2173
Epoch 5/9 | Val Accuracy: 92.0833% | Train Accuracy: 92.6889% | Counter: 1 | Train Loss: 0.2055 | Val Loss: 0.2220
Epoch 6/9 | Val Accuracy: 92.6000% | Train Accuracy: 93.4037% | Counter: 0 | Train Loss: 0.1839 | Val Loss: 0.2062
Epoch 7/9 | Val Accuracy: 91.7833% | Train Accuracy: 94.0463% | Counter: 1 | Train Loss: 0.1649 | Val Loss: 0.2351
Epoch 8/9 | Val Accuracy: 92.4500% | Train Accuracy: 94.7407% | Counter: 2 | Train Loss: 0.1461 | Val Loss: 0.2318
Epoch 9/9 | Val Accuracy: 92.7000% | Train Accuracy: 95.1185% | Counter: 3 | Tra

In [100]:
Fashion_CNN.eval() 

with torch.no_grad():
    correct = 0
    total = 0
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device) 
        outputs = Fashion_CNN(images) 
        _, predicted = torch.max(outputs, dim=1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    print(f"Test Accuracy: {100 * correct / total:.2f}%")

Test Accuracy: 92.16%
