In [26]:



import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms

# Load Fashion MNIST dataset
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))])

trainset = torchvision.datasets.FashionMNIST(root='./data', train=True, download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=64, shuffle=True)

testset = torchvision.datasets.FashionMNIST(root='./data', train=False, download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=64, shuffle=False)

# Define CNN architecture

# Define CNN architecture with Batch Normalization
class FashionMNISTModel(nn.Module):
    def __init__(self):
        super(FashionMNISTModel, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3, padding=1)
        self.bn1 = nn.BatchNorm2d(32)  # Batch Norm
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.bn2 = nn.BatchNorm2d(64)  # Batch Norm
        self.conv3 = nn.Conv2d(64, 128, kernel_size=3, padding=1)
        self.bn3 = nn.BatchNorm2d(128)  # Batch Norm
        self.pool = nn.MaxPool2d(2, 2)
        self.fc1 = nn.Linear(128 * 3 * 3, 128)
        self.bn_fc1 = nn.BatchNorm1d(128)  # Batch Norm
        self.fc2 = nn.Linear(128, 64)
        self.bn_fc2 = nn.BatchNorm1d(64)  # Batch Norm
        self.fc3 = nn.Linear(64, 10)
        self.dropout = nn.Dropout(0.25)

    def forward(self, x):
        x = self.pool(F.relu(self.bn1(self.conv1(x))))  # Apply Batch Norm
        x = self.pool(F.relu(self.bn2(self.conv2(x))))  # Apply Batch Norm
        x = self.pool(F.relu(self.bn3(self.conv3(x))))  # Apply Batch Norm
        x = x.view(-1, 128 * 3 * 3)
        x = F.relu(self.bn_fc1(self.fc1(x)))  # Apply Batch Norm
        x = self.dropout(x)
        x = F.relu(self.bn_fc2(self.fc2(x)))  # Apply Batch Norm
        x = self.fc3(x)
        x = F.softmax(x, dim=1)
        return x

# Check for GPU and use it if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = FashionMNISTModel().to(device)
def save_checkpoint(epoch, model, optimizer, path='./fmnist/checkpoint.pth'):
    state = {
        'epoch': epoch,
        'state_dict': model.state_dict(),
        'optimizer': optimizer.state_dict(),
    }
    try:
        torch.save(state, path)
        print(f'Model saved to {path}')
    except Exception as e:
        print(f'Error saving model: {e}')
# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1) 
# Training loop
for epoch in range(100):
    running_loss = 0.0
    for images, labels in trainloader:
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    save_checkpoint(epoch, model, optimizer, path=f'./fmnist/checkpoint_epoch_{epoch}.pth')
    print(f"Epoch {epoch+1}, Loss: {running_loss/len(trainloader)}")
    # Testing the model
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in testloader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    print(f"Accuracy on test set: {100 * correct / total}%")

Model saved to ./fmnist/checkpoint_epoch_0.pth
Epoch 1, Loss: 1.6358853052419895
Accuracy on test set: 88.26%
Model saved to ./fmnist/checkpoint_epoch_1.pth
Epoch 2, Loss: 1.5671520116232605
Accuracy on test set: 89.65%
Model saved to ./fmnist/checkpoint_epoch_2.pth
Epoch 3, Loss: 1.555843675568668
Accuracy on test set: 89.73%
Model saved to ./fmnist/checkpoint_epoch_3.pth
Epoch 4, Loss: 1.5483821566933509
Accuracy on test set: 89.89%
Model saved to ./fmnist/checkpoint_epoch_4.pth
Epoch 5, Loss: 1.5436316428662362
Accuracy on test set: 89.42%
Model saved to ./fmnist/checkpoint_epoch_5.pth
Epoch 6, Loss: 1.5390878092251352
Accuracy on test set: 90.03%
Model saved to ./fmnist/checkpoint_epoch_6.pth
Epoch 7, Loss: 1.5369168391614072
Accuracy on test set: 90.46%
Model saved to ./fmnist/checkpoint_epoch_7.pth
Epoch 8, Loss: 1.532227820679069
Accuracy on test set: 90.2%
Model saved to ./fmnist/checkpoint_epoch_8.pth
Epoch 9, Loss: 1.5293533922766827
Accuracy on test set: 90.95%
Model saved t

# BLOT LOSS

In [25]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
import torch.nn.functional as F

# Load Fashion MNIST dataset
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

trainset = torchvision.datasets.FashionMNIST(
    root='./data', train=True, download=True, transform=transform)
trainloader = DataLoader(trainset, batch_size=64, shuffle=True)

testset = torchvision.datasets.FashionMNIST(
    root='./data', train=False, download=True, transform=transform)
testloader = DataLoader(testset, batch_size=64, shuffle=False)

# Define CNN architecture with Batch Normalization
class FashionMNISTModel(nn.Module):
    def __init__(self):
        super(FashionMNISTModel, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3, padding=1)
        self.bn1 = nn.BatchNorm2d(32)  # Batch Norm
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.bn2 = nn.BatchNorm2d(64)  # Batch Norm
        self.conv3 = nn.Conv2d(64, 128, kernel_size=3, padding=1)
        self.bn3 = nn.BatchNorm2d(128)  # Batch Norm
        self.pool = nn.MaxPool2d(2, 2)
        self.fc1 = nn.Linear(128 * 3 * 3, 128)
        self.bn_fc1 = nn.BatchNorm1d(128)  # Batch Norm
        self.fc2 = nn.Linear(128, 64)
        self.bn_fc2 = nn.BatchNorm1d(64)  # Batch Norm
        self.fc3 = nn.Linear(64, 10)
        self.dropout = nn.Dropout(0.25)

    def forward(self, x):
        x = self.pool(F.relu(self.bn1(self.conv1(x))))  # Apply Batch Norm
        x = self.pool(F.relu(self.bn2(self.conv2(x))))  # Apply Batch Norm
        x = self.pool(F.relu(self.bn3(self.conv3(x))))  # Apply Batch Norm
        x = x.view(-1, 128 * 3 * 3)
        x = F.relu(self.bn_fc1(self.fc1(x)))  # Apply Batch Norm
        x = self.dropout(x)
        x = F.relu(self.bn_fc2(self.fc2(x)))  # Apply Batch Norm
        x = self.fc3(x)
        x = F.softmax(x, dim=1)
        return x

# Custom Bayes Loss function
def Bayes_loss(output, target):
    batch_size = output.size(0)
    num_classes = output.size(1)
    
    # Create a mask for the losses
    mask = torch.arange(num_classes, device=target.device).expand(batch_size, num_classes)
    target_expanded = target.unsqueeze(1).expand_as(mask)
    
    # Calculate the loss
    loss = (mask >= target_expanded).float() * output
    loss += (mask == (target_expanded - 1)).float() * (1 - output)
    
    return loss.sum() / batch_size

# Initialize the model, optimizer, and scheduler
device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")
model = FashionMNISTModel().to(device)

# Use RMSprop optimizer
optimizer = optim.RMSprop(model.parameters(), lr=0.001)

# Learning rate scheduler: Reduce LR when a metric has stopped improving
scheduler = optim.lr_scheduler.ReduceLROnPlateau(
    optimizer, mode='min', factor=0.5, patience=5, verbose=True)

# Gradient clipping value
clip_value = 1.0

# Define the training function with gradient clipping and monitoring
def train(epoch, loader, model, optimizer, scheduler=None):
    model.train()
    running_loss = 0.0
    for batch_idx, (data, target) in enumerate(loader):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        
        # Forward pass
        output = model(data)
        
        # Calculate Bayes loss
        be = Bayes_loss(output[:, 1:10], target)
        
        # Backpropagation with gradient clipping
        be.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=clip_value)
        optimizer.step()
        
        # Accumulate loss
        running_loss += be.item()
        
        # Monitor gradient norms
        total_norm = 0
        for p in model.parameters():
            if p.grad is not None:
                param_norm = p.grad.data.norm(2)
                total_norm += param_norm.item() ** 2
        total_norm = total_norm ** 0.5
        
        # Print loss and gradient norm every 100 batches
        if batch_idx % 100 == 0:
            avg_loss = running_loss / (batch_idx + 1)
            print(f'Epoch {epoch} [{batch_idx * len(data)}/{len(loader.dataset)}] '
                  f'Loss: {avg_loss:.6f} | Grad Norm: {total_norm:.6f}')
            
    # Scheduler step if applicable
    if scheduler:
        scheduler.step(running_loss / len(loader))

# Evaluate the model
def test(loader, model, device):
    model.eval()  # Set the model to evaluation mode
    correct = 0
    total = 0
    with torch.no_grad():  # Disable gradient computation
        for data, targets in loader:
            data = data.to(device)
            targets = targets.to(device)
            
            # Forward pass
            outputs = model(data)
            
            # Get the predicted class
            _, predicted = torch.max(outputs.data, 1)
            
            # Accumulate total and correct predictions
            total += targets.size(0)
            correct += (predicted == targets).sum().item()

    # Calculate accuracy
    accuracy = correct / total
    print(f'Accuracy on test images: {100 * accuracy:.2f}%')

# Training loop with test evaluation after each epoch
for epoch in range(1, 100):  # Increased number of epochs
    train(epoch, trainloader, model, optimizer, scheduler)
    test(testloader, model, device)


Epoch 1 [0/60000] Loss: 1.263420 | Grad Norm: 0.999999
Epoch 1 [6400/60000] Loss: 0.470093 | Grad Norm: 0.316115
Epoch 1 [12800/60000] Loss: 0.361460 | Grad Norm: 0.157285
Epoch 1 [19200/60000] Loss: 0.317149 | Grad Norm: 0.303545
Epoch 1 [25600/60000] Loss: 0.290204 | Grad Norm: 0.328989
Epoch 1 [32000/60000] Loss: 0.273240 | Grad Norm: 0.344299
Epoch 1 [38400/60000] Loss: 0.260978 | Grad Norm: 0.360410
Epoch 1 [44800/60000] Loss: 0.251338 | Grad Norm: 0.489539
Epoch 1 [51200/60000] Loss: 0.242995 | Grad Norm: 0.261258
Epoch 1 [57600/60000] Loss: 0.237226 | Grad Norm: 0.517022
Accuracy on test images: 83.45%
Epoch 2 [0/60000] Loss: 0.202385 | Grad Norm: 0.230545
Epoch 2 [6400/60000] Loss: 0.181345 | Grad Norm: 0.556004
Epoch 2 [12800/60000] Loss: 0.181857 | Grad Norm: 0.111720
Epoch 2 [19200/60000] Loss: 0.179320 | Grad Norm: 0.161669
Epoch 2 [25600/60000] Loss: 0.181892 | Grad Norm: 0.422458
Epoch 2 [32000/60000] Loss: 0.180496 | Grad Norm: 0.307625
Epoch 2 [38400/60000] Loss: 0.1813