In [None]:
import torch
from torch import nn
from adopt import ADOPT
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import torchvision
from torchvision.datasets import FashionMNIST
import torchvision.transforms as transforms
import numpy as np
import matplotlib.pyplot as plt

In [None]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
torch.manual_seed(42)
batch_size = 512
num_epochs = 300
lr = 0.01

In [None]:
train_dataset = FashionMNIST(root='./data', train=True, download=True, transform=transforms.ToTensor())
train_loader = DataLoader(train_dataset, batch_size, shuffle=True)
test_dataset = FashionMNIST(root='./data', train=False, download=True, transform=transforms.ToTensor())
test_loader = DataLoader(test_dataset, batch_size)

In [None]:
class MLP(nn.Module):
    def __init__(self, input_dims, hidden_dims, output_dims):
        super(MLP, self).__init__()
        self.layer1 = nn.Linear(input_dims, hidden_dims)
        self.layer2 = nn.Linear(hidden_dims, hidden_dims)
        self.layer3 = nn.Linear(hidden_dims, hidden_dims)
        self.layer4 = nn.Linear(hidden_dims, hidden_dims)
        self.layer5 = nn.Linear(hidden_dims, hidden_dims)
        self.output = nn.Linear(hidden_dims, output_dims)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        x = nn.Flatten()(x)
        x = self.layer1(x)
        x = self.sigmoid(x)
        x = self.layer2(x)
        x = self.sigmoid(x)
        x = self.layer3(x)
        x = self.sigmoid(x)
        x = self.layer4(x)
        x = self.sigmoid(x)
        x = self.layer5(x)
        x = self.sigmoid(x)
        out = self.output(x)
        return out

In [None]:
def train_model(model, optimizer):
    criterion = nn.CrossEntropyLoss()
    train_losses = []
    train_acc = []
    val_losses = []
    val_acc = []
    for epoch in range(num_epochs):
        model.train()
        t_loss = 0
        t_acc = 0
        cnt = 0
        for X, y in train_loader:
            X, y = X.to(device), y.to(device)
            optimizer.zero_grad()
            outputs = model(X)
            loss = criterion(outputs, y)
            loss.backward()
            optimizer.step()
            t_loss += loss.item()
            t_acc += (torch.argmax(outputs, 1) == y).sum().item()
            cnt += len(y)
        t_loss /= len(train_loader)
        train_losses.append(t_loss)
        t_acc /= cnt
        train_acc.append(t_acc)
    
        model.eval()
        v_loss = 0
        v_acc = 0
        cnt = 0
        with torch.no_grad():
            for X, y in test_loader:
                X, y = X.to(device), y.to(device)
                outputs = model(X)
                loss = criterion(outputs, y)
                v_loss += loss.item()
                v_acc += (torch.argmax(outputs, 1)==y).sum().item()
                cnt += len(y)
        v_loss /= len(test_loader)
        val_losses.append(v_loss)
        v_acc /= cnt
        val_acc.append(v_acc)
        
        if (epoch+1) % 50 == 0:
            print(f"Epoch {epoch+1}/{num_epochs}, Train_Loss: {t_loss:.4f}, Train_Acc: {t_acc:.4f}, Validation Loss: {v_loss:.4f}, Val_Acc: {v_acc:.4f}")
            
    fig, axes = plt.subplots(1, 2, figsize=(12, 8))
    fig.suptitle('Training and Validation Metrics Over Epochs', fontsize=16)
        
    axes[0].plot(train_losses, label='Training Loss', color='blue')
    axes[0].plot(val_losses, label='Validation Loss', color='orange')
    axes[0].set_title('Training Loss', fontsize=14)
    axes[0].set_xlabel('Epochs', fontsize=12)
    axes[0].set_ylabel('Loss', fontsize=12)
    axes[0].legend(fontsize=12)
    axes[0].grid(True)

    axes[1].plot(train_acc, label='Training Acc', color='blue')
    axes[1].plot(val_acc, label='Validation Acc', color='orange')
    axes[1].set_title('Validation Loss', fontsize=14)
    axes[1].set_xlabel('Epochs', fontsize=12)
    axes[1].set_ylabel('Loss', fontsize=12)
    axes[1].legend(fontsize=12)
    axes[1].grid(True)

In [None]:
model = MLP(input_dims=784, hidden_dims=128, output_dims=10).to(device)
optimizer = optim.SGD(model.parameters(), lr=lr)

train_model(model, optimizer)

In [None]:
model = MLP(input_dims=784, hidden_dims=128, output_dims=10).to(device)
optimizer = optim.SGD(model.parameters(), lr=lr, momentum=0.9)

train_model(model, optimizer)

In [None]:
model = MLP(input_dims=784, hidden_dims=128, output_dims=10).to(device)
optimizer = optim.RMSprop(model.parameters(), lr=lr)

train_model(model, optimizer)

In [None]:
model = MLP(input_dims=784, hidden_dims=128, output_dims=10).to(device)
optimizer = optim.Adam(model.parameters(), lr=lr)

train_model(model, optimizer)

In [None]:
model = MLP(input_dims=784, hidden_dims=128, output_dims=10).to(device)
optimizer = ADOPT(model.parameters(), lr=lr)

train_model(model, optimizer)