In [1]:
import torch
import torch.nn as nn
from torch.optim.optimizer import Optimizer
from torchvision.datasets import FashionMNIST
from torchvision.transforms import ToTensor
from torch.utils.data import DataLoader


In [2]:

# Load Fashion MNIST dataset
train_dataset = FashionMNIST(root='./data', train=True, download=True, transform=ToTensor())
test_dataset = FashionMNIST(root='./data', train=False, download=True, transform=ToTensor())

# Create data loaders
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

# Define the model
class FashionMNISTModel(nn.Module):
    def __init__(self):
        super(FashionMNISTModel, self).__init__()
        self.flatten = nn.Flatten()
        self.fc1 = nn.Linear(28 * 28, 128)
        self.fc2 = nn.Linear(128, 10)
        self.dropout = nn.Dropout(0.2)

    def forward(self, x):
        x = self.flatten(x)
        x = self.fc1(x)
        x = nn.functional.relu(x)
        x = self.dropout(x)
        x = self.fc2(x)
        return nn.functional.softmax(x, dim=1)

# Create the model instance
model = FashionMNISTModel()


In [3]:

# Define the custom Adam optimizer
class AdamOptimizer(Optimizer):
    def __init__(self, params, lr=0.001, betas=(0.9, 0.999), eps=1e-8):
        defaults = dict(lr=lr, betas=betas, eps=eps)
        super(AdamOptimizer, self).__init__(params, defaults)

    def step(self, closure=None):
        for group in self.param_groups:
            for p in group['params']:
                if p.grad is None:  # Check the grad of parameter is exist
                    continue
                grad = p.grad.data
                state = self.state[p]

                if 'step' not in state:
                    state['step'] = 0
                if 'm' not in state:
                    state['m'] = torch.zeros_like(p.data)
                if 'v' not in state:
                    state['v'] = torch.zeros_like(p.data)

                m = state['m']
                v = state['v']
                beta1, beta2 = group['betas']
                eps = group['eps']
                state['step'] += 1

                m.mul_(beta1).add_(1 - beta1, grad)
                v.mul_(beta2).addcmul_(1 - beta2, grad, grad)

                bias_correction1 = 1 - beta1 ** state['step']
                bias_correction2 = 1 - beta2 ** state['step']
                lr = group['lr'] * (bias_correction2 ** 0.5) / bias_correction1

                p.data.addcdiv_(-lr, m, v.sqrt().add_(eps))


In [4]:

# Create the optimizer instance
optimizer = AdamOptimizer(model.parameters(), lr=0.01)

# Define the loss function (CrossEntropyLoss)
criterion = nn.CrossEntropyLoss()

# Training loop
num_epochs = 10

for epoch in range(num_epochs):
    model.train()
    for images, labels in train_loader:
        # Zero the gradients
        optimizer.zero_grad()

        # Forward pass
        outputs = model(images)
        loss = criterion(outputs, labels)

        # Backward pass and optimization
        loss.backward()
        optimizer.step()

    # Print the loss after every epoch
    print(f"Epoch {epoch+1}/{num_epochs}------------> Loss: {loss.item()}")

# Evaluation
model.eval()
total_correct = 0
total_samples = 0

with torch.no_grad():
    for images, labels in test_loader:
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total_samples += labels.size(0)
        total_correct += (predicted == labels).sum().item()


	add_(Number alpha, Tensor other)
Consider using one of the following signatures instead:
	add_(Tensor other, *, Number alpha) (Triggered internally at C:\actions-runner\_work\pytorch\pytorch\builder\windows\pytorch\torch\csrc\utils\python_arg_parser.cpp:1485.)
  m.mul_(beta1).add_(1 - beta1, grad)


Epoch 1/10------------> Loss: 1.6744211912155151
Epoch 2/10------------> Loss: 1.6485497951507568
Epoch 3/10------------> Loss: 1.8048733472824097
Epoch 4/10------------> Loss: 1.8047927618026733
Epoch 5/10------------> Loss: 1.5884759426116943
Epoch 6/10------------> Loss: 1.648650050163269
Epoch 7/10------------> Loss: 1.6798999309539795
Epoch 8/10------------> Loss: 1.6799001693725586
Epoch 9/10------------> Loss: 1.8048882484436035
Epoch 10/10------------> Loss: 1.5861531496047974


In [5]:
accuracy = total_correct / total_samples
print(f"Test Accuracy: {accuracy}")

Test Accuracy: 0.7921
