## Load Data

In [25]:
import torch
from torch import nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import torchvision
from torchvision.datasets import FashionMNIST
import torchvision.transforms as transforms
import numpy as np
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
torch.manual_seed(42)

<torch._C.Generator at 0x1fecc397bf0>

In [26]:
batch_size = 512
num_epochs = 300
lr = 0.01

In [27]:
train_dataset = FashionMNIST(root='./data', train=True, download=True, transform=transforms.ToTensor())
train_loader = DataLoader(train_dataset, batch_size, shuffle=True)
test_dataset = FashionMNIST(root='./data', train=False, download=True, transform=transforms.ToTensor())
test_loader = DataLoader(test_dataset, batch_size)

## Modeling

In [28]:
class MLP(nn.Module):
    def __init__(self, input_dims, hidden_dims, output_dims):
        super(MLP, self).__init__()
        self.layer1 = nn.Linear(input_dims, hidden_dims)
        self.layer2 = nn.Linear(hidden_dims, hidden_dims)
        self.layer3 = nn.Linear(hidden_dims, hidden_dims)
        self.layer4 = nn.Linear(hidden_dims, hidden_dims)
        self.layer5 = nn.Linear(hidden_dims, hidden_dims)
        self.output = nn.Linear(hidden_dims, output_dims)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        x = nn.Flatten()(x)
        x = self.layer1(x)
        x = self.sigmoid(x)
        x = self.layer2(x)
        x = self.sigmoid(x)
        x = self.layer3(x)
        x = self.sigmoid(x)
        x = self.layer4(x)
        x = self.sigmoid(x)
        x = self.layer5(x)
        x = self.sigmoid(x)
        out = self.output(x)
        return out

In [29]:
model = MLP(input_dims=784, hidden_dims=128, output_dims=10).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=lr)


In [30]:
train_losses = []
train_acc = []
val_losses = []
val_acc = []
for epoch in range(num_epochs):
    model.train()
    t_loss = 0
    t_acc = 0
    cnt = 0
    for X, y in train_loader:
        X, y = X.to(device), y.to(device)
        optimizer.zero_grad()
        outputs = model(X)
        loss = criterion(outputs, y)
        loss.backward()
        optimizer.step()
        t_loss += loss.item()
        t_acc += (torch.argmax(outputs, 1) == y).sum().item()
        cnt += len(y)
    t_loss /= len(train_loader)
    train_losses.append(t_loss)
    t_acc /= cnt
    train_acc.append(t_acc)

    model.eval()
    v_loss = 0
    v_acc = 0
    cnt = 0
    with torch.no_grad():
        for X, y in test_loader:
            X, y = X.to(device), y.to(device)
            outputs = model(X)
            loss = criterion(outputs, y)
            v_loss += loss.item()
            v_acc += (torch.argmax(outputs, 1)==y).sum().item()
            cnt += len(y)
    v_loss /= len(test_loader)
    val_losses.append(v_loss)
    v_acc /= cnt
    val_acc.append(v_acc)
    print(f"Epoch {epoch+1}/{num_epochs}, Train_Loss: {t_loss:.4f}, Train_Acc: {t_acc:.4f}, Validation Loss: {v_loss:.4f}, Val_Acc: {v_acc:.4f}")

Epoch 1/300, Train_Loss: 1.5892, Train_Acc: 0.3003, Validation Loss: 0.9940, Val_Acc: 0.5932
Epoch 2/300, Train_Loss: 0.7816, Train_Acc: 0.6894, Validation Loss: 0.6931, Val_Acc: 0.7303
Epoch 3/300, Train_Loss: 0.6015, Train_Acc: 0.7751, Validation Loss: 0.6306, Val_Acc: 0.7772
Epoch 4/300, Train_Loss: 0.5038, Train_Acc: 0.8228, Validation Loss: 0.5227, Val_Acc: 0.8186
Epoch 5/300, Train_Loss: 0.4406, Train_Acc: 0.8504, Validation Loss: 0.4515, Val_Acc: 0.8438
Epoch 6/300, Train_Loss: 0.3888, Train_Acc: 0.8670, Validation Loss: 0.4355, Val_Acc: 0.8429
Epoch 7/300, Train_Loss: 0.3603, Train_Acc: 0.8726, Validation Loss: 0.3896, Val_Acc: 0.8648
Epoch 8/300, Train_Loss: 0.3380, Train_Acc: 0.8806, Validation Loss: 0.4012, Val_Acc: 0.8547
Epoch 9/300, Train_Loss: 0.3321, Train_Acc: 0.8818, Validation Loss: 0.4340, Val_Acc: 0.8484
Epoch 10/300, Train_Loss: 0.3252, Train_Acc: 0.8837, Validation Loss: 0.4328, Val_Acc: 0.8400
Epoch 11/300, Train_Loss: 0.3153, Train_Acc: 0.8864, Validation Loss: