In [1]:
from sklearn.datasets import fetch_openml

# Load Fashion MNIST
fashion_mnist = fetch_openml('Fashion-MNIST', version=1, as_frame=False)

# Extract data and labels
X, y = fashion_mnist.data, fashion_mnist.target.astype(int)

print(f"Data shape: {X.shape}, Labels shape: {y.shape}")


Data shape: (70000, 784), Labels shape: (70000,)


In [2]:
import numpy as np
import pandas as pd
import torch
from torch.utils.data import Dataset, DataLoader
import torch.optim as optim
import torch.nn as nn
from sklearn.model_selection import train_test_split

In [3]:
torch.manual_seed(123)

<torch._C.Generator at 0x78fa6080c070>

In [4]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cuda


In [5]:
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [6]:
class dataset(Dataset):

  def __init__(self, x, y):

    self.x = torch.tensor(x, dtype=torch.float32)
    self.y = torch.tensor(y, dtype=torch.long)

  def __len__(self):
    return len(self.x)

  def __getitem__(self, index):
    return self.x[index], self.y[index]

In [7]:
train_dataset = dataset(x_train, y_train)
test_dataset = dataset(x_test, y_test)

In [8]:
train_loader = DataLoader(train_dataset, batch_size=128,pin_memory=True, shuffle=True) # use larger batch size and pin_memory
test_loader = DataLoader(test_dataset, batch_size=128,pin_memory=True, shuffle=True)

In [9]:
class Model(nn.Module):

  def __init__(self,num_features):
    super().__init__()

    self.layers = nn.Sequential(
        nn.Linear(num_features, 128),
        nn.BatchNorm1d(128),
        nn.ReLU(),
        nn.Dropout(0.2),
        nn.Linear(128, 64),
        nn.BatchNorm1d(64),
        nn.ReLU(),
        nn.Dropout(0.2),
        nn.Linear(64, 10)
    )

  def forward(self, x):
    return self.layers(x)

In [10]:
lr = 0.001
epochs = 25

model = Model(x_train.shape[1]).to(device)

lossfn = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=1e-5)

In [11]:
for i in range(epochs):

  total_epoch_loss = 0

  for batch_features, batch_labels in train_loader:

    batch_features, batch_labels = batch_features.to(device), batch_labels.to(device)

    output = model(batch_features)

    loss = lossfn(output, batch_labels)

    optimizer.zero_grad()

    loss.backward()

    optimizer.step()

    total_epoch_loss += loss.item()

  print(f"Epoch: {i+1}, Loss: {total_epoch_loss/len(train_loader)}")


Epoch: 1, Loss: 0.6077312286858145
Epoch: 2, Loss: 0.41324491455266466
Epoch: 3, Loss: 0.3772756142975533
Epoch: 4, Loss: 0.3523756817459516
Epoch: 5, Loss: 0.33589363635675
Epoch: 6, Loss: 0.3202941038701088
Epoch: 7, Loss: 0.3077953222407598
Epoch: 8, Loss: 0.29657483168932947
Epoch: 9, Loss: 0.2933017376419072
Epoch: 10, Loss: 0.28408449684103876
Epoch: 11, Loss: 0.2751291892810227
Epoch: 12, Loss: 0.2697585779986425
Epoch: 13, Loss: 0.26223567910662526
Epoch: 14, Loss: 0.25653306527497016
Epoch: 15, Loss: 0.2501318236328151
Epoch: 16, Loss: 0.24648611196508147
Epoch: 17, Loss: 0.24011652814607098
Epoch: 18, Loss: 0.2339192908512403
Epoch: 19, Loss: 0.2306138962405185
Epoch: 20, Loss: 0.22895447248125186
Epoch: 21, Loss: 0.22401543172527122
Epoch: 22, Loss: 0.2206952725473332
Epoch: 23, Loss: 0.22065545712091608
Epoch: 24, Loss: 0.21330938409996902
Epoch: 25, Loss: 0.213516608833178


In [12]:
model.eval()

Model(
  (layers): Sequential(
    (0): Linear(in_features=784, out_features=128, bias=True)
    (1): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): Dropout(p=0.2, inplace=False)
    (4): Linear(in_features=128, out_features=64, bias=True)
    (5): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (6): ReLU()
    (7): Dropout(p=0.2, inplace=False)
    (8): Linear(in_features=64, out_features=10, bias=True)
  )
)

In [13]:
total = 0
count = 0

with torch.no_grad():

  for batch_features, batch_labels in test_loader:

    batch_features, batch_labels = batch_features.to(device), batch_labels.to(device)

    output = model(batch_features)

    _, predicted = torch.max(output.data, 1)

    total += batch_labels.size(0)

    count += (predicted == batch_labels).sum().item()

print(f"Accuracy: {count/total}")

Accuracy: 0.9012142857142857


In [14]:
total = 0
count = 0

with torch.no_grad():

  for batch_features, batch_labels in train_loader:

    batch_features, batch_labels = batch_features.to(device), batch_labels.to(device)

    output = model(batch_features)

    _, predicted = torch.max(output.data, 1)

    total += batch_labels.size(0)

    count += (predicted == batch_labels).sum().item()

print(f"Accuracy: {count/total}")

Accuracy: 0.9453928571428571
