In [3]:
from torchvision.datasets import FashionMNIST
from torchvision.transforms import ToTensor
from torch import nn
import torch
import matplotlib.pyplot as plt
from torch.utils.data import DataLoader
from tqdm import tqdm
# from sklearn.metrics import accuracy_score

In [4]:
device = torch.device('cuda' if torch.cuda.is_available() else "cpu")

In [5]:
train_data = FashionMNIST("./dataset", train=True, transform=ToTensor(), download=True)
test_data = FashionMNIST("./dataset", train=False, transform=ToTensor(), download=True)

In [6]:
# See first training sample
image, label = train_data[0]
image, label

(tensor([[[0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
           0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
           0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
           0.0000, 0.0000, 0.0000, 0.0000],
          [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
           0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
           0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
           0.0000, 0.0000, 0.0000, 0.0000],
          [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
           0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
           0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
           0.0000, 0.0000, 0.0000, 0.0000],
          [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
           0.0000, 0.0000, 0.0000, 0.0000, 0.0039, 0.0000, 0.0000, 0.0510,
           0.2863, 0.0000, 0.0000, 0.0039, 

In [7]:
# What's the shape of the image?
image.shape

torch.Size([1, 28, 28])

In [8]:
# See classes
class_names = train_data.classes
class_names

['T-shirt/top',
 'Trouser',
 'Pullover',
 'Dress',
 'Coat',
 'Sandal',
 'Shirt',
 'Sneaker',
 'Bag',
 'Ankle boot']

In [9]:
BATCH_SIZE = 32

train_dataloader = DataLoader(train_data, batch_size=BATCH_SIZE, shuffle=True)
test_dataloader = DataLoader(test_data, batch_size=BATCH_SIZE, shuffle=True)

In [10]:
len(train_dataloader)

1875

In [11]:
batch_x, batch_y = next(iter(train_dataloader))
batch_x.shape, batch_y.shape

(torch.Size([32, 1, 28, 28]), torch.Size([32]))

In [12]:
ss = nn.Flatten()
ss(batch_x).shape

torch.Size([32, 784])

In [13]:
class FLatModel(nn.Module):
    def __init__(self, *args, **kwargs):
        super().__init__()
        self.layer1 = nn.Flatten()
        self.layer2 = nn.Linear(784, 1000)
        self.layer3 = nn.Linear(1000, 1000)
        self.layer4 = nn.Linear(1000, 500)
        self.layer5 = nn.Linear(500, 100)
        self.layer6 = nn.Linear(100, 10)
        self.relu = nn.ReLU()

    def forward(self, x):
        x = self.layer1(x)
        x = self.relu(self.layer2(x))
        x = self.relu(self.layer3(x))
        x = self.relu(self.layer4(x))
        x = self.relu(self.layer5(x))
        return self.layer6(x)

In [14]:
FlatNet = FLatModel().to(device=device)

In [15]:
next(FlatNet.parameters()).device

device(type='cuda', index=0)

In [16]:
# batch_x, batch_y = next(iter(train_dataloader))
# FlatNet(batch_x)

In [17]:
loss_fn = nn.CrossEntropyLoss().to(device)
optim_fn = torch.optim.AdamW(params=FlatNet.parameters(), lr=0.005)

In [18]:
total_params = 0
for p in FlatNet.parameters():
    total_params+=p.numel()
print(total_params)

2337610


In [19]:
def acc_fn(y_preds, y):
    acc_score = torch.eq(y_preds, y).sum() / len(y)
    return acc_score

In [20]:
# acc_fn(torch.argmax(torch.softmax(y_logits, dim=1), dim=1),batch_y)

In [21]:
epochs = 5
for e in range(epochs):
    total_train_loss = 0
    total_train_acc = 0
    for batch_x, batch_y in tqdm(train_dataloader):
        FlatNet.train()

        batch_x = batch_x.to(device)
        batch_y = batch_y.to(device)

        y_logits = FlatNet(batch_x)
        train_loss = loss_fn(y_logits, batch_y)
        total_train_loss+=train_loss

        train_acc = acc_fn(torch.argmax(torch.softmax(y_logits, dim=1), dim=1),batch_y)
        total_train_acc+=train_acc

        optim_fn.zero_grad()
        train_loss.backward()
        optim_fn.step()

        # print("Train loss:", train_loss)
    with torch.inference_mode():
        total_train_loss = total_train_loss / len(train_dataloader)
        total_train_acc = total_train_acc / len(train_dataloader)
        print("Total Train loss: ",total_train_loss, "Train Acc: ",total_train_acc)
        

  0%|          | 0/1875 [00:00<?, ?it/s]

100%|██████████| 1875/1875 [00:06<00:00, 282.75it/s]


Total Train loss:  tensor(0.6149, device='cuda:0') Train Acc:  tensor(0.7737, device='cuda:0')


100%|██████████| 1875/1875 [00:06<00:00, 290.89it/s]


Total Train loss:  tensor(0.4626, device='cuda:0') Train Acc:  tensor(0.8386, device='cuda:0')


100%|██████████| 1875/1875 [00:06<00:00, 293.68it/s]


Total Train loss:  tensor(0.4219, device='cuda:0') Train Acc:  tensor(0.8511, device='cuda:0')


100%|██████████| 1875/1875 [00:06<00:00, 296.61it/s]


Total Train loss:  tensor(0.3933, device='cuda:0') Train Acc:  tensor(0.8609, device='cuda:0')


100%|██████████| 1875/1875 [00:06<00:00, 293.88it/s]

Total Train loss:  tensor(0.3895, device='cuda:0') Train Acc:  tensor(0.8627, device='cuda:0')





In [24]:
# for batch_test_x, batch_test_y in tqdm(test_dataloader):
#     print(batch_test_x)
#     print(batch_test_y)
#     break

In [23]:
total_test_loss, total_test_acc = 0,0
# acc on test data
for batch_test_x, batch_test_y in tqdm(test_dataloader):

    batch_test_x = batch_test_x.to(device)
    batch_test_y = batch_test_y.to(device)

    y_logits = FlatNet(batch_test_x)
    test_loss = loss_fn(y_logits, batch_test_y)
    total_test_loss+=test_loss

    test_acc = acc_fn(torch.argmax(torch.softmax(y_logits, dim=1), dim=1),batch_test_y)
    total_test_acc+=test_acc

    # print("Train loss:", train_loss)
with torch.inference_mode():
    total_test_loss = total_test_loss / len(test_dataloader)
    total_test_acc = total_test_acc / len(test_dataloader)
    print("Total Test loss: ",total_test_loss, "Test Acc: ",total_test_acc)

100%|██████████| 313/313 [00:00<00:00, 399.57it/s]

Total Test loss:  tensor(0.4177, device='cuda:0') Test Acc:  tensor(0.8568, device='cuda:0')



