In [3]:
import torch
import torchvision
from torchvision.transforms import transforms

import matplotlib.pyplot as plt

In [4]:
torch.cuda.is_available()

True

In [5]:
device = 'cuda:0' if torch.cuda.is_available() else 'cpu'
device

'cuda:0'

In [6]:
transforms = transforms.ToTensor()

batch_size = 8

trainset = torchvision.datasets.FashionMNIST(root='../data', train=True, download=True, transform=transforms)
testset = torchvision.datasets.FashionMNIST(root='../data', train=False, download=True, transform=transforms)

In [7]:
labels = ('T-shirt/top', 'Trouser/pants','Pullover shirt','Dress','Coat','Sandal',
           'Shirt','Sneaker','Bag','Ankle boot')

In [8]:
len(trainset)

60000

In [9]:
# split the train set into train and validation
trainset, valset = torch.utils.data.random_split(trainset, [50000, 10000])

In [10]:

len(trainset), len(valset), len(testset)

(50000, 10000, 10000)

In [11]:
print(f"Number of batches in the training set: { int(len(trainset) / batch_size) } ")

Number of batches in the training set: 6250 


In [12]:
# to iterate through the sets, we need to wrap them in a data loader
trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, shuffle=True, num_workers=2)

valloader = torch.utils.data.DataLoader(valset, batch_size=batch_size, shuffle=False, num_workers=2)

testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size, shuffle=False, num_workers=2)

In [13]:
# check images
train_iter = iter(trainset)

img, label = next(train_iter)

img.shape, label

(torch.Size([1, 28, 28]), 9)

## Modeling

In [14]:
import torch.nn as nn
import torch.nn.functional as F

In [15]:
class NNet(nn.Module):
    # what it has
    def __init__(self) -> None:
        super().__init__()
        
        # input -> (1, 28, 28) Meaning a gray scale image with 1 channel
        # output -> the depth?  
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=256, kernel_size=3)
        self.pool1 = nn.MaxPool2d(2, 2)
        
        self.conv2 = nn.Conv2d(in_channels=256, out_channels=512, kernel_size=3)
        self.pool2 = nn.MaxPool2d(2, 2)
        
        self.conv3 = nn.Conv2d(in_channels=512, out_channels=1024, kernel_size=2)
        self.pool3 = nn.MaxPool2d(2, 2)
        
        self.flatten = nn.Flatten()
        
        self.fc1 = nn.Linear(in_features=4096, out_features=1024)
        self.drop1 = nn.Dropout(p=0.3)
        
        self.fc2 = nn.Linear(in_features=1024, out_features=1024)
        self.drop2 = nn.Dropout(p=0.3)
        
        self.out = nn.Linear(in_features=1024, out_features=10)
        
    # what to do 
    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = self.pool1(x)
        
        x = F.relu(self.conv2(x))
        x = self.pool2(x)
        
        x = F.relu(self.conv3(x))
        x = self.pool3(x)
        
        x = self.flatten(x)
        
        x = F.relu(self.fc1(x))
        x = self.drop1(x)
        
        x = F.relu(self.fc2(x))
        x = self.drop2(x)
        
        x = self.out(x)
        
        return x

In [16]:
# init the network
net = NNet()
net.to(device)



NNet(
  (conv1): Conv2d(1, 256, kernel_size=(3, 3), stride=(1, 1))
  (pool1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1))
  (pool2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv3): Conv2d(512, 1024, kernel_size=(2, 2), stride=(1, 1))
  (pool3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (fc1): Linear(in_features=4096, out_features=1024, bias=True)
  (drop1): Dropout(p=0.3, inplace=False)
  (fc2): Linear(in_features=1024, out_features=1024, bias=True)
  (drop2): Dropout(p=0.3, inplace=False)
  (out): Linear(in_features=1024, out_features=10, bias=True)
)

In [17]:
# data is a tuple of input and label
for i, data in enumerate(trainloader):
    inputs, labels = data[0].to(device), data[1].to(device)
    print(f"input shape: {inputs.shape}")
    print(f"after network shape: {net(inputs).shape}")
    break


# Result: input shape: torch.Size([8, 1, 28, 28]);   torch.Size([8, 256, 26, 26])
# 8 because we have a batch of 8

input shape: torch.Size([8, 1, 28, 28])
after network shape: torch.Size([8, 10])


In [18]:
num_params = 0
for x in net.parameters():
    num_params += len(torch.flatten(x))
    
print(f"Number of params: {num_params:,}")

Number of params: 8,536,074


In [19]:
# optimisers and loss fucntion
import torch.optim as optim

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(), lr=0.0001)

In [31]:
print(type(trainloader))

<class 'torch.utils.data.dataloader.DataLoader'>


In [30]:

print(isinstance(optimizer, torch.optim.Optimizer))

True


In [32]:
def train_epoch():
    # set NN into training mode
    net.train(True)
    
    running_loss = 0.0
    running_acc = 0.0
    
    # iterate over the data loader
    for batch_index, data in enumerate(trainloader):
        inputs, labels = data[0].to(device), data[1].to(device)
        
        optimizer.zero_grad()
        outputs = net(inputs) # shape: [batch_size, 10]
        correct = torch.sum(labels == torch.argmax(outputs, dim=1)).item()
        
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        
        running_acc += correct / batch_size
        running_loss += loss.item()

        # print every 500 batches
        if batch_index % 500 == 499:
            running_loss_500_batches = running_loss / 500
            running_acc_500_batches = (running_acc / 500) * 100
            print(f"Running loss: {running_acc_500_batches:.2f} - Running acc: {running_acc_500_batches:.2f}")
        
            # reset running loss and acc
            running_loss = 0.0
            running_acc = 0.0
    print()
        

def validate_epoch():
    # set network to validation mode
    net.train(False)
    running_loss = 0.0
    running_acc = 0.0
    
    for batch_index, data in enumerate(valloader):
        inputs, labels = data[0].to(device), data[1].to(device)
        
        # dont worry about calculating gradients
        with torch.no_grad():
            outputs = net(inputs)
            correct = torch.sum(labels == torch.argmax(outputs, dim=1)).item()
            loss = criterion(outputs, labels)
            
            running_acc += correct / batch_size
            running_loss += loss.item()
        # calculate average loss and acc
    avg_loss_all_batches = running_loss / len(valloader)
    avg_acc_all_batches =( running_acc / len(valloader)) * 100
    print(f"Val loss: {avg_loss_all_batches:.2f} - Val acc: {avg_acc_all_batches:.2f}")
    print()

In [33]:
# training loop

num_epochs = 10

for epoch in range(num_epochs):
    print(f"Epoch: {epoch}")
    train_epoch()
    validate_epoch()
    
print("Finished!!!")

Epoch: 0
Running loss: 95.97 - Running acc: 95.97
Running loss: 95.78 - Running acc: 95.78
Running loss: 95.93 - Running acc: 95.93
Running loss: 95.40 - Running acc: 95.40
Running loss: 95.25 - Running acc: 95.25
Running loss: 95.93 - Running acc: 95.93
Running loss: 96.17 - Running acc: 96.17
Running loss: 95.43 - Running acc: 95.43
Running loss: 95.17 - Running acc: 95.17
Running loss: 94.70 - Running acc: 94.70
Running loss: 94.90 - Running acc: 94.90
Running loss: 94.83 - Running acc: 94.83

Val loss: 0.27 - Val acc: 92.06

Epoch: 1
Running loss: 96.40 - Running acc: 96.40
Running loss: 96.38 - Running acc: 96.38
Running loss: 96.45 - Running acc: 96.45
Running loss: 96.47 - Running acc: 96.47
Running loss: 96.03 - Running acc: 96.03
Running loss: 96.47 - Running acc: 96.47
Running loss: 96.20 - Running acc: 96.20
Running loss: 95.05 - Running acc: 95.05
Running loss: 95.78 - Running acc: 95.78
Running loss: 95.85 - Running acc: 95.85
Running loss: 95.55 - Running acc: 95.55
Runni

In [None]:
# save model
torch.save(net, '../models/fashnion_mnist.pth')