# 0. Imports

In [22]:
# imports
# imports

import torch
from torch import nn
from torchvision.datasets import FashionMNIST
from torchvision import transforms
from torch.utils.data import random_split, DataLoader
from torchsummary import summary
from torch.nn import CrossEntropyLoss
from torch.optim import Adam
from tqdm.auto import tqdm
import time
from sklearn.metrics import confusion_matrix, accuracy_score

In [2]:
# for reproducibility
torch.manual_seed(42)

<torch._C.Generator at 0x1a4fcd2a910>

In [3]:
# setup device
device = 'cuda' if torch.cuda.is_available() else 'cpu'
device = torch.device(device)
device

device(type='cuda')

# 1. Dataset and Dataloader

In [5]:
# get fashion mnsist
train_dataset = FashionMNIST("./data", download=True, transform=transforms.ToTensor())
val_dataset = FashionMNIST("./data", download=True, train=False, transform=transforms.ToTensor())
len(train_dataset), len(val_dataset) 

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz to ./data\FashionMNIST\raw\train-images-idx3-ubyte.gz


100%|██████████| 26421880/26421880 [00:08<00:00, 3073254.49it/s]


Extracting ./data\FashionMNIST\raw\train-images-idx3-ubyte.gz to ./data\FashionMNIST\raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz to ./data\FashionMNIST\raw\train-labels-idx1-ubyte.gz


100%|██████████| 29515/29515 [00:00<00:00, 118867.13it/s]


Extracting ./data\FashionMNIST\raw\train-labels-idx1-ubyte.gz to ./data\FashionMNIST\raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz to ./data\FashionMNIST\raw\t10k-images-idx3-ubyte.gz


100%|██████████| 4422102/4422102 [00:02<00:00, 1607330.02it/s]


Extracting ./data\FashionMNIST\raw\t10k-images-idx3-ubyte.gz to ./data\FashionMNIST\raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz to ./data\FashionMNIST\raw\t10k-labels-idx1-ubyte.gz


100%|██████████| 5148/5148 [00:00<?, ?it/s]

Extracting ./data\FashionMNIST\raw\t10k-labels-idx1-ubyte.gz to ./data\FashionMNIST\raw






(60000, 10000)

In [8]:
train_dataset[0][0].shape

torch.Size([1, 28, 28])

In [6]:
# mapping
train_dataset.class_to_idx

{'T-shirt/top': 0,
 'Trouser': 1,
 'Pullover': 2,
 'Dress': 3,
 'Coat': 4,
 'Sandal': 5,
 'Shirt': 6,
 'Sneaker': 7,
 'Bag': 8,
 'Ankle boot': 9}

In [7]:
# create dataloaders
batch_size = 128
train_dataloader = DataLoader(train_dataset, batch_size=batch_size)
val_dataloader = DataLoader(val_dataset, batch_size=batch_size)

# 2. Create CNN

In [20]:
class FashionClassifier(nn.Module):
    
    def __init__(self):
        super().__init__()
        
        self.block_1 = nn.Sequential(
            nn.Conv2d(in_channels=1, out_channels=32, kernel_size=3, padding="same"),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.MaxPool2d(2)
        )
        self.block_2 = nn.Sequential(
            nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, padding="same"),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(2)
        )
        self.block_3 = nn.Sequential(
            nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, padding="same"),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d(2)
        )

        self.block_4 = nn.Sequential(
            nn.Flatten(),
            nn.Linear(in_features=128*3*3, out_features=64),
            nn.ReLU(),
        )
        self.last_layer = nn.Linear(in_features=64, out_features=10)
        
    def forward(self, x):
        features = self.block_4(self.block_3(self.block_2(self.block_1(x))))
        activations = self.last_layer(features)
        return (activations, features)

In [21]:
model = FashionClassifier().to(device)
summary(model, (1, 28, 28))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 32, 28, 28]             320
       BatchNorm2d-2           [-1, 32, 28, 28]              64
              ReLU-3           [-1, 32, 28, 28]               0
         MaxPool2d-4           [-1, 32, 14, 14]               0
            Conv2d-5           [-1, 64, 14, 14]          18,496
       BatchNorm2d-6           [-1, 64, 14, 14]             128
              ReLU-7           [-1, 64, 14, 14]               0
         MaxPool2d-8             [-1, 64, 7, 7]               0
            Conv2d-9            [-1, 128, 7, 7]          73,856
      BatchNorm2d-10            [-1, 128, 7, 7]             256
             ReLU-11            [-1, 128, 7, 7]               0
        MaxPool2d-12            [-1, 128, 3, 3]               0
          Flatten-13                 [-1, 1152]               0
           Linear-14                   

# 3. Training Loop

In [23]:
def train_step(model: torch.nn.Module,
               data_loader: torch.utils.data.DataLoader,
               loss_fn: torch.nn.Module,
               optimizer: torch.optim.Optimizer,
               device: torch.device
              ):
    
    start_time = time.time()

    train_loss = 0    
    model.to(device)
    model.train()
    
    for (X, y) in data_loader:
        # send data to GPU
        X, y = X.to(device), y.to(device)
        # X, y = X.to(device), y.type(torch.LongTensor).to(device)
        
        # 1. forward pass
        y_pred, _ = model(X)

        # 2. calculate loss
        loss = loss_fn(y_pred, y)
        train_loss += loss
        
        # 3. optimizer zero grad
        optimizer.zero_grad()
        
        # 4. loss backward
        loss.backward()
        
        # 5. optimizer step
        optimizer.step()
    
    train_loss /= len(data_loader)

    end_time = time.time()

    return {"avg_batch_loss": train_loss, "time": (end_time - start_time)* 10**3}

def valid_step(model: torch.nn.Module,
               data_loader: torch.utils.data.DataLoader,
               device: torch.device
              ):
    
    # send the model to device
    model.to(device)

    # send the model in eval mode
    model.eval()

    # for confusion matrix and accuracy
    y_true = torch.Tensor([]).to(device)
    y_pred = torch.Tensor([]).to(device)

    with torch.inference_mode(): 
        for X, y in data_loader:
            # Send data to GPU
            X, y = X.to(device), y.to(device)
            
            # 1. Forward pass
            test_pred, _ = model(X)
            
            y_true = torch.cat((y_true, y), dim=0)
            y_pred = torch.cat((y_pred, test_pred.argmax(axis=1)), dim=0)
        
        # send back to cpu
        y_true = y_true.cpu()
        y_pred = y_pred.cpu()

        return {"accuracy": accuracy_score(y_true, y_pred), "confusion_matrix": confusion_matrix(y_true, y_pred, normalize="true")}


In [24]:
# create loss_fn
loss_fn = CrossEntropyLoss()

In [25]:
# create optimizer

lr = 3.2 * (10**-4)
optimizer = Adam(model.parameters(), lr=lr)

In [26]:
epochs = 32

for epoch in tqdm(range(epochs)):
        tres = train_step(model, train_dataloader, loss_fn, optimizer, device)
        print(f"epoch: {epoch}")
        print(f"avg_batch_loss: {tres['avg_batch_loss']}")
        print(f"time: {tres['time']}")   
        print("")

  0%|          | 0/32 [00:00<?, ?it/s]

epoch: 0
avg_batch_loss: 0.4434037506580353
time: 12172.55187034607

epoch: 1
avg_batch_loss: 0.267421156167984
time: 10890.377283096313

epoch: 2
avg_batch_loss: 0.22502896189689636
time: 10839.633703231812

epoch: 3
avg_batch_loss: 0.1948803961277008
time: 11439.167499542236

epoch: 4
avg_batch_loss: 0.16981291770935059
time: 11872.137546539307

epoch: 5
avg_batch_loss: 0.14761732518672943
time: 11043.886423110962

epoch: 6
avg_batch_loss: 0.12854616343975067
time: 11101.792097091675

epoch: 7
avg_batch_loss: 0.11301642656326294
time: 11059.082984924316

epoch: 8
avg_batch_loss: 0.10432501137256622
time: 11201.534271240234

epoch: 9
avg_batch_loss: 0.09881874918937683
time: 10837.589025497437

epoch: 10
avg_batch_loss: 0.08720887452363968
time: 11085.078716278076

epoch: 11
avg_batch_loss: 0.08123228698968887
time: 11431.962251663208

epoch: 12
avg_batch_loss: 0.08062945306301117
time: 11220.621347427368

epoch: 13
avg_batch_loss: 0.0734601616859436
time: 12004.87995147705

epoch: 14

In [27]:
# validation results
vres = valid_step(model, val_dataloader, device)
print(f"accuracy: {vres['accuracy']}")
print(f"confusion_matrix: \n{vres['confusion_matrix']}")

accuracy: 0.9071
confusion_matrix: 
[[0.878 0.    0.024 0.022 0.005 0.001 0.067 0.    0.003 0.   ]
 [0.005 0.974 0.001 0.009 0.005 0.    0.006 0.    0.    0.   ]
 [0.014 0.001 0.884 0.008 0.058 0.    0.035 0.    0.    0.   ]
 [0.022 0.003 0.015 0.877 0.046 0.001 0.036 0.    0.    0.   ]
 [0.002 0.    0.055 0.014 0.896 0.    0.032 0.    0.001 0.   ]
 [0.002 0.    0.    0.    0.    0.971 0.    0.02  0.    0.007]
 [0.115 0.    0.079 0.026 0.106 0.    0.67  0.    0.003 0.001]
 [0.    0.    0.    0.    0.    0.006 0.    0.968 0.    0.026]
 [0.006 0.001 0.001 0.004 0.005 0.002 0.006 0.002 0.972 0.001]
 [0.    0.    0.    0.    0.    0.003 0.001 0.015 0.    0.981]]


# 4. Using CNN as Feature Extractor

In [34]:
# get features
def get_features(model, data_loader):
    # send the model to device
    model.to(device)

    # send the model in eval mode
    model.eval()

    # for confusion matrix and accuracy
    all_y = torch.Tensor([]).to(device)
    all_x = torch.Tensor([]).to(device)

    with torch.inference_mode(): 
        for X, y in data_loader:
            # Send data to GPU
            X, y = X.to(device), y.to(device)
            
            # 1. Forward pass
            _, features = model(X)
            
            all_y = torch.cat((all_y, y), dim=0)
            all_x = torch.cat((all_x, features), dim=0)
        
        # send back to cpu
        return (all_x.cpu(), all_y.cpu())   

In [35]:
train_x, train_y = get_features(model, train_dataloader)
train_x.shape, train_y.shape

(torch.Size([60000, 64]), torch.Size([60000]))

In [36]:
val_x, val_y = get_features(model, val_dataloader)
val_x.shape, val_y.shape

(torch.Size([10000, 64]), torch.Size([10000]))

In [41]:
# try out sklearn models
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score 

fmodel = LogisticRegression(max_iter=256)
fmodel.fit(train_x, train_y)
print("accuracy:", accuracy_score(val_y, fmodel.predict(val_x)))

accuracy: 0.9107


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [42]:
# random forest
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score 

fmodel = RandomForestClassifier()
fmodel.fit(train_x, train_y)
print("accuracy:", accuracy_score(val_y, fmodel.predict(val_x)))

accuracy: 0.9132


In [43]:
# gaussian NB
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score 

fmodel = GaussianNB()
fmodel.fit(train_x, train_y)
print("accuracy:", accuracy_score(val_y, fmodel.predict(val_x)))

accuracy: 0.9066


In [44]:
# multinomial NB
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score 

fmodel = MultinomialNB()
fmodel.fit(train_x, train_y)
print("accuracy:", accuracy_score(val_y, fmodel.predict(val_x)))

accuracy: 0.9086
