In [1]:
import torch
import torchvision

In [None]:
# Downloading dataset
train_set = torchvision.datasets.FashionMNIST(root="data/", download=True)

# Defining util funtions

In [1]:
import os
import struct
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import f1_score, accuracy_score
from sklearn.model_selection import cross_val_score

def normalize(images):
    return ((images / 255.) - .5) *2

def load_fashionmnist(path, kind='train'):
    labels_path = os.path.join(path,
                              f'{kind}-labels-idx1-ubyte')
    images_path = os.path.join(path,
                              f'{kind}-images-idx3-ubyte')
    with open(labels_path, 'rb') as lbpath:
        magic, n = struct.unpack('>II', lbpath.read(8))
        labels = np.fromfile(lbpath, dtype=np.uint8)
        
    with open(images_path, 'rb') as imgpath:
        magic, n, rows, cols = struct.unpack('>IIII', imgpath.read(16))
        images = np.fromfile(imgpath, dtype=np.uint8).reshape(len(labels), 784)
        
    return images, labels

def plot_fashion_classes_samples(images, labels):
    fig, ax = plt.subplots(nrows=2, ncols=5, sharex=True, sharey=True)
    ax = ax.flatten()
    for i in range(10):
        img = images[labels == i][0].reshape(28, 28)
        ax[i].imshow(img, cmap='Greys')
        
    ax[0].set_xticks([])
    ax[0].set_yticks([])
    plt.tight_layout()
    plt.show()
    
def plot_fashion_samples_5x5(images, label):
    fig, ax = plt.subplots(nrows=5, ncols=5, sharex=True, sharey=True)
    ax = ax.flatten()
    for i in range(25):
        img = images[labels == label][i].reshape(28, 28)
        ax[i].imshow(img, cmap='Greys')
        
    ax[0].set_xticks([])
    ax[0].set_yticks([])
    plt.tight_layout()
    plt.show()
    
def classify_acc_f1(classifier, X_test, y_test):
    y_pred = classifier.predict(X_test)
    f1 = f1_score(y_test, y_pred, average='weighted')
    accuracy = accuracy_score(y_test, y_pred)
    return accuracy, f1

def batch_generator(X, y, batch_size=64, shuffle=False, random_seed=None):
    idx = np.arange(y.shape[0])
    
    if shuffle:
        rng = np.random.RandomState(random_seed)
        rng.shuffle(idx)
        X = X[idx]
        y = y[idx]
    
    for i in range(0, X.shape[0], batch_size):
        yield (X[i: i + batch_size, :], y[i: i + batch_size])

## Visualisation

In [None]:
images, labels = load_fashionmnist('data/FashionMNIST/raw/')

In [None]:
plot_fashion_classes_samples(images, labels)

In [None]:
plot_fashion_samples_5x5(images, 0)

# Logistic Regression Model

## Data extraction

In [2]:
X_train, y_train = load_fashionmnist('data/FashionMNIST/raw/')
X_test, y_test = load_fashionmnist('data/FashionMNIST/raw/', kind='t10k')
X_train = normalize(X_train)
X_test = normalize(X_test)

## Features Extraction

In [12]:
from sklearn.decomposition import PCA
import numpy as np

pca = PCA(n_components=20)
X_train_pca = pca.fit_transform(X_train)
X_test_pca = pca.fit_transform(X_test)

## Model training and evaluation

### With Principal Component Analysis

In [13]:
from sklearn.model_selection import GridSearchCV, cross_val_score
from sklearn.linear_model import LogisticRegression

param_grid = {'C': [0.1, 1.0, 10],
              'fit_intercept': [True, False],
              'intercept_scaling': [0.1, 1.0, 2.0]}

classifier = LogisticRegression(n_jobs=-1)

gs = GridSearchCV(estimator=classifier,
                  param_grid=param_grid,
                  scoring='accuracy',
                  cv=5,
                  n_jobs=-1)

gs.fit(X_train_pca, y_train)
print(gs.best_score_)
print(gs.best_params_)

0.8090499999999998
{'C': 0.1, 'fit_intercept': True, 'intercept_scaling': 0.1}


In [14]:
lr_best = gs.best_estimator_
accuracy, f1 = classify_acc_f1(lr_best, X_test_pca, y_test)
print(f'F1 score: {f1}')
print(f'Accuracy: {accuracy}')

F1 score: 0.4287301458912836
Accuracy: 0.438


### Without Principal Component Analysis

In [4]:
from sklearn.model_selection import GridSearchCV, cross_val_score
from sklearn.linear_model import LogisticRegression

param_grid = {'C': [0.1, 1.0, 10],
              'fit_intercept': [True, False],
              'intercept_scaling': [0.1, 1.0, 2.0]}

classifier = LogisticRegression(n_jobs=-1)

gs = GridSearchCV(estimator=classifier,
                  param_grid=param_grid,
                  scoring='accuracy',
                  cv=5,
                  n_jobs=-1)

gs.fit(X_train, y_train)
print(gs.best_score_)
print(gs.best_params_)

0.8567333333333333
{'C': 0.1, 'fit_intercept': False, 'intercept_scaling': 0.1}


In [10]:
lr_best = gs.best_estimator_
acc, f1_ = classify_acc_f1(lr_best, X_test, y_test)
print(f'F1 score: {f1_}')
print(f'Accuracy: {accuracy}')

F1 score: 0.8431083469320838
Accuracy: 0.8439


# Convolutional Neural Network Model

## Features Extraction

In [15]:
X_train, y_train = load_fashionmnist('data/FashionMNIST/raw/')
X_test, y_test = load_fashionmnist('data/FashionMNIST/raw/', kind='t10k')

X_train = X_train.reshape(28, 28, 60000)
X_test = X_test.reshape(28, 28, 10000)

X_train_centered = normalize(X_train)
X_test_centered = normalize(X_test)

# mean_vals = np.mean(X_train, axis=0)
# std_val = np.std(X_train)
# X_train_standarized = (X_train - mean_vals) / std_val
# X_test_standarized = (X_test - mean_vals) / std_val

In [2]:
import torch
import torchvision
import torchvision.transforms as transforms

transform = transforms.Compose(
    [transforms.ToTensor()]
)

batch_size = 64

train_set = torchvision.datasets.FashionMNIST(root="data/", train=True, download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(train_set, batch_size=batch_size, shuffle=True, num_workers=4)

test_set = torchvision.datasets.FashionMNIST(root="data/", train=False, download=True, transform=transform)
testloader = torch.utils.data.DataLoader(test_set, batch_size=batch_size, shuffle=False, num_workers=4)

## Model

### Simple CNN Conv(1, 32, 5) Pool(2, 2) Conv(32, 64, 5) Pool(2, 2) FC(1024, 10) no Batch Norm, no Dropout

In [16]:
import torch.nn as nn
import torch.nn.functional as F

class CNN(nn.Module):
    def __init__(self):
        super().__init__()

        self.conv1 = nn.Conv2d(1, 32, 5)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(32, 64, 5)
        self.fc1 = nn.Linear(64 * 4 * 4, 10)
    
    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = torch.flatten(x, 1)
        x = self.fc1(x)
        return x

### CNN Conv(1, 32, 5) Pool(2, 2) Conv(32, 64, 5) Pool(2, 2) FC(1024, 10) Batch Norm, no Dropout

In [44]:
import torch.nn as nn

class CNNBatchNorm(nn.Module):
    def __init__(self):
        super().__init__()

        self._conv_layer = nn.Sequential(
            nn.Conv2d(1, 32, 5),
            nn.ReLU(),
            nn.BatchNorm2d(32),
            nn.MaxPool2d(2, 2),
            
            nn.Conv2d(32, 64, 5),
            nn.ReLU(),
            nn.BatchNorm2d(64),
            nn.MaxPool2d(2, 2),
            nn.Flatten()
        )
        
        self._linear_layer = nn.Sequential(
            nn.Linear(1024, 10)
        )
    
    def forward(self, x):
        x = self._conv_layer(x)
        x = self._linear_layer(x)
        return x

### CNN Conv(1, 32, 5) Pool(2, 2) Conv(32, 64, 5) Pool(2, 2) FC(1024, 10) Batch Norm, Dropout(0.3)

In [50]:
import torch.nn as nn

class CNNBatchNormDropout(nn.Module):
    def __init__(self):
        super().__init__()

        self._conv_layer = nn.Sequential(
            nn.Conv2d(1, 32, 5),
            nn.ReLU(),
            nn.BatchNorm2d(32),
            nn.MaxPool2d(2, 2),
            
            nn.Conv2d(32, 64, 5),
            nn.ReLU(),
            nn.BatchNorm2d(64),
            nn.MaxPool2d(2, 2),
            nn.Flatten()
        )
        
        self._linear_layer = nn.Sequential(
            nn.Linear(1024, 10),
            nn.Dropout(0.3)
        )
    
    def forward(self, x):
        x = self._conv_layer(x)
        x = self._linear_layer(x)
        return x

### CNN Conv(1, 32, 5) Pool(2, 2) Conv(32, 64, 5) Pool(2, 2) FC(1024, 512) FC(512, 256) FC(256, 10) Batch Norm, no Dropout

In [86]:
import torch.nn as nn

class CNNBatchNormFC3(nn.Module):
    def __init__(self):
        super().__init__()

        self._conv_layer = nn.Sequential(
            nn.Conv2d(1, 32, 5),
            nn.ReLU(),
            nn.BatchNorm2d(32),
            nn.MaxPool2d(2, 2),
            
            nn.Conv2d(32, 64, 5),
            nn.ReLU(),
            nn.BatchNorm2d(64),
            nn.MaxPool2d(2, 2),
            nn.Flatten()
        )
        
        self._linear_layer = nn.Sequential(
            nn.Linear(1024, 512),
            nn.ReLU(),
            nn.BatchNorm1d(512),
            nn.Linear(512, 256),
            nn.ReLU(),
            nn.BatchNorm1d(256),
            nn.Linear(256, 10)
        )
    
    def forward(self, x):
        x = self._conv_layer(x)
        x = self._linear_layer(x)
        return x

### CNN Conv(1, 64, 3) Pool(2, 2) Conv(64, 32, 3) Pool(2, 2) FC(16, 10) Batch Norm, Dropout:

In [121]:
import torch.nn as nn

class CNNBatchNorm2(nn.Module):
    def __init__(self):
        super().__init__()

        self._conv_layer = nn.Sequential(
            nn.Conv2d(1, 64, 3),
            nn.ReLU(),
            nn.BatchNorm2d(64),
            nn.MaxPool2d(2, 2),
            
            nn.Conv2d(64, 32, 3),
            nn.ReLU(),
            nn.BatchNorm2d(32),
            nn.MaxPool2d(2, 2),
            nn.Flatten()
        )
        
        self._linear_layer = nn.Sequential(
            nn.Linear(800, 10),
            nn.Dropout(0.1)
        )
    
    def forward(self, x):
        x = self._conv_layer(x)
        x = self._linear_layer(x)
        return x

### CNN Vgg Like Conv(1, 16, 3, pad='same') Conv(16, 16, 3, pad='same') Pool(2) Conv(16, 32, 3) Conv(32, 32, 3) Pool(2) FC(800, 512) FC(512, 256) FC(256, 10)

In [65]:
import torch.nn as nn

class CNNVggLike(nn.Module):
    def __init__(self):
        super().__init__()

        self._conv_layer = nn.Sequential(
            nn.Conv2d(1, 16, 3, padding=(1, 1)),
            nn.ReLU(),
            nn.BatchNorm2d(16),
            
            nn.Conv2d(16, 16, 3, padding=(1, 1)),
            nn.ReLU(),
            nn.BatchNorm2d(16),
            
            nn.MaxPool2d(2, 2),
            
            nn.Conv2d(16, 32, 3),
            nn.ReLU(),
            nn.BatchNorm2d(32),
            
            nn.Conv2d(32, 32, 3),
            nn.ReLU(),
            nn.BatchNorm2d(32),
            
            nn.MaxPool2d(2, 2),
            
            nn.Flatten()
        )
        
        self._linear_layer = nn.Sequential(
            nn.Linear(800, 512),
            nn.ReLU(),
            
            nn.Linear(512, 256),
            nn.ReLU(),
            
            nn.Linear(256, 10)
        )
    
    def forward(self, x):
        x = self._conv_layer(x)
        x = self._linear_layer(x)
        return x

### CNN Vgg Like Conv(1, 16, 3, pad='same') Conv(16, 16, 3, pad='same') Pool(2) Conv(16, 32, 5) Conv(32, 32, 5) Pool(2) FC(800, 512) FC(512, 256) FC(256, 10)

In [25]:
import torch.nn as nn

class CNNVggLike2(nn.Module):
    def __init__(self):
        super().__init__()

        self._conv_layer = nn.Sequential(
            nn.Conv2d(1, 16, 3, padding=(1, 1)),
            nn.ReLU(),
            #nn.BatchNorm2d(16),
            
            nn.Conv2d(16, 16, 3, padding=(1, 1)),
            nn.ReLU(),
            nn.BatchNorm2d(16),
            
            nn.MaxPool2d(2, 2),
            
            nn.Conv2d(16, 32, 5, padding=(1, 1)),
            nn.ReLU(),
            #nn.BatchNorm2d(32),
            
            nn.Conv2d(32, 32, 5, padding=(1, 1)),
            nn.ReLU(),
            nn.BatchNorm2d(32),
            
            nn.MaxPool2d(2, 2),
            
            nn.Flatten()
        )
        
        self._linear_layer = nn.Sequential(
            nn.Linear(800, 800),
            nn.ReLU(),
            
            nn.Linear(800, 512),
            nn.ReLU(),
            
            nn.Linear(512, 10)
        )
    
    def forward(self, x):
        x = self._conv_layer(x)
        x = self._linear_layer(x)
        return x

## Model Training

In [4]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [26]:
import torch.optim as optim

cnn = CNNVggLike2()
cnn = cnn.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(cnn.parameters(), lr=0.001)

In [27]:
epochs = 10
for epoch in range(epochs):
    running_loss = 0.0
    
    for i, data in enumerate(trainloader, 0):
        inputs, labels = data
        inputs = inputs.to(device)
        labels = labels.to(device)
        
        optimizer.zero_grad()
        
        outputs = cnn(inputs)
        loss = criterion(outputs, labels)
        
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
        if i % 200 == 199:
            print(f'Epoch: [{epoch + 1}, {i + 1}], loss: {running_loss / 199}')
            running_loss = 0.0

print('Finished Training')

Epoch: [1, 200], loss: 0.5500974816143812
Epoch: [1, 400], loss: 0.36868770188422656
Epoch: [1, 600], loss: 0.32034970696397763
Epoch: [1, 800], loss: 0.3086476062410441
Epoch: [2, 200], loss: 0.259161011957044
Epoch: [2, 400], loss: 0.24775518353410703
Epoch: [2, 600], loss: 0.2544451482481693
Epoch: [2, 800], loss: 0.23874065437209066
Epoch: [3, 200], loss: 0.20298004468631506
Epoch: [3, 400], loss: 0.20381918107744437
Epoch: [3, 600], loss: 0.20931758978708306
Epoch: [3, 800], loss: 0.21406435436714236
Epoch: [4, 200], loss: 0.16864517785084607
Epoch: [4, 400], loss: 0.17707917015606434
Epoch: [4, 600], loss: 0.18489056031907622
Epoch: [4, 800], loss: 0.17887498951287725
Epoch: [5, 200], loss: 0.14686024627980576
Epoch: [5, 400], loss: 0.14442190857388865
Epoch: [5, 600], loss: 0.15751738087671935
Epoch: [5, 800], loss: 0.15232041363964727
Epoch: [6, 200], loss: 0.12163447127232899
Epoch: [6, 400], loss: 0.12510268421066767
Epoch: [6, 600], loss: 0.1446788756073869
Epoch: [6, 800], 

torch.save(cnn.state_dict(), 'models/vgg_cnn.pth')

## Model Evaluation

cnn = CNNBatchNorm()
cnn.load_state_dict(torch.load('models/batch_norm_cnn.pth'))

In [28]:
from torch import Tensor

correct = 0
total = 0
with torch.no_grad():
    y_pred = []
    y_test = []
    for data in testloader:
        images, labels = data
        images = images.to(device)
        y_test.extend(labels)
        
        outputs = cnn(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        y_pred.extend(Tensor.cpu(predicted))
        
f1 = f1_score(y_test, y_pred, average='weighted')
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy}')
print(f'F1-Score: {f1}')

Accuracy: 0.9203
F1-Score: 0.9196067290354151


# Results

##### LogisticRegression:
Accuracy: 84.39%<br>
F1-Score: 84.31%<br>

##### Simple CNN Conv(1, 32, 5) Pool(2, 2) Conv(32, 64, 5) Pool(2, 2) FC(1024, 10) no Batch Norm, no Dropout:
Accuracy: 87.75%<br>
F1-Score: 87.72%<br>

##### CNN Conv(1, 32, 5) Pool(2, 2) Conv(32, 64, 5) Pool(2, 2) FC(1024, 10) Batch Norm, no Dropout:
Accuracy: 90.07%<br>
F1-Score: 90.13%<br>

##### CNN Conv(1, 32, 5) Pool(2, 2) Conv(32, 64, 5) Pool(2, 2) FC(1024, 10) Batch Norm, Dropout(0.3):
Accuracy: 72.13%<br>
F1-Score: 72.73%<br>

##### CNN Conv(1, 32, 5) Pool(2, 2) Conv(32, 64, 5) Pool(2, 2) FC(1024, 512) FC(512, 256) FC(256, 10) Batch Norm, no Dropout:
Accuracy: 90.69%<br>
F1-Score: 90.67%<br>

##### CNN Conv(1, 64, 3) Pool(2, 2) Conv(64, 32, 3) Pool(2, 2) FC(16, 10) Batch Norm, no Dropout:
Accuracy: 89.83%<br>
F1-Score: 89.91%<br>

##### CNN Conv(1, 64, 3) Pool(2, 2) Conv(64, 32, 3) Pool(2, 2) FC(16, 10) Batch Norm, Dropout:
Accuracy: 86.61%<br>
F1-Score: 86.61%<br>

##### CNN Vgg Like Conv(1, 16, 3, pad='same') Conv(16, 16, 3, pad='same') Pool(2) Conv(16, 32, 3) Conv(32, 32, 3) Pool(2) FC(800, 512) FC(512, 256) FC(256, 10)
Accuracy: 92.20%<br>
F1-Score: 92.12%<br>

##### CNN Vgg Like Conv(1, 16, 3, pad='same') Conv(16, 16, 3, pad='same') Pool(2) Conv(16, 32, 5, pad=(1, 1)) Conv(32, 32, 5, pad=(1, 1)) Pool(2) FC(800, 512) FC(512, 256) FC(256, 10)
Accuracy: 92.62%<br>
F1-Score: 92.61%<br>