In [2]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.init as init
import torchvision.datasets as dset
import torchvision.transforms as transforms
from torch.utils.data import DataLoader

torch.backends.cudnn.enabled = False
torch.backends.cudnn.benchmark = False

import matplotlib.pyplot as plt

import os
os.environ['CUDA_VISIBLE_DEVICES'] = '0' 

import warnings
warnings.filterwarnings('ignore')
warnings.simplefilter('ignore')

device = 'cuda' if torch.cuda.is_available() else 'cpu'

import random

random.seed(777)
torch.manual_seed(777)
if device == 'cuda':
    torch.cuda.manual_seed_all(777)

# Load & Set Data

## Download Data

In [3]:
batch_size = 512

fmnist_train = dset.FashionMNIST("./", train=True, transform=transforms.ToTensor(), target_transform=None, download=True)
fmnist_test = dset.FashionMNIST("./", train=False, transform=transforms.ToTensor(), target_transform=None, download=True)

## Ready for K-Fold CV

In [4]:
from sklearn.model_selection import StratifiedKFold

def reset_weights(m):
    if isinstance(m, nn.Linear):
        nn.init.kaiming_uniform(m.weight)

n_split = 5
skf = StratifiedKFold(n_splits = n_split, shuffle = True)



# Test
for fold_num, (train_idx, valid_idx) in enumerate(skf.split(np.arange(fmnist_train.__len__()), fmnist_train.targets)) : 
    print(f'Fold {fold_num} Initiated')
    train_subsampler = torch.utils.data.SubsetRandomSampler(train_idx)
    valid_subsampler = torch.utils.data.SubsetRandomSampler(valid_idx)

    train_data = DataLoader(dataset=fmnist_train, batch_size = batch_size, sampler = train_subsampler)
    valid_data = DataLoader(dataset=fmnist_train, batch_size = batch_size, sampler = valid_subsampler)
    test_data  = DataLoader(dataset = fmnist_test, batch_size  = batch_size , shuffle = False)
    

    dataloaders = {}
    dataloaders['train'] = train_data
    dataloaders['valid'] = valid_data
    dataloaders['test'] = test_data
    # model.apply(reset_weights)

    break;
    

Fold 0 Initiated


# Modeling

## Constructing Model

In [5]:
class FC_Block(nn.Module) : 
    def __init__(self, input_dim, output_dim) : 
        super(FC_Block, self).__init__()

        self.linear = nn.Linear(input_dim, output_dim, bias = True)
        self.relu = nn.functional.relu
        self.batch_norm = nn.BatchNorm1d(output_dim)
        self.dropout = nn.Dropout(0.1)
    
    def forward(self, x) : 
        x = self.linear(x)
        x = self.relu(x)
        x = self.batch_norm(x)
        x = self.dropout(x)
        return x

In [15]:
class AutoEncoder(nn.Module) : 
    def __init__(self) : 
        super(AutoEncoder, self).__init__()
        self.encoder = nn.Sequential(
            FC_Block(28*28,512),
            FC_Block(512,256),
            FC_Block(256,64),
            FC_Block(64,16),
        )

        self.decoder = nn.Sequential(
            FC_Block(16,64),
            FC_Block(64,256),
            FC_Block(256,512),
            nn.Linear(512,28*28)
        )
    
    def forward(self,x) : 
        x = x.view(-1, 28*28)
        x = self.encoder(x)
        x = self.decoder(x)
        return x

In [16]:
class MLP_Classifier(nn.Module) : 
    def __init__(self) : 
        super(MLP_Classifier,self).__init__()
        self.fc1 = FC_Block(28*28, 512)
        self.fc2 = FC_Block(512,256)
        self.fc3 = FC_Block(256,128)
        self.fc4 = FC_Block(128, 64)
        self.out_linear = nn.Linear(64,10)
    
    def forward(self, x) :
        x = self.fc1(x)
        x = self.fc2(x)
        x = self.fc3(x)
        x = self.fc4(x)
        x = self.out_linear(x)
        return x

In [17]:
class AE_MLP(nn.Module) : 
    def __init__(self) : 
        super(AE_MLP, self).__init__()
        self.AE = AutoEncoder()
        self.MLP = MLP_Classifier()
    def forward(self,x) : 
        x = x.view(-1,28*28)
        x = self.AE(x)
        x = self.MLP(x)
        return x

# Training

- https://pytorch.org/tutorials/beginner/transfer_learning_tutorial.html

In [18]:
def train_model(model, dataloaders, criterion, optimizer, scheduler, num_epochs, early_stop) : 
    import time
    import copy

    since = time.time()
    best_model_wts = copy.deepcopy(model.state_dict())
    best_loss = 1e+4
    early_stop_epoch = 0

    for epoch in range(num_epochs) : 
        # print(f'Epoch {epoch}/{num_epochs -1}')
        # print('=' * 10)

        for phase in ['train','valid'] : 
            if phase == 'train' : 
                model.train()
            elif phase == 'valid' : 
                model.eval()

            running_loss = 0
            running_corr = 0

            for x,y in dataloaders[phase] : 
                x = x.to(device)
                y = y.to(device)
                optimizer.zero_grad()
                with torch.set_grad_enabled(phase =='train') : 
                    output = model(x)
                    loss = criterion(output, y)

                    if phase == 'train' : 
                        loss.backward()
                        optimizer.step()
                running_loss += loss.item() * x.size(0)
                running_corr += sum(torch.argmax(output, 1) == y)

            if phase == 'train' : 
                scheduler.step()
            
            epoch_loss = running_loss / len(dataloaders[phase].dataset)
            epoch_acc = running_corr / len(dataloaders[phase].dataset)

            if phase == 'valid' and epoch_loss < best_loss : 
                print(f'On Epoch {epoch}, Best Model Saved with Valid Loss {round(epoch_loss, 4)} and Acc {round(epoch_acc.item(), 4)}')
                
                best_loss = epoch_loss
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())
                early_stop_epoch = 0
            elif phase == 'valid' : 
                early_stop_epoch += 1

        if early_stop_epoch >= early_stop : 
            "Early Stop Occured"
            break;
    time_elapsed = time.time() - since
    print(f'Training Complete in {time_elapsed//60}min {time_elapsed%60}sec')
    print(f'Best Validation Loss : {best_loss} with Accuracy {best_acc}')


    model.load_state_dict(best_model_wts)
    return model


In [19]:
def predict_test(model, dataloaders) : 
    with torch.no_grad():
        predictions = []
        for x,y in dataloaders['test'] : 
            x = x.view(-1,28*28).to(device)
            y = y.to(device)

            prediction = model(x)
            predictions.extend(prediction)
    return predictions

In [20]:
def return_dataloaders(train_idx, valid_idx) : 
    train_subsampler = torch.utils.data.SubsetRandomSampler(train_idx)
    valid_subsampler = torch.utils.data.SubsetRandomSampler(valid_idx)

    train_data = DataLoader(dataset=fmnist_train, batch_size = batch_size, sampler = train_subsampler)
    valid_data = DataLoader(dataset=fmnist_train, batch_size = batch_size, sampler = valid_subsampler)
    test_data  = DataLoader(dataset = fmnist_test, batch_size  = batch_size , shuffle = False)
    

    dataloaders = {}
    dataloaders['train'] = train_data
    dataloaders['valid'] = valid_data
    dataloaders['test'] = test_data
    return dataloaders

In [21]:
test_predictions = []

n_split = 5
skf = StratifiedKFold(n_splits = n_split, shuffle = True)

# Test
for fold_num, (train_idx, valid_idx) in enumerate(skf.split(np.arange(fmnist_train.__len__()), fmnist_train.targets)) : 
    ae_mlp = AE_MLP().to(device)
    optimizer = optim.Adam(ae_mlp.parameters(), lr = 0.001)
    loss_fn = nn.CrossEntropyLoss().to(device)
    lr_scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)

    print(f'Fold {fold_num} Initiated')
    print('='*30)
    dataloaders = return_dataloaders(train_idx, valid_idx)
    ae_mlp = train_model(ae_mlp, dataloaders, loss_fn, optimizer, lr_scheduler, 100, early_stop = 5)
    test_prediction = predict_test(ae_mlp, dataloaders)
    test_predictions.append(test_prediction)
    

Fold 0 Initiated
On Epoch 0, Best Model Saved with Valid Loss 0.0962 and Acc 0.1665
On Epoch 1, Best Model Saved with Valid Loss 0.0865 and Acc 0.1695
On Epoch 3, Best Model Saved with Valid Loss 0.0858 and Acc 0.1696
On Epoch 5, Best Model Saved with Valid Loss 0.0812 and Acc 0.1711
On Epoch 6, Best Model Saved with Valid Loss 0.0739 and Acc 0.173
On Epoch 7, Best Model Saved with Valid Loss 0.0649 and Acc 0.1768
On Epoch 8, Best Model Saved with Valid Loss 0.0649 and Acc 0.1771
On Epoch 9, Best Model Saved with Valid Loss 0.0636 and Acc 0.1778
On Epoch 10, Best Model Saved with Valid Loss 0.0634 and Acc 0.1776
On Epoch 11, Best Model Saved with Valid Loss 0.0627 and Acc 0.1778
On Epoch 12, Best Model Saved with Valid Loss 0.0627 and Acc 0.1782
On Epoch 14, Best Model Saved with Valid Loss 0.0619 and Acc 0.1783
On Epoch 18, Best Model Saved with Valid Loss 0.0619 and Acc 0.1785
On Epoch 19, Best Model Saved with Valid Loss 0.0617 and Acc 0.1784
Training Complete in 2.0min 10.130306243

In [22]:
test_preds = []
for prediction in test_predictions : 
    test_preds.append(torch.cat([x for x in prediction], dim = 0).detach().cpu().numpy().reshape(-1,10))
test_pred = np.argmax(np.mean(test_preds, axis = 0), axis = 1)
test_accuracy = sum(test_pred == fmnist_test.targets.numpy()) / fmnist_test.__len__()
print('Test Accuracy : ', test_accuracy)

Test Accuracy :  0.8928
