In [None]:
!pip install efficientnet_pytorch

# Imports

In [None]:
import numpy as np
import pandas as pd
from PIL import Image
import matplotlib.pyplot as plt

import torch
from torch import nn, optim
import torch.nn.functional as F
from torchvision import transforms
from torch.optim import lr_scheduler
from torch.utils.data import Dataset, DataLoader

from efficientnet_pytorch import EfficientNet

import os
import sys
import time
import random
from tqdm.auto import tqdm

# Settings

## Path Settings

In [None]:
TRAIN_DATA = '/kaggle/input/fashiondata/image_train_Kaggle.csv'
TEST_DATA = '/kaggle/input/fashiondata/image_test_Kaggle.csv'
SAVE_DIR = '/kaggle/working/saved_ckpt'

if not os.path.exists(SAVE_DIR):
    os.mkdir(SAVE_DIR)

In [None]:
SEED = 0
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
BATCH_SIZE = 128
NUM_WORKERS = 4
PIN_MEMORY = True

# Learning rate scheduling
lr_s = True
lr_w = True


def seed_everything(seed=0):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    print('Set seed', seed)
    
seed_everything(SEED)

# Visualizing the Data

In [None]:
df_train = pd.read_csv(TRAIN_DATA)
df_test = pd.read_csv(TEST_DATA)

In [None]:
print(f'Number of training data items: {len(df_train)}')
print(f'Number of test data items: {len(df_test)}')

In [None]:
def plot_random_grid(df, n, m):
    """Plot an nxm grid of random images from the training dataframe"""
    
    idxs = np.random.choice(len(df), n*m, replace=False)
    images = df.iloc[idxs, 1:].values.reshape(-1, 28, 28)
    labels = df.iloc[idxs, 0].values.astype(str)
    
    fig, ax = plt.subplots(n, m, figsize=(5,5))
    for i in range(n):
        for j in range(m):
            ax[i, j].imshow(images[i*j + j], cmap='gray')
            ax[i, j].axis('off')
            ax[i, j].set_title(labels[i*j + j])
            
plot_random_grid(df_train, 3, 4)

In [None]:
def plot_distrib(df):
    labels = df.iloc[:, 0].values.ravel()
    freq = np.bincount(labels)
    print('Class distribution:', freq)
    plt.bar(np.arange(10), freq)
    
plot_distrib(df_train)

# The Dataset

In [None]:
train_idx = np.arange(len(df_train))
val_idx = np.random.choice(train_idx, int(len(df_train) * 0.2), replace=False)
train_idx = train_idx[np.logical_not(np.isin(train_idx, val_idx))]
df_train_ = df_train.iloc[train_idx]
df_val_ = df_train.iloc[val_idx]

## Dataset Wrapper

In [None]:
class FashionDataset(Dataset):
    """Dataset wrapper"""
    def __init__(self, df, transforms=None, train=True):
        self.df = df
        self.transforms = transforms
        self.train = train
        
    def __len__(self):
        return len(self.df)
        
    def __getitem__(self, index):
        row = self.df.iloc[index].values.ravel()
        if self.train:
            label, image = row[0], row[1:]
        else:
            id, image = row[0], row[1:]
            
        image = Image.fromarray(
            image.reshape(28, 28).astype(np.uint8)
        ).convert('RGB')
        
        if self.transforms is not None:
            image = self.transforms(image)
        
        return (image, label) if self.train else (image, id)

## Preprocessing

In [None]:
def calculate_mean_std(df):
    images = df.iloc[:, 1:].values/255
    return images.mean(), np.std(images)

train_mean, train_std = calculate_mean_std(df_train_)
val_mean, val_std = calculate_mean_std(df_val_)
test_mean, test_std = calculate_mean_std(df_test)
print(train_mean, train_std)
print(val_mean, val_std)
print(test_mean, test_std)

In [None]:
train_transforms = transforms.Compose(
    [   
        transforms.RandomRotation(10),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize(train_mean, train_std)
    ]
)

val_transforms = transforms.Compose(
    [
        transforms.ToTensor(),
        transforms.Normalize(val_mean, test_std)
    ]
)

## Dataloader

In [None]:
data_train = FashionDataset(df_train_, transforms=train_transforms, \
                            train=True)
trainloader = DataLoader(data_train, batch_size=BATCH_SIZE, \
                         num_workers=NUM_WORKERS, pin_memory=PIN_MEMORY, \
                         shuffle=True)

In [None]:
data_val = FashionDataset(df_val_, transforms=val_transforms, \
                           train=True)
valloader = DataLoader(data_val, batch_size=BATCH_SIZE, \
                         num_workers=NUM_WORKERS, pin_memory=PIN_MEMORY, \
                         shuffle=False)

# Model

In [None]:
def count_params(net):
    return sum(map(lambda p: p.numel(), net.parameters()))

In [None]:
net = EfficientNet.from_pretrained('efficientnet-b0')
net._fc = nn.Linear(1280, 10, bias=True)
print('Params: {:.2f}M'.format(count_params(net)/1e6))

# Training

In [None]:
def save_ckpt(epoch):
    save_path = os.path.join(SAVE_DIR, f'best.pth')
    best_ckpt = {
        'epoch': epoch,
        'model_state_dict': net.state_dict(),
        'optimizer_state_dict': optimizer.state_dict()
    }
    torch.save(best_ckpt, save_path)
    print('Saved current best at', save_path)
    
def load_ckpt():
    load_path = os.path.join(SAVE_DIR, f'best.pth')
    ckpt = torch.load(load_path)
    net.load_state_dict(ckpt['model_state_dict'])
    optimizer.load_state_dict(ckpt['optimizer_state_dict'])
    print('Restored best val ckpt')

In [None]:
def train(start_epoch, end_epoch):
    best_acc = 0
    best_ckpt = None
    
    net.train()
    for epoch in range(start_epoch, end_epoch+1):
        t0 = time.time()
        running_loss = 0.0
        
        # Train
        for images, labels in tqdm(trainloader):
            
            if epoch <= w and lr_w:
                warmup_scheduler.step()
                
            images, labels = images.to(DEVICE), labels.to(DEVICE)
            
            optimizer.zero_grad()
            preds = net(images)
            loss = criterion(preds, labels)
            loss.backward()
            optimizer.step()
            
            running_loss += loss.item()
        
        # Validation
        if not epoch % eval_freq:
            val_acc, val_loss = evaluate(valloader)
            if val_acc > best_acc:
                best_acc = val_acc
                save_ckpt(epoch)
            else:
                load_ckpt()
                
                
        # Scheduling
        if epoch > w and lr_s:
            train_scheduler.step()
            
        logs = [
            f'Epoch: {epoch}/{end_epoch}',
            f'loss: {running_loss}',
            f'LR: {optimizer.param_groups[0]["lr"]}',
            f'Time: {time.time()-t0}s'
        ]
        
        print(' | '.join(logs))

In [None]:
def evaluate(dataloader):
    net.eval()
    test_loss = 0.0
    correct = 0.0

    with torch.no_grad():
        for images, labels in tqdm(dataloader):
            images, labels = images.to(DEVICE), labels.to(DEVICE)
            preds = net(images)
            loss = criterion(preds, labels)
            test_loss += loss.item()
            _, preds = preds.max(1)
            
            correct += preds.eq(labels).sum()
    
    net.train()
    
    accuracy = correct / len(dataloader.dataset)
    print('Accuracy:', accuracy, 'Test loss:', test_loss)
    return accuracy, test_loss

In [None]:
class WarmUpLR(lr_scheduler._LRScheduler):
    """Warmup learning rate scheduler"""
    def __init__(self, optimizer, total_iters, last_epoch=-1):
        self.total_iters = total_iters
        super().__init__(optimizer, last_epoch)

    def get_lr(self):
        return [base_lr * self.last_epoch / (self.total_iters + 1e-8) for base_lr in self.base_lrs]

In [None]:
def LabelSmoothedCrossEntropy(preds, labels, epsilon=0.1, num_classes=10):
    """Cross Entropy with Label Smoothing"""
    one_hot = torch.zeros_like(preds).scatter(1, labels.view(-1, 1), 1)
    one_hot = one_hot*(1-epsilon) + (1-one_hot)*epsilon/(num_classes-1)
    log_prb = F.log_softmax(preds, dim=1)
    loss = -(one_hot * log_prb).sum(dim=1)
    return loss.mean()

In [None]:
net = net.to(DEVICE)
optimizer = optim.SGD(net.parameters(), lr=0.001, \
                      momentum=0.9, weight_decay=5e-4)
criterion = LabelSmoothedCrossEntropy
eval_freq = 5 # Validation after how many epochs

# Scheduling 
w = 4 # how many warmups
if lr_w:
    warmup_scheduler = WarmUpLR(optimizer, len(trainloader) * w)
    print('Warmup scheduler initialized')

if lr_s:
    train_scheduler = lr_scheduler.MultiStepLR(
        optimizer,
        milestones=[60,100,140,160,180],
        gamma=0.2
    )
    print('Train scheduler initialized')

In [None]:
train(1, 200)

# Final Test Predictions

In [None]:
def make_preds(dataloader):
    net.eval()
    data_out = []
    with torch.no_grad():
        for images, id in tqdm(dataloader):
            images = images.to(DEVICE)
            preds = net(images)
            _, preds = preds.max(1)
            id = id.numpy().ravel()
            preds = preds.cpu().numpy().ravel()
            batch_preds = np.vstack([id, preds]).T.tolist()
            data_out += batch_preds
    return data_out

In [None]:
test_transforms = transforms.Compose(
    [   
        transforms.ToTensor(),
        transforms.Normalize(test_mean, test_std)
    ]
)

data_test = FashionDataset(df_test, transforms=test_transforms, \
                           train=False)
testloader = DataLoader(data_test, batch_size=BATCH_SIZE, \
                         num_workers=NUM_WORKERS, pin_memory=PIN_MEMORY, \
                         shuffle=False)

In [None]:
load_ckpt()

In [None]:
data_out = make_preds(testloader)

In [None]:
df_out = pd.DataFrame(data_out, columns=['ID', 'label'])
df_out.head()

In [None]:
df_out.to_csv('submission.csv', index=False)