In [None]:
import random
import os
import pandas as pd
import numpy as np

from os import listdir
from os.path import isfile, isdir, join
from IPython.display import clear_output

from PIL import Image
import cv2

import torch
import torch.nn as nn
from torchvision import transforms
from efficientnet_pytorch import EfficientNet

USE_CUDA = torch.cuda.is_available()
if USE_CUDA:
    print("CUDA Available")

CUDA Available


In [None]:
TRAIN_DATA = './train/simpsons_dataset/'
TEST_DATA = './testset/testset/'
SORTED = './sorted/'
TRAIN_DIVISION = True

IMG_SIZE = 224

MODEL_PATH = './model_b0_t.pth'
best_eval = None

In [None]:
RESIZE = transforms.Resize(int(IMG_SIZE*1.5))
CENTER_CROP = transforms.CenterCrop((IMG_SIZE, IMG_SIZE))
IMAGE_TRANSFORMS = transforms.Compose([
        transforms.Resize((IMG_SIZE, IMG_SIZE)),
        transforms.RandomRotation(20),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        transforms.RandomHorizontalFlip(p=0.5),
        transforms.RandomVerticalFlip(p=0.3),
    ])

In [None]:
def get_list_from_dir(path, full=False, directory=False):
    ''' Returns list of file names and full paths in directory, 
        if full==False, then list of names only.
    '''
    if directory:
        if full:
            return [os.path.join(path, f) for f in listdir(path) if isdir(join(path, f))]
        else:
            return [f for f in listdir(path) if isdir(join(path, f))]
    if full:
        return [os.path.join(path, f) for f in listdir(path) if isfile(join(path, f))]
    else:
        return [f for f in listdir(path) if isfile(join(path, f))]

In [None]:
cat_list = get_list_from_dir(TRAIN_DATA, directory=True)
NUM_CLASSES = len(cat_list)

In [None]:
cat_to_id = dict(zip(cat_list, list(range(len(cat_list)))))

In [None]:
counts = []
for cat in cat_list:
    counts.append(len(get_list_from_dir(os.path.join(TRAIN_DATA, cat))))

In [None]:
counts.sort()
print(f'Counts: {counts}')

Counts: [246, 310, 358, 457, 469, 498, 623, 877, 913, 986, 1079, 1193, 1194, 1206, 1291, 1342, 1354, 1452, 1454, 2246]


In [None]:
class SimpsonsDataset(torch.utils.data.Dataset):
    ''' Create dataset to format data, to use in standard Dataloader    
    '''
    def __init__(self, 
                 cat_list,
#                  image_size=IMG_SIZE,                  
                 train_path=TRAIN_DATA,
                 test_path=TEST_DATA,
                 train=True, 
                 train_fraction=0.85
                ):
        self.path_images = train_path
        self.cat_list = cat_list
        self.files = []
        self.ids = [] # indices of filenames
        self.id = 0
        for i, cat in enumerate(cat_list):
            files = get_list_from_dir(os.path.join(self.path_images, cat))
            self.files.append(files)
            self.ids.append(list(range(len(files))))           
        random.seed(0)
        
        self._indices = [[] for _ in range(len(self.ids))]  # indices of split
        self._len = 0
        for i in range(len(self.ids)):             
            random.shuffle(self.ids[i])
            train_size = int(len(self.ids[i]) * train_fraction)
            if train:
                self._len += train_size
            else:
                self._len += (len(self.ids[i]) - train_size)
            self._indices[i] = self.ids[i][:train_size] if train else self.ids[i][train_size:]        
   
    def get_next_id(self):
        self.id += 1
        if self.id >= len(self.ids):
            self.id = 0
        return self.id
    
    def get_one_hot(self, id):
        ''' One-hot encode for mask
        '''
        out = [0 for _ in range(len(self.ids))]
        out[id] = 1
        return torch.tensor(out)
        
    def __len__(self):
        return self._len
    
    def tfs(self, image, rnd=True):
        if rnd:
            if np.random.random() < 0.45:
                image = RESIZE(image)
            if np.random.random() < 0.45:
                image = CENTER_CROP(image)
        return IMAGE_TRANSFORMS(image)
    
    def __getitem__(self, i):
        ''' Here choosing random class first, to negate class disbalance.
            Then choosing image from class and transform it
        '''
        idx = np.random.randint(0, len(self.ids))
        while len(self._indices[idx]) == 0:  # if come class is empty
            idx = np.random.randint(0, len(self.ids))
        file_id = np.random.randint(0, len(self._indices[idx]))
        
        name = self.files[idx][self._indices[idx][file_id]]
        
        img_name = os.path.join(self.path_images, self.cat_list[idx], name)        
        image = Image.open(img_name)
        image = self.tfs(image)       
        
        return image, self.get_one_hot(idx) #, img_name

In [None]:
trainset = SimpsonsDataset(cat_list, train=True)
valset = SimpsonsDataset(cat_list, train=False)

In [None]:
print(f'Train part: {len(trainset)}')
print(f'Test part : {len(valset)}')

Train part: 16608
Test part : 2940


In [None]:
image, mask = trainset[0]

In [None]:
# pretrained efficientnet model with b0 version, can get higher version for higher quality
model = EfficientNet.from_name('efficientnet-b0')
# replacing output layer with new for our class quantity
model._fc = torch.nn.Linear(in_features=model._fc.in_features, out_features=NUM_CLASSES, bias=True)

In [None]:
def make_optimizer(model, lr=3e-4):
    return torch.optim.Adam(model.parameters(), lr=lr)

In [None]:
loss = torch.nn.CrossEntropyLoss()

In [None]:
if USE_CUDA:
    model.cuda()
    loss.cuda()
optimizer = make_optimizer(model)

In [None]:
def train_loop(model, optimizer, loss, train_loader, 
               n_iter, lr_scheduler=None, plot=None, 
               plot_kwargs={}, use_cuda=False, plot_steps=10):
    model.train()
    losses = []
    for i, (images, masks) in enumerate(train_loader):
        if i == n_iter:
            break
        if use_cuda:
            images = images.cuda()
            masks = masks.cuda()
        predicted = model(images)
        loss_value = loss(predicted, masks.float())
        
        optimizer.zero_grad()        
        loss_value.backward()
        optimizer.step()
        
        if lr_scheduler is not None:
            lr_scheduler.step()

        losses.append(loss_value.item())
        if i % 10 == 0:
            print("Step {} / {}, loss: {:.4f}, learning rate: {:.4f}\r".format(i, n_iter, loss_value.item(), optimizer.param_groups[0]["lr"]), end="")
    print(" " * 50 + "\r", end="")
    print("Train loss: {:.4f}, learning rate: {:.5f}".format(np.mean(losses[-plot_steps:]), optimizer.param_groups[0]["lr"]))
    return np.mean(losses)

In [None]:
def eval_model(model, loss, testset, batch_size,
               use_cuda=False,
               num_workers=1):
    model.eval()
#     clear_output()
    kwargs = {}

    test_loader = torch.utils.data.DataLoader(
        testset, 
        batch_size=batch_size,
        num_workers=0)
    
    losses = []
    with torch.no_grad():
        for images, masks in test_loader:
            if use_cuda:
                images = images.cuda()
                masks = masks.cuda()    
            predicted = model(images)

            loss_value = loss(predicted, masks.float())            
            losses.append(loss_value.item())
    test_loss = np.mean(losses)
    print("Test loss:", test_loss)
    return test_loss

In [None]:
# # this used for cache empty, to choose right batch_size
# torch.cuda.empty_cache()
# if USE_CUDA:
#     model.cuda()
#     loss.cuda()
# optimizer = make_optimizer(model)

In [None]:
batch_size = 16
n_iters = 2000
eval_steps = 1000
loss_storage = []
train_loss_storage = []

train_loader = torch.utils.data.DataLoader(trainset, batch_size=batch_size,
                    shuffle=True, num_workers=0)
# eval_loss = eval_model(model, loss, valset, batch_size, use_cuda=USE_CUDA)
# loss_storage.append(eval_loss)

In [None]:
epoch = 0
if best_eval is None:
    best_eval = 1000

In [None]:
%%time
num_epochs = 1
while epoch < num_epochs:
    i = 0
    while i < n_iters:    
        train_steps = min(eval_steps, n_iters - i)
        train_loss = train_loop(model, optimizer, loss, train_loader, train_steps,
                   lr_scheduler=None,
                   use_cuda=USE_CUDA)
        i += train_steps
    eval_loss = eval_model(model, loss, valset, batch_size,
                           use_cuda=USE_CUDA)
    if eval_loss < best_eval:
        best_eval = eval_loss
        torch.save({
            'epoch': epoch,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'loss': eval_loss,
            }, MODEL_PATH)
        print('Saving...')
    print(f'Epoch: {epoch+1}')
    loss_storage.append(eval_loss)
    train_loss_storage.append(train_loss)
    epoch = epoch + 1

Test loss: 0.291168636163765
Epoch: 1
Wall time: 15min 2s


In [None]:
loss_storage

[1.583194928648679,
 0.8027988547540229,
 0.6389938513140964,
 0.5546677119463034,
 0.4849191670098504,
 0.36140497557252,
 0.3766913807434637,
 0.30669559792453505,
 0.31727622835213604,
 0.31575080315119325,
 0.28149506299118715,
 0.2939396672996024]

In [None]:
# load best evaluated model
model.load_state_dict(torch.load(MODEL_PATH)['model_state_dict'])

<All keys matched successfully>

In [None]:
# getting all image names from test dir
name_list = get_list_from_dir(TEST_DATA, full=False)

In [None]:
UPSCALE = transforms.Resize((IMG_SIZE, IMG_SIZE)) # for display only
TEST_TRANSFORMS = transforms.Compose([
        transforms.Resize((IMG_SIZE, IMG_SIZE)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ])

In [None]:
def sort_images(model, name_list, cat_list, submission, verbose=False):
    ''' Real validation is unavailable, 
        so we sort images to class directories for visual validation,
        also creating submission here.
    '''
    submission = {"Id": [], "Expected": []}
    model.eval()
    for name in name_list:
        submission['Id'].append(name)
        img_name = os.path.join(TEST_DATA, name)
        img = Image.open(img_name)        
        image = TEST_TRANSFORMS(img) 
        with torch.no_grad():
            if USE_CUDA:
                predicted = model(image.unsqueeze(0).cuda())
            else:
                predicted = model(image.unsqueeze(0))

        probs = torch.softmax(predicted, dim=-1)
        image_id = torch.argmax(probs).item()
        submission['Expected'].append(cat_list[image_id])
        if verbose:
            ima = UPSCALE(img)
            display(ima)
            print(predicted)
            print(probs)
            print(image_id)
            print(cat_list[image_id])
        if not os.path.exists(os.path.join(SORTED, cat_list[image_id])):
            os.makedirs(os.path.join(SORTED, cat_list[image_id]))
        save_path = os.path.join(SORTED, cat_list[image_id], name)
        with open(save_path, 'w') as f:            
            img.save(f)
    pd.DataFrame(submission).to_csv("simpsons_submission.csv", index=False)

In [None]:
sort_images(model, name_list, cat_list, submission, verbose=False)