In [1]:
import os
import gc
import cv2
import time
import torch
import torchvision
import numpy as np
import pandas as pd
from tqdm import tqdm
from tqdm import tqdm_notebook as tqdm2
import matplotlib.pyplot as plt

import albumentations as albu
import torchvision.datasets as dset
import torchvision.transforms as transforms
from torch.utils.data import Dataset, DataLoader

import torch
from torch import nn
from torch import optim
import torch.nn.functional as F
from torch.nn.parameter import Parameter

import sklearn

def seed_everything(seed=42):
    """
    42 is the answer to everything.
    """
    random.seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    
    seed_everything()

In [2]:
# Load Feather Data 
debug = False
if debug:
    df = '../data/train.csv'
    df = pd.read_csv(df)
    files = [f'../data/train_128_feather/train_{i}.feather' for i in range(4)]
    data0 = pd.read_feather(files[0])
    data_full = data0
    del data0
    gc.collect()
    data_full = df.merge(data_full, on='image_id', how='inner')
    del df
    gc.collect()
    print(data_full.shape)
else:
    df = '../data/train.csv'
    df = pd.read_csv(df)
    files = [f'../data/train_128_feather/train_{i}.feather' for i in range(4)]
    data0 = pd.read_feather(files[0])
    data1 = pd.read_feather(files[1])
    data2 = pd.read_feather(files[2])
    data3 = pd.read_feather(files[3])
    data_full = pd.concat([data0,data1,data2,data3], ignore_index=True)
    del data0, data1, data2, data3
    gc.collect()
    data_full = df.merge(data_full, on='image_id', how='inner')
    del df
    gc.collect()
    print(data_full.shape)

(200840, 16389)


In [3]:
from sklearn.model_selection import train_test_split

if debug:
    LIMIT = 10000
    data_full = data_full[:LIMIT]
train_df , valid_df = train_test_split(data_full, test_size=0.20, random_state=42,shuffle=True) ## Split Labels
del data_full 
gc.collect()

10

# Augmentations

In [4]:
import numpy as np
import torch
import torch.nn as nn

class ToTensor:
    def __call__(self, data):
        if isinstance(data, tuple):
            return tuple([self._to_tensor(image) for image in data])
        else:
            return self._to_tensor(data)

    def _to_tensor(self, data):
        if len(data.shape) == 3:
            return torch.from_numpy(data.transpose(2, 0, 1).astype(np.float32))
        else:
            return torch.from_numpy(data[None, :, :].astype(np.float32))
        
class Normalize:
    def __init__(self, mean, std):
        self.mean = np.average(mean)
        self.std = np.average(std)

    def __call__(self, image):
        image = np.asarray(image).astype(np.float32) / 255.
        image = (image - self.mean) / self.std
        return image    
    
train_aug = albu.Compose([ 
    albu.ShiftScaleRotate(p=0.8, border_mode=cv2.BORDER_CONSTANT, value =1),
    albu.OneOf([
        albu.ElasticTransform(p=0.1, alpha=1, sigma=10, alpha_affine=10, border_mode=cv2.BORDER_CONSTANT,value =1),
        albu.GridDistortion(distort_limit =0.01 ,border_mode=cv2.BORDER_CONSTANT,value =1, p=0.1),
        albu.OpticalDistortion(p=0.1, distort_limit= 0.01, shift_limit=0.1, border_mode=cv2.BORDER_CONSTANT,value =1)                  
        ], p=0.3),
    albu.OneOf([
#         albu.GaussNoise(var_limit=0.5),
        albu.Blur(),
        albu.GaussianBlur(blur_limit=1)
        ], p=0.4),    
    albu.RandomGamma(p=0.8)
])    

# Dataset

In [5]:
class BengaliAI(Dataset):
    def __init__(self, data, details=False, transform=None, imgsize=(128, 128)):
        self.images = data.iloc[:, 5:].values
        self.grapheme_roots = data['grapheme_root'].values
        self.vowel_diacritics = data['vowel_diacritic'].values
        self.consonant_diacritics = data['consonant_diacritic'].values
        self.imgsize = imgsize
        self.transform = transform
        if details:
            self.mean, self.std = details
        else:
            self.mean, self.std = 0.5, 0.5
        self.reqtransform = transforms.Compose([
            Normalize(self.mean, self.std),
            transforms.ToTensor()
        ])    
    
    def __getitem__(self, idx):
        img = self.images[idx].reshape(self.imgsize).astype(np.float)
        grapheme_root = self.grapheme_roots[idx]
        vowel_diacritic = self.vowel_diacritics[idx]
        consonant_diacritic = self.consonant_diacritics[idx]
        if self.transform is not None:
            img = self.transform(image=img)['image']
#         norm = Normalize(self.mean, self.std) #TODO: Determine these values using pretrainedmodels
#         t = ToTensor()
#         img = t._to_tensor(norm(img))
        img = self.reqtransform(img)
        label = (grapheme_root, vowel_diacritic, consonant_diacritic)
        return img, label
        
    def __len__(self):
        return len(self.images)
    

if debug:    
    dataset = BengaliAI(train_df, transform=train_aug)
    i = 0
    LIMIT = 10

    for img, (l1, l2, l2) in dataset:
        plt.imshow(img.numpy().reshape(128, 128))
        plt.show()
        i += 1
        if i > LIMIT:
            break

# Model

In [6]:
# pretrainedmodels.__dict__[model_name](pretrained='imagenet')

In [7]:
import pretrainedmodels
from efficientnet_pytorch import EfficientNet

n_grapheme = 168
n_vowel = 11
n_consonant = 7
num_classes = [n_grapheme, n_vowel, n_consonant]

class ClassifierCNN(nn.Module):
    def __init__(self, model_name, num_classes=num_classes, pretrained='imagenet'):
        super(ClassifierCNN, self).__init__()
        
        self.inconv = nn.Conv2d(1, 3, kernel_size=3, stride=1, padding=1, bias=True)
        if model_name.split('-')[0] == 'efficientnet':
            self.effnet = True
#             self.model = EfficientNet.from_pretrained(model_name) 
            self.model = EfficientNet.from_name(model_name) 
            in_features = 1280 #TODO: Write a lazy linear to find this, for now, I do it by getting an error
        else:
            self.effnet = False
            self.model = pretrainedmodels.__dict__[model_name](pretrained=pretrained)
            in_features = self.model.last_linear.in_features
            
        self.head_grapheme_root = nn.Linear(in_features, num_classes[0])
        self.head_vowel_diacritic = nn.Linear(in_features, num_classes[1])
        self.head_consonant_diacritic = nn.Linear(in_features, num_classes[2])
        
    def freeze(self):
        for param in self.model.parameters():
            param.requires_grad = False

    def unfreeze(self):
        for param in self.model.parameters():
            param.requires_grad = True
            
    def forward(self, x, logit=True):
        x  = self.inconv(x)
        if self.effnet:
            features = self.model.extract_features(x)
        else:
            features = self.model.features(x)
        features = F.adaptive_avg_pool2d(features, 1)
        features = features.view(features.size(0), -1)

        logit_grapheme_root = self.head_grapheme_root(features)
        logit_vowel_diacritic = self.head_vowel_diacritic(features)
        logit_consonant_diacritic = self.head_consonant_diacritic(features)
        
        if logit:
            return logit_grapheme_root, logit_vowel_diacritic, logit_consonant_diacritic            
        else:
            grapheme_root = F.softmax(logit_grapheme_root, 1)
            vowel_diacritic = F.softmax(logit_vowel_diacritic, 1)
            consonant_diacritic = F.softmax(logit_consonant_diacritic, 1)
            return grapheme_root, vowel_diacritic, consonant_diacritic            

# Train

In [8]:
def mean_std(model_name):
    try:
        mean = pretrainedmodels.__dict__['pretrained_settings'][model_name]['imagenet']['mean']
        std = pretrainedmodels.__dict__['pretrained_settings'][model_name]['imagenet']['std']
    except:
        mean, std = 0.5, 0.5
    return (mean, std)
    
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
batch_size = 32
# model_name = 'se_resnext101_32x4d'
model_name = 'efficientnet-b0'
model = ClassifierCNN(model_name).to(device)
lr = 1e-3
n_epochs = 30
optimizer = optim.AdamW(
    model.parameters(), 
    lr=lr
)
# scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=[10], gamma=0.3) 
# scheduler = torch.optim.lr_scheduler.OneCycleLR(optimizer, 1e-2, total_steps=None, epochs=n_epochs, steps_per_epoch=3139, pct_start=0.0,
#                                    anneal_strategy='cos', cycle_momentum=True,base_momentum=0.85, max_momentum=0.95,  div_factor=100.0)

criterion = nn.CrossEntropyLoss()

train_dataset = BengaliAI(train_df, transform=train_aug, details=mean_std(model_name))
train_loader = DataLoader(train_dataset, batch_size=batch_size, num_workers=4, shuffle=False)

val_dataset = BengaliAI(valid_df, details=mean_std(model_name))
val_loader = DataLoader(val_dataset, batch_size=batch_size, num_workers=4, shuffle=False)

In [9]:
x = torch.zeros((batch_size,1, 64, 64))
with torch.no_grad():
    output1, output2, output3 =model(x.cuda())
print(output1.shape)
print(output2.shape)
print(output3.shape)

torch.Size([32, 168])
torch.Size([32, 11])
torch.Size([32, 7])


In [10]:
def macro_recall_multi(preds, labels):
    pred_graphemes, pred_vowels, pred_consonants = preds
    true_graphemes, true_vowels, true_consonants = labels
    n_grapheme = 168
    n_vowel = 11
    n_consonant = 7
    pred_label_graphemes = torch.argmax(pred_graphemes, dim=1).cpu().numpy()
    true_label_graphemes = true_graphemes.cpu().numpy()
    pred_label_vowels = torch.argmax(pred_vowels, dim=1).cpu().numpy()
    true_label_vowels = true_vowels.cpu().numpy()
    pred_label_consonants = torch.argmax(pred_consonants, dim=1).cpu().numpy()
    true_label_consonants = true_consonants.cpu().numpy()    

    recall_grapheme = sklearn.metrics.recall_score(pred_label_graphemes, true_label_graphemes, average='macro')
    recall_vowel = sklearn.metrics.recall_score(pred_label_vowels, true_label_vowels, average='macro')
    recall_consonant = sklearn.metrics.recall_score(pred_label_consonants, true_label_consonants, average='macro')
    scores = [recall_grapheme, recall_vowel, recall_consonant]
    final_score = np.average(scores, weights=[2, 1, 1])
    return final_score, recall_grapheme, recall_vowel, recall_consonant


def calc_macro_recall(solution, submission):
    # solution df, submission df
    scores = []
    for component in ['grapheme_root', 'consonant_diacritic', 'vowel_diacritic']:
        y_true_subset = solution[solution[component] == component]['target'].values
        y_pred_subset = submission[submission[component] == component]['target'].values
        scores.append(sklearn.metrics.recall_score(
            y_true_subset, y_pred_subset, average='macro'))
    final_score = np.average(scores, weights=[2, 1, 1])
    return final_score

In [None]:
# model.freeze()
ws = [0.5, 0.25, 0.25]
history = pd.DataFrame()

if n_epochs:    
    for epoch in tqdm2(range(n_epochs)):
        
        running_loss = 0
        running_loss0 = 0
        running_loss1 = 0
        running_loss2 = 0
        
        running_acc0 = 0.0
        running_acc1 = 0.0
        running_acc2 = 0.0
        
        running_recall = 0.0
        running_recall0 = 0.0
        running_recall1 = 0.0
        running_recall2 = 0.0
        
        recall = 0
        
        bar = tqdm2(train_loader)
        for i, (img, label) in enumerate(bar):
            img = img.to(device)
            optimizer.zero_grad()
            out = model(img)    
            label[0] = label[0].to(device)
            label[1] = label[1].to(device)
            label[2] = label[2].to(device)
            loss0 = criterion(out[0], label[0])
            loss1 = criterion(out[1], label[1])
            loss2 = criterion(out[2], label[2])
            loss = ws[0]*loss0 + ws[1]*loss1 + ws[2]*loss2
#             loss.backward()
#             optimizer.step()
            bar.set_description(f"Recall: {recall:.3f}")
            with torch.no_grad():
            
                running_loss += loss.item()/len(train_loader)
                running_loss0 += loss0.item()/len(train_loader)
                running_loss1 += loss1.item()/len(train_loader)
                running_loss2 += loss2.item()/len(train_loader)
                
                recall, recall_grapheme, recall_vowel, recall_consonant = macro_recall_multi(out, label)
                
                running_recall += recall/len(train_loader)
                running_recall0 += recall_grapheme/len(train_loader)
                running_recall1 += recall_vowel/len(train_loader)
                running_recall2 += recall_consonant/len(train_loader)
                
                running_acc0 += (out[0].argmax(1)==label[0]).float().mean()/len(train_loader)
                running_acc1 += (out[1].argmax(1)==label[1]).float().mean()/len(train_loader)
                running_acc2 += (out[2].argmax(1)==label[2]).float().mean()/len(train_loader)
            
            loss.backward()
            optimizer.step()
#             scheduler.step()
        
        print(f"Epoch: [{epoch+1}/{n_epochs}] Training...")
        print(f"Recall: {running_recall:.3f} | [{running_recall0:.3f} | {running_recall1:.3f} | {running_recall2:.3f}]")        
        print(f"Acc:  [{100*running_acc0:.3f}% | {100*running_acc1:.3f}% | {100*running_acc2:.3f}%]")                
        print(f"Loss: {running_loss:.3f} | [{running_loss0:.3f} | {running_loss1:.3f} | {running_loss2:.3f}]")
        
        history.loc[epoch, 'train_loss'] = running_loss
        history.loc[epoch, 'train_recall'] = running_recall
        history.loc[epoch, 'train_acc_grapheme'] = running_acc0.cpu().numpy()
        history.loc[epoch, 'train_acc_vowel'] = running_acc1.cpu().numpy()
        history.loc[epoch, 'train_acc_consonant'] = running_acc2.cpu().numpy()
        
        with torch.no_grad():
            running_loss = 0
            running_loss0 = 0
            running_loss1 = 0
            running_loss2 = 0

            running_acc0 = 0.0
            running_acc1 = 0.0
            running_acc2 = 0.0

            running_recall = 0.0
            running_recall0 = 0.0
            running_recall1 = 0.0
            running_recall2 = 0.0

        
            for i, (img, label) in enumerate(val_loader):
                img = img.to(device)
                out = model(img)    
                label[0] = label[0].to(device)
                label[1] = label[1].to(device)
                label[2] = label[2].to(device)
                recall, recall_grapheme, recall_vowel, recall_consonant = macro_recall_multi(out, label)
                running_recall += recall/len(val_loader)
                running_recall0 += recall_grapheme/len(val_loader)
                running_recall1 += recall_vowel/len(val_loader)
                running_recall2 += recall_consonant/len(val_loader)
                running_acc0 += (out[0].argmax(1)==label[0]).float().mean()/len(val_loader)
                running_acc1 += (out[1].argmax(1)==label[1]).float().mean()/len(val_loader)
                running_acc2 += (out[2].argmax(1)==label[2]).float().mean()/len(val_loader)
            print(f"Epoch: [{epoch+1}/{n_epochs}] Validating...")
            print(f"Recall: {running_recall:.3f} | [{running_recall0:.3f} | {running_recall1:.3f} | {running_recall2:.3f}]")        
            print(f"Acc:  [{100*running_acc0:.3f}% | {100*running_acc1:.3f}% | {100*running_acc2:.3f}%]")     
            
            history.loc[epoch, 'val_recall'] = running_recall
            history.loc[epoch, 'val_acc_grapheme'] = running_acc0.cpu().numpy()
            history.loc[epoch, 'val_acc_vowel'] = running_acc1.cpu().numpy()
            history.loc[epoch, 'val_acc_consonant'] = running_acc2.cpu().numpy()

HBox(children=(IntProgress(value=0, max=30), HTML(value='')))

HBox(children=(IntProgress(value=0, max=5021), HTML(value='')))

  'recall', 'true', average, warn_for)


# TTTTT

In [1]:
import pandas as pd

In [2]:
!ls ../logs/

not-pretrained-exp1.csv  pretrained-exp1.csv


In [7]:
np = pd.read_csv("../logs/not-pretrained-exp1.csv").drop(['Unnamed: 0'], axis=1)
p = pd.read_csv("../logs/pretrained-exp1.csv").drop(['Unnamed: 0'], axis=1)

In [8]:
np

Unnamed: 0,train_loss,train_recall,train_acc_grapheme,train_acc_vowel,train_acc_consonant,val_recall,val_acc_grapheme,val_acc_vowel,val_acc_consonant
0,1.505188,0.476433,0.404644,0.754619,0.801698,0.743615,0.747239,0.912694,0.917646
1,0.515448,0.767685,0.776719,0.915721,0.921272,0.811384,0.82748,0.936008,0.935037
2,0.376225,0.825291,0.837834,0.939543,0.939769,0.851313,0.862586,0.952254,0.948646
3,0.311444,0.852571,0.865452,0.950254,0.949413,0.862851,0.875599,0.953523,0.956633
4,0.272624,0.869646,0.882967,0.956389,0.955902,0.877221,0.89095,0.961435,0.961012
5,0.245733,0.88095,0.893137,0.960847,0.959275,0.881372,0.894731,0.962082,0.960265
6,0.223936,0.890622,0.903265,0.963866,0.961967,0.890289,0.90426,0.964446,0.96263
7,0.210154,0.896671,0.908588,0.966583,0.964345,0.897883,0.911923,0.969123,0.964495
8,0.195881,0.902503,0.913735,0.968219,0.967006,0.902158,0.913715,0.970642,0.966636
9,0.185157,0.907468,0.918703,0.970246,0.968157,0.901726,0.913889,0.971264,0.968849


In [9]:
p

Unnamed: 0,train_loss,train_recall,train_acc_grapheme,train_acc_vowel,train_acc_consonant,val_recall,val_acc_grapheme,val_acc_vowel,val_acc_consonant
0,0.625307,0.743177,0.737785,0.900265,0.909795,0.836936,0.850693,0.942973,0.943844
1,0.305778,0.855398,0.866846,0.951049,0.951884,0.869518,0.883237,0.956259,0.954767
2,0.246591,0.879242,0.891446,0.960294,0.959119,0.878773,0.891647,0.963277,0.957753
3,0.215192,0.893393,0.904441,0.965787,0.964318,0.888841,0.898015,0.966959,0.961286
4,0.194135,0.903426,0.914053,0.968965,0.967372,0.896166,0.909212,0.966859,0.963774
5,0.179257,0.909576,0.920047,0.971166,0.96955,0.900414,0.913192,0.967855,0.966585
6,0.165977,0.91526,0.925356,0.973182,0.971465,0.893938,0.90548,0.967831,0.965366
7,0.156747,0.919602,0.929874,0.974443,0.972573,0.900766,0.912744,0.969546,0.965914
8,0.148719,0.923348,0.932575,0.975806,0.974275,0.907084,0.918765,0.971513,0.968327
9,0.142007,0.926574,0.935849,0.976433,0.974911,0.908876,0.918491,0.974325,0.971089
