# Environment Setup

In [0]:
import os
os.environ['KAGGLE_USERNAME'] = "larswigger" # username from the json file
os.environ['KAGGLE_KEY'] = "####" # key from the json file
!pip install -q kaggle
!kaggle datasets download -d larswigger/minimal-bengalipreprocessing
!unzip *.zip

Downloading minimal-bengalipreprocessing.zip to /content
 99% 977M/987M [00:16<00:00, 33.4MB/s]
100% 987M/987M [00:16<00:00, 61.6MB/s]
Archive:  minimal-bengalipreprocessing.zip
  inflating: Folded_Train.csv        
  inflating: resized_images.npy      


In [0]:
from google.colab import drive
drive.mount('/content/gdrive')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /content/gdrive


In [0]:
os.chdir("/content/gdrive/My Drive/Colab Notebooks/Bengali/Output")

In [0]:
!pip install pretrainedmodels
#!pip install --upgrade efficientnet-pytorch

Collecting pretrainedmodels
[?25l  Downloading https://files.pythonhosted.org/packages/84/0e/be6a0e58447ac16c938799d49bfb5fb7a80ac35e137547fc6cee2c08c4cf/pretrainedmodels-0.7.4.tar.gz (58kB)
[K     |█████▋                          | 10kB 31.8MB/s eta 0:00:01[K     |███████████▏                    | 20kB 39.3MB/s eta 0:00:01[K     |████████████████▊               | 30kB 46.2MB/s eta 0:00:01[K     |██████████████████████▎         | 40kB 51.1MB/s eta 0:00:01[K     |███████████████████████████▉    | 51kB 50.1MB/s eta 0:00:01[K     |████████████████████████████████| 61kB 10.7MB/s 
Collecting munch
  Downloading https://files.pythonhosted.org/packages/cc/ab/85d8da5c9a45e072301beb37ad7f833cd344e04c817d97e0cc75681d248f/munch-2.5.0-py2.py3-none-any.whl
Building wheels for collected packages: pretrainedmodels
  Building wheel for pretrainedmodels (setup.py) ... [?25l[?25hdone
  Created wheel for pretrainedmodels: filename=pretrainedmodels-0.7.4-cp36-none-any.whl size=60962 sha256=

In [0]:
from tqdm import tqdm
import numpy as np
import pandas as pd
import torch
import pretrainedmodels
import albumentations
import gc
import os
from PIL import Image
import sklearn.metrics
# define constants
ORIGINAL_HEIGHT = 137
ORIGINAL_WIDTH = 236
PROCESSED_HEIGHT = 95
PROCESSED_WIDTH = 165

DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
print("Device:", DEVICE)

Device: cuda


# Dataset & Data Loader

In [0]:
class BengaliDataSet:
    #static variable, as there is no difference between instances and no use outside of class
    memmap = np.memmap("/content/resized_images.npy", mode="r", shape=(200840, PROCESSED_HEIGHT, PROCESSED_WIDTH))
    
    def __init__(self, folds, validation=False):
        tmp = pd.read_csv("/content/Folded_Train.csv")
        tmp = tmp[tmp["Fold"].isin(folds)].reset_index(drop=True)
        self.image_ids = tmp["image_id"].values.astype(np.int32)
        self.root_labels = tmp["grapheme_root"].values.astype(np.uint8)
        self.vowel_labels = tmp["vowel_diacritic"].values.astype(np.uint8)
        self.consonant_labels = tmp["consonant_diacritic"].values.astype(np.uint8)
        if validation == True:
            self.transform = albumentations.Compose([albumentations.Normalize(always_apply=True)])
        else:
            self.transform = albumentations.Compose([albumentations.ShiftScaleRotate(rotate_limit=10),
                                                    albumentations.Normalize(always_apply=True)])
            
    def __len__(self):
        return len(self.image_ids)
    
    def __getitem__(self, index):
        #get image from disk, get disk index from image_ids first
        img = self.memmap[self.image_ids[index]]
        #transfer learning only RGB images
        img = Image.fromarray(img).convert("RGB")
        img = self.transform(image=np.array(img))["image"]
        #torchvision has a different channel order
        img = np.transpose(img, (2,0,1))
        return (torch.tensor(img), 
                torch.tensor(self.root_labels[index], dtype=torch.long),
                torch.tensor(self.vowel_labels[index], dtype=torch.long),
                torch.tensor(self.consonant_labels[index], dtype=torch.long)
               )

In [0]:
train_data = BengaliDataSet([1,2,3,4])
valid_data = BengaliDataSet([0], validation=True)

In [0]:
train_loader = torch.utils.data.DataLoader(train_data,
                                           batch_size=64,
                                           shuffle=True,
                                           num_workers=6)
valid_loader = torch.utils.data.DataLoader(valid_data,
                                           batch_size=64,
                                           shuffle=False,
                                           num_workers=4)

# Model definition

In [0]:
from efficientnet_pytorch import EfficientNet

In [0]:
class Net(torch.nn.Module):
    
    def __init__(self):
        super(Net, self).__init__()
        self.transfer_model = pretrainedmodels.__dict__["resnet50"](pretrained="imagenet")
        #self.transfer_model = EfficientNet.from_pretrained("efficientnet-b3")

        self.root = torch.nn.Linear(2048, 168)
        self.vowel = torch.nn.Linear(2048, 11)
        self.consonant = torch.nn.Linear(2048, 7)
        
    def forward(self, x):
        batch_size = x.shape[0]
        x = self.transfer_model.features(x)
        #x = self.transfer_model.extract_features(x)
        x = torch.nn.functional.adaptive_max_pool2d(x, 1).reshape(batch_size, -1)
        
        root = self.root(x)
        vowel = self.vowel(x)
        consonant = self.consonant(x)
        return (root, vowel, consonant)

In [0]:
model = Net()
model = model.to(DEVICE)

In [0]:
model

Net(
  (transfer_model): ResNet(
    (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (layer1): Sequential(
      (0): Bottleneck(
        (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (downsample): Sequential(
       

# Model Training

In [0]:
def rand_bbox(size, lam):
    W = size[2]
    H = size[3]
    cut_rat = np.sqrt(1. - lam)
    cut_w = np.int(W * cut_rat)
    cut_h = np.int(H * cut_rat)

    # uniform
    cx = np.random.randint(W)
    cy = np.random.randint(H)

    bbx1 = np.clip(cx - cut_w // 2, 0, W)
    bby1 = np.clip(cy - cut_h // 2, 0, H)
    bbx2 = np.clip(cx + cut_w // 2, 0, W)
    bby2 = np.clip(cy + cut_h // 2, 0, H)

    return bbx1, bby1, bbx2, bby2
def cutmix(data, targets1, targets2, targets3, alpha):
    indices = torch.randperm(data.size(0))
    shuffled_data = data[indices]
    shuffled_targets1 = targets1[indices]
    shuffled_targets2 = targets2[indices]
    shuffled_targets3 = targets3[indices]

    lam = np.random.beta(alpha, alpha)
    bbx1, bby1, bbx2, bby2 = rand_bbox(data.size(), lam)
    data[:, :, bbx1:bbx2, bby1:bby2] = data[indices, :, bbx1:bbx2, bby1:bby2]
    # adjust lambda to exactly match pixel ratio
    lam = 1 - ((bbx2 - bbx1) * (bby2 - bby1) / (data.size()[-1] * data.size()[-2]))

    targets = [targets1, shuffled_targets1, targets2, shuffled_targets2, targets3, shuffled_targets3, lam]
    return data, targets

def mixup(data, targets1, targets2, targets3, alpha):
    indices = torch.randperm(data.size(0))
    shuffled_data = data[indices]
    shuffled_targets1 = targets1[indices]
    shuffled_targets2 = targets2[indices]
    shuffled_targets3 = targets3[indices]

    lam = np.random.beta(alpha, alpha)
    data = data * lam + shuffled_data * (1 - lam)
    targets = [targets1, shuffled_targets1, targets2, shuffled_targets2, targets3, shuffled_targets3, lam]

    return data, targets


def cutmix_criterion(preds1,preds2,preds3, targets):
    targets1, targets2,targets3, targets4,targets5, targets6, lam = targets[0], targets[1], targets[2], targets[3], targets[4], targets[5], targets[6]
    criterion = torch.nn.CrossEntropyLoss(reduction='mean')
    return lam * criterion(preds1, targets1) + (1 - lam) * criterion(preds1, targets2) + lam * criterion(preds2, targets3) + (1 - lam) * criterion(preds2, targets4) + lam * criterion(preds3, targets5) + (1 - lam) * criterion(preds3, targets6)

def mixup_criterion(preds1,preds2,preds3, targets):
    targets1, targets2,targets3, targets4,targets5, targets6, lam = targets[0], targets[1], targets[2], targets[3], targets[4], targets[5], targets[6]
    criterion = torch.nn.CrossEntropyLoss(reduction='mean')
    return lam * criterion(preds1, targets1) + (1 - lam) * criterion(preds1, targets2) + lam * criterion(preds2, targets3) + (1 - lam) * criterion(preds2, targets4) + lam * criterion(preds3, targets5) + (1 - lam) * criterion(preds3, targets6)

In [0]:
#modified to search for highest score instead of lowest loss
class EarlyStopping:
    """Early stops the training if validation loss doesn't improve after a given patience."""
    def __init__(self, patience=7, verbose=False, delta=0):
        """
        Args:
            patience (int): How long to wait after last time validation loss improved.
                            Default: 7
            verbose (bool): If True, prints a message for each validation loss improvement. 
                            Default: False
            delta (float): Minimum change in the monitored quantity to qualify as an improvement.
                            Default: 0
        """
        self.patience = patience
        self.verbose = verbose
        self.counter = 0
        self.best_score = None
        self.early_stop = False
        self.val_loss_min = np.Inf
        self.delta = delta

    def __call__(self, val_loss, model):

        score = val_loss

        if self.best_score is None:
            self.best_score = score
            self.save_checkpoint(val_loss, model)
        elif score < self.best_score + self.delta:
            self.counter += 1
            print(f'EarlyStopping counter: {self.counter} out of {self.patience}')
            if self.counter >= self.patience:
                self.early_stop = True
        else:
            self.best_score = score
            self.save_checkpoint(val_loss, model)
            self.counter = 0

    def save_checkpoint(self, val_loss, model):
        '''Saves model when validation loss decrease.'''
        if self.verbose:
            print(f'Score improved ({self.val_loss_min:.6f} --> {val_loss:.6f}).  Saving model ...')
        torch.save(model.state_dict(), 'checkpoint.pt')
        self.val_loss_min = val_loss

In [0]:
def loss_function(output, target):
    root_loss = torch.nn.CrossEntropyLoss()(output[0], target[0])
    vowel_loss = torch.nn.CrossEntropyLoss()(output[1], target[1])
    consonant_loss = torch.nn.CrossEntropyLoss()(output[2], target[2])
    return (root_loss, vowel_loss, consonant_loss)

def final_loss_function(losses):
    return (losses[0]+losses[1]+losses[2]) / 3

In [0]:
def competition_scores(output, target):
    predictions = np.argmax(output[0].cpu().detach(), axis=1)
    root_score = sklearn.metrics.recall_score(
        target[0], predictions, average='macro')
    predictions = np.argmax(output[1].cpu().detach(), axis=1)
    vowel_score = sklearn.metrics.recall_score(
        target[1], predictions, average='macro')
    predictions = np.argmax(output[2].cpu().detach(), axis=1)
    consonant_score = sklearn.metrics.recall_score(
        target[2], predictions, average='macro')
    return (root_score, vowel_score, consonant_score)

In [0]:
cutout = albumentations.Compose([albumentations.Cutout(always_apply=True)])

def train_epoch(dataset, dataloader, model, optimizer, mixup_prob=0.5, mixup_alpha=0.4, cutmix_alpha=0.4):
    model.train()
    final_loss = 0
    for batch_index, (images, root_label, vowel_label, consonant_label) in enumerate(tqdm(dataloader, total=(len(dataset)//dataloader.batch_size))):
        
        #cutmix/mixup
        if np.random.rand()<mixup_prob:
                #move to GPU
                images = images.to(DEVICE)
                root_label = root_label.to(DEVICE)
                vowel_label = vowel_label.to(DEVICE)
                consonant_label = consonant_label.to(DEVICE)

                images, targets = mixup(images, root_label, vowel_label, consonant_label, mixup_alpha)
                output1, output2, output3 = model(images)
                optimizer_loss = mixup_criterion(output1,output2,output3, targets) 
        else:
            #apply cutout
            images = images.numpy()
            for idx, img in enumerate(images):
               img = np.transpose(img, (1,2,0))
               img = cutout(image=img)["image"]
               img = np.transpose(img, (2,0,1))
               images[idx] = img
            #move to GPU
            images = torch.tensor(images)
            images = images.to(DEVICE)
            root_label = root_label.to(DEVICE)
            vowel_label = vowel_label.to(DEVICE)
            consonant_label = consonant_label.to(DEVICE)
            #step
            root_pred, vowel_pred, consonant_pred = model(images)
            losses = loss_function((root_pred, vowel_pred, consonant_pred), (root_label, vowel_label, consonant_label))
            optimizer_loss = final_loss_function(losses)
        optimizer.zero_grad()
        final_loss+=float(optimizer_loss)
        optimizer_loss.backward()
        optimizer.step()
    batch_count = len(dataset)//dataloader.batch_size
    final_loss /= batch_count
    return final_loss

In [0]:
def valid_epoch(dataset, dataloader, model):
    model.eval()
    final_losses = [0,0,0]
    predictions = [[],[],[]]
    for batch_index, (images, root_label, vowel_label, consonant_label) in enumerate(tqdm(dataloader, total=(len(dataset)//dataloader.batch_size))):
        #move to GPU
        images = images.to(DEVICE)
        root_label = root_label.to(DEVICE)
        vowel_label = vowel_label.to(DEVICE)
        consonant_label = consonant_label.to(DEVICE)
        #prediction
        root_pred, vowel_pred, consonant_pred = model(images)
        #increment losses
        losses = loss_function((root_pred, vowel_pred, consonant_pred), (root_label, vowel_label, consonant_label))
        final_losses[0]+=float(losses[0])
        final_losses[1]+=float(losses[1])
        final_losses[2]+=float(losses[2])
        #add predictions
        predictions[0].append(np.argmax(root_pred.cpu().detach(), axis=1))
        predictions[1].append(np.argmax(vowel_pred.cpu().detach(), axis=1))
        predictions[2].append(np.argmax(consonant_pred.cpu().detach(), axis=1))
    #take average of scores
    batch_count = len(dataset)//dataloader.batch_size
    final_losses[0] /= batch_count
    final_losses[1] /= batch_count
    final_losses[2] /= batch_count
    #combine predictions into one array
    predictions[0] = np.concatenate(predictions[0], axis=0)
    predictions[1] = np.concatenate(predictions[1], axis=0)
    predictions[2] = np.concatenate(predictions[2], axis=0)
    final_scores = [0,0,0]
    final_scores[0] = sklearn.metrics.recall_score(dataset.root_labels, predictions[0], average='macro')
    final_scores[1] = sklearn.metrics.recall_score(dataset.vowel_labels, predictions[1], average='macro')
    final_scores[2] = sklearn.metrics.recall_score(dataset.consonant_labels, predictions[2], average='macro')
    return (final_losses, final_scores)

In [0]:
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode="max", patience=5, factor=0.3, verbose=True)
early_stopping = EarlyStopping(patience=10, verbose=True)
EPOCHS = 30
!rm history.csv
with open("history.csv", "a") as history:
    history.write("Epoch,Loss,Valid_Loss,Valid_Root_Loss,Valid_Vowel_Loss,Valid_Consonant_Loss,Valid_Score,Valid_Root_Score,Valid_Vowel_Score,Valid_Consonant_Score\n")

In [0]:
backup = torch.load("backup.total")

In [0]:
optimizer.load_state_dict(backup["optimizer"])
model.load_state_dict(backup["model"])
scheduler.load_state_dict(backup["scheduler"])
early_stopping = backup["early_stopping"]

# Training Loop

In [0]:
for epoch in range(EPOCHS):
    #create data
    """
    valid_fold = epoch % 5
    train_data = BengaliDataSet([x for x in range(5) if not x == valid_fold])
    valid_data = BengaliDataSet([valid_fold], validation=True)
    train_loader = torch.utils.data.DataLoader(train_data,
                                           batch_size=64,
                                           shuffle=True,
                                           num_workers=6)
    valid_loader = torch.utils.data.DataLoader(valid_data,
                                           batch_size=64,
                                           shuffle=False,
                                           num_workers=4)
    """
    #print(f"Epoch {epoch+1}/{EPOCHS}:")
    train_loss = train_epoch(train_data, train_loader, model, optimizer, mixup_prob=0.5)
    #print(f"Loss: {total_loss} Root_Loss: {train_losses[0]} Vowel_Loss: {train_losses[1]} Consonant_Loss: {train_losses[2]}")
    valid_scores = valid_epoch(valid_data, valid_loader, model)
    total_valid_loss = final_loss_function(valid_scores[0])
    #print(f"Valid_Loss: {total_valid_loss} Valid_Root_Loss: {valid_scores[0][0]} Valid_Vowel_Loss: {valid_scores[0][1]} Valid_Consonant_Loss: {valid_scores[0][2]}")
    final_score = np.average(valid_scores[1], weights=[2,1,1])
    #print(f"Valid_Score: {final_score} Valid_Root_Score: {valid_scores[1][0]} Valid_Vowel_Score: {valid_scores[1][1]} Valid_Consonant_Score: {valid_scores[1][2]}")
    #Logging data to file
    with open("history.csv", "a") as history:
        history.write(f"{epoch+1},{train_loss},")
        history.write(f"{total_valid_loss},{valid_scores[0][0]},{valid_scores[0][1]},{valid_scores[0][2]},")
        history.write(f"{final_score},{valid_scores[1][0]},{valid_scores[1][1]},{valid_scores[1][2]}\n")
    #early stopping
    early_stopping(final_score, model)
    if early_stopping.early_stop:
            print("Early stopping")
            break
    #learning rate scheduler
    scheduler.step(final_score)
    #backup every epoch to be able to continue training
    torch.save({
                "model":model.state_dict(),
                "optimizer":optimizer.state_dict(),
                "scheduler":scheduler.state_dict(),
                "early_stopping": early_stopping
                },
                "backup.total")
    #cleanup
    gc.collect()

2511it [14:42,  3.49it/s]                          
628it [01:03, 10.65it/s]                         


Score improved (inf --> 0.894087).  Saving model ...


2511it [14:44,  3.45it/s]                          
628it [01:02, 10.73it/s]                         


Score improved (0.894087 --> 0.934528).  Saving model ...


2511it [14:47,  3.62it/s]                          
628it [01:02,  9.97it/s]                         


Score improved (0.934528 --> 0.935051).  Saving model ...


2511it [14:39,  3.40it/s]                          
628it [01:03,  9.90it/s]                         


Score improved (0.935051 --> 0.943959).  Saving model ...


2511it [14:42,  3.39it/s]                          
628it [01:03,  9.84it/s]                         


Score improved (0.943959 --> 0.953844).  Saving model ...


2511it [14:41,  3.41it/s]                          
628it [01:02, 10.01it/s]                         


EarlyStopping counter: 1 out of 10


2511it [14:47,  3.43it/s]                          
628it [01:02, 10.76it/s]                         


Score improved (0.953844 --> 0.955165).  Saving model ...


  8%|▊         | 211/2510 [01:14<14:48,  2.59it/s]