# Voting!

# First model(SE_Resnext_32:0.9696)

In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load in 

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
#for dirname, _, filenames in os.walk('/kaggle/input'):
#    for filename in filenames:
#        print(os.path.join(dirname, filename))

# Any results you write to the current directory are saved as output.

In [2]:
import torch
import numpy as np
from torch.utils.data import DataLoader
from torch.utils.data import Dataset
from torchvision import transforms as torchtransforms
import cv2
import torch.nn as nn
from tqdm import tqdm_notebook as tqdm
import torchvision
import torch.nn as nn
import torch.nn.functional as F
import time

In [3]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)
modelpath = "/kaggle/input/se-resnext50-32x4d-fold2/se_resnext50_32x4d_fold2.pkl"
root_path="/kaggle/input/bengaliai-cv19"

cuda:0


In [4]:
simple_transform_valid = torchtransforms.Compose([
    torchtransforms.ToTensor(),
    torchtransforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

In [5]:
HEIGHT = 137
WIDTH = 236
SIZE = 128
class ClsTestDataset(Dataset):
    def __init__(self, df, torchtransforms):
        self.df = df
        self.pathes = self.df.iloc[:,0].values
        self.data = self.df.iloc[:, 1:].values
        self.torchtransforms = torchtransforms

    def __getitem__(self, idx):
        #row = self.df.iloc[idx].values
        path = self.pathes[idx]
        img = self.data[idx, :]
        img = 255 - img.reshape(HEIGHT, WIDTH).astype(np.uint8)
        img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)       
        img = torchtransforms.ToPILImage()(img)
        img = self.torchtransforms(img)
        return path, img
    def __len__(self):
        return len(self.df)

def make_loader(
        data_folder,
        batch_size=64,
        num_workers=2,
        is_shuffle = False,
):

    image_dataset = ClsTestDataset(df = data_folder,
                                    torchtransforms = simple_transform_valid)

    return DataLoader(
    image_dataset,
    batch_size=batch_size,
    num_workers=num_workers,
    pin_memory=True,
    shuffle=is_shuffle
    )

In [6]:
from __future__ import print_function, division, absolute_import
from collections import OrderedDict
import math

import torch.nn as nn
from torch.utils import model_zoo
__all__ = ['SENet', 'se_resnext50_32x4d']
class SEModule(nn.Module):

    def __init__(self, channels, reduction):
        super(SEModule, self).__init__()
        self.avg_pool = nn.AdaptiveAvgPool2d(1)
        self.fc1 = nn.Conv2d(channels, channels // reduction, kernel_size=1,
                             padding=0)
        self.relu = nn.ReLU(inplace=True)
        self.fc2 = nn.Conv2d(channels // reduction, channels, kernel_size=1,
                             padding=0)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        module_input = x
        x = self.avg_pool(x)
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        x = self.sigmoid(x)
        return module_input * x


class Bottleneck(nn.Module):
    """
    Base class for bottlenecks that implements `forward()` method.
    """
    def forward(self, x):
        residual = x

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)
        out = self.relu(out)

        out = self.conv3(out)
        out = self.bn3(out)

        if self.downsample is not None:
            residual = self.downsample(x)

        out = self.se_module(out) + residual
        out = self.relu(out)

        return out


class SEBottleneck(Bottleneck):
    """
    Bottleneck for SENet154.
    """
    expansion = 4

    def __init__(self, inplanes, planes, groups, reduction, stride=1,
                 downsample=None):
        super(SEBottleneck, self).__init__()
        self.conv1 = nn.Conv2d(inplanes, planes * 2, kernel_size=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes * 2)
        self.conv2 = nn.Conv2d(planes * 2, planes * 4, kernel_size=3,
                               stride=stride, padding=1, groups=groups,
                               bias=False)
        self.bn2 = nn.BatchNorm2d(planes * 4)
        self.conv3 = nn.Conv2d(planes * 4, planes * 4, kernel_size=1,
                               bias=False)
        self.bn3 = nn.BatchNorm2d(planes * 4)
        self.relu = nn.ReLU(inplace=True)
        self.se_module = SEModule(planes * 4, reduction=reduction)
        self.downsample = downsample
        self.stride = stride


class SEResNetBottleneck(Bottleneck):
    """
    ResNet bottleneck with a Squeeze-and-Excitation module. It follows Caffe
    implementation and uses `stride=stride` in `conv1` and not in `conv2`
    (the latter is used in the torchvision implementation of ResNet).
    """
    expansion = 4

    def __init__(self, inplanes, planes, groups, reduction, stride=1,
                 downsample=None):
        super(SEResNetBottleneck, self).__init__()
        self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False,
                               stride=stride)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, padding=1,
                               groups=groups, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)
        self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False)
        self.bn3 = nn.BatchNorm2d(planes * 4)
        self.relu = nn.ReLU(inplace=True)
        self.se_module = SEModule(planes * 4, reduction=reduction)
        self.downsample = downsample
        self.stride = stride


class SEResNeXtBottleneck(Bottleneck):
    """
    ResNeXt bottleneck type C with a Squeeze-and-Excitation module.
    """
    expansion = 4

    def __init__(self, inplanes, planes, groups, reduction, stride=1,
                 downsample=None, base_width=4):
        super(SEResNeXtBottleneck, self).__init__()
        width = math.floor(planes * (base_width / 64)) * groups
        self.conv1 = nn.Conv2d(inplanes, width, kernel_size=1, bias=False,
                               stride=1)
        self.bn1 = nn.BatchNorm2d(width)
        self.conv2 = nn.Conv2d(width, width, kernel_size=3, stride=stride,
                               padding=1, groups=groups, bias=False)
        self.bn2 = nn.BatchNorm2d(width)
        self.conv3 = nn.Conv2d(width, planes * 4, kernel_size=1, bias=False)
        self.bn3 = nn.BatchNorm2d(planes * 4)
        self.relu = nn.ReLU(inplace=True)
        self.se_module = SEModule(planes * 4, reduction=reduction)
        self.downsample = downsample
        self.stride = stride


class SENet(nn.Module):

    def __init__(self, block, layers, groups, reduction, dropout_p=0.2,
                 inplanes=128, input_3x3=True, downsample_kernel_size=3,
                 downsample_padding=1, num_classes=1000):        
        super(SENet, self).__init__()
        self.inplanes = inplanes
        if input_3x3:
            layer0_modules = [
                ('conv1', nn.Conv2d(3, 64, 3, stride=2, padding=1,
                                    bias=False)),
                ('bn1', nn.BatchNorm2d(64)),
                ('relu1', nn.ReLU(inplace=True)),
                ('conv2', nn.Conv2d(64, 64, 3, stride=1, padding=1,
                                    bias=False)),
                ('bn2', nn.BatchNorm2d(64)),
                ('relu2', nn.ReLU(inplace=True)),
                ('conv3', nn.Conv2d(64, inplanes, 3, stride=1, padding=1,
                                    bias=False)),
                ('bn3', nn.BatchNorm2d(inplanes)),
                ('relu3', nn.ReLU(inplace=True)),
            ]
        else:
            layer0_modules = [
                ('conv1', nn.Conv2d(3, inplanes, kernel_size=7, stride=2,
                                    padding=3, bias=False)),
                ('bn1', nn.BatchNorm2d(inplanes)),
                ('relu1', nn.ReLU(inplace=True)),
            ]
        # To preserve compatibility with Caffe weights `ceil_mode=True`
        # is used instead of `padding=1`.
        layer0_modules.append(('pool', nn.MaxPool2d(3, stride=2,
                                                    ceil_mode=True)))
        self.layer0 = nn.Sequential(OrderedDict(layer0_modules))
        self.layer1 = self._make_layer(
            block,
            planes=64,
            blocks=layers[0],
            groups=groups,
            reduction=reduction,
            downsample_kernel_size=1,
            downsample_padding=0
        )
        self.layer2 = self._make_layer(
            block,
            planes=128,
            blocks=layers[1],
            stride=2,
            groups=groups,
            reduction=reduction,
            downsample_kernel_size=downsample_kernel_size,
            downsample_padding=downsample_padding
        )
        self.layer3 = self._make_layer(
            block,
            planes=256,
            blocks=layers[2],
            stride=2,
            groups=groups,
            reduction=reduction,
            downsample_kernel_size=downsample_kernel_size,
            downsample_padding=downsample_padding
        )
        self.layer4 = self._make_layer(
            block,
            planes=512,
            blocks=layers[3],
            stride=2,
            groups=groups,
            reduction=reduction,
            downsample_kernel_size=downsample_kernel_size,
            downsample_padding=downsample_padding
        )
        self.avg_pool = nn.AvgPool2d(7, stride=1)
        self.dropout = nn.Dropout(dropout_p) if dropout_p is not None else None
        self.last_linear = nn.Linear(512 * block.expansion, num_classes)

    def _make_layer(self, block, planes, blocks, groups, reduction, stride=1,
                    downsample_kernel_size=1, downsample_padding=0):
        downsample = None
        if stride != 1 or self.inplanes != planes * block.expansion:
            downsample = nn.Sequential(
                nn.Conv2d(self.inplanes, planes * block.expansion,
                          kernel_size=downsample_kernel_size, stride=stride,
                          padding=downsample_padding, bias=False),
                nn.BatchNorm2d(planes * block.expansion),
            )

        layers = []
        layers.append(block(self.inplanes, planes, groups, reduction, stride,
                            downsample))
        self.inplanes = planes * block.expansion
        for i in range(1, blocks):
            layers.append(block(self.inplanes, planes, groups, reduction))

        return nn.Sequential(*layers)

    def features(self, x):
        x = self.layer0(x)
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)
        return x

    def logits(self, x):
        x = self.avg_pool(x)
        if self.dropout is not None:
            x = self.dropout(x)
        x = x.view(x.size(0), -1)
        x = self.last_linear(x)
        return x

    def forward(self, x):
        x = self.features(x)
        x = self.logits(x)
        return x
    
def se_resnext50_32x4d(num_classes=1000, pretrained='imagenet'):
    model = SENet(SEResNeXtBottleneck, [3, 4, 6, 3], groups=32, reduction=16,
                  dropout_p=None, inplanes=64, input_3x3=False,
                  downsample_kernel_size=1, downsample_padding=0,
                  num_classes=num_classes)
    return model

In [7]:
model = se_resnext50_32x4d(pretrained=None)
model.avg_pool = nn.AdaptiveAvgPool2d(1)
model.last_linear = nn.Linear(model.last_linear.in_features, 186)
modelvalue = torch.load(modelpath, map_location='cuda:0')
newmodelvalue = {}
for kv in modelvalue:
    newmodelvalue[kv[4:]]=modelvalue[kv]        
model.load_state_dict(newmodelvalue)
model = model.to(device)

In [8]:
def getmodeleval(model, dataloaders):
    model.eval()
    tbar = tqdm(dataloaders)
    pathes=[]

    alllogit1 = []
    alllogit2 = []
    alllogit3 = []
    for path, img in tbar:
        img = img.to(device)
        pathes.extend(path)
        with torch.no_grad():
            output = model(img)
        logit1, logit2, logit3 = output[:,: 168],\
                                    output[:,168: 168+11],\
                                    output[:,168+11:]
        logit1 = F.softmax(logit1, dim=1).cpu().numpy()  # 对每一行进行softmax
        logit2 = F.softmax(logit2, dim=1).cpu().numpy()
        logit3 = F.softmax(logit3, dim=1).cpu().numpy()
        alllogit1.extend(logit1.tolist())
        alllogit2.extend(logit2.tolist())
        alllogit3.extend(logit3.tolist())
    alllogit1 = np.array(alllogit1)
    alllogit2 = np.array(alllogit2)
    alllogit3 = np.array(alllogit3)
    
    print("getmodeleval::alllogit1.shape", alllogit1.shape)
    print("getmodeleval::alllogit2.shape", alllogit2.shape)
    print("getmodeleval::alllogit3.shape", alllogit3.shape)
    return pathes, alllogit1, alllogit2, alllogit3

In [9]:
allpathes=[]
allpreds_root = []
allpreds_vowel = []
allpreds_consonant = []
tAllBegin = time.time()
for i in range(4):
    test_csv = pd.read_parquet(os.path.join(root_path, f'test_image_data_{i}.parquet'))
    tBegin = time.time()
    dataloaders = make_loader(data_folder = test_csv,
                                           batch_size=8,
                                           num_workers = 2,
                                           is_shuffle = False)
    pathes, logit1, logit2, logit3 = getmodeleval(model, dataloaders)
    preds_root = np.argmax(logit1, axis=1)# 其中，axis=1表示按行计算
    preds_vowel = np.argmax(logit2, axis=1)# 其中，axis=1表示按行计算
    preds_consonant = np.argmax(logit3, axis=1)# 其中，axis=1表示按行计算

    allpathes.extend(pathes)
    allpreds_root.extend(preds_root.tolist())
    allpreds_vowel.extend(preds_vowel.tolist())
    allpreds_consonant.extend(preds_consonant.tolist())
    tEnd = time.time()
    print(i, int(round(tEnd * 1000)) - int(round(tBegin * 1000)), "ms")
tAllEnd = time.time()
print(len(allpathes), len(allpreds_root), len(allpreds_vowel), len(allpreds_consonant),  int(round(tAllEnd * 1000)) - int(round(tAllBegin * 1000)), "ms")

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  This is separate from the ipykernel package so we can avoid doing imports until


HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))


getmodeleval::alllogit1.shape (3, 168)
getmodeleval::alllogit2.shape (3, 11)
getmodeleval::alllogit3.shape (3, 7)
0 1097 ms


HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))


getmodeleval::alllogit1.shape (3, 168)
getmodeleval::alllogit2.shape (3, 11)
getmodeleval::alllogit3.shape (3, 7)
1 173 ms


HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))


getmodeleval::alllogit1.shape (3, 168)
getmodeleval::alllogit2.shape (3, 11)
getmodeleval::alllogit3.shape (3, 7)
2 165 ms


HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))


getmodeleval::alllogit1.shape (3, 168)
getmodeleval::alllogit2.shape (3, 11)
getmodeleval::alllogit3.shape (3, 7)
3 162 ms
12 12 12 12 370440 ms


In [10]:
print(len(allpathes), len(allpreds_root), len(allpreds_vowel), len(allpreds_consonant))

12 12 12 12


In [11]:
row_id=[]
target=[]
for idx, image_id in enumerate(allpathes):
    target.extend([allpreds_consonant[idx]])
    target.extend([allpreds_root[idx]])
    target.extend([allpreds_vowel[idx]])

    row_id.extend([str(image_id) + '_consonant_diacritic'])
    row_id.extend([str(image_id) + '_grapheme_root'])
    row_id.extend([str(image_id) + '_vowel_diacritic'])

#print(row_id)
#print(target)
submission_df = pd.read_csv(root_path + '/sample_submission.csv')
#print(submission_df.shape)
# print(len(target))
# print(len(row_id))
# print(target)
# print(row_id)
submission_df.target = np.hstack(np.array(target).astype(np.int))
#submission_df['target'] = np.array(target).astype(np.int)
#submission_df['row_id'] = row_id
print(submission_df.head(10))
#submission_df.to_csv('submission.csv', index=False)

                       row_id  target
0  Test_0_consonant_diacritic       0
1        Test_0_grapheme_root       3
2      Test_0_vowel_diacritic       0
3  Test_1_consonant_diacritic       0
4        Test_1_grapheme_root      93
5      Test_1_vowel_diacritic       2
6  Test_2_consonant_diacritic       0
7        Test_2_grapheme_root      19
8      Test_2_vowel_diacritic       0
9  Test_3_consonant_diacritic       0


In [12]:
import sys

print("{}{: >25}{}{: >10}{}".format('|','Variable Name','|','Memory','|'))
print(" ------------------------------------ ")
for var_name in dir():
    if not var_name.startswith("_") and sys.getsizeof(eval(var_name)) > 10000: #over 10000 byte data pick up
        print("{}{: >25}{}{: >10}{}".format('|',var_name,'|',sys.getsizeof(eval(var_name)),'|'))

|            Variable Name|    Memory|
 ------------------------------------ 
|               modelvalue|     39072|
|            newmodelvalue|     18528|
|                 test_csv|     97235|


In [13]:
del modelvalue
del newmodelvalue
del test_csv

# Second model(SEResNeXt prediction with pytorch:0.9663)

In [14]:
!pip install ../input/pretrainedmodels/pretrainedmodels-0.7.4/pretrainedmodels-0.7.4/ > /dev/null # no output

In [15]:
import gc
import os
from pathlib import Path
import random
import sys

from tqdm.notebook import tqdm
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

import matplotlib.pyplot as plt
import seaborn as sns

from IPython.core.display import display, HTML

# --- plotly ---
from plotly import tools, subplots
import plotly.offline as py
py.init_notebook_mode(connected=True)
import plotly.graph_objs as go
import plotly.express as px
import plotly.figure_factory as ff

# --- models ---
from sklearn import preprocessing
from sklearn.model_selection import KFold
import lightgbm as lgb
import xgboost as xgb
import catboost as cb

# --- setup ---
pd.set_option('max_columns', 50)

In [16]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
#for dirname, _, filenames in os.walk('/kaggle/input'):
#    for filename in filenames:
#        print(os.path.join(dirname, filename))

In [17]:
debug=False
submission=True
batch_size=256
device='cuda:0'
out='.'

In [18]:
datadir = Path('/kaggle/input/bengaliai-cv19')
featherdir = Path('/kaggle/input/bengaliaicv19feather')
outdir = Path('.')

In [19]:
import numpy
import six
import torch
from torch.utils.data.dataset import Dataset


class DatasetMixin(Dataset):

    def __init__(self, transform=None):
        self.transform = transform

    def __getitem__(self, index):
        """Returns an example or a sequence of examples."""
        if torch.is_tensor(index):
            index = index.tolist()
        if isinstance(index, slice):
            current, stop, step = index.indices(len(self))
            return [self.get_example_wrapper(i) for i in
                    six.moves.range(current, stop, step)]
        elif isinstance(index, list) or isinstance(index, numpy.ndarray):
            return [self.get_example_wrapper(i) for i in index]
        else:
            return self.get_example_wrapper(index)

    def __len__(self):
        """Returns the number of data points."""
        raise NotImplementedError

    def get_example_wrapper(self, i):
        """Wrapper of `get_example`, to apply `transform` if necessary"""
        example = self.get_example(i)
        if self.transform:
            example = self.transform(example)
        return example

    def get_example(self, i):
        """Returns the i-th example.

        Implementations should override it. It should raise :class:`IndexError`
        if the index is invalid.

        Args:
            i (int): The index of the example.

        Returns:
            The i-th example.

        """
        raise NotImplementedError

In [20]:
import numpy as np


class BengaliAIDataset(DatasetMixin):
    def __init__(self, images, labels=None, transform=None, indices=None):
        super(BengaliAIDataset, self).__init__(transform=transform)
        self.images = images
        self.labels = labels
        if indices is None:
            indices = np.arange(len(images))
        self.indices = indices
        self.train = labels is not None

    def __len__(self):
        """return length of this dataset"""
        return len(self.indices)

    def get_example(self, i):
        """Return i-th data"""
        i = self.indices[i]
        x = self.images[i]
        # Opposite white and black: background will be white (1.0) and
        # for future Affine transformation
        x = (255 - x).astype(np.float32) / 255.
        if self.train:
            y = self.labels[i]
            return x, y
        else:
            return x

In [21]:
import cv2
from skimage.transform import AffineTransform, warp
import numpy as np


def affine_image(img):
    """

    Args:
        img: (h, w) or (1, h, w)

    Returns:
        img: (h, w)
    """
    # ch, h, w = img.shape
    # img = img / 255.
    if img.ndim == 3:
        img = img[0]

    # --- scale ---
    min_scale = 0.8
    max_scale = 1.2
    sx = np.random.uniform(min_scale, max_scale)
    sy = np.random.uniform(min_scale, max_scale)

    # --- rotation ---
    max_rot_angle = 7
    rot_angle = np.random.uniform(-max_rot_angle, max_rot_angle) * np.pi / 180.

    # --- shear ---
    max_shear_angle = 10
    shear_angle = np.random.uniform(-max_shear_angle, max_shear_angle) * np.pi / 180.

    # --- translation ---
    max_translation = 4
    tx = np.random.randint(-max_translation, max_translation)
    ty = np.random.randint(-max_translation, max_translation)

    tform = AffineTransform(scale=(sx, sy), rotation=rot_angle, shear=shear_angle,
                            translation=(tx, ty))
    transformed_image = warp(img, tform)
    assert transformed_image.ndim == 2
    return transformed_image


def crop_char_image(image, threshold=40./255.):
    assert image.ndim == 2
    is_black = image > threshold

    is_black_vertical = np.sum(is_black, axis=0) > 0
    is_black_horizontal = np.sum(is_black, axis=1) > 0
    left = np.argmax(is_black_horizontal)
    right = np.argmax(is_black_horizontal[::-1])
    top = np.argmax(is_black_vertical)
    bottom = np.argmax(is_black_vertical[::-1])
    height, width = image.shape
    cropped_image = image[left:height - right, top:width - bottom]
    return cropped_image


def resize(image, size=(128, 128)):
    return cv2.resize(image, size)

In [22]:
import numpy as np


def add_gaussian_noise(x, sigma):
    x += np.random.randn(*x.shape) * sigma
    x = np.clip(x, 0., 1.)
    return x


class Transform:
    def __init__(self, affine=True, crop=True, size=(64, 64),
                 normalize=True, train=True, threshold=40.,
                 sigma=-1.):
        self.affine = affine
        self.crop = crop
        self.size = size
        self.normalize = normalize
        self.train = train
        self.threshold = threshold / 255.
        self.sigma = sigma / 255.

    def __call__(self, example):
        if self.train:
            x, y = example
        else:
            x = example
        # --- Augmentation ---
        if self.affine:
            x = affine_image(x)

        # --- Train/Test common preprocessing ---
        if self.crop:
            x = crop_char_image(x, threshold=self.threshold)
        if self.size is not None:
            x = resize(x, size=self.size)
        if self.sigma > 0.:
            x = add_gaussian_noise(x, sigma=self.sigma)
        if self.normalize:
            x = (x.astype(np.float32) - 0.0692) / 0.2051
        if x.ndim == 2:
            x = x[None, :, :]
        x = x.astype(np.float32)
        if self.train:
            y = y.astype(np.int64)
            return x, y
        else:
            return x

In [23]:
import torch


def residual_add(lhs, rhs):
    lhs_ch, rhs_ch = lhs.shape[1], rhs.shape[1]
    if lhs_ch < rhs_ch:
        out = lhs + rhs[:, :lhs_ch]
    elif lhs_ch > rhs_ch:
        out = torch.cat([lhs[:, :rhs_ch] + rhs, lhs[:, rhs_ch:]], dim=1)
    else:
        out = lhs + rhs
    return out

In [24]:
from typing import List

import torch
from torch import nn
from torch.nn.parameter import Parameter


class LazyLoadModule(nn.Module):
    """Lazy buffer/parameter loading using load_state_dict_pre_hook

    Define all buffer/parameter in `_lazy_buffer_keys`/`_lazy_parameter_keys` and
    save buffer with `register_buffer`/`register_parameter`
    method, which can be outside of __init__ method.
    Then this module can load any shape of Tensor during de-serializing.

    Note that default value of lazy buffer is torch.Tensor([]), while lazy parameter is None.
    """
    _lazy_buffer_keys: List[str] = []     # It needs to be override to register lazy buffer
    _lazy_parameter_keys: List[str] = []  # It needs to be override to register lazy parameter

    def __init__(self):
        super(LazyLoadModule, self).__init__()
        for k in self._lazy_buffer_keys:
            self.register_buffer(k, torch.tensor([]))
        for k in self._lazy_parameter_keys:
            self.register_parameter(k, None)
        self._register_load_state_dict_pre_hook(self._hook)

    def _hook(self, state_dict, prefix, local_metadata, strict, missing_keys,
             unexpected_keys, error_msgs):
        for key in self._lazy_buffer_keys:
            self.register_buffer(key, state_dict[prefix + key])

        for key in self._lazy_parameter_keys:
            self.register_parameter(key, Parameter(state_dict[prefix + key]))

In [25]:
import math
import torch
from torch.nn import init
from torch.nn.parameter import Parameter
import torch.nn.functional as F


class LazyLinear(LazyLoadModule):
    """Linear module with lazy input inference

    `in_features` can be `None`, and it is determined at the first time of forward step dynamically.
    """

    __constants__ = ['bias', 'in_features', 'out_features']
    _lazy_parameter_keys = ['weight']

    def __init__(self, in_features, out_features, bias=True):
        super(LazyLinear, self).__init__()
        self.in_features = in_features
        self.out_features = out_features
        if bias:
            self.bias = Parameter(torch.Tensor(out_features))
        else:
            self.register_parameter('bias', None)

        if in_features is not None:
            self.weight = Parameter(torch.Tensor(out_features, in_features))
            self.reset_parameters()

    def reset_parameters(self):
        init.kaiming_uniform_(self.weight, a=math.sqrt(5))
        if self.bias is not None:
            fan_in, _ = init._calculate_fan_in_and_fan_out(self.weight)
            bound = 1 / math.sqrt(fan_in)
            init.uniform_(self.bias, -bound, bound)

    def forward(self, input):
        if self.weight is None:
            self.in_features = input.shape[-1]
            self.weight = Parameter(torch.Tensor(self.out_features, self.in_features))
            self.reset_parameters()

            # Need to send lazy defined parameter to device...
            self.to(input.device)
        return F.linear(input, self.weight, self.bias)

    def extra_repr(self):
        return 'in_features={}, out_features={}, bias={}'.format(
            self.in_features, self.out_features, self.bias is not None
        )

In [26]:
from torch import nn
import torch.nn.functional as F


class LinearBlock(nn.Module):

    def __init__(self, in_features, out_features, bias=True,
                 use_bn=True, activation=F.relu, dropout_ratio=-1, residual=False,):
        super(LinearBlock, self).__init__()
        if in_features is None:
            self.linear = LazyLinear(in_features, out_features, bias=bias)
        else:
            self.linear = nn.Linear(in_features, out_features, bias=bias)
        if use_bn:
            self.bn = nn.BatchNorm1d(out_features)
        if dropout_ratio > 0.:
            self.dropout = nn.Dropout(p=dropout_ratio)
        else:
            self.dropout = None
        self.activation = activation
        self.use_bn = use_bn
        self.dropout_ratio = dropout_ratio
        self.residual = residual

    def __call__(self, x):
        h = self.linear(x)
        if self.use_bn:
            h = self.bn(h)
        if self.activation is not None:
            h = self.activation(h)
        if self.residual:
            h = residual_add(h, x)
        if self.dropout_ratio > 0:
            h = self.dropout(h)
        return h

In [27]:
import pretrainedmodels
import torch
from torch import nn
import torch.nn.functional as F
from torch.nn import Sequential


class PretrainedCNN(nn.Module):
    def __init__(self, model_name='se_resnext101_32x4d',
                 in_channels=1, out_dim=10, use_bn=True,
                 pretrained=None):
        super(PretrainedCNN, self).__init__()
        self.conv0 = nn.Conv2d(
            in_channels, 3, kernel_size=3, stride=1, padding=1, bias=True)
        self.base_model = pretrainedmodels.__dict__[model_name](pretrained=pretrained)
        activation = F.leaky_relu
        self.do_pooling = True
        if self.do_pooling:
            inch = self.base_model.last_linear.in_features
        else:
            inch = None
        hdim = 512
        lin1 = LinearBlock(inch, hdim, use_bn=use_bn, activation=activation, residual=False)
        lin2 = LinearBlock(hdim, out_dim, use_bn=use_bn, activation=None, residual=False)
        self.lin_layers = Sequential(lin1, lin2)

    def forward(self, x):
        h = self.conv0(x)
        h = self.base_model.features(h)

        if self.do_pooling:
            h = torch.sum(h, dim=(-1, -2))
        else:
            # [128, 2048, 4, 4] when input is (128, 128)
            bs, ch, height, width = h.shape
            h = h.view(bs, ch*height*width)
        for layer in self.lin_layers:
            h = layer(h)
        return h

In [28]:
import torch
from torch import nn
import torch.nn.functional as F
from tqdm import tqdm


def accuracy(y, t):
    pred_label = torch.argmax(y, dim=1)
    count = pred_label.shape[0]
    correct = (pred_label == t).sum().type(torch.float32)
    acc = correct / count
    return acc


class BengaliClassifier(nn.Module):
    def __init__(self, predictor, n_grapheme=168, n_vowel=11, n_consonant=7):
        super(BengaliClassifier, self).__init__()
        self.n_grapheme = n_grapheme
        self.n_vowel = n_vowel
        self.n_consonant = n_consonant
        self.n_total_class = self.n_grapheme + self.n_vowel + self.n_consonant
        self.predictor = predictor

        self.metrics_keys = [
            'loss', 'loss_grapheme', 'loss_vowel', 'loss_consonant',
            'acc_grapheme', 'acc_vowel', 'acc_consonant']

    def forward(self, x, y=None):
        pred = self.predictor(x)
        if isinstance(pred, tuple):
            assert len(pred) == 3
            preds = pred
        else:
            assert pred.shape[1] == self.n_total_class
            preds = torch.split(pred, [self.n_grapheme, self.n_vowel, self.n_consonant], dim=1)
        loss_grapheme = F.cross_entropy(preds[0], y[:, 0])
        loss_vowel = F.cross_entropy(preds[1], y[:, 1])
        loss_consonant = F.cross_entropy(preds[2], y[:, 2])
        loss = loss_grapheme + loss_vowel + loss_consonant
        metrics = {
            'loss': loss.item(),
            'loss_grapheme': loss_grapheme.item(),
            'loss_vowel': loss_vowel.item(),
            'loss_consonant': loss_consonant.item(),
            'acc_grapheme': accuracy(preds[0], y[:, 0]),
            'acc_vowel': accuracy(preds[1], y[:, 1]),
            'acc_consonant': accuracy(preds[2], y[:, 2]),
        }
        return loss, metrics, pred

    def calc(self, data_loader):
        device: torch.device = next(self.parameters()).device
        self.eval()
        output_list = []
        with torch.no_grad():
            for batch in tqdm(data_loader):
                # TODO: support general preprocessing.
                # If `data` is not `Data` instance, `to` method is not supported!
                batch = batch.to(device)
                pred = self.predictor(batch)
                output_list.append(pred)
        output = torch.cat(output_list, dim=0)
        preds = torch.split(output, [self.n_grapheme, self.n_vowel, self.n_consonant], dim=1)
        return preds

    def predict_proba(self, data_loader):
        preds = self.calc(data_loader)
        return [F.softmax(p, dim=1) for p in preds]

    def predict(self, data_loader):
        preds = self.calc(data_loader)
        pred_labels = [torch.argmax(p, dim=1) for p in preds]
        return pred_labels

In [29]:
def build_predictor(arch, out_dim, model_name=None):
    if arch == 'pretrained':
        predictor = PretrainedCNN(in_channels=1, out_dim=out_dim, model_name=model_name)
    else:
        raise ValueError("[ERROR] Unexpected value arch={}".format(arch))
    return predictor


def build_classifier(arch, load_model_path, n_total, model_name='', device='cuda:0'):
    if isinstance(device, str):
        device = torch.device(device)
    predictor = build_predictor(arch, out_dim=n_total, model_name=model_name)
    print('predictor', type(predictor))
    classifier = BengaliClassifier(predictor)
    if load_model_path:
        predictor.load_state_dict(torch.load(load_model_path))
    else:
        print("[WARNING] Unexpected value load_model_path={}"
              .format(load_model_path))
    classifier.to(device)
    return classifier

In [30]:
import gc
import numpy as np
import pandas as pd


def prepare_image(datadir, featherdir, data_type='train',
                  submission=False, indices=[0, 1, 2, 3]):
    assert data_type in ['train', 'test']
    if submission:
        image_df_list = [pd.read_parquet(datadir / f'{data_type}_image_data_{i}.parquet')
                         for i in indices]
    else:
        image_df_list = [pd.read_feather(featherdir / f'{data_type}_image_data_{i}.feather')
                         for i in indices]

    print('image_df_list', len(image_df_list))
    HEIGHT = 137
    WIDTH = 236
    images = [df.iloc[:, 1:].values.reshape(-1, HEIGHT, WIDTH) for df in image_df_list]
    del image_df_list
    gc.collect()
    images = np.concatenate(images, axis=0)
    return images

In [31]:
def predict_core(test_images, image_size, threshold,
                 arch, n_total, model_name, load_model_path, batch_size=512, device='cuda:0', **kwargs):
    classifier = build_classifier(arch, load_model_path, n_total, model_name, device=device)
    test_dataset = BengaliAIDataset(
        test_images, None,
        transform=Transform(affine=False, crop=True, size=(image_size, image_size),
                            threshold=threshold, train=False))
    print('test_dataset', len(test_dataset))
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
    test_pred_proba = classifier.predict_proba(test_loader)
    return test_pred_proba

In [32]:
device = torch.device(device)
n_grapheme = 168
n_vowel = 11
n_consonant = 7
n_total = n_grapheme + n_vowel + n_consonant
print('n_total', n_total)

n_total 186


In [33]:
from torch.utils.data.dataloader import DataLoader
from chainer_chemistry.utils import save_json, load_json


#--- Prediction ---
traindir = '/kaggle/input/bengaliaicv19-trainedmodels/'
data_type = 'test'
test_preds_list = []

for i in range(4):
    # --- prepare data ---
    indices = [i]
    test_images = prepare_image(
        datadir, featherdir, data_type=data_type, submission=submission, indices=indices)
    n_dataset = len(test_images)
    print(f'n_dataset={n_dataset}')
    # print(f'i={i}, n_dataset={n_dataset}')
    # test_data_size = 200 if debug else int(n_dataset * 0.9)

    model_preds_list = []
    for j in range(4):
        # --- Depends on train configuration ---
        train_args_dict = load_json(os.path.join(traindir, f'args_{j}.json'))
        train_args_dict.update({
            'load_model_path': os.path.join(traindir, f'predictor_{j}.pt'),
            'device': device,
            'batch_size': batch_size,
            'debug': debug,
        })
        print(f'j {j} updated train_args_dict {train_args_dict}')
        test_preds = predict_core(
                test_images=test_images, n_total=n_total,
                **train_args_dict)

        model_preds_list.append(test_preds)

    # --- ensemble ---
    proba0 = torch.mean(torch.stack([test_preds[0] for test_preds in model_preds_list], dim=0), dim=0)
    proba1 = torch.mean(torch.stack([test_preds[1] for test_preds in model_preds_list], dim=0), dim=0)
    proba2 = torch.mean(torch.stack([test_preds[2] for test_preds in model_preds_list], dim=0), dim=0)
    p0 = torch.argmax(proba0, dim=1).cpu().numpy()
    p1 = torch.argmax(proba1, dim=1).cpu().numpy()
    p2 = torch.argmax(proba2, dim=1).cpu().numpy()
    print('p0', p0.shape, 'p1', p1.shape, 'p2', p2.shape)

    test_preds_list.append([p0, p1, p2])
    if debug:
        break
    del test_images
    gc.collect()


A module chainer_chemistry.datasets was not imported, probably because RDKit is not installed. To install RDKit, please follow instruction in https://github.com/pfnet-research/chainer-chemistry#installation.



image_df_list 1
n_dataset=3
j 0 updated train_args_dict {'image_size': 128, 'threshold': 20.0, 'arch': 'pretrained', 'model_name': 'se_resnext50_32x4d', 'load_model_path': '/kaggle/input/bengaliaicv19-trainedmodels/predictor_0.pt', 'device': device(type='cuda', index=0), 'batch_size': 256, 'debug': False}
predictor <class '__main__.PretrainedCNN'>


100%|██████████| 1/1 [00:00<00:00, 25.30it/s]

test_dataset 3
j 1 updated train_args_dict {'image_size': 128, 'threshold': 20.0, 'arch': 'pretrained', 'model_name': 'se_resnext50_32x4d', 'load_model_path': '/kaggle/input/bengaliaicv19-trainedmodels/predictor_1.pt', 'device': device(type='cuda', index=0), 'batch_size': 256, 'debug': False}





predictor <class '__main__.PretrainedCNN'>


100%|██████████| 1/1 [00:00<00:00, 42.23it/s]

test_dataset 3
j 2 updated train_args_dict {'image_size': 128, 'threshold': 20.0, 'arch': 'pretrained', 'model_name': 'se_resnext50_32x4d', 'load_model_path': '/kaggle/input/bengaliaicv19-trainedmodels/predictor_2.pt', 'device': device(type='cuda', index=0), 'batch_size': 256, 'debug': False}





predictor <class '__main__.PretrainedCNN'>


100%|██████████| 1/1 [00:00<00:00, 42.69it/s]

test_dataset 3
j 3 updated train_args_dict {'image_size': 128, 'threshold': 20.0, 'arch': 'pretrained', 'model_name': 'se_resnext50_32x4d', 'load_model_path': '/kaggle/input/bengaliaicv19-trainedmodels/predictor_3.pt', 'device': device(type='cuda', index=0), 'batch_size': 256, 'debug': False}





predictor <class '__main__.PretrainedCNN'>


100%|██████████| 1/1 [00:00<00:00, 42.45it/s]

test_dataset 3
p0 (3,) p1 (3,) p2 (3,)





image_df_list 1
n_dataset=3
j 0 updated train_args_dict {'image_size': 128, 'threshold': 20.0, 'arch': 'pretrained', 'model_name': 'se_resnext50_32x4d', 'load_model_path': '/kaggle/input/bengaliaicv19-trainedmodels/predictor_0.pt', 'device': device(type='cuda', index=0), 'batch_size': 256, 'debug': False}


  0%|          | 0/1 [00:00<?, ?it/s]

predictor <class '__main__.PretrainedCNN'>
test_dataset 3


100%|██████████| 1/1 [00:00<00:00, 34.87it/s]


j 1 updated train_args_dict {'image_size': 128, 'threshold': 20.0, 'arch': 'pretrained', 'model_name': 'se_resnext50_32x4d', 'load_model_path': '/kaggle/input/bengaliaicv19-trainedmodels/predictor_1.pt', 'device': device(type='cuda', index=0), 'batch_size': 256, 'debug': False}
predictor <class '__main__.PretrainedCNN'>
test_dataset 3


100%|██████████| 1/1 [00:00<00:00, 26.88it/s]


j 2 updated train_args_dict {'image_size': 128, 'threshold': 20.0, 'arch': 'pretrained', 'model_name': 'se_resnext50_32x4d', 'load_model_path': '/kaggle/input/bengaliaicv19-trainedmodels/predictor_2.pt', 'device': device(type='cuda', index=0), 'batch_size': 256, 'debug': False}


  0%|          | 0/1 [00:00<?, ?it/s]

predictor <class '__main__.PretrainedCNN'>
test_dataset 3


100%|██████████| 1/1 [00:00<00:00, 40.71it/s]


j 3 updated train_args_dict {'image_size': 128, 'threshold': 20.0, 'arch': 'pretrained', 'model_name': 'se_resnext50_32x4d', 'load_model_path': '/kaggle/input/bengaliaicv19-trainedmodels/predictor_3.pt', 'device': device(type='cuda', index=0), 'batch_size': 256, 'debug': False}
predictor <class '__main__.PretrainedCNN'>


100%|██████████| 1/1 [00:00<00:00, 41.29it/s]

test_dataset 3
p0 (3,) p1 (3,) p2 (3,)





image_df_list 1
n_dataset=3
j 0 updated train_args_dict {'image_size': 128, 'threshold': 20.0, 'arch': 'pretrained', 'model_name': 'se_resnext50_32x4d', 'load_model_path': '/kaggle/input/bengaliaicv19-trainedmodels/predictor_0.pt', 'device': device(type='cuda', index=0), 'batch_size': 256, 'debug': False}
predictor <class '__main__.PretrainedCNN'>


100%|██████████| 1/1 [00:00<00:00, 41.59it/s]

test_dataset 3
j 1 updated train_args_dict {'image_size': 128, 'threshold': 20.0, 'arch': 'pretrained', 'model_name': 'se_resnext50_32x4d', 'load_model_path': '/kaggle/input/bengaliaicv19-trainedmodels/predictor_1.pt', 'device': device(type='cuda', index=0), 'batch_size': 256, 'debug': False}



  0%|          | 0/1 [00:00<?, ?it/s]

predictor <class '__main__.PretrainedCNN'>
test_dataset 3


100%|██████████| 1/1 [00:00<00:00, 39.54it/s]


j 2 updated train_args_dict {'image_size': 128, 'threshold': 20.0, 'arch': 'pretrained', 'model_name': 'se_resnext50_32x4d', 'load_model_path': '/kaggle/input/bengaliaicv19-trainedmodels/predictor_2.pt', 'device': device(type='cuda', index=0), 'batch_size': 256, 'debug': False}


  0%|          | 0/1 [00:00<?, ?it/s]

predictor <class '__main__.PretrainedCNN'>
test_dataset 3


100%|██████████| 1/1 [00:00<00:00, 36.28it/s]


j 3 updated train_args_dict {'image_size': 128, 'threshold': 20.0, 'arch': 'pretrained', 'model_name': 'se_resnext50_32x4d', 'load_model_path': '/kaggle/input/bengaliaicv19-trainedmodels/predictor_3.pt', 'device': device(type='cuda', index=0), 'batch_size': 256, 'debug': False}
predictor <class '__main__.PretrainedCNN'>


100%|██████████| 1/1 [00:00<00:00, 27.74it/s]

test_dataset 3
p0 (3,) p1 (3,) p2 (3,)





image_df_list 1
n_dataset=3
j 0 updated train_args_dict {'image_size': 128, 'threshold': 20.0, 'arch': 'pretrained', 'model_name': 'se_resnext50_32x4d', 'load_model_path': '/kaggle/input/bengaliaicv19-trainedmodels/predictor_0.pt', 'device': device(type='cuda', index=0), 'batch_size': 256, 'debug': False}
predictor <class '__main__.PretrainedCNN'>


100%|██████████| 1/1 [00:00<00:00, 37.99it/s]

test_dataset 3
j 1 updated train_args_dict {'image_size': 128, 'threshold': 20.0, 'arch': 'pretrained', 'model_name': 'se_resnext50_32x4d', 'load_model_path': '/kaggle/input/bengaliaicv19-trainedmodels/predictor_1.pt', 'device': device(type='cuda', index=0), 'batch_size': 256, 'debug': False}



100%|██████████| 1/1 [00:00<00:00, 43.07it/s]

predictor <class '__main__.PretrainedCNN'>
test_dataset 3





j 2 updated train_args_dict {'image_size': 128, 'threshold': 20.0, 'arch': 'pretrained', 'model_name': 'se_resnext50_32x4d', 'load_model_path': '/kaggle/input/bengaliaicv19-trainedmodels/predictor_2.pt', 'device': device(type='cuda', index=0), 'batch_size': 256, 'debug': False}
predictor <class '__main__.PretrainedCNN'>


100%|██████████| 1/1 [00:00<00:00, 42.91it/s]

test_dataset 3
j 3 updated train_args_dict {'image_size': 128, 'threshold': 20.0, 'arch': 'pretrained', 'model_name': 'se_resnext50_32x4d', 'load_model_path': '/kaggle/input/bengaliaicv19-trainedmodels/predictor_3.pt', 'device': device(type='cuda', index=0), 'batch_size': 256, 'debug': False}



  0%|          | 0/1 [00:00<?, ?it/s]

predictor <class '__main__.PretrainedCNN'>
test_dataset 3


100%|██████████| 1/1 [00:00<00:00, 39.39it/s]

p0 (3,) p1 (3,) p2 (3,)





In [34]:
p0 = np.concatenate([test_preds[0] for test_preds in test_preds_list], axis=0)
p1 = np.concatenate([test_preds[1] for test_preds in test_preds_list], axis=0)
p2 = np.concatenate([test_preds[2] for test_preds in test_preds_list], axis=0)
print('concat:', 'p0', p0.shape, 'p1', p1.shape, 'p2', p2.shape)

row_id = []
target = []
for i in tqdm(range(len(p0))):
    row_id += [f'Test_{i}_grapheme_root', f'Test_{i}_vowel_diacritic',
               f'Test_{i}_consonant_diacritic']
    target += [p0[i], p1[i], p2[i]]
submission_df2 = pd.DataFrame({'row_id': row_id, 'target': target})
#submission_df.to_csv('submission.csv', index=False)

100%|██████████| 12/12 [00:00<00:00, 8688.36it/s]

concat: p0 (12,) p1 (12,) p2 (12,)





In [35]:
import sys

print("{}{: >25}{}{: >10}{}".format('|','Variable Name','|','Memory','|'))
print(" ------------------------------------ ")
for var_name in dir():
    if not var_name.startswith("_") and sys.getsizeof(eval(var_name)) > 10000: #over 10000 byte data pick up
        print("{}{: >25}{}{: >10}{}".format('|',var_name,'|',sys.getsizeof(eval(var_name)),'|'))

|            Variable Name|    Memory|
 ------------------------------------ 


# Third model(Multi_Output_CNN:0.9518)

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
from tqdm.auto import tqdm
from glob import glob
import time, gc
import cv2

from tensorflow import keras
import matplotlib.image as mpimg
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Model
from keras.models import clone_model
from keras.layers import Dense,Conv2D,Flatten,MaxPool2D,Dropout,BatchNormalization, Input
from keras.optimizers import Adam
from keras.callbacks import ReduceLROnPlateau
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
import PIL.Image as Image, PIL.ImageDraw as ImageDraw, PIL.ImageFont as ImageFont
from matplotlib import pyplot as plt
import seaborn as sns

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

train_df_ = pd.read_csv('/kaggle/input/bengaliai-cv19/train.csv')
test_df_ = pd.read_csv('/kaggle/input/bengaliai-cv19/test.csv')
class_map_df = pd.read_csv('/kaggle/input/bengaliai-cv19/class_map.csv')
sample_sub_df = pd.read_csv('/kaggle/input/bengaliai-cv19/sample_submission.csv')

HEIGHT = 236
WIDTH = 236

def get_n(df, field, n, top=True):
    top_graphemes = df.groupby([field]).size().reset_index(name='counts')['counts'].sort_values(ascending=not top)[:n]
    top_grapheme_roots = top_graphemes.index
    top_grapheme_counts = top_graphemes.values
    top_graphemes = class_map_df[class_map_df['component_type'] == field].reset_index().iloc[top_grapheme_roots]
    top_graphemes.drop(['component_type', 'label'], axis=1, inplace=True)
    top_graphemes.loc[:, 'count'] = top_grapheme_counts
    return top_graphemes

def image_from_char(char):
    image = Image.new('RGB', (WIDTH, HEIGHT))
    draw = ImageDraw.Draw(image)
    myfont = ImageFont.truetype('/kaggle/input/kalpurush-fonts/kalpurush-2.ttf', 120)
    w, h = draw.textsize(char, font=myfont)
    draw.text(((WIDTH - w) / 2,(HEIGHT - h) / 3), char, font=myfont)

    return image

train_df_ = train_df_.drop(['grapheme'], axis=1, inplace=False)

train_df_[['grapheme_root', 'vowel_diacritic', 'consonant_diacritic']] = train_df_[['grapheme_root', 'vowel_diacritic', 'consonant_diacritic']].astype('uint8')

IMG_SIZE=64
N_CHANNELS=1

def resize(df, size=64, need_progress_bar=True):
    resized = {}
    resize_size=64
    if need_progress_bar:
        for i in tqdm(range(df.shape[0])):
            image=df.loc[df.index[i]].values.reshape(137,236)
            _, thresh = cv2.threshold(image, 30, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
            contours, _ = cv2.findContours(thresh,cv2.RETR_LIST,cv2.CHAIN_APPROX_SIMPLE)[-2:]

            idx = 0 
            ls_xmin = []
            ls_ymin = []
            ls_xmax = []
            ls_ymax = []
            for cnt in contours:
                idx += 1
                x,y,w,h = cv2.boundingRect(cnt)
                ls_xmin.append(x)
                ls_ymin.append(y)
                ls_xmax.append(x + w)
                ls_ymax.append(y + h)
            xmin = min(ls_xmin)
            ymin = min(ls_ymin)
            xmax = max(ls_xmax)
            ymax = max(ls_ymax)

            roi = image[ymin:ymax,xmin:xmax]
            resized_roi = cv2.resize(roi, (resize_size, resize_size),interpolation=cv2.INTER_AREA)
            resized[df.index[i]] = resized_roi.reshape(-1)
    else:
        for i in range(df.shape[0]):
            #image = cv2.resize(df.loc[df.index[i]].values.reshape(137,236),(size,size),None,fx=0.5,fy=0.5,interpolation=cv2.INTER_AREA)
            image=df.loc[df.index[i]].values.reshape(137,236)
            _, thresh = cv2.threshold(image, 30, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
            contours, _ = cv2.findContours(thresh,cv2.RETR_LIST,cv2.CHAIN_APPROX_SIMPLE)[-2:]

            idx = 0 
            ls_xmin = []
            ls_ymin = []
            ls_xmax = []
            ls_ymax = []
            for cnt in contours:
                idx += 1
                x,y,w,h = cv2.boundingRect(cnt)
                ls_xmin.append(x)
                ls_ymin.append(y)
                ls_xmax.append(x + w)
                ls_ymax.append(y + h)
            xmin = min(ls_xmin)
            ymin = min(ls_ymin)
            xmax = max(ls_xmax)
            ymax = max(ls_ymax)

            roi = image[ymin:ymax,xmin:xmax]
            resized_roi = cv2.resize(roi, (resize_size, resize_size),interpolation=cv2.INTER_AREA)
            resized[df.index[i]] = resized_roi.reshape(-1)
    resized = pd.DataFrame(resized).T
    return resized

def get_dummies(df):
    cols = []
    for col in df:
        cols.append(pd.get_dummies(df[col].astype(str)))
    return pd.concat(cols, axis=1)

inputs = Input(shape = (IMG_SIZE, IMG_SIZE, 1))

model = Conv2D(filters=32, kernel_size=(3, 3), padding='SAME', activation='relu', input_shape=(IMG_SIZE, IMG_SIZE, 1))(inputs)
model = Conv2D(filters=32, kernel_size=(3, 3), padding='SAME', activation='relu')(model)
model = Conv2D(filters=32, kernel_size=(3, 3), padding='SAME', activation='relu')(model)
model = Conv2D(filters=32, kernel_size=(3, 3), padding='SAME', activation='relu')(model)
model = BatchNormalization(momentum=0.15)(model)
model = MaxPool2D(pool_size=(2, 2))(model)
model = Conv2D(filters=32, kernel_size=(5, 5), padding='SAME', activation='relu')(model)
model = Dropout(rate=0.3)(model)

model = Conv2D(filters=64, kernel_size=(3, 3), padding='SAME', activation='relu')(model)
model = Conv2D(filters=64, kernel_size=(3, 3), padding='SAME', activation='relu')(model)
model = Conv2D(filters=64, kernel_size=(3, 3), padding='SAME', activation='relu')(model)
model = Conv2D(filters=64, kernel_size=(3, 3), padding='SAME', activation='relu')(model)
model = BatchNormalization(momentum=0.15)(model)
model = MaxPool2D(pool_size=(2, 2))(model)
model = Conv2D(filters=64, kernel_size=(5, 5), padding='SAME', activation='relu')(model)
model = BatchNormalization(momentum=0.15)(model)
model = Dropout(rate=0.3)(model)

model = Conv2D(filters=128, kernel_size=(3, 3), padding='SAME', activation='relu')(model)
model = Conv2D(filters=128, kernel_size=(3, 3), padding='SAME', activation='relu')(model)
model = Conv2D(filters=128, kernel_size=(3, 3), padding='SAME', activation='relu')(model)
model = Conv2D(filters=128, kernel_size=(3, 3), padding='SAME', activation='relu')(model)
model = BatchNormalization(momentum=0.15)(model)
model = MaxPool2D(pool_size=(2, 2))(model)
model = Conv2D(filters=128, kernel_size=(5, 5), padding='SAME', activation='relu')(model)
model = BatchNormalization(momentum=0.15)(model)
model = Dropout(rate=0.3)(model)

model = Conv2D(filters=256, kernel_size=(3, 3), padding='SAME', activation='relu')(model)
model = Conv2D(filters=256, kernel_size=(3, 3), padding='SAME', activation='relu')(model)
model = Conv2D(filters=256, kernel_size=(3, 3), padding='SAME', activation='relu')(model)
model = Conv2D(filters=256, kernel_size=(3, 3), padding='SAME', activation='relu')(model)
model = BatchNormalization(momentum=0.15)(model)
model = MaxPool2D(pool_size=(2, 2))(model)
model = Conv2D(filters=256, kernel_size=(5, 5), padding='SAME', activation='relu')(model)
model = BatchNormalization(momentum=0.15)(model)
model = Dropout(rate=0.3)(model)

model = Flatten()(model)
model = Dense(1024, activation = "relu")(model)
model = Dropout(rate=0.3)(model)
dense = Dense(512, activation = "relu")(model)

head_root = Dense(168, activation = 'softmax')(dense)
head_vowel = Dense(11, activation = 'softmax')(dense)
head_consonant = Dense(7, activation = 'softmax')(dense)

model = Model(inputs=inputs, outputs=[head_root, head_vowel, head_consonant])

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

#Set a learning rate annealer. Learning rate will be half after 3 epochs if accuracy is not increased
learning_rate_reduction_root = ReduceLROnPlateau(monitor='dense_3_accuracy', 
                                            patience=3, 
                                            verbose=1,
                                            factor=0.5, 
                                            min_lr=0.00001)
learning_rate_reduction_vowel = ReduceLROnPlateau(monitor='dense_4_accuracy', 
                                            patience=3, 
                                            verbose=1,
                                            factor=0.5, 
                                            min_lr=0.00001)
learning_rate_reduction_consonant = ReduceLROnPlateau(monitor='dense_5_accuracy', 
                                            patience=3, 
                                            verbose=1,
                                            factor=0.5, 
                                            min_lr=0.00001)

batch_size = 256
epochs = 30

class MultiOutputDataGenerator(keras.preprocessing.image.ImageDataGenerator):

    def flow(self,
             x,
             y=None,
             batch_size=32,
             shuffle=True,
             sample_weight=None,
             seed=71,
             save_to_dir=None,
             save_prefix='',
             save_format='png',
             subset=None):

        targets = None
        target_lengths = {}
        ordered_outputs = []
        for output, target in y.items():
            if targets is None:
                targets = target
            else:
                targets = np.concatenate((targets, target), axis=1)
            target_lengths[output] = target.shape[1]
            ordered_outputs.append(output)


        for flowx, flowy in super().flow(x, targets, batch_size=batch_size,
                                         shuffle=shuffle):
            target_dict = {}
            i = 0
            for output in ordered_outputs:
                target_length = target_lengths[output]
                target_dict[output] = flowy[:, i: i + target_length]
                i += target_length

            yield flowx, target_dict

HEIGHT = 137
WIDTH = 236

histories = []
for i in range(4):
    train_df = pd.merge(pd.read_parquet(f'/kaggle/input/bengaliai-cv19/train_image_data_{i}.parquet'), train_df_, on='image_id').drop(['image_id'], axis=1)
    
    # Visualize few samples of current training dataset
    fig, ax = plt.subplots(nrows=3, ncols=4, figsize=(16, 8))
    count=0
    for row in ax:
        for col in row:
            col.imshow(resize(train_df.drop(['grapheme_root', 'vowel_diacritic', 'consonant_diacritic'], axis=1).iloc[[count]], need_progress_bar=False).values.reshape(-1).reshape(IMG_SIZE, IMG_SIZE).astype(np.float64))
            count += 1
    plt.show()
    
    X_train = train_df.drop(['grapheme_root', 'vowel_diacritic', 'consonant_diacritic'], axis=1)
    X_train = resize(X_train)/255
    
    # CNN takes images in shape `(batch_size, h, w, channels)`, so reshape the images
    X_train = X_train.values.reshape(-1, IMG_SIZE, IMG_SIZE, N_CHANNELS)
    
    Y_train_root = pd.get_dummies(train_df['grapheme_root']).values
    Y_train_vowel = pd.get_dummies(train_df['vowel_diacritic']).values
    Y_train_consonant = pd.get_dummies(train_df['consonant_diacritic']).values

    print(f'Training images: {X_train.shape}')
    print(f'Training labels root: {Y_train_root.shape}')
    print(f'Training labels vowel: {Y_train_vowel.shape}')
    print(f'Training labels consonants: {Y_train_consonant.shape}')

    # Divide the data into training and validation set
    x_train, x_test, y_train_root, y_test_root, y_train_vowel, y_test_vowel, y_train_consonant, y_test_consonant = train_test_split(X_train, Y_train_root, Y_train_vowel, Y_train_consonant, test_size=0.08, random_state=71)
    del train_df
    del X_train
    del Y_train_root, Y_train_vowel, Y_train_consonant

    # Data augmentation for creating more training data
    datagen = MultiOutputDataGenerator(
        featurewise_center=False,  # set input mean to 0 over the dataset
        samplewise_center=False,  # set each sample mean to 0
        featurewise_std_normalization=False,  # divide inputs by std of the dataset
        samplewise_std_normalization=False,  # divide each input by its std
        zca_whitening=False,  # apply ZCA whitening
        rotation_range=8,  # randomly rotate images in the range (degrees, 0 to 180)
        zoom_range = 0.15, # Randomly zoom image 
        width_shift_range=0.15,  # randomly shift images horizontally (fraction of total width)
        height_shift_range=0.15,  # randomly shift images vertically (fraction of total height)
        horizontal_flip=False,  # randomly flip images
        vertical_flip=False)  # randomly flip images


    # This will just calculate parameters required to augment the given data. This won't perform any augmentations
    datagen.fit(x_train)

    # Fit the model
    history = model.fit_generator(datagen.flow(x_train, {'dense_3': y_train_root, 'dense_4': y_train_vowel, 'dense_5': y_train_consonant}, batch_size=batch_size),
                              epochs = epochs, validation_data = (x_test, [y_test_root, y_test_vowel, y_test_consonant]), 
                              steps_per_epoch=x_train.shape[0] // batch_size, 
                              callbacks=[learning_rate_reduction_root, learning_rate_reduction_vowel, learning_rate_reduction_consonant])

    histories.append(history)
    
    # Delete to reduce memory usage
    del x_train
    del x_test
    del y_train_root
    del y_test_root
    del y_train_vowel
    del y_test_vowel
    del y_train_consonant
    del y_test_consonant
    gc.collect()

preds_dict = {
    'grapheme_root': [],
    'vowel_diacritic': [],
    'consonant_diacritic': []
}

components = ['consonant_diacritic', 'grapheme_root', 'vowel_diacritic']
target=[] # model predictions placeholder
row_id=[] # row_id place holder
for i in range(4):
    df_test_img = pd.read_parquet('/kaggle/input/bengaliai-cv19/test_image_data_{}.parquet'.format(i)) 
    df_test_img.set_index('image_id', inplace=True)

    X_test = resize(df_test_img, need_progress_bar=False)/255
    X_test = X_test.values.reshape(-1, IMG_SIZE, IMG_SIZE, N_CHANNELS)
    
    preds = model.predict(X_test)

    for i, p in enumerate(preds_dict):
        preds_dict[p] = np.argmax(preds[i], axis=1)

    for k,id in enumerate(df_test_img.index.values):  
        for i,comp in enumerate(components):
            id_sample=id+'_'+comp
            row_id.append(id_sample)
            target.append(preds_dict[comp][k])
    del df_test_img
    del X_test
    gc.collect()

submission_df3 = pd.DataFrame(
    {
        'row_id': row_id,
        'target':target
    },
    columns = ['row_id','target'] 
)
#df_sample.to_csv('submission.csv',index=False)

submission_df.head()

# Fourth model(Grapheme fast.ai starter [inference]:0.9639)  
ref:https://www.kaggle.com/iafoss/grapheme-fast-ai-starter-inference/data

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

In [36]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

import cv2
from tqdm import tqdm_notebook as tqdm
import fastai
from fastai.vision import *
import os
#from mish_activation import *
import warnings
warnings.filterwarnings("ignore")

In [37]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class MishFunction(torch.autograd.Function):
    @staticmethod
    def forward(ctx, x):
        ctx.save_for_backward(x)
        return x * torch.tanh(F.softplus(x))   # x * tanh(ln(1 + exp(x)))

    @staticmethod
    def backward(ctx, grad_output):
        x = ctx.saved_variables[0]
        sigmoid = torch.sigmoid(x)
        tanh_sp = torch.tanh(F.softplus(x)) 
        return grad_output * (tanh_sp + x * sigmoid * (1 - tanh_sp * tanh_sp))

class Mish(nn.Module):
    def forward(self, x):
        return MishFunction.apply(x)

def to_Mish(model):
    for child_name, child in model.named_children():
        if isinstance(child, nn.ReLU):
            setattr(model, child_name, Mish())
        else:
            to_Mish(child)

In [38]:
HEIGHT = 137
WIDTH = 236
SIZE = 128
bs = 128
stats = (0.0692, 0.2051)
arch = models.densenet121
MODEL = '../input/grapheme-fast-ai-starter-lb-0-964/model_0.pth'
nworkers = 2

TEST = ['/kaggle/input/bengaliai-cv19/test_image_data_0.parquet',
        '/kaggle/input/bengaliai-cv19/test_image_data_1.parquet',
        '/kaggle/input/bengaliai-cv19/test_image_data_2.parquet',
        '/kaggle/input/bengaliai-cv19/test_image_data_3.parquet']

LABELS = '../input/bengaliai-cv19/train.csv'

df = pd.read_csv(LABELS)
nunique = list(df.nunique())[1:-1]

In [39]:
class Head(nn.Module):
    def __init__(self, nc, n, ps=0.5):
        super().__init__()
        layers = [AdaptiveConcatPool2d(), Mish(), Flatten()] + \
            bn_drop_lin(nc*2, 512, True, ps, Mish()) + \
            bn_drop_lin(512, n, True, ps)
        self.fc = nn.Sequential(*layers)
        self._init_weight()
        
    def _init_weight(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                torch.nn.init.kaiming_normal_(m.weight)
            elif isinstance(m, nn.BatchNorm2d):
                m.weight.data.fill_(1.0)
                m.bias.data.zero_()
        
    def forward(self, x):
        return self.fc(x)

#change the first conv to accept 1 chanel input
class Dnet_1ch(nn.Module):
    def __init__(self, arch=arch, n=nunique, pre=True, ps=0.5):
        super().__init__()
        m = arch(True) if pre else arch()
        
        conv = nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3, bias=False)
        w = (m.features.conv0.weight.sum(1)).unsqueeze(1)
        conv.weight = nn.Parameter(w)
        
        self.layer0 = nn.Sequential(conv, m.features.norm0, nn.ReLU(inplace=True))
        self.layer1 = nn.Sequential(
            nn.MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False),
            m.features.denseblock1)
        self.layer2 = nn.Sequential(m.features.transition1,m.features.denseblock2)
        self.layer3 = nn.Sequential(m.features.transition2,m.features.denseblock3)
        self.layer4 = nn.Sequential(m.features.transition3,m.features.denseblock4,
                                    m.features.norm5)
        
        nc = self.layer4[-1].weight.shape[0]
        self.head1 = Head(nc,n[0])
        self.head2 = Head(nc,n[1])
        self.head3 = Head(nc,n[2])
        #to_Mish(self.layer0), to_Mish(self.layer1), to_Mish(self.layer2)
        #to_Mish(self.layer3), to_Mish(self.layer4)
        
    def forward(self, x):    
        x = self.layer0(x)
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)
        
        x1 = self.head1(x)
        x2 = self.head2(x)
        x3 = self.head3(x)
        
        return x1,x2,x3

In [40]:
model = Dnet_1ch(pre=False).cuda()
model.load_state_dict(torch.load(MODEL, map_location=torch.device('cpu')));
model.eval();

In [41]:
#check https://www.kaggle.com/iafoss/image-preprocessing-128x128

def bbox(img):
    rows = np.any(img, axis=1)
    cols = np.any(img, axis=0)
    rmin, rmax = np.where(rows)[0][[0, -1]]
    cmin, cmax = np.where(cols)[0][[0, -1]]
    return rmin, rmax, cmin, cmax

def crop_resize(img0, size=SIZE, pad=16):
    #crop a box around pixels large than the threshold 
    #some images contain line at the sides
    ymin,ymax,xmin,xmax = bbox(img0[5:-5,5:-5] > 60)#80)
    #cropping may cut too much, so we need to add it back
    xmin = xmin - 13 if (xmin > 13) else 0
    ymin = ymin - 10 if (ymin > 10) else 0
    xmax = xmax + 13 if (xmax < WIDTH - 13) else WIDTH
    ymax = ymax + 10 if (ymax < HEIGHT - 10) else HEIGHT
    img = img0[ymin:ymax,xmin:xmax]
    #remove lo intensity pixels as noise
    img[img < 28] = 0
    lx, ly = xmax-xmin,ymax-ymin
    l = max(lx,ly) + pad
    #make sure that the aspect ratio is kept in rescaling
    img = np.pad(img, [((l-ly)//2,), ((l-lx)//2,)], mode='constant')
    return cv2.resize(img,(size,size))

In [42]:
class GraphemeDataset(Dataset):
    def __init__(self, fname):
        self.df = pd.read_parquet(fname)
        self.data = 255 - self.df.iloc[:, 1:].values.reshape(-1, HEIGHT, WIDTH).astype(np.uint8)

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        name = self.df.iloc[idx,0]
        #normalize each image by its max val
        img = (self.data[idx]*(255.0/self.data[idx].max())).astype(np.uint8)
        img = crop_resize(img)
        img = (img.astype(np.float32)/255.0 - stats[0])/stats[1]
        return img, name

In [43]:
row_id,target = [],[]
for fname in TEST:
    ds = GraphemeDataset(fname)
    dl = DataLoader(ds, batch_size=bs, num_workers=nworkers, shuffle=False)
    with torch.no_grad():
        for x,y in tqdm(dl):
            x = x.unsqueeze(1).cuda()
            p1,p2,p3 = model(x)
            p1 = p1.argmax(-1).view(-1).cpu()
            p2 = p2.argmax(-1).view(-1).cpu()
            p3 = p3.argmax(-1).view(-1).cpu()
            for idx,name in enumerate(y):
                row_id += [f'{name}_grapheme_root',f'{name}_vowel_diacritic',
                           f'{name}_consonant_diacritic']
                target += [p1[idx].item(),p2[idx].item(),p3[idx].item()]
                
submission_df3 = pd.DataFrame({'row_id': row_id, 'target': target})
#sub_df.to_csv('submission.csv', index=False)
#sub_df.head()

HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




In [44]:
import sys

print("{}{: >25}{}{: >10}{}".format('|','Variable Name','|','Memory','|'))
print(" ------------------------------------ ")
for var_name in dir():
    if not var_name.startswith("_") and sys.getsizeof(eval(var_name)) > 10000: #over 10000 byte data pick up
        print("{}{: >25}{}{: >10}{}".format('|',var_name,'|',sys.getsizeof(eval(var_name)),'|'))

|            Variable Name|    Memory|
 ------------------------------------ 
|                       df|  38722817|


In [45]:
del df

# Fifth model([Keras]:Grapheme GridMask+AugMix In EfficientNet: 0.9554)  
ref:https://www.kaggle.com/ipythonx/keras-grapheme-gridmask-augmix-in-efficientnet/output

In [46]:
import os
import cv2
import gc
import math
import random
import warnings
import numpy as np
import pandas as pd
from PIL import Image
from glob import glob
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm.notebook import tqdm

#sklearns 
from sklearn.metrics import cohen_kappa_score, accuracy_score
from sklearn.model_selection import train_test_split 

# keras modules 
import tensorflow as tf
import keras
from keras.optimizers import Adam, Nadam, SGD
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Model, load_model, Sequential
from keras.layers import Dense, GlobalAveragePooling2D, Dropout, Conv2D, GlobalMaxPooling2D, concatenate
from keras.layers import (MaxPooling2D, Input, Average, Activation, MaxPool2D,
                          Flatten, LeakyReLU, BatchNormalization)
from keras import models
from keras import layers
from keras.preprocessing.image import load_img
from keras.preprocessing.image import img_to_array

from keras.utils import Sequence
from keras import utils as np_utils
from keras.callbacks import (Callback, ModelCheckpoint,
                                        LearningRateScheduler,EarlyStopping, 
                                        ReduceLROnPlateau,CSVLogger)

warnings.simplefilter('ignore')
sns.set_style('whitegrid')

Using TensorFlow backend.


#declare some parameter
SEED = 2020
epoch = 100
batch_size = 12 
dim = (128, 128)
SIZE = 128
stats = (0.0692, 0.2051)
HEIGHT = 137 
WIDTH = 236

def seed_all(SEED):
    random.seed(SEED)
    np.random.seed(SEED)
    
#seed all
seed_all(SEED)

#load files
im_path = '../input/grapheme-imgs-128x128/'
train = pd.read_csv('../input/bengaliai-cv19/train.csv')
test = pd.read_csv('../input/bengaliai-cv19/test.csv')
train['filename'] = train.image_id.apply(lambda filename: im_path + filename + '.png')

In [47]:
## Grid Mask
# code takesn from https://www.kaggle.com/haqishen/gridmask

import albumentations
from albumentations.core.transforms_interface import DualTransform, ImageOnlyTransform
from albumentations.augmentations import functional as F

class GridMask(DualTransform):
    """GridMask augmentation for image classification and object detection.

    Args:
        num_grid (int): number of grid in a row or column.
        fill_value (int, float, lisf of int, list of float): value for dropped pixels.
        rotate ((int, int) or int): range from which a random angle is picked. If rotate is a single int
            an angle is picked from (-rotate, rotate). Default: (-90, 90)
        mode (int):
            0 - cropout a quarter of the square of each grid (left top)
            1 - reserve a quarter of the square of each grid (left top)
            2 - cropout 2 quarter of the square of each grid (left top & right bottom)

    Targets:
        image, mask

    Image types:
        uint8, float32

    Reference:
    |  https://arxiv.org/abs/2001.04086
    |  https://github.com/akuxcw/GridMask
    """

    def __init__(self, num_grid=3, fill_value=0, rotate=0, mode=0, always_apply=False, p=0.5):
        super(GridMask, self).__init__(always_apply, p)
        if isinstance(num_grid, int):
            num_grid = (num_grid, num_grid)
        if isinstance(rotate, int):
            rotate = (-rotate, rotate)
        self.num_grid = num_grid
        self.fill_value = fill_value
        self.rotate = rotate
        self.mode = mode
        self.masks = None
        self.rand_h_max = []
        self.rand_w_max = []

    def init_masks(self, height, width):
        if self.masks is None:
            self.masks = []
            n_masks = self.num_grid[1] - self.num_grid[0] + 1
            for n, n_g in enumerate(range(self.num_grid[0], self.num_grid[1] + 1, 1)):
                grid_h = height / n_g
                grid_w = width / n_g
                this_mask = np.ones((int((n_g + 1) * grid_h), int((n_g + 1) * grid_w))).astype(np.uint8)
                for i in range(n_g + 1):
                    for j in range(n_g + 1):
                        this_mask[
                             int(i * grid_h) : int(i * grid_h + grid_h / 2),
                             int(j * grid_w) : int(j * grid_w + grid_w / 2)
                        ] = self.fill_value
                        if self.mode == 2:
                            this_mask[
                                 int(i * grid_h + grid_h / 2) : int(i * grid_h + grid_h),
                                 int(j * grid_w + grid_w / 2) : int(j * grid_w + grid_w)
                            ] = self.fill_value
                
                if self.mode == 1:
                    this_mask = 1 - this_mask

                self.masks.append(this_mask)
                self.rand_h_max.append(grid_h)
                self.rand_w_max.append(grid_w)

    def apply(self, image, mask, rand_h, rand_w, angle, **params):
        h, w = image.shape[:2]
        mask = F.rotate(mask, angle) if self.rotate[1] > 0 else mask
        mask = mask[:,:,np.newaxis] if image.ndim == 3 else mask
        image *= mask[rand_h:rand_h+h, rand_w:rand_w+w].astype(image.dtype)
        return image

    def get_params_dependent_on_targets(self, params):
        img = params['image']
        height, width = img.shape[:2]
        self.init_masks(height, width)

        mid = np.random.randint(len(self.masks))
        mask = self.masks[mid]
        rand_h = np.random.randint(self.rand_h_max[mid])
        rand_w = np.random.randint(self.rand_w_max[mid])
        angle = np.random.randint(self.rotate[0], self.rotate[1]) if self.rotate[1] > 0 else 0

        return {'mask': mask, 'rand_h': rand_h, 'rand_w': rand_w, 'angle': angle}

    @property
    def targets_as_params(self):
        return ['image']

    def get_transform_init_args_names(self):
        return ('num_grid', 'fill_value', 'rotate', 'mode')

In [48]:
# augmix : https://github.com/google-research/augmix

from PIL import Image
from PIL import ImageOps
import numpy as np

def int_parameter(level, maxval):
    """Helper function to scale `val` between 0 and maxval .
    Args:
    level: Level of the operation that will be between [0, `PARAMETER_MAX`].
    maxval: Maximum value that the operation can have. This will be scaled to
      level/PARAMETER_MAX.
    Returns:
    An int that results from scaling `maxval` according to `level`.
    """
    return int(level * maxval / 10)


def float_parameter(level, maxval):
    """Helper function to scale `val` between 0 and maxval.
    Args:
    level: Level of the operation that will be between [0, `PARAMETER_MAX`].
    maxval: Maximum value that the operation can have. This will be scaled to
      level/PARAMETER_MAX.
    Returns:
    A float that results from scaling `maxval` according to `level`.
    """
    return float(level) * maxval / 10.

def sample_level(n):
    return np.random.uniform(low=0.1, high=n)

def autocontrast(pil_img, _):
    return ImageOps.autocontrast(pil_img)

def equalize(pil_img, _):
    return ImageOps.equalize(pil_img)

def posterize(pil_img, level):
    level = int_parameter(sample_level(level), 4)
    return ImageOps.posterize(pil_img, 4 - level)

def rotate(pil_img, level):
    degrees = int_parameter(sample_level(level), 30)
    if np.random.uniform() > 0.5:
        degrees = -degrees
    return pil_img.rotate(degrees, resample=Image.BILINEAR)

def solarize(pil_img, level):
    level = int_parameter(sample_level(level), 256)
    return ImageOps.solarize(pil_img, 256 - level)

def shear_x(pil_img, level):
    level = float_parameter(sample_level(level), 0.3)
    if np.random.uniform() > 0.5:
        level = -level
    return pil_img.transform((SIZE, SIZE),
                           Image.AFFINE, (1, level, 0, 0, 1, 0),
                           resample=Image.BILINEAR)

def shear_y(pil_img, level):
    level = float_parameter(sample_level(level), 0.3)
    if np.random.uniform() > 0.5:
        level = -level
    return pil_img.transform((SIZE, SIZE),
                           Image.AFFINE, (1, 0, 0, level, 1, 0),
                           resample=Image.BILINEAR)

def translate_x(pil_img, level):
    level = int_parameter(sample_level(level), SIZE / 3)
    if np.random.random() > 0.5:
        level = -level
    return pil_img.transform((SIZE, SIZE),
                           Image.AFFINE, (1, 0, level, 0, 1, 0),
                           resample=Image.BILINEAR)


def translate_y(pil_img, level):
    level = int_parameter(sample_level(level), SIZE / 3)
    if np.random.random() > 0.5:
        level = -level
    return pil_img.transform((SIZE, SIZE),
                           Image.AFFINE, (1, 0, 0, 0, 1, level),
                           resample=Image.BILINEAR)

augmentations = [
    autocontrast, equalize, posterize, rotate, solarize, shear_x, shear_y,
    translate_x, translate_y
]

# taken from https://www.kaggle.com/iafoss/image-preprocessing-128x128
MEAN = [ 0.06922848809290576,  0.06922848809290576,  0.06922848809290576]
STD = [ 0.20515700083327537,  0.20515700083327537,  0.20515700083327537]

def normalize(image):
    """Normalize input image channel-wise to zero mean and unit variance."""
    image = image.transpose(2, 0, 1)  # Switch to channel-first
    mean, std = np.array(MEAN), np.array(STD)
    image = (image - mean[:, None, None]) / std[:, None, None]
    return image.transpose(1, 2, 0)


def apply_op(image, op, severity):
    image = np.clip(image * 255., 0, 255).astype(np.uint8)
    pil_img = Image.fromarray(image)  # Convert to PIL.Image
    pil_img = op(pil_img, severity)
    return np.asarray(pil_img) / 255.


def augment_and_mix(image, severity=1, width=3, depth=1, alpha=1.):
    """Perform AugMix augmentations and compute mixture.
    Args:
    image: Raw input image as float32 np.ndarray of shape (h, w, c)
    severity: Severity of underlying augmentation operators (between 1 to 10).
    width: Width of augmentation chain
    depth: Depth of augmentation chain. -1 enables stochastic depth uniformly
      from [1, 3]
    alpha: Probability coefficient for Beta and Dirichlet distributions.
    Returns:
    mixed: Augmented and mixed image.
  """
    ws = np.float32(
      np.random.dirichlet([alpha] * width))
    m = np.float32(np.random.beta(alpha, alpha))

    mix = np.zeros_like(image)
    for i in range(width):
        image_aug = image.copy()
        depth = depth if depth > 0 else np.random.randint(1, 4)
        
        for _ in range(depth):
            op = np.random.choice(augmentations)
            image_aug = apply_op(image_aug, op, severity)
        mix = np.add(mix, ws[i] * normalize(image_aug), out=mix, 
                     casting="unsafe")

    mixed = (1 - m) * normalize(image) + m * mix
    return mixed

In [49]:
class GraphemeGenerator(Sequence):
    def __init__(self, data, batch_size, dim, shuffle=True, transform=None):
        self._data = data
        self._label_1 = pd.get_dummies(self._data['grapheme_root'], 
                                       columns = ['grapheme_root'])
        self._label_2 = pd.get_dummies(self._data['vowel_diacritic'], 
                                       columns = ['vowel_diacritic'])
        self._label_3 = pd.get_dummies(self._data['consonant_diacritic'], 
                                       columns = ['consonant_diacritic'])
        self._list_idx = data.index.values
        self._batch_size = batch_size
        self._dim = dim
        self._shuffle = shuffle
        self.transform = transform
        self.on_epoch_end()
        
    def __len__(self):
        return int(np.floor(len(self._data)/self._batch_size))
    
    def __getitem__(self, index):
        batch_idx = self._indices[index*self._batch_size:(index+1)*self._batch_size]
        _idx = [self._list_idx[k] for k in batch_idx]

        Data     = np.empty((self._batch_size, *self._dim, 1))
        Target_1 = np.empty((self._batch_size, 168), dtype = int)
        Target_2 = np.empty((self._batch_size, 11 ), dtype = int)
        Target_3 = np.empty((self._batch_size,  7 ), dtype = int)
        
        for i, k in enumerate(_idx):
            # load the image file using cv2
            image = cv2.imread(im_path + self._data['image_id'][k] + '.png')
            image = cv2.resize(image,  self._dim) 
            
            if self.transform is not None:
                if np.random.rand() > 0.5:
                    # albumentation : grid mask
                    res = self.transform(image=image)
                    image = res['image']
                else:
                    # augmix augmentation
                    image = augment_and_mix(image)
            
            # gray scaling 
            gray = lambda rgb : np.dot(rgb[... , :3] , [0.299 , 0.587, 0.114]) 
            image = gray(image)  
            
            # expand the axises 
            image = image[:, :, np.newaxis]
            Data[i,:, :, :] =  image
        
            Target_1[i,:] = self._label_1.loc[k, :].values
            Target_2[i,:] = self._label_2.loc[k, :].values
            Target_3[i,:] = self._label_3.loc[k, :].values
            
        return Data, [Target_1, Target_2, Target_3]
    
    
    def on_epoch_end(self):
        self._indices = np.arange(len(self._list_idx))
        if self._shuffle:
            np.random.shuffle(self._indices)

!pip install ../input/efficientnet-keras-source-code/repository/qubvel-efficientnet-c993591

import efficientnet.keras as efn 

#we will be using EfficientNetB0
wg = '../input/efficientnet-keras-weights-b0b5/efficientnet-b0_imagenet_1000_notop.h5'
efnet = efn.EfficientNetB0(weights=wg, 
                           include_top = False, 
                           input_shape=(128, 128, 3))

In [50]:
from keras.engine import Layer

class GroupNormalization(Layer):
    """Group normalization layer
    Group Normalization divides the channels into groups and computes within each group
    the mean and variance for normalization. GN's computation is independent of batch sizes,
    and its accuracy is stable in a wide range of batch sizes
    # Arguments
        groups: Integer, the number of groups for Group Normalization.
        axis: Integer, the axis that should be normalized
            (typically the features axis).
            For instance, after a `Conv2D` layer with
            `data_format="channels_first"`,
            set `axis=1` in `BatchNormalization`.
        epsilon: Small float added to variance to avoid dividing by zero.
        center: If True, add offset of `beta` to normalized tensor.
            If False, `beta` is ignored.
        scale: If True, multiply by `gamma`.
            If False, `gamma` is not used.
            When the next layer is linear (also e.g. `nn.relu`),
            this can be disabled since the scaling
            will be done by the next layer.
        beta_initializer: Initializer for the beta weight.
        gamma_initializer: Initializer for the gamma weight.
        beta_regularizer: Optional regularizer for the beta weight.
        gamma_regularizer: Optional regularizer for the gamma weight.
        beta_constraint: Optional constraint for the beta weight.
        gamma_constraint: Optional constraint for the gamma weight.
    # Input shape
        Arbitrary. Use the keyword argument `input_shape`
        (tuple of integers, does not include the samples axis)
        when using this layer as the first layer in a model.
    # Output shape
        Same shape as input.
    # References
        - [Group Normalization](https://arxiv.org/abs/1803.08494)
    """

    def __init__(self,
                 groups=32,
                 axis=-1,
                 epsilon=1e-5,
                 center=True,
                 scale=True,
                 beta_initializer='zeros',
                 gamma_initializer='ones',
                 beta_regularizer=None,
                 gamma_regularizer=None,
                 beta_constraint=None,
                 gamma_constraint=None,
                 **kwargs):
        super(GroupNormalization, self).__init__(**kwargs)
        self.supports_masking = True
        self.groups = groups
        self.axis = axis
        self.epsilon = epsilon
        self.center = center
        self.scale = scale
        self.beta_initializer = initializers.get(beta_initializer)
        self.gamma_initializer = initializers.get(gamma_initializer)
        self.beta_regularizer = regularizers.get(beta_regularizer)
        self.gamma_regularizer = regularizers.get(gamma_regularizer)
        self.beta_constraint = constraints.get(beta_constraint)
        self.gamma_constraint = constraints.get(gamma_constraint)

    def build(self, input_shape):
        dim = input_shape[self.axis]

        if dim is None:
            raise ValueError('Axis ' + str(self.axis) + ' of '
                             'input tensor should have a defined dimension '
                             'but the layer received an input with shape ' +
                             str(input_shape) + '.')

        if dim < self.groups:
            raise ValueError('Number of groups (' + str(self.groups) + ') cannot be '
                             'more than the number of channels (' +
                             str(dim) + ').')

        if dim % self.groups != 0:
            raise ValueError('Number of groups (' + str(self.groups) + ') must be a '
                             'multiple of the number of channels (' +
                             str(dim) + ').')

        self.input_spec = InputSpec(ndim=len(input_shape),
                                    axes={self.axis: dim})
        shape = (dim,)

        if self.scale:
            self.gamma = self.add_weight(shape=shape,
                                         name='gamma',
                                         initializer=self.gamma_initializer,
                                         regularizer=self.gamma_regularizer,
                                         constraint=self.gamma_constraint)
        else:
            self.gamma = None
        if self.center:
            self.beta = self.add_weight(shape=shape,
                                        name='beta',
                                        initializer=self.beta_initializer,
                                        regularizer=self.beta_regularizer,
                                        constraint=self.beta_constraint)
        else:
            self.beta = None
        self.built = True

    def call(self, inputs, **kwargs):
        input_shape = K.int_shape(inputs)
        tensor_input_shape = K.shape(inputs)

        # Prepare broadcasting shape.
        reduction_axes = list(range(len(input_shape)))
        del reduction_axes[self.axis]
        broadcast_shape = [1] * len(input_shape)
        broadcast_shape[self.axis] = input_shape[self.axis] // self.groups
        broadcast_shape.insert(1, self.groups)

        reshape_group_shape = K.shape(inputs)
        group_axes = [reshape_group_shape[i] for i in range(len(input_shape))]
        group_axes[self.axis] = input_shape[self.axis] // self.groups
        group_axes.insert(1, self.groups)

        # reshape inputs to new group shape
        group_shape = [group_axes[0], self.groups] + group_axes[2:]
        group_shape = K.stack(group_shape)
        inputs = K.reshape(inputs, group_shape)

        group_reduction_axes = list(range(len(group_axes)))
        group_reduction_axes = group_reduction_axes[2:]

        mean = K.mean(inputs, axis=group_reduction_axes, keepdims=True)
        variance = K.var(inputs, axis=group_reduction_axes, keepdims=True)

        inputs = (inputs - mean) / (K.sqrt(variance + self.epsilon))

        # prepare broadcast shape
        inputs = K.reshape(inputs, group_shape)
        outputs = inputs

        # In this case we must explicitly broadcast all parameters.
        if self.scale:
            broadcast_gamma = K.reshape(self.gamma, broadcast_shape)
            outputs = outputs * broadcast_gamma

        if self.center:
            broadcast_beta = K.reshape(self.beta, broadcast_shape)
            outputs = outputs + broadcast_beta

        outputs = K.reshape(outputs, tensor_input_shape)

        return outputs

    def get_config(self):
        config = {
            'groups': self.groups,
            'axis': self.axis,
            'epsilon': self.epsilon,
            'center': self.center,
            'scale': self.scale,
            'beta_initializer': initializers.serialize(self.beta_initializer),
            'gamma_initializer': initializers.serialize(self.gamma_initializer),
            'beta_regularizer': regularizers.serialize(self.beta_regularizer),
            'gamma_regularizer': regularizers.serialize(self.gamma_regularizer),
            'beta_constraint': constraints.serialize(self.beta_constraint),
            'gamma_constraint': constraints.serialize(self.gamma_constraint)
        }
        base_config = super(GroupNormalization, self).get_config()
        return dict(list(base_config.items()) + list(config.items()))

    def compute_output_shape(self, input_shape):
        return input_shap

#Replace all Batch Normalization layers by Group Normalization layers
for i, layer in enumerate(efnet.layers):
    if "batch_normalization" in layer.name:
        efnet.layers[i] = GroupNormalization(groups=32, 
                                             axis=-1, 
                                             epsilon=0.00001)

def create_model(input_dim, output_dim, base_model):
    
    input_tensor = Input(input_dim)
    
    x = Conv2D(3, (3, 3), padding='same',  kernel_initializer='he_uniform', 
               bias_initializer='zeros')(input_tensor)
    curr_output = base_model(x)
    curr_output = GlobalAveragePooling2D()(curr_output)
    curr_output = Dropout(0.5)(curr_output)
    curr_output = Dense(512, activation='elu')(curr_output)
    curr_output = Dropout(0.5)(curr_output)
        
    oputput1 = Dense(168,  activation='softmax', name='gra') (curr_output)
    oputput2 = Dense(11,  activation='softmax', name='vow') (curr_output)
    oputput3 = Dense(7,  activation='softmax', name='cons') (curr_output)
    output_tensor = [oputput1, oputput2, oputput3]

    model = Model(input_tensor, output_tensor)
    
    return model

#building the complete model
model = create_model(input_dim=(128,128,1), output_dim=(168,11,7), base_model = efnet)
model.summary()

In [51]:
# code: https://github.com/titu1994/keras-adabound   
class AdaBound(keras.optimizers.Optimizer):
    """AdaBound optimizer.
    Default parameters follow those provided in the original paper.
    # Arguments
        lr: float >= 0. Learning rate.
        final_lr: float >= 0. Final learning rate.
        beta_1: float, 0 < beta < 1. Generally close to 1.
        beta_2: float, 0 < beta < 1. Generally close to 1.
        gamma: float >= 0. Convergence speed of the bound function.
        epsilon: float >= 0. Fuzz factor. If `None`, defaults to `K.epsilon()`.
        decay: float >= 0. Learning rate decay over each update.
        weight_decay: Weight decay weight.
        amsbound: boolean. Whether to apply the AMSBound variant of this
            algorithm.
    # References
        - [Adaptive Gradient Methods with Dynamic Bound of Learning Rate]
          (https://openreview.net/forum?id=Bkg3g2R9FX)
        - [Adam - A Method for Stochastic Optimization]
          (https://arxiv.org/abs/1412.6980v8)
        - [On the Convergence of Adam and Beyond]
          (https://openreview.net/forum?id=ryQu7f-RZ)
    """

    def __init__(self, learning_rate=0.001, final_lr=0.1, beta_1=0.9, beta_2=0.999, gamma=1e-3,
                 epsilon=None, decay=0., amsbound=False, weight_decay=0.0, **kwargs):
        super(AdaBound, self).__init__(**kwargs)

        if not 0. <= gamma <= 1.:
            raise ValueError("Invalid `gamma` parameter. Must lie in [0, 1] range.")

        with K.name_scope(self.__class__.__name__):
            self.iterations = K.variable(0, dtype='int64', name='iterations')
            self.learning_rate = K.variable(learning_rate, name='learning_rate')
            self.beta_1 = K.variable(beta_1, name='beta_1')
            self.beta_2 = K.variable(beta_2, name='beta_2')
            self.decay = K.variable(decay, name='decay')

        self.final_lr = final_lr
        self.gamma = gamma

        if epsilon is None:
            epsilon = K.epsilon()
        self.epsilon = epsilon
        self.initial_decay = decay
        self.amsbound = amsbound

        self.weight_decay = float(weight_decay)
        self.base_lr = float(learning_rate)

    def get_updates(self, loss, params):
        grads = self.get_gradients(loss, params)
        self.updates = [K.update_add(self.iterations, 1)]

        learning_rate = self.learning_rate
        if self.initial_decay > 0:
            learning_rate = learning_rate * (1. / (1. + self.decay * K.cast(self.iterations,
                                                      K.dtype(self.decay))))

        t = K.cast(self.iterations, K.floatx()) + 1

        # Applies bounds on actual learning rate
        step_size = learning_rate * (K.sqrt(1. - K.pow(self.beta_2, t)) /
                          (1. - K.pow(self.beta_1, t)))

        final_lr = self.final_lr * learning_rate / self.base_lr
        lower_bound = final_lr * (1. - 1. / (self.gamma * t + 1.))
        upper_bound = final_lr * (1. + 1. / (self.gamma * t))

        ms = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
        vs = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
        if self.amsbound:
            vhats = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
        else:
            vhats = [K.zeros(1) for _ in params]
        self.weights = [self.iterations] + ms + vs + vhats

        for p, g, m, v, vhat in zip(params, grads, ms, vs, vhats):
            # apply weight decay
            if self.weight_decay != 0.:
                g += self.weight_decay * K.stop_gradient(p)

            m_t = (self.beta_1 * m) + (1. - self.beta_1) * g
            v_t = (self.beta_2 * v) + (1. - self.beta_2) * K.square(g)

            if self.amsbound:
                vhat_t = K.maximum(vhat, v_t)
                denom = (K.sqrt(vhat_t) + self.epsilon)
                self.updates.append(K.update(vhat, vhat_t))
            else:
                denom = (K.sqrt(v_t) + self.epsilon)

            # Compute the bounds
            step_size_p = step_size * K.ones_like(denom)
            step_size_p_bound = step_size_p / denom
            bounded_lr_t = m_t * K.minimum(K.maximum(step_size_p_bound,
                                                     lower_bound), upper_bound)

            p_t = p - bounded_lr_t

            self.updates.append(K.update(m, m_t))
            self.updates.append(K.update(v, v_t))
            new_p = p_t

            # Apply constraints.
            if getattr(p, 'constraint', None) is not None:
                new_p = p.constraint(new_p)

            self.updates.append(K.update(p, new_p))
        return self.updates

    def get_config(self):
        config = {'learning_rate': float(K.get_value(self.learning_rate)),
                  'final_lr': float(self.final_lr),
                  'beta_1': float(K.get_value(self.beta_1)),
                  'beta_2': float(K.get_value(self.beta_2)),
                  'gamma': float(self.gamma),
                  'decay': float(K.get_value(self.decay)),
                  'epsilon': self.epsilon,
                  'weight_decay': self.weight_decay,
                  'amsbound': self.amsbound}
        base_config = super(AdaBound, self).get_config()
        return dict(list(base_config.items()) + list(config.items())) 

from keras import backend as K

#compiling    
model.compile(
    
    optimizer = AdaBound(learning_rate=0.0001,
                    final_lr=.1,
                    gamma=1e-3,
                    weight_decay=5e-4,
                    amsbound=False), 
    
    loss = {'gra' : 'categorical_crossentropy', 
            'vow' : 'categorical_crossentropy', 
            'cons': 'categorical_crossentropy'},
    
    loss_weights = {'gra' : 1.0,
                    'vow' : 1.0,
                    'cons': 1.0},
    
    metrics={'gra' : 'accuracy', 
             'vow' : 'accuracy', 
             'cons': 'accuracy'}
)

print(model.summary())

In [52]:
# grid mask augmentation
transforms_train = albumentations.Compose([
    GridMask(num_grid=3, rotate=15, p=1),
])

# for way one - data generator
train_labels, val_labels = train_test_split(train, test_size = 0.20, 
                                            random_state = SEED)

# training generator
train_generator = GraphemeGenerator(train_labels, batch_size, dim, 
                                shuffle = True, transform=transforms_train)

# validation generator: no shuffle , not augmentation
val_generator = GraphemeGenerator(val_labels, batch_size, dim, 
                              shuffle = False)

NameError: name 'train' is not defined

from keras.callbacks import (ModelCheckpoint, LearningRateScheduler,
                             EarlyStopping, ReduceLROnPlateau, TensorBoard, CSVLogger)

#some call back function; feel free to add more for experiment
def Call_Back():
    # model check point
    checkpoint = ModelCheckpoint('E0Train_B0.h5', 
                                 monitor = 'val_loss', 
                                 verbose = 0, save_best_only=True, 
                                 mode = 'min',
                                 save_weights_only = True)
    
    #lr_scheduler = LearningRateScheduler(schedule=lambda epoch: 0.001 * (0.9 ** epoch))
    csv_logger = CSVLogger('E0.csv')
    early = EarlyStopping(monitor='val_loss', 
                          mode='min', patience=5)
    
    return [checkpoint, csv_logger, early]

#epoch size 
epochs = 60 # increase the number, ex.: 100/200
training = False # setting it true for training the model

#calling all callbacks 
callbacks = Call_Back()

if training:
    # acatual training (fitting)
    train_history = model.fit_generator(
        train_generator,
        steps_per_epoch=int(len(train_labels)/batch_size), 
        validation_data=val_generator,
        validation_steps = int(len(val_labels)/batch_size),
        epochs=epochs,
        callbacks=callbacks
    )
else: 
    model.load_weights('../input/efficientb/E0Train_B0.h5')

log_file = "../input/efficientb/E0.csv"
history = pd.read_csv(log_file) 

#find the lowest validation loss score
print(history.loc[history['val_loss'].idxmin()])
history.head()

In [53]:
import pandas 
plt.style.use("seaborn")

def plot_log(filename, show=True):

    data = pandas.read_csv(filename)

    fig = plt.figure(figsize=(8,10))
    fig.subplots_adjust(top=0.95, bottom=0.05, right=0.95)
    fig.add_subplot(211)
    
    for key in data.keys():
        if key.find('loss') >= 0:  # training loss
            plt.plot(data['epoch'].values, data[key].values, label=key)
    plt.legend()
    plt.title('Training and Validtion Loss')

    fig.add_subplot(212)
    for key in data.keys():
        if key.find('acc') >= 0:  # acc
            plt.plot(data['epoch'].values, data[key].values, label=key)
    plt.legend()
    plt.title('Training and Validation Accuracy')

    if show:
        plt.show()
        
plot_log(log_file)

NameError: name 'log_file' is not defined

In [54]:
# helper function
def bbox(img):
    rows = np.any(img, axis=1)
    cols = np.any(img, axis=0)
    rmin, rmax = np.where(rows)[0][[0, -1]]
    cmin, cmax = np.where(cols)[0][[0, -1]]
    return rmin, rmax, cmin, cmax

def crop_resize(img0, size=SIZE, pad=16):
    #crop a box around pixels large than the threshold 
    #some images contain line at the sides
    ymin,ymax,xmin,xmax = bbox(img0[5:-5,5:-5] > 60)#80)
    #cropping may cut too much, so we need to add it back
    xmin = xmin - 13 if (xmin > 13) else 0
    ymin = ymin - 10 if (ymin > 10) else 0
    xmax = xmax + 13 if (xmax < WIDTH - 13) else WIDTH
    ymax = ymax + 10 if (ymax < HEIGHT - 10) else HEIGHT
    img = img0[ymin:ymax,xmin:xmax]
    #remove lo intensity pixels as noise
    img[img < 28] = 0
    lx, ly = xmax-xmin,ymax-ymin
    l = max(lx,ly) + pad
    #make sure that the aspect ratio is kept in rescaling
    img = np.pad(img, [((l-ly)//2,), ((l-lx)//2,)], mode='constant')
    
    return cv2.resize(img,(size,size))

def test_batch_generator(df, batch_size):
    num_imgs = len(df)

    for batch_start in range(0, num_imgs, batch_size):
        curr_batch_size = min(num_imgs, batch_start + batch_size) - batch_start
        idx = np.arange(batch_start, batch_start + curr_batch_size)

        names_batch = df.iloc[idx, 0].values
        imgs_batch = 255 - df.iloc[idx, 1:].values.reshape(-1, HEIGHT, WIDTH).astype(np.uint8)
        X_batch = np.zeros((curr_batch_size, SIZE, SIZE, 1))
        
        for j in range(curr_batch_size):
            img = (imgs_batch[j,]*(255.0/imgs_batch[j,].max())).astype(np.uint8)
            img = crop_resize(img, size=SIZE)
            img = img[:, :, np.newaxis]
            X_batch[j,] = img

        yield X_batch, names_batch


#load the parquet files 
TEST = [
    "../input/bengaliai-cv19/test_image_data_0.parquet",
    "../input/bengaliai-cv19/test_image_data_1.parquet",
    "../input/bengaliai-cv19/test_image_data_2.parquet",
    "../input/bengaliai-cv19/test_image_data_3.parquet",
]

#placeholders 
row_id = []
target = []

#iterative over the test sets
for fname in tqdm(TEST):
    test_ = pd.read_parquet(fname)
    test_gen = test_batch_generator(test_, batch_size=batch_size)

    for batch_x, batch_name in test_gen:
        batch_predict = model.predict(batch_x)
        for idx, name in enumerate(batch_name):
            row_id += [
                f"{name}_consonant_diacritic",
                f"{name}_grapheme_root",
                f"{name}_vowel_diacritic",
            ]
            target += [
                np.argmax(batch_predict[2], axis=1)[idx],
                np.argmax(batch_predict[0], axis=1)[idx],
                np.argmax(batch_predict[1], axis=1)[idx],
            ]

    del test_
    gc.collect()
    
    
submission_df3 = pd.DataFrame(
    {
        'row_id': row_id,
        'target':target
    },
    columns = ['row_id','target'] 
)

#df_sample.to_csv('submission.csv',index=False)
gc.collect()

# Merge results

In [55]:
merge = pd.merge(submission_df, submission_df2, on="row_id")
merge = pd.merge(merge, submission_df3, on="row_id")

In [56]:
merge.head()

Unnamed: 0,row_id,target_x,target_y,target
0,Test_0_consonant_diacritic,0,0,0
1,Test_0_grapheme_root,3,3,3
2,Test_0_vowel_diacritic,0,0,0
3,Test_1_consonant_diacritic,0,0,0
4,Test_1_grapheme_root,93,93,93


# Voting

In [57]:
model1=merge["target_x"]
model2=merge["target_y"]
model3=merge["target"]

In [58]:
model1[1]

3

In [59]:
# weight
k1=1.2
k2=1.0
k3=0.8

#convert to one-hot
from keras.utils import to_categorical
model1_onehot=to_categorical(model1, 168)
model2_onehot=to_categorical(model2, 168)
model3_onehot=to_categorical(model3, 168)

In [60]:
#voting
voting_result=[]
for i in range(len(merge)):
    voting = np.argmax(model1_onehot[i]*k1 + model2_onehot[i]*k2+ model3_onehot[i]*k3)
    voting_result.append(voting)


In [61]:
voting_result

[0,
 3,
 0,
 0,
 93,
 2,
 0,
 19,
 0,
 0,
 115,
 0,
 0,
 79,
 4,
 0,
 115,
 2,
 5,
 147,
 9,
 0,
 137,
 7,
 0,
 119,
 9,
 0,
 133,
 10,
 4,
 148,
 1,
 0,
 21,
 2]

In [62]:
merge.head(40)

Unnamed: 0,row_id,target_x,target_y,target
0,Test_0_consonant_diacritic,0,0,0
1,Test_0_grapheme_root,3,3,3
2,Test_0_vowel_diacritic,0,0,0
3,Test_1_consonant_diacritic,0,0,0
4,Test_1_grapheme_root,93,93,93
5,Test_1_vowel_diacritic,2,2,2
6,Test_2_consonant_diacritic,0,0,0
7,Test_2_grapheme_root,19,19,19
8,Test_2_vowel_diacritic,0,0,0
9,Test_3_consonant_diacritic,0,0,0


In [63]:
merge=merge.drop(["target_x", "target_y", "target"], axis=1)
#merge=merge.drop(["target_x", "target_y"], axis=1)

In [64]:
merge["target"]=voting_result

In [65]:
merge.head(40)

Unnamed: 0,row_id,target
0,Test_0_consonant_diacritic,0
1,Test_0_grapheme_root,3
2,Test_0_vowel_diacritic,0
3,Test_1_consonant_diacritic,0
4,Test_1_grapheme_root,93
5,Test_1_vowel_diacritic,2
6,Test_2_consonant_diacritic,0
7,Test_2_grapheme_root,19
8,Test_2_vowel_diacritic,0
9,Test_3_consonant_diacritic,0


In [66]:
merge.to_csv('submission.csv',index=False)