In [1]:
!mkdir -p /tmp/pip/cache/
!cp ../input/steel-my-models/efficientnet_pytorch-0.5.1.xyz /tmp/pip/cache/efficientnet_pytorch-0.5.1.tar.gz

In [2]:
!pip install --no-index --find-links /tmp/pip/cache/ efficientnet-pytorch

Looking in links: /tmp/pip/cache/
Processing /tmp/pip/cache/efficientnet_pytorch-0.5.1.tar.gz
Building wheels for collected packages: efficientnet-pytorch
  Building wheel for efficientnet-pytorch (setup.py) ... [?25l- \ done
[?25h  Created wheel for efficientnet-pytorch: filename=efficientnet_pytorch-0.5.1-cp36-none-any.whl size=11768 sha256=03739dd48f8b40120ad1075cb334d96c3dae508b0a5ac76abe0ee29abdcaadf1
  Stored in directory: /tmp/.cache/pip/wheels/5c/5a/43/cd0c920c44f367c447b35c79f795910683cb26cd51579b328f
Successfully built efficientnet-pytorch
Installing collected packages: efficientnet-pytorch
Successfully installed efficientnet-pytorch-0.5.1


In [3]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

import time

import albumentations as albu
from albumentations.pytorch import ToTensor
import PIL
import cv2 as cv

import matplotlib.pyplot as plt
from matplotlib.font_manager import FontProperties

import torch
from torch.utils.data import Dataset, DataLoader
import torchvision
import torchvision.transforms.functional as TF
from torch import nn
import torch.nn.functional as F
from torch.utils.data import SubsetRandomSampler
from torch.optim import Adam,lr_scheduler

from efficientnet_pytorch import EfficientNet
from efficientnet_pytorch.utils import Conv2dStaticSamePadding, get_model_params

from tqdm import tqdm_notebook, tqdm

## Load Data

In [4]:
# setup the input data folder
DATA_PATH = '../input/bengaliai-cv19/'

# load the dataframes with labels
train_labels = pd.read_csv(DATA_PATH + 'train.csv')
test_labels = pd.read_csv(DATA_PATH + 'test.csv')
class_map = pd.read_csv(DATA_PATH + 'class_map.csv')
sample_submission = pd.read_csv(DATA_PATH + 'sample_submission.csv')

In [5]:
def load_images():
    '''
    Helper function to load all train and test images
    '''
    test_list = []
    for i in range(0,4):
        test_list.append(pd.read_parquet(DATA_PATH + 'test_image_data_{}.parquet'.format(i)))
    test = pd.concat(test_list, ignore_index=True)
    
    return test

In [6]:
test = load_images()

## Image Preprocessing and Data Augmentation

In [7]:
# setup image hight and width
HEIGHT = 137
WIDTH = 236

def threshold_image(img):
    '''
    Helper function for thresholding the images
    '''
    gray = PIL.Image.fromarray(np.uint8(img), 'L')
    ret,th = cv.threshold(np.array(gray),0,255,cv.THRESH_BINARY+cv.THRESH_OTSU)
    return th

def train_transforms(p=.5):
    '''
    Function returns the training pipeline of augmentations
    '''
    return albu.Compose([
        # compose the random cropping and random rotation
        albu.RandomSizedCrop(min_max_height=(int(HEIGHT // 1.1), HEIGHT), height = HEIGHT, width = WIDTH, p=p),
        albu.Rotate(limit=5, p=p),
        albu.Resize(height = HEIGHT, width = WIDTH)
    ], p=1.0)

def valid_transforms():
    '''
    Function returns the training pipeline of augmentations
    '''
    return albu.Compose([
        # compose the random cropping and random rotation
        albu.Resize(height = HEIGHT, width = WIDTH)
    ], p=1.0)

## Define the Dataset

In [8]:
def get_image(idx, df, labels):
    '''
    Helper function to get the image and label from the training set
    '''
    # get the image id by idx
    image_id = df.iloc[idx].image_id
    # get the image by id
    img = df[df.image_id == image_id].values[:, 1:].reshape(HEIGHT, WIDTH).astype(float)
    # get the labels
    row = labels[labels.image_id == image_id]
    labels = row['grapheme_root'].values[0], \
    row['vowel_diacritic'].values[0], \
    row['consonant_diacritic'].values[0]
    
    return img, labels

def get_validation(idx, df):
    '''
    Helper function to get the validation image and image_id from the test set
    '''
    # get the image id by idx
    image_id = df.iloc[idx].image_id
    # get the image by id
    img = df[df.image_id == image_id].values[:, 1:].reshape(HEIGHT, WIDTH).astype(float)
    return img, image_id

In [9]:
class BengaliDataset(Dataset):
    '''
    Create custom Bengali dataset
    '''
    def __init__(self, df_images, transforms, df_labels = None, validation = False):
        self.df_images = df_images
        self.df_labels = df_labels
        self.transforms = transforms
        self.validation = validation

    def __len__(self):
        return len(self.df_images)

    def __getitem__(self, idx):
        if not self.validation:
            img, label = get_image(idx, self.df_images, self.df_labels)
            img = threshold_image(img)
            aug = self.transforms(image = img)
            return TF.to_tensor(aug['image']), label
        else:
            img, image_id = get_validation(idx, self.df_images)
            img = threshold_image(img)
            aug = self.transforms(image = img)
            return TF.to_tensor(aug['image']), image_id

## Define the Model

In [10]:
efficientnet_b0 = EfficientNet.from_name('efficientnet-b0')

In [11]:
class BengaliModel(nn.Module):
    def __init__(self, backbone_model):
        super(BengaliModel, self).__init__()
        self.conv = nn.Conv2d(in_channels=1, out_channels=3, kernel_size=3)
        self.backbone_model = backbone_model
        self.fc1 = nn.Linear(in_features=1000, out_features=168) # grapheme_root
        self.fc2 = nn.Linear(in_features=1000, out_features=11) # vowel_diacritic
        self.fc3 = nn.Linear(in_features=1000, out_features=7) # consonant_diacritic
        
    def forward(self, x):
        # pass through the backbone model
        y = self.conv(x)
        y = self.backbone_model(y)
        
        # multi-output
        grapheme_root = self.fc1(y)
        vowel_diacritic = self.fc2(y)
        consonant_diacritic = self.fc3(y)
        
        return grapheme_root, vowel_diacritic, consonant_diacritic

In [12]:
model = BengaliModel(efficientnet_b0)

## Train the Model

In [13]:
# setup training device
device = torch.device('cuda:0' if torch.cuda.is_available() else "cpu")
device

device(type='cuda', index=0)

In [14]:
#train the model
model.to(device)

BengaliModel(
  (conv): Conv2d(1, 3, kernel_size=(3, 3), stride=(1, 1))
  (backbone_model): EfficientNet(
    (_conv_stem): Conv2dStaticSamePadding(
      3, 32, kernel_size=(3, 3), stride=(2, 2), bias=False
      (static_padding): ZeroPad2d(padding=(0, 1, 0, 1), value=0.0)
    )
    (_bn0): BatchNorm2d(32, eps=0.001, momentum=0.010000000000000009, affine=True, track_running_stats=True)
    (_blocks): ModuleList(
      (0): MBConvBlock(
        (_depthwise_conv): Conv2dStaticSamePadding(
          32, 32, kernel_size=(3, 3), stride=[1, 1], groups=32, bias=False
          (static_padding): ZeroPad2d(padding=(1, 1, 1, 1), value=0.0)
        )
        (_bn1): BatchNorm2d(32, eps=0.001, momentum=0.010000000000000009, affine=True, track_running_stats=True)
        (_se_reduce): Conv2dStaticSamePadding(
          32, 8, kernel_size=(1, 1), stride=(1, 1)
          (static_padding): Identity()
        )
        (_se_expand): Conv2dStaticSamePadding(
          8, 32, kernel_size=(1, 1), stride=

In [15]:
state = torch.load('../input/bengaliaiutils/efficientnet_b0_10.pth', map_location=lambda storage, loc: storage)
model.load_state_dict(state["state_dict"])

<All keys matched successfully>

## Create Submission

In [16]:
# initialize train dataset
test_dataset = BengaliDataset(test, valid_transforms(), test_labels, validation = True)

In [17]:
validloader = DataLoader(test_dataset, batch_size=8, shuffle=False, num_workers=0)

In [18]:
def get_predicted_label(ps):
    ps = F.softmax(ps)
    top_p, top_class = ps.topk(1, dim=1)
        
    top_p = top_p.detach().numpy()
    top_class = top_class.detach().numpy()
    
    return np.array(top_class).reshape(ps.shape[0])

In [19]:
submission = pd.DataFrame(columns=['row_id', 'target'])

for imgs, image_ids in validloader:
    
    imgs = imgs.to(device)
    grapheme_root, vowel_diacritic, consonant_diacritic  = model.forward(imgs)
    imgs = imgs.cpu()
    grapheme_root = grapheme_root.cpu()
    vowel_diacritic = vowel_diacritic.cpu()
    consonant_diacritic = consonant_diacritic.cpu()
    
    grapheme_root_labels = get_predicted_label(grapheme_root)
    vowel_diacritic_labels = get_predicted_label(vowel_diacritic)
    consonant_diacritic_labels = get_predicted_label(consonant_diacritic)
        
    for i in range(imgs.shape[0]):
        img = imgs[i]
        image_id = image_ids[i]
        
        grapheme_root_label = grapheme_root_labels[i]
        vowel_diacritic_label = vowel_diacritic_labels[i]
        consonant_diacritic_label = consonant_diacritic_labels[i]

        submission = submission.append({'row_id':str(image_id)+'_grapheme_root', 'target':grapheme_root_label}, 
                                       ignore_index=True)
        submission = submission.append({'row_id':str(image_id)+'_vowel_diacritic', 'target':vowel_diacritic_label}, 
                                       ignore_index=True)
        submission = submission.append({'row_id':str(image_id)+'_consonant_diacritic', 'target':consonant_diacritic_label}, 
                                       ignore_index=True)

  


In [20]:
submission.head()

Unnamed: 0,row_id,target
0,Test_0_grapheme_root,64
1,Test_0_vowel_diacritic,1
2,Test_0_consonant_diacritic,0
3,Test_1_grapheme_root,93
4,Test_1_vowel_diacritic,2


In [21]:
submission.to_csv('submission.csv', index=False)