In [4]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn

import warnings
warnings.filterwarnings('ignore')

from torch.utils.data import Dataset
from sklearn.metrics import recall_score
import matplotlib.pyplot as plt
import joblib
from tqdm.notebook import tqdm
########## YOUR DIR
import sys
sys.path.append("..") ## to import parent's folder
from Local import DIR 
########### YOUR DIR

In [5]:
import os
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"   # see issue #152
os.environ["CUDA_VISIBLE_DEVICES"]="0"

## Read & Split data

In [6]:
df_train = pd.read_csv(f"{DIR}/train.csv")


Unnamed: 0,image_id,grapheme_root,vowel_diacritic,consonant_diacritic,grapheme
0,Train_0,15,9,5,ক্ট্রো
1,Train_1,159,0,0,হ
2,Train_2,22,3,5,খ্রী
3,Train_3,53,2,2,র্টি
4,Train_4,71,9,5,থ্রো
...,...,...,...,...,...
200835,Train_200835,22,7,2,র্খে
200836,Train_200836,65,9,0,ত্তো
200837,Train_200837,2,1,4,অ্যা
200838,Train_200838,152,9,0,স্নো


In [23]:
from sklearn.model_selection import train_test_split
X_train, X_val = train_test_split(df_train, test_size=0.2)

## Define Dataset

In [16]:
class BengaliDataset(Dataset):
    def __init__(self, csv, img_height, img_width, transform):
        self.csv = csv.reset_index()
        self.img_ids = csv['image_id'].values
        self.img_height = img_height
        self.img_width = img_width
        self.transform = transform
        
    def __len__(self):
        return len(self.csv)
    
    def __getitem__(self, index):
        img_id = self.img_ids[index]
        img = joblib.load(f'../input/train_images/{img_id}.pkl')
        img = img.reshape(self.img_height, self.img_width).astype(np.uint8)
        #img = 255 - img
        img = 255 - img
        #img = np.expand_dims(img, axis=2)
        
        img = img[:, :, np.newaxis]
        img = np.repeat(img, 3, 2)
        if self.transform is not None:
            img = self.transform(image=img)['image']
        
        label_1 = self.csv.iloc[index].grapheme_root
        label_2 = self.csv.iloc[index].vowel_diacritic
        label_3 = self.csv.iloc[index].consonant_diacritic
        
        return img, np.array([label_1, label_2, label_3])

## Define augmentations


In [22]:
import torch
import torchvision.transforms as T

train_augmentation = T.Compose([
    T.ToTensor(),
    T.RandomRotation(20),
    ##we can add more augmentation##
])

valid_augmentation = T.Compose([
    T.ToTensor(),
])

## Make Data Loader

In [26]:
from torch.utils.data import Dataset, DataLoader

train_dataset = BengaliDataset(csv=X_train,
                            img_height=137,
                            img_width=236,
                            transform=train_augmentation)

valid_dataset = BengaliDataset(csv=X_val,
                            img_height=137,
                            img_width=236,
                            transform=valid_augmentation)
train_loader = DataLoader(train_dataset,
                        shuffle=True,
                        num_workers=4,
                        batch_size=128
                       )

valid_loader = DataLoader(valid_dataset,
                       shuffle=False,
                        num_workers=4,
                        batch_size=128
                       )

## Make model, optimizer