## Train

In [1]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn

import warnings
warnings.filterwarnings('ignore')

from torch.utils.data import Dataset
from sklearn.metrics import recall_score
import matplotlib.pyplot as plt
import joblib

## 1. Read Dataset

In [2]:
df_train = pd.read_csv('./input/train.csv')

In [3]:
df_train.head()

Unnamed: 0,image_id,grapheme_root,vowel_diacritic,consonant_diacritic,grapheme
0,Train_0,15,9,5,ক্ট্রো
1,Train_1,159,0,0,হ
2,Train_2,22,3,5,খ্রী
3,Train_3,53,2,2,র্টি
4,Train_4,71,9,5,থ্রো


## 2. Splits dataset

In [4]:
# fold 만드는 방법은 2 파일에 있음
df_train['fold'] = pd.read_csv('./input/df_folds.csv')['fold']

In [7]:
trn_fold = [i for i in range(6) if i not in [5]]
vld_fold = [5]
trn_idx = df_train.loc[df_train['fold'].isin(trn_fold)].index
vld_idx = df_train.loc[df_train['fold'].isin(vld_fold)].index

## 3. Define dataset

In [35]:
class BengaliDataset(Dataset):
    def __init__(self, csv, img_height, img_width, transform):
        self.csv = csv.reset_index()
        self.img_ids = csv['image_id'].values
        self.img_height = img_height
        self.img_width = img_width
        self.transform = transform
    
    def __len__(self):
        return len(self.csv)
    
    def __getitem__(self, index):
        img_id = self.img_ids[index]
        img = joblib.load(f'./input/train_images/{img_id}.pkl')
        img = img.reshape(self.img_height, self.img_width).astype(np.uint8)
        img = 255 - img
        
        img = img[:,:,np.newaxis]
        img = np.repeat(img,3,2)
        
        if self.transform is not None:
            img = self.transform(image=img)['image']
            
        label_1 = self.csv.iloc[index].grapheme_root
        label_2 = self.csv.iloc[index].vowel_diacritic
        label_3 = self.csv.iloc[index].consonant_diacritic
        
        return img, np.array([label_1, label_2, label_3])

## 4. Define Augmentations

In [19]:
from albumentations import (
    HorizontalFlip, IAAPerspective, ShiftScaleRotate, CLAHE, RandomRotate90,
    Transpose, ShiftScaleRotate, Blur, OpticalDistortion, GridDistortion, HueSaturationValue,
    IAAAdditiveGaussianNoise, GaussNoise, MotionBlur, MedianBlur, RandomBrightnessContrast, IAAPiecewiseAffine,
    IAASharpen, IAAEmboss, Flip, OneOf, Compose, Rotate
)

from albumentations.pytorch import ToTensor

In [20]:
train_augmentation = Compose([
    Rotate(20),
    ToTensor()  # pytorch 형식 채널 바꾸기
    ])

valid_augmentation = Compose([
    ToTensor()
])

## 5. Make dataloader

In [21]:
from torch.utils.data import Dataset, DataLoader

In [131]:
trn_dataset = BengaliDataset(#csv = df_train.loc[trn_idx],
                            csv = df_train.loc[trn_idx][:1000],# test 용으로 1000개만 뽑기
                            img_height = 137,
                            img_width = 236,
                            transform = train_augmentation)

vld_dataset = BengaliDataset(#csv = df_train.loc[vld_idx],
                             csv = df_train.loc[vld_idx][:1000],# test 용으로 1000개만 뽑기
                            img_height = 137,
                            img_width = 236,
                            transform = train_augmentation)

In [132]:
trn_loader = DataLoader(trn_dataset,
                       shuffle=True,
                       num_workers=2,
                        batch_size=256)

vld_loader = DataLoader(vld_dataset,
                       shuffle=False,
                       num_workers=2,
                        batch_size=256)

In [30]:
trn_dataset[0][0].shape

torch.Size([3, 137, 236])

In [42]:
for inputs, targets in trn_loader:
    break

In [40]:
inputs.shape  #256은 batch size

torch.Size([256, 3, 137, 236])

## 5. Make model, optimizer, criterion

In [43]:
import pretrainedmodels

In [44]:
model_name = 'resnet34'
model = pretrainedmodels.__dict__[model_name](pretrained = 'imagenet')

In [45]:
model

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [47]:
in_features = model.last_linear.in_features
model.last_linear = torch.nn.Linear(in_features, 186)

In [48]:
model

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [49]:
in_features

512

In [50]:
model = model.cuda()

In [52]:
optimizer = torch.optim.Adam(model.parameters(), lr = 0.001)

In [53]:
# ReduceOnPlateau 는 현재 metric 을 지켜보면서 특정 epoch(patience) 에도 좋아지지 않으면, 
# 정해놓은 factor 만큼 감소. (lr = factor * lr)
loss_fn = nn.CrossEntropyLoss()
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                      mode='max',
                                                      verbose=True,
                                                      patience=7,
                                                      factor=0.5)

## Training

In [54]:
from tqdm import tqdm_notebook

In [55]:
train_loss = []
model.train()  # 학습하기 전에 시작해야 함

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [56]:
for inputs, targets in trn_loader:
    break

In [57]:
inputs = inputs.cuda()
targets = targets.cuda()

In [59]:
logits = model(inputs)

In [None]:
grapheme = logits

In [62]:
logits.shape

torch.Size([256, 186])

In [65]:
grapheme = logits[:,:168]
vowel = logits[:,168:179]
cons = logits[:,179:]

In [71]:
loss = loss_fn(grapheme, targets[:,0]) + loss_fn(vowel, targets[:,1]) + loss_fn(cons, targets[:, 2])
# loss = 0.5*loss_fn(grapheme, targets[:,0]) + 0.25*loss_fn(vowel, targets[:,1]) + 0.25*loss_fn(cons, targets[:, 2])

In [78]:
loss.backward()

In [130]:
# 1 batch 학습
# loss.item()  loss 객체에서 숫자만 빼주기
train_loss = []
model.train()  # 학습하기 전에 시작해야 함

for inputs, targets in tqdm_notebook(trn_loader):

    inputs = inputs.cuda()
    targets = targets.cuda()

    logits = model(inputs)

    grapheme = logits[:,:168]
    vowel = logits[:,168:179]
    cons = logits[:,179:]

    loss = loss_fn(grapheme, targets[:,0]) + loss_fn(vowel, targets[:,1]) + loss_fn(cons, targets[:, 2])
    loss.backward()

    optimizer.step()
    optimizer.zero_grad()
    train_loss.append(loss.item())

HBox(children=(FloatProgress(value=0.0, max=654.0), HTML(value='')))

KeyboardInterrupt: 

## validation

In [114]:
val_loss = []
val_true = []
val_pred = []

model.eval()

with torch.no_grad():
    
    for inputs, targets in tqdm_notebook(vld_loader):

        inputs = inputs.cuda()
        targets = targets.cuda()

        logits = model(inputs)

        grapheme = logits[:,:168]
        vowel = logits[:,168:179]
        cons = logits[:,179:]

        loss = loss_fn(grapheme, targets[:,0]) + loss_fn(vowel, targets[:,1]) + loss_fn(cons, targets[:, 2])

        val_loss.append(loss.item())
        
        grapheme = grapheme.cpu().argmax(dim=1).data.numpy()
        vowel = vowel.cpu().argmax(dim=1).data.numpy()
        cons = cons.cpu().argmax(dim=1).data.numpy()
        
        val_true.append(targets.cpu().numpy())
        val_pred.append(np.stack([grapheme, vowel, cons], axis=1))
    

HBox(children=(FloatProgress(value=0.0, max=131.0), HTML(value='')))




In [135]:
# 각 셋들이 가지고 있는 168 개의 확률에 대해서, 가장 높은 확률값을 가지는 index
grapheme = grapheme.cpu().argmax(dim=1).data.numpy()
vowel = vowel.cpu().argmax(dim=1).data.numpy()
cons = cons.cpu().argmax(dim=1).data.numpy()

AttributeError: 'numpy.ndarray' object has no attribute 'cpu'

In [137]:
grapheme.cpu()

AttributeError: 'numpy.ndarray' object has no attribute 'cpu'

In [113]:
val_true.append(targets.cpu().numpy())
val_pred.append(np.stack([grapheme, vowel, cons], axis=1))

## 확인

In [120]:
val_true = np.concatenate(val_true)
val_pred = np.concatenate(val_pred)

In [121]:
print(val_true.shape, val_pred.shape)

(33473, 3) (33473, 3)


In [126]:
val_loss = np.mean(val_loss)
train_loss = np.mean(train_loss)

TypeError: unsupported operand type(s) for +: 'builtin_function_or_method' and 'builtin_function_or_method'

In [None]:
print(val_loss, train_loss)

In [None]:
score_g = recall_score(val_true[:,0], val_pred[:,0], average='macro')
score_v = recall_score(val_true[:,1], val_pred[:,1], average='macro')
score_c = recall_score(val_true[:,2], val_pred[:,2], average='macro')

In [None]:
final_score = np.average([score_g, score_v, score_c], weights=[2,1,1])

In [None]:
print(f'train_loss: {train_loss:.5f}; val_loss:{val_loss:.5f}, score:{final_score:.5f}')
print(f'score_g: {score_g:.5f}; score_v: {score_v:.5f}, score_c: {score_c:.5f}')

---

## Final Training Code

In [140]:
best_score = -1

for epoch in range(10):
    # 1 batch 학습
    # loss.item()  loss 객체에서 숫자만 빼주기
    train_loss = []
    model.train()  # 학습하기 전에 시작해야 함

    for inputs, targets in tqdm_notebook(trn_loader):

        inputs = inputs.cuda()
        targets = targets.cuda()

        logits = model(inputs)

        grapheme = logits[:,:168]
        vowel = logits[:,168:179]
        cons = logits[:,179:]

        loss = loss_fn(grapheme, targets[:,0]) + loss_fn(vowel, targets[:,1]) + loss_fn(cons, targets[:, 2])
        loss.backward()

        optimizer.step()
        optimizer.zero_grad()
        train_loss.append(loss.item())

    val_loss = []
    val_true = []
    val_pred = []

    model.eval()

    with torch.no_grad():

        for inputs, targets in tqdm_notebook(vld_loader):

            inputs = inputs.cuda()
            targets = targets.cuda()

            logits = model(inputs)

            grapheme = logits[:,:168]
            vowel = logits[:,168:179]
            cons = logits[:,179:]

            loss = loss_fn(grapheme, targets[:,0]) + loss_fn(vowel, targets[:,1]) + loss_fn(cons, targets[:, 2])

            val_loss.append(loss.item())

            grapheme = grapheme.cpu().argmax(dim=1).data.numpy()
            vowel = vowel.cpu().argmax(dim=1).data.numpy()
            cons = cons.cpu().argmax(dim=1).data.numpy()

            val_true.append(targets.cpu().numpy())
            val_pred.append(np.stack([grapheme, vowel, cons], axis=1))

    val_true = np.concatenate(val_true)
    val_pred = np.concatenate(val_pred)

    val_loss = np.mean(val_loss)
    train_loss = np.mean(train_loss)

    score_g = recall_score(val_true[:,0], val_pred[:,0], average='macro')
    score_v = recall_score(val_true[:,1], val_pred[:,1], average='macro')
    score_c = recall_score(val_true[:,2], val_pred[:,2], average='macro')

    final_score = np.average([score_g, score_v, score_c], weights=[2,1,1])

    print(f'train_loss: {train_loss:.5f}; val_loss:{val_loss:.5f}, score:{final_score:.5f}')
    print(f'score_g: {score_g:.5f}; score_v: {score_v:.5f}, score_c: {score_c:.5f}')
    
    if final_score > best_score:
        best_score = final_score

        state_dict = model.cpu().state_dict()
        model = model.cuda()
        torch.save(state_dict, 'model.pt')

HBox(children=(FloatProgress(value=0.0, max=4.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=4.0), HTML(value='')))


train_loss: 0.04411; val_loss:0.75693, score:0.85710
score_g: 0.77145; score_v: 0.95440, score_c: 0.93109


HBox(children=(FloatProgress(value=0.0, max=4.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=4.0), HTML(value='')))


train_loss: 0.03919; val_loss:0.75907, score:0.85577
score_g: 0.76934; score_v: 0.95427, score_c: 0.93011


HBox(children=(FloatProgress(value=0.0, max=4.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=4.0), HTML(value='')))


train_loss: 0.05014; val_loss:0.75630, score:0.86488
score_g: 0.77365; score_v: 0.94784, score_c: 0.96438


HBox(children=(FloatProgress(value=0.0, max=4.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=4.0), HTML(value='')))


train_loss: 0.03895; val_loss:0.73671, score:0.86762
score_g: 0.77511; score_v: 0.95838, score_c: 0.96189


HBox(children=(FloatProgress(value=0.0, max=4.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=4.0), HTML(value='')))


train_loss: 0.03789; val_loss:0.76482, score:0.87184
score_g: 0.78316; score_v: 0.95898, score_c: 0.96204


HBox(children=(FloatProgress(value=0.0, max=4.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=4.0), HTML(value='')))


train_loss: 0.03537; val_loss:0.75144, score:0.84766
score_g: 0.75151; score_v: 0.95836, score_c: 0.92925


HBox(children=(FloatProgress(value=0.0, max=4.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=4.0), HTML(value='')))


train_loss: 0.02469; val_loss:0.80322, score:0.86087
score_g: 0.76901; score_v: 0.94240, score_c: 0.96307


HBox(children=(FloatProgress(value=0.0, max=4.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=4.0), HTML(value='')))


train_loss: 0.03080; val_loss:0.74556, score:0.86618
score_g: 0.77498; score_v: 0.94546, score_c: 0.96930


HBox(children=(FloatProgress(value=0.0, max=4.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=4.0), HTML(value='')))


train_loss: 0.02558; val_loss:0.74541, score:0.87054
score_g: 0.77736; score_v: 0.96562, score_c: 0.96182


HBox(children=(FloatProgress(value=0.0, max=4.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=4.0), HTML(value='')))


train_loss: 0.02898; val_loss:0.76731, score:0.86395
score_g: 0.76311; score_v: 0.96466, score_c: 0.96491


In [141]:
!ls

 1_data_prepare.ipynb					 input
 2_pytorch_dataset.ipynb				 model
'3_image_augmentation (albumentations, pytorch).ipynb'	 model.pt
 4_train.ipynb						 submission.csv
 efficientnet.ipynb


In [142]:
model.load_state_dict(torch.load('./model.pt'))

<All keys matched successfully>