## Import

In [1]:
import random
import pandas as pd
import numpy as np
import os
from PIL import Image

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader

from torchvision.models import resnet18
from torchvision import transforms

from tqdm.auto import tqdm
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split

import timm

import warnings
warnings.filterwarnings(action='ignore') 

In [2]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

## Hyperparameter Setting

In [36]:
CFG = {
    'IMG_HEIGHT_SIZE':64,
    'IMG_WIDTH_SIZE':224,
    'EPOCHS':20,
    'LEARNING_RATE':1e-3,
    'BATCH_SIZE':64,
    'NUM_WORKERS':0, # 본인의 GPU, CPU 환경에 맞게 설정
    'SEED':27
}

## Fixed RandomSeed

In [4]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

seed_everything(CFG['SEED']) # Seed 고정

## Data Load & Train/Validation Split

In [5]:
import os
os.getcwd()

'/data/Dacon/Text_recognition'

In [6]:
df = pd.read_csv('/data/HDD/ljy/open/train.csv')

In [7]:
# 제공된 학습데이터 중 1글자 샘플들의 단어사전이 학습/테스트 데이터의 모든 글자를 담고 있으므로 학습 데이터로 우선 배치
df['len'] = df['label'].str.len()
train_v1 = df[df['len']==1]

In [8]:
# 제공된 학습데이터 중 2글자 이상의 샘플들에 대해서 단어길이를 고려하여 Train (80%) / Validation (20%) 분할
df = df[df['len']>1]
train_v2, val, _, _ = train_test_split(df, df['len'], test_size=0.2, random_state=CFG['SEED'])

In [9]:
# 학습 데이터로 우선 배치한 1글자 샘플들과 분할된 2글자 이상의 학습 샘플을 concat하여 최종 학습 데이터로 사용
train = pd.concat([train_v1, train_v2])
print(len(train), len(val))

66251 10637


## Get Vocabulary

In [10]:
# 학습 데이터로부터 단어 사전(Vocabulary) 구축
train_gt = [gt for gt in train['label']]
train_gt = "".join(train_gt)
letters = sorted(list(set(list(train_gt))))
print(len(letters))

2349


In [11]:
vocabulary = ["-"] + letters
print(len(vocabulary))
idx2char = {k:v for k,v in enumerate(vocabulary, start=0)}
char2idx = {v:k for k,v in idx2char.items()}

2350


## CustomDataset

In [12]:
os.chdir("/data/HDD/ljy/open")

In [13]:
class CustomDataset(Dataset):
    def __init__(self, img_path_list, label_list, train_mode=True):
        self.img_path_list = img_path_list
        self.label_list = label_list
        self.train_mode = train_mode
        
    def __len__(self):
        return len(self.img_path_list)
    
    def __getitem__(self, index):
        image = Image.open(self.img_path_list[index]).convert('RGB')
        
        if self.train_mode:
            image = self.train_transform(image)
        else:
            image = self.test_transform(image)
            
        if self.label_list is not None:
            text = self.label_list[index]
            return image, text
        else:
            return image
    
    # Image Augmentation
    def train_transform(self, image):
        transform_ops = transforms.Compose([
            transforms.Resize((CFG['IMG_HEIGHT_SIZE'],CFG['IMG_WIDTH_SIZE'])),
            transforms.ToTensor(),
            transforms.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225))
        ])
        return transform_ops(image)
    
    def test_transform(self, image):
        transform_ops = transforms.Compose([
            transforms.Resize((CFG['IMG_HEIGHT_SIZE'],CFG['IMG_WIDTH_SIZE'])),
            transforms.ToTensor(),
            transforms.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225))
        ])
        return transform_ops(image)

In [14]:
train_dataset = CustomDataset(train['img_path'].values, train['label'].values, True)
train_loader = DataLoader(train_dataset, batch_size = CFG['BATCH_SIZE'], shuffle=True, num_workers=CFG['NUM_WORKERS'])

val_dataset = CustomDataset(val['img_path'].values, val['label'].values, False)
val_loader = DataLoader(val_dataset, batch_size = CFG['BATCH_SIZE'], shuffle=True, num_workers=CFG['NUM_WORKERS'])

In [15]:
image_batch, text_batch = iter(train_loader).next()
print(image_batch.size(), text_batch)

torch.Size([64, 3, 64, 224]) ('합리적', '뜻', '끊다', '튈', '구분되다', '책임자', '다하다', '뵌', '이', '잡히다', '닭', '텅', '축구장', '아쉬움', '적성', '전날', '걱정스럽다', '뇝', '오', '지방', '여쭈다', '깔리다', '현관', '형', '쥐다', '숨기다', '땜', '예', '카페', '만화가', '살아오다', '딧', '살펴보다', '술', '전개되다', '시기', '같다', '삠', '긋', '남', '달리다', '폭', '고프다', '구입', '쎌', '꾀', '인제', '친해지다', '고교', '컬', '앉히다', '과정', '요', '되돌아오다', '이', '뵙다', '트이다', '본래', '폼', '자체', '어머니', '올', '국민', '흙')


## Model Define

In [16]:
timm.list_models(pretrained=True)

['adv_inception_v3',
 'bat_resnext26ts',
 'beit_base_patch16_224',
 'beit_base_patch16_224_in22k',
 'beit_base_patch16_384',
 'beit_large_patch16_224',
 'beit_large_patch16_224_in22k',
 'beit_large_patch16_384',
 'beit_large_patch16_512',
 'botnet26t_256',
 'cait_m36_384',
 'cait_m48_448',
 'cait_s24_224',
 'cait_s24_384',
 'cait_s36_384',
 'cait_xs24_384',
 'cait_xxs24_224',
 'cait_xxs24_384',
 'cait_xxs36_224',
 'cait_xxs36_384',
 'coat_lite_mini',
 'coat_lite_small',
 'coat_lite_tiny',
 'coat_mini',
 'coat_tiny',
 'convit_base',
 'convit_small',
 'convit_tiny',
 'convmixer_768_32',
 'convmixer_1024_20_ks9_p14',
 'convmixer_1536_20',
 'convnext_base',
 'convnext_base_384_in22ft1k',
 'convnext_base_in22ft1k',
 'convnext_base_in22k',
 'convnext_large',
 'convnext_large_384_in22ft1k',
 'convnext_large_in22ft1k',
 'convnext_large_in22k',
 'convnext_small',
 'convnext_tiny',
 'convnext_xlarge_384_in22ft1k',
 'convnext_xlarge_in22ft1k',
 'convnext_xlarge_in22k',
 'crossvit_9_240',
 'crossv

In [17]:
list(timm.create_model('densenet121', pretrained=True).children())

[Sequential(
   (conv0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
   (norm0): BatchNormAct2d(
     64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
     (act): ReLU(inplace=True)
   )
   (pool0): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
   (denseblock1): DenseBlock(
     (denselayer1): DenseLayer(
       (norm1): BatchNormAct2d(
         64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
         (act): ReLU(inplace=True)
       )
       (conv1): Conv2d(64, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
       (norm2): BatchNormAct2d(
         128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
         (act): ReLU(inplace=True)
       )
       (conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
     )
     (denselayer2): DenseLayer(
       (norm1): BatchNormAct2d(
         96, eps=1e-05, momentum=0.1, affine=True, track_runnin

In [18]:
len(char2idx)

2350

In [26]:
class RecognitionModel(nn.Module):
    def __init__(self, num_chars=len(char2idx), rnn_hidden_size=256):
        super(RecognitionModel, self).__init__()
        self.num_chars = num_chars
        self.rnn_hidden_size = rnn_hidden_size
        
        # CNN Backbone = 사전학습된 resnet18 활용
        # https://arxiv.org/abs/1512.03385
        #resnet = pyramidnet()
        dense = timm.create_model('densenet121', pretrained=True)
        # CNN Feature Extract
        dense_modules = list(dense.children())[:-2]
        self.feature_extract = nn.Sequential(
            *dense_modules,
            #nn.Conv2d(1024, 64, kernel_size=(3,6), stride=1, padding=1),
            #nn.BatchNorm2d(64),
            #nn.ReLU(inplace=True)
        )

        self.linear1 = nn.Linear(2048, rnn_hidden_size)
        
        # RNN
        self.rnn = nn.RNN(input_size=rnn_hidden_size, 
                            hidden_size=rnn_hidden_size,
                            bidirectional=True, 
                            batch_first=True)
        self.linear2 = nn.Linear(self.rnn_hidden_size*2, num_chars)
        
        
    def forward(self, x):
        # CNN
        x = self.feature_extract(x) # [batch_size, channels, height, width]
        x = x.permute(0, 3, 1, 2) # [batch_size, width, channels, height]

        batch_size = x.size(0)
        T = x.size(1)
        x = x.view(batch_size, T, -1) # [batch_size, T==width, num_features==channels*height]
        #print(x.size())
        x = self.linear1(x)
        #print(x.size())
        
        # RNN
        x, hidden = self.rnn(x)
        
        output = self.linear2(x)
        #print(output.size())
        output = output.permute(1, 0, 2) # [T==10, batch_size, num_classes==num_features]
        
        return output

## Define CTC Loss

In [27]:
criterion = nn.CTCLoss(blank=0) # idx 0 : '-'

In [28]:
def encode_text_batch(text_batch):
    text_batch_targets_lens = [len(text) for text in text_batch]
    text_batch_targets_lens = torch.IntTensor(text_batch_targets_lens)
    
    text_batch_concat = "".join(text_batch)
    text_batch_targets = [char2idx[c] for c in text_batch_concat]
    text_batch_targets = torch.IntTensor(text_batch_targets)
    
    return text_batch_targets, text_batch_targets_lens

In [29]:
def compute_loss(text_batch, text_batch_logits):
    """
    text_batch: list of strings of length equal to batch size
    text_batch_logits: Tensor of size([T, batch_size, num_classes])
    """
    text_batch_logps = F.log_softmax(text_batch_logits, 2) # [T, batch_size, num_classes]  
    text_batch_logps_lens = torch.full(size=(text_batch_logps.size(1),), 
                                       fill_value=text_batch_logps.size(0), 
                                       dtype=torch.int32).to(device) # [batch_size] 

    text_batch_targets, text_batch_targets_lens = encode_text_batch(text_batch)
    loss = criterion(text_batch_logps, text_batch_targets, text_batch_logps_lens, text_batch_targets_lens)

    return loss

## Train

In [30]:
def train(model, optimizer, train_loader, val_loader, scheduler, device):
    model.to(device)
    
    best_loss = 999999
    best_model = None
    for epoch in range(1, CFG['EPOCHS']+1):
        model.train()
        train_loss = []
        for image_batch, text_batch in tqdm(iter(train_loader)):
            image_batch = image_batch.to(device)
            
            optimizer.zero_grad()
            
            text_batch_logits = model(image_batch)
            loss = compute_loss(text_batch, text_batch_logits)
            
            loss.backward()
            optimizer.step()
            
            train_loss.append(loss.item())
        
        _train_loss = np.mean(train_loss)
        
        _val_loss = validation(model, val_loader, device)
        print(f'Epoch : [{epoch}] Train CTC Loss : [{_train_loss:.5f}] Val CTC Loss : [{_val_loss:.5f}]')
        
        if scheduler is not None:
            scheduler.step(_val_loss)
        
        if best_loss > _val_loss:
            best_loss = _val_loss
            best_model = model
            torch.save(model.state_dict(), '/data/Dacon/Text_recognition/PyramidNet_weights.pth')
    
    return best_model

## Validation

In [31]:
def validation(model, val_loader, device):
    model.eval()
    val_loss = []
    with torch.no_grad():
        for image_batch, text_batch in tqdm(iter(val_loader)):
            image_batch = image_batch.to(device)
            
            text_batch_logits = model(image_batch)
            loss = compute_loss(text_batch, text_batch_logits)
            
            val_loss.append(loss.item())
    
    _val_loss = np.mean(val_loss)
    return _val_loss

## Run!!

In [37]:
model = RecognitionModel()
model.eval()
optimizer = torch.optim.Adam(params = model.parameters(), lr = CFG["LEARNING_RATE"])
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=2,threshold_mode='abs',min_lr=1e-8, verbose=True)

infer_model = train(model, optimizer, train_loader, val_loader, scheduler, device)

  0%|          | 0/1036 [00:00<?, ?it/s]

  0%|          | 0/167 [00:00<?, ?it/s]

Epoch : [1] Train CTC Loss : [4.09030] Val CTC Loss : [1.00787]


  0%|          | 0/1036 [00:00<?, ?it/s]

  0%|          | 0/167 [00:00<?, ?it/s]

Epoch : [2] Train CTC Loss : [1.38325] Val CTC Loss : [0.47668]


  0%|          | 0/1036 [00:00<?, ?it/s]

  0%|          | 0/167 [00:00<?, ?it/s]

Epoch : [3] Train CTC Loss : [0.74478] Val CTC Loss : [0.32615]


  0%|          | 0/1036 [00:00<?, ?it/s]

  0%|          | 0/167 [00:00<?, ?it/s]

Epoch : [4] Train CTC Loss : [0.47638] Val CTC Loss : [0.28799]


  0%|          | 0/1036 [00:00<?, ?it/s]

  0%|          | 0/167 [00:00<?, ?it/s]

Epoch : [5] Train CTC Loss : [0.35906] Val CTC Loss : [0.24972]


  0%|          | 0/1036 [00:00<?, ?it/s]

  0%|          | 0/167 [00:00<?, ?it/s]

Epoch : [6] Train CTC Loss : [0.29881] Val CTC Loss : [0.27645]


  0%|          | 0/1036 [00:00<?, ?it/s]

  0%|          | 0/167 [00:00<?, ?it/s]

Epoch : [7] Train CTC Loss : [0.26617] Val CTC Loss : [0.21381]


  0%|          | 0/1036 [00:00<?, ?it/s]

  0%|          | 0/167 [00:00<?, ?it/s]

Epoch : [8] Train CTC Loss : [0.22495] Val CTC Loss : [0.26351]


  0%|          | 0/1036 [00:00<?, ?it/s]

  0%|          | 0/167 [00:00<?, ?it/s]

Epoch : [9] Train CTC Loss : [0.19941] Val CTC Loss : [0.21338]


  0%|          | 0/1036 [00:00<?, ?it/s]

  0%|          | 0/167 [00:00<?, ?it/s]

Epoch : [10] Train CTC Loss : [0.19154] Val CTC Loss : [0.19785]


  0%|          | 0/1036 [00:00<?, ?it/s]

  0%|          | 0/167 [00:00<?, ?it/s]

Epoch : [11] Train CTC Loss : [0.18949] Val CTC Loss : [0.25360]


  0%|          | 0/1036 [00:00<?, ?it/s]

  0%|          | 0/167 [00:00<?, ?it/s]

Epoch : [12] Train CTC Loss : [0.16792] Val CTC Loss : [0.22474]


  0%|          | 0/1036 [00:00<?, ?it/s]

  0%|          | 0/167 [00:00<?, ?it/s]

Epoch : [13] Train CTC Loss : [0.16822] Val CTC Loss : [0.23853]
Epoch    13: reducing learning rate of group 0 to 5.0000e-04.


  0%|          | 0/1036 [00:00<?, ?it/s]

  0%|          | 0/167 [00:00<?, ?it/s]

Epoch : [14] Train CTC Loss : [0.04465] Val CTC Loss : [0.11435]


  0%|          | 0/1036 [00:00<?, ?it/s]

  0%|          | 0/167 [00:00<?, ?it/s]

Epoch : [15] Train CTC Loss : [0.02292] Val CTC Loss : [0.12785]


  0%|          | 0/1036 [00:00<?, ?it/s]

  0%|          | 0/167 [00:00<?, ?it/s]

Epoch : [16] Train CTC Loss : [0.03164] Val CTC Loss : [0.13250]


  0%|          | 0/1036 [00:00<?, ?it/s]

  0%|          | 0/167 [00:00<?, ?it/s]

Epoch : [17] Train CTC Loss : [0.03504] Val CTC Loss : [0.14748]
Epoch    17: reducing learning rate of group 0 to 2.5000e-04.


  0%|          | 0/1036 [00:00<?, ?it/s]

  0%|          | 0/167 [00:00<?, ?it/s]

Epoch : [18] Train CTC Loss : [0.01173] Val CTC Loss : [0.09682]


  0%|          | 0/1036 [00:00<?, ?it/s]

  0%|          | 0/167 [00:00<?, ?it/s]

Epoch : [19] Train CTC Loss : [0.00459] Val CTC Loss : [0.09586]


  0%|          | 0/1036 [00:00<?, ?it/s]

  0%|          | 0/167 [00:00<?, ?it/s]

Epoch : [20] Train CTC Loss : [0.00517] Val CTC Loss : [0.10802]


In [38]:
infer_model = train(model, optimizer, train_loader, val_loader, scheduler, device)

  0%|          | 0/1036 [00:00<?, ?it/s]

  0%|          | 0/167 [00:00<?, ?it/s]

Epoch : [1] Train CTC Loss : [0.00785] Val CTC Loss : [0.10977]


  0%|          | 0/1036 [00:00<?, ?it/s]

  0%|          | 0/167 [00:00<?, ?it/s]

Epoch : [2] Train CTC Loss : [0.00854] Val CTC Loss : [0.09703]
Epoch    22: reducing learning rate of group 0 to 1.2500e-04.


  0%|          | 0/1036 [00:00<?, ?it/s]

  0%|          | 0/167 [00:00<?, ?it/s]

Epoch : [3] Train CTC Loss : [0.00259] Val CTC Loss : [0.08979]


  0%|          | 0/1036 [00:00<?, ?it/s]

  0%|          | 0/167 [00:00<?, ?it/s]

Epoch : [4] Train CTC Loss : [0.00125] Val CTC Loss : [0.08444]


  0%|          | 0/1036 [00:00<?, ?it/s]

  0%|          | 0/167 [00:00<?, ?it/s]

Epoch : [5] Train CTC Loss : [0.00079] Val CTC Loss : [0.08654]


  0%|          | 0/1036 [00:00<?, ?it/s]

  0%|          | 0/167 [00:00<?, ?it/s]

Epoch : [6] Train CTC Loss : [0.00111] Val CTC Loss : [0.09566]


  0%|          | 0/1036 [00:00<?, ?it/s]

  0%|          | 0/167 [00:00<?, ?it/s]

Epoch : [7] Train CTC Loss : [0.00183] Val CTC Loss : [0.09526]
Epoch    27: reducing learning rate of group 0 to 6.2500e-05.


  0%|          | 0/1036 [00:00<?, ?it/s]

  0%|          | 0/167 [00:00<?, ?it/s]

Epoch : [8] Train CTC Loss : [0.00382] Val CTC Loss : [0.08888]


  0%|          | 0/1036 [00:00<?, ?it/s]

KeyboardInterrupt: 

In [39]:
infer_model = train(model, optimizer, train_loader, val_loader, scheduler, device)

  0%|          | 0/1036 [00:00<?, ?it/s]

  0%|          | 0/167 [00:00<?, ?it/s]

Epoch : [1] Train CTC Loss : [0.00063] Val CTC Loss : [0.08701]


  0%|          | 0/1036 [00:00<?, ?it/s]

  0%|          | 0/167 [00:00<?, ?it/s]

Epoch : [2] Train CTC Loss : [0.00044] Val CTC Loss : [0.09194]
Epoch    30: reducing learning rate of group 0 to 3.1250e-05.


  0%|          | 0/1036 [00:00<?, ?it/s]

  0%|          | 0/167 [00:00<?, ?it/s]

Epoch : [3] Train CTC Loss : [0.00033] Val CTC Loss : [0.08783]


  0%|          | 0/1036 [00:00<?, ?it/s]

  0%|          | 0/167 [00:00<?, ?it/s]

Epoch : [4] Train CTC Loss : [0.00023] Val CTC Loss : [0.08951]


  0%|          | 0/1036 [00:00<?, ?it/s]

  0%|          | 0/167 [00:00<?, ?it/s]

Epoch : [5] Train CTC Loss : [0.00018] Val CTC Loss : [0.08519]
Epoch    33: reducing learning rate of group 0 to 1.5625e-05.


  0%|          | 0/1036 [00:00<?, ?it/s]

  0%|          | 0/167 [00:00<?, ?it/s]

Epoch : [6] Train CTC Loss : [0.00019] Val CTC Loss : [0.08600]


  0%|          | 0/1036 [00:00<?, ?it/s]

  0%|          | 0/167 [00:00<?, ?it/s]

Epoch : [7] Train CTC Loss : [0.00016] Val CTC Loss : [0.08472]


  0%|          | 0/1036 [00:00<?, ?it/s]

  0%|          | 0/167 [00:00<?, ?it/s]

Epoch : [8] Train CTC Loss : [0.00013] Val CTC Loss : [0.08516]
Epoch    36: reducing learning rate of group 0 to 7.8125e-06.


  0%|          | 0/1036 [00:00<?, ?it/s]

  0%|          | 0/167 [00:00<?, ?it/s]

Epoch : [9] Train CTC Loss : [0.00013] Val CTC Loss : [0.08546]


  0%|          | 0/1036 [00:00<?, ?it/s]

  0%|          | 0/167 [00:00<?, ?it/s]

Epoch : [10] Train CTC Loss : [0.00012] Val CTC Loss : [0.08511]


  0%|          | 0/1036 [00:00<?, ?it/s]

  0%|          | 0/167 [00:00<?, ?it/s]

Epoch : [11] Train CTC Loss : [0.00015] Val CTC Loss : [0.08913]
Epoch    39: reducing learning rate of group 0 to 3.9063e-06.


  0%|          | 0/1036 [00:00<?, ?it/s]

  0%|          | 0/167 [00:00<?, ?it/s]

Epoch : [12] Train CTC Loss : [0.00011] Val CTC Loss : [0.08495]


  0%|          | 0/1036 [00:00<?, ?it/s]

  0%|          | 0/167 [00:00<?, ?it/s]

Epoch : [13] Train CTC Loss : [0.00011] Val CTC Loss : [0.08607]


  0%|          | 0/1036 [00:00<?, ?it/s]

  0%|          | 0/167 [00:00<?, ?it/s]

Epoch : [14] Train CTC Loss : [0.00011] Val CTC Loss : [0.08498]
Epoch    42: reducing learning rate of group 0 to 1.9531e-06.


  0%|          | 0/1036 [00:00<?, ?it/s]

  0%|          | 0/167 [00:00<?, ?it/s]

Epoch : [15] Train CTC Loss : [0.00011] Val CTC Loss : [0.08597]


  0%|          | 0/1036 [00:00<?, ?it/s]

  0%|          | 0/167 [00:00<?, ?it/s]

Epoch : [16] Train CTC Loss : [0.00011] Val CTC Loss : [0.08601]


  0%|          | 0/1036 [00:00<?, ?it/s]

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



  0%|          | 0/167 [00:00<?, ?it/s]

Epoch : [20] Train CTC Loss : [0.00009] Val CTC Loss : [0.08556]
Epoch    48: reducing learning rate of group 0 to 4.8828e-07.


## Inference

In [24]:
test = pd.read_csv('./test.csv')

NameError: name 'pd' is not defined

In [25]:
test_dataset = CustomDataset(test['img_path'].values, None, False)
test_loader = DataLoader(test_dataset, batch_size = CFG['BATCH_SIZE'], shuffle=False, num_workers=CFG['NUM_WORKERS'])

NameError: name 'CustomDataset' is not defined

In [26]:
def decode_predictions(text_batch_logits):
    text_batch_tokens = F.softmax(text_batch_logits, 2).argmax(2) # [T, batch_size]
    text_batch_tokens = text_batch_tokens.numpy().T # [batch_size, T]

    text_batch_tokens_new = []
    for text_tokens in text_batch_tokens:
        text = [idx2char[idx] for idx in text_tokens]
        text = "".join(text)
        text_batch_tokens_new.append(text)

    return text_batch_tokens_new

def inference(model, test_loader, device):
    model.eval()
    preds = []
    with torch.no_grad():
        for image_batch in tqdm(iter(test_loader)):
            image_batch = image_batch.to(device)
            
            text_batch_logits = model(image_batch)
            
            text_batch_pred = decode_predictions(text_batch_logits.cpu())
            
            preds.extend(text_batch_pred)
    return preds

In [27]:
predictions = inference(infer_model, test_loader, device)

NameError: name 'infer_model' is not defined

## Submission

In [None]:
# 샘플 별 추론결과를 독립적으로 후처리
def remove_duplicates(text):
    if len(text) > 1:
        letters = [text[0]] + [letter for idx, letter in enumerate(text[1:], start=1) if text[idx] != text[idx-1]]
    elif len(text) == 1:
        letters = [text[0]]
    else:
        return ""
    return "".join(letters)

def correct_prediction(word):
    parts = word.split("-")
    parts = [remove_duplicates(part) for part in parts]
    corrected_word = "".join(parts)
    return corrected_word

In [None]:
submit = pd.read_csv('./sample_submission.csv')
submit['label'] = predictions
submit['label'] = submit['label'].apply(correct_prediction)

In [27]:
submit.to_csv('./submission.csv', index=False)