## Import

In [None]:
!pip install transformers

In [None]:
import random
import pandas as pd
import numpy as np
import os
import cv2

from sklearn import preprocessing
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import train_test_split

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader

import torch
import numpy as np
from transformers import AutoTokenizer
import argparse
import random
from sklearn.model_selection import StratifiedKFold
import torch.optim as optim
from transformers.optimization import get_cosine_schedule_with_warmup
from tqdm import tqdm
from tqdm.auto import tqdm

import albumentations as A # fast image agumentation library
from albumentations.pytorch.transforms import ToTensorV2 # 이미지 형 변환
import torchvision.models as models
from google.colab import drive

drive.mount('/content/drive')
from sklearn.metrics import f1_score, accuracy_score
from sklearn.metrics import classification_report
import matplotlib.pyplot as plt

import warnings
warnings.filterwarnings(action='ignore')

file_path = '/content/drive/MyDrive/DACON/2022 관광데이터 AI 경진대회/data'

Mounted at /content/drive


In [None]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

## Data Load & Train/Validation Split

In [None]:
df = pd.read_csv(file_path + '/train.csv')

In [None]:
df['img_path'] = file_path + df['img_path'].str[1:]
df['img_path']

0        /content/drive/MyDrive/DACON/2022 관광데이터...
1        /content/drive/MyDrive/DACON/2022 관광데이터...
2        /content/drive/MyDrive/DACON/2022 관광데이터...
3        /content/drive/MyDrive/DACON/2022 관광데이터...
4        /content/drive/MyDrive/DACON/2022 관광데이터...
                               ...                        
16981    /content/drive/MyDrive/DACON/2022 관광데이터...
16982    /content/drive/MyDrive/DACON/2022 관광데이터...
16983    /content/drive/MyDrive/DACON/2022 관광데이터...
16984    /content/drive/MyDrive/DACON/2022 관광데이터...
16985    /content/drive/MyDrive/DACON/2022 관광데이터...
Name: img_path, Length: 16986, dtype: object

## Label-Encoding

* 3종류의 Category(cat1, cat2, cat3)를 모두 label encoding을 이용해서 transform을 진행

In [None]:
le = preprocessing.LabelEncoder()
### Cat 3 LabelEncoding
le.fit(df['cat3'].values)
df['cat3'] = le.transform(df['cat3'].values)

In [None]:
le = preprocessing.LabelEncoder()
### Cat 2 LabelEncoding
le.fit(df['cat2'].values)
df['cat2'] = le.transform(df['cat2'].values)
le = preprocessing.LabelEncoder()
### Cat 1 LabelEncoding
le.fit(df['cat1'].values)
df['cat1'] = le.transform(df['cat1'].values)

## KFold

In [None]:
from sklearn.model_selection import StratifiedKFold
folds = StratifiedKFold(n_splits=5, random_state=42, shuffle=True)
df['kfold'] = -1
for i in range(5):
    df_idx, valid_idx = list(folds.split(df.values, df['cat3']))[i]
    valid = df.iloc[valid_idx]

    df.loc[df[df.id.isin(valid.id) == True].index.to_list(), 'kfold'] = i

## CustomDataset

* 분석에 사용할 수 있는 Image와 text data가 있는데 Image는 사용하지 않고 Text data만 사용
>Image Data를 포함해 분석하게 될 시 시간이 훨씬 많이 소요되는 점과 Text Data만으로 좋은 performance를 낼 수 있기 때문

In [None]:
import torch
import cv2
from torch.utils.data import Dataset, DataLoader

class CategoryDataset(Dataset):
  def __init__(self, text, cats1, cats2, cats3, tokenizer, feature_extractor, max_len):
    self.text = text
    # self.image_path = image_path
    self.cats1 = cats1
    self.cats2 = cats2
    self.cats3 = cats3
    self.tokenizer = tokenizer
    self.feature_extractor = feature_extractor
    self.max_len = max_len

  def __len__(self):
    return len(self.text)

  def __getitem__(self, item):
    text = str(self.text[item])
    # image = cv2.imread(str(self.image_path[item]))
    cat = self.cats1[item]
    cat2 = self.cats2[item]
    cat3 = self.cats3[item]

    # Text Encoding 진행
    encoding = self.tokenizer.encode_plus(
      text,
      add_special_tokens=True,
      max_length=self.max_len,
      return_token_type_ids=False,
      padding = 'max_length',
      truncation = True,
      return_attention_mask=True,
      return_tensors='pt',
    )
    # image_feature = self.feature_extractor(images=image, return_tensors="pt")
    return {
      'input_ids': encoding['input_ids'].flatten(),
      'attention_mask': encoding['attention_mask'].flatten(),
    #   'pixel_values': image_feature['pixel_values'][0],
      'cats1': torch.tensor(cat, dtype=torch.long),
      'cats2': torch.tensor(cat2, dtype=torch.long),
      'cats3': torch.tensor(cat3, dtype=torch.long)
    }

def create_data_loader(df, tokenizer, feature_extractor, max_len, batch_size, shuffle_=False):
    ds = CategoryDataset(
        text=df.overview.to_numpy(),
        # image_path = df.img_path.to_numpy(),
        cats1=df.cat1.to_numpy(),
        cats2=df.cat2.to_numpy(),
        cats3=df.cat3.to_numpy(),
        tokenizer=tokenizer,
        feature_extractor = feature_extractor,
        max_len=max_len
    )
    return DataLoader(
        ds,
        batch_size=batch_size,
        num_workers=6,
        shuffle = shuffle_
    )

## Model Define

* 역시 text만을 이용하여 분석 진행 (Vision Transform 이용하지 않음)

In [None]:
from transformers import AutoModel,ViTModel,ViTFeatureExtractor
import torch.nn as nn

class TourClassifier(nn.Module):
  def __init__(self, n_classes1, n_classes2, n_classes3, text_model_name):
    super(TourClassifier, self).__init__()
    self.text_model = AutoModel.from_pretrained(text_model_name).to(device) # roberta 이용
    # self.image_model = ViTModel.from_pretrained(image_model_name).to(device)
    
    self.text_model.gradient_checkpointing_enable()  
    # self.image_model.gradient_checkpointing_enable()  

    self.drop = nn.Dropout(p=0.1)

    def get_cls(target_size):
        #
        return nn.Sequential(
            nn.Linear(self.text_model.config.hidden_size, self.text_model.config.hidden_size),
            nn.LayerNorm(self.text_model.config.hidden_size),
            nn.Dropout(p = 0.1),
            nn.ReLU(),
            nn.Linear(self.text_model.config.hidden_size, target_size),
        )  
    self.cls = get_cls(n_classes1) #cat1
    self.cls2 = get_cls(n_classes2) #cat2
    self.cls3 = get_cls(n_classes3) #cat3
    
  def forward(self, input_ids, attention_mask):
    text_output = self.text_model(input_ids=input_ids, attention_mask=attention_mask)
    # image_output = self.image_model(pixel_values = pixel_values)
    # concat_outputs = torch.cat([text_output.last_hidden_state, image_output.last_hidden_state],1)
    
    # config hidden size 일치해야함
    encoder_layer = nn.TransformerEncoderLayer(d_model=self.text_model.config.hidden_size, nhead=8).to(device)
    transformer_encoder = nn.TransformerEncoder(encoder_layer, num_layers=2).to(device)

    outputs = transformer_encoder(text_output.last_hidden_state)
    #cls token 
    outputs = outputs[:,0]
    output = self.drop(outputs)

    out1 = self.cls(output) #cat1
    out2 = self.cls2(output) #cat2
    out3 = self.cls3(output) #cat3
    return out1,out2,out3
    

In [None]:
from sklearn.metrics import f1_score
import time
import math
import torch

def calc_tour_acc(pred, label):
    _, idx = pred.max(1)
    
    acc = torch.eq(idx, label).sum().item() / idx.size()[0] 
    x = label.cpu().numpy()
    y = idx.cpu().numpy()
    f1_acc = f1_score(x, y, average='weighted')
    return acc,f1_acc


class AverageMeter(object):
    """Computes and stores the average and current value"""
    def __init__(self):
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count


def asMinutes(s):
    m = math.floor(s / 60)
    s -= m * 60
    return '%dm %ds' % (m, s)


def timeSince(since, percent):
    now = time.time()
    s = now - since
    es = s / (percent)
    rs = es - s
    return '%s (remain %s)' % (asMinutes(s), asMinutes(rs))

## Train

In [None]:
def train_epoch(model,data_loader,loss_fn,optimizer,device,scheduler,n_examples,epoch):

  batch_time = AverageMeter()     
  data_time = AverageMeter()      
  losses = AverageMeter()         
  accuracies = AverageMeter()
  f1_accuracies = AverageMeter()
  
  sent_count = AverageMeter()   
    

  start = end = time.time()

  model = model.train()
  correct_predictions = 0
  for step,d in enumerate(data_loader):
    data_time.update(time.time() - end)
    batch_size = d["input_ids"].size(0) 

    input_ids = d["input_ids"].to(device)
    attention_mask = d["attention_mask"].to(device)
    # pixel_values = d['pixel_values'].to(device)
    cats1 = d["cats1"].to(device)
    cats2 = d["cats2"].to(device)
    cats3 = d["cats3"].to(device)

    outputs,outputs2,outputs3 = model(
      input_ids=input_ids,
      attention_mask=attention_mask,
    #   pixel_values=pixel_values
    )
    _, preds = torch.max(outputs3, dim=1)

    loss1 = loss_fn(outputs, cats1)
    loss2 = loss_fn(outputs2, cats2)
    loss3 = loss_fn(outputs3, cats3)

    loss = loss1 * 0.05 + loss2 * 0.1 + loss3 * 0.85

    correct_predictions += torch.sum(preds == cats3)
    losses.update(loss.item(), batch_size)
    loss.backward()
    nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
    optimizer.step()
    scheduler.step()
    optimizer.zero_grad()

    batch_time.update(time.time() - end)
    end = time.time()

    sent_count.update(batch_size)
    if step % 200 == 0 or step == (len(data_loader)-1):
                acc,f1_acc = calc_tour_acc(outputs3, cats3)
                accuracies.update(acc, batch_size)
                f1_accuracies.update(f1_acc, batch_size)

                
                print('Epoch: [{0}][{1}/{2}] '
                      'Data {data_time.val:.3f} ({data_time.avg:.3f}) '
                      'Elapsed {remain:s} '
                      'Loss: {loss.val:.3f}({loss.avg:.3f}) '
                      'Acc: {acc.val:.3f}({acc.avg:.3f}) '   
                      'f1_Acc: {f1_acc.val:.3f}({f1_acc.avg:.3f}) '           
                      'sent/s {sent_s:.0f} '
                      .format(
                      epoch, step+1, len(data_loader),
                      data_time=data_time, loss=losses,
                      acc=accuracies,
                      f1_acc=f1_accuracies,
                      remain=timeSince(start, float(step+1)/len(data_loader)),
                      sent_s=sent_count.avg/batch_time.avg
                      ))

  return correct_predictions.double() / n_examples, losses.avg

def validate(model,data_loader,loss_fn,optimizer,device,scheduler,n_examples):
  model = model.eval()
  losses = []
  correct_predictions = 0
  cnt = 0
  for d in tqdm(data_loader):
    with torch.no_grad():
      input_ids = d["input_ids"].to(device)
      attention_mask = d["attention_mask"].to(device)
    #   pixel_values = d['pixel_values'].to(device)
      cats1 = d["cats1"].to(device)
      cats2 = d["cats2"].to(device)
      cats3 = d["cats3"].to(device)
      outputs,outputs2,outputs3 = model(
        input_ids=input_ids,
        attention_mask=attention_mask,
        # pixel_values=pixel_values
      )
      _, preds = torch.max(outputs3, dim=1)
      loss1 = loss_fn(outputs, cats1)
      loss2 = loss_fn(outputs2, cats2)
      loss3 = loss_fn(outputs3, cats3)

      loss = loss1 * 0.05 + loss2 * 0.1 + loss3 * 0.85

      correct_predictions += torch.sum(preds == cats3)
      losses.append(loss.item())
      nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
      if cnt == 0:
        cnt +=1
        outputs3_arr = outputs3
        cats3_arr = cats3
      else:
        outputs3_arr = torch.cat([outputs3_arr, outputs3],0)
        cats3_arr = torch.cat([cats3_arr, cats3],0)
  acc,f1_acc = calc_tour_acc(outputs3_arr, cats3_arr)
  return f1_acc, np.mean(losses)

## Run!!

In [None]:
train = df[df["kfold"] != 0].reset_index(drop=True)
valid = df[df["kfold"] == 0].reset_index(drop=True)

tokenizer = AutoTokenizer.from_pretrained("klue/roberta-large")
feature_extractor = ViTFeatureExtractor.from_pretrained('google/vit-large-patch32-384')
train_data_loader = create_data_loader(train, tokenizer, feature_extractor, 256, 16, shuffle_=True)
valid_data_loader = create_data_loader(valid, tokenizer, feature_extractor, 256, 16)


EPOCHS = 30
model = TourClassifier(n_classes1 = 6, n_classes2 = 18, n_classes3 = 128, text_model_name = "klue/roberta-large").to(device)
optimizer = optim.AdamW(model.parameters(), lr= 3e-5)
total_steps = len(train_data_loader) * EPOCHS
scheduler = get_cosine_schedule_with_warmup(
optimizer,
num_warmup_steps=int(total_steps*0.1),
num_training_steps=total_steps
)
loss_fn = nn.CrossEntropyLoss().to(device)

Some weights of the model checkpoint at klue/roberta-large were not used when initializing RobertaModel: ['lm_head.dense.weight', 'lm_head.layer_norm.bias', 'lm_head.layer_norm.weight', 'lm_head.decoder.weight', 'lm_head.dense.bias', 'lm_head.bias', 'lm_head.decoder.bias']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaModel were not initialized from the model checkpoint at klue/roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it f

In [None]:
max_acc = 0
for epoch in range(EPOCHS):
    print('-' * 10)
    print(f'Epoch {epoch}/{EPOCHS-1}')
    print('-' * 10)
    train_acc, train_loss = train_epoch(
        model,
        train_data_loader,
        loss_fn,
        optimizer,
        device,
        scheduler,
        len(train),
        epoch
    )
    validate_acc, validate_loss = validate(
        model,
        valid_data_loader,
        loss_fn,
        optimizer,
        device,
        scheduler,
        len(valid)
    )

    if validate_acc > max_acc:
        max_acc = validate_acc
        torch.save(model.state_dict(),f'tourbaseline_fold0.pt')

    print(f'Train loss {train_loss} accuracy {train_acc}')
    print(f'Validate loss {validate_loss} accuracy {validate_acc}')
    print("")
    print("")

----------
Epoch 0/29
----------
Epoch: [0][1/850] Data 0.369 (0.369) Elapsed 0m 1s (remain 17m 17s) Loss: 4.488(4.488) Acc: 0.000(0.000) f1_Acc: 0.000(0.000) sent/s 13 
Epoch: [0][201/850] Data 0.003 (0.005) Elapsed 2m 37s (remain 8m 30s) Loss: 3.302(4.299) Acc: 0.500(0.250) f1_Acc: 0.333(0.167) sent/s 20 
Epoch: [0][401/850] Data 0.004 (0.004) Elapsed 5m 14s (remain 5m 52s) Loss: 2.051(3.735) Acc: 0.688(0.396) f1_Acc: 0.633(0.322) sent/s 20 
Epoch: [0][601/850] Data 0.003 (0.003) Elapsed 7m 51s (remain 3m 15s) Loss: 2.121(3.161) Acc: 0.562(0.438) f1_Acc: 0.564(0.383) sent/s 20 
Epoch: [0][801/850] Data 0.003 (0.003) Elapsed 10m 28s (remain 0m 38s) Loss: 0.489(2.754) Acc: 0.938(0.537) f1_Acc: 0.917(0.489) sent/s 20 
Epoch: [0][850/850] Data 0.002 (0.003) Elapsed 11m 6s (remain 0m 0s) Loss: 1.372(2.677) Acc: 1.000(0.560) f1_Acc: 1.000(0.514) sent/s 20 


  0%|          | 0/213 [00:00<?, ?it/s]

Train loss 2.6765088320199553 accuracy 0.4626876655872829
Validate loss 1.1792427578442533 accuracy 0.6938954870956101


----------
Epoch 1/29
----------
Epoch: [1][1/850] Data 0.373 (0.373) Elapsed 0m 1s (remain 17m 3s) Loss: 1.088(1.088) Acc: 0.875(0.875) f1_Acc: 0.877(0.877) sent/s 13 
Epoch: [1][201/850] Data 0.003 (0.005) Elapsed 2m 38s (remain 8m 30s) Loss: 1.599(1.169) Acc: 0.688(0.781) f1_Acc: 0.642(0.759) sent/s 20 
Epoch: [1][401/850] Data 0.003 (0.004) Elapsed 5m 15s (remain 5m 52s) Loss: 0.766(1.105) Acc: 0.812(0.792) f1_Acc: 0.818(0.779) sent/s 20 
Epoch: [1][601/850] Data 0.003 (0.003) Elapsed 7m 52s (remain 3m 15s) Loss: 0.960(1.060) Acc: 0.750(0.781) f1_Acc: 0.777(0.778) sent/s 20 
Epoch: [1][801/850] Data 0.002 (0.003) Elapsed 10m 59s (remain 0m 40s) Loss: 0.944(1.022) Acc: 0.750(0.775) f1_Acc: 0.700(0.763) sent/s 19 
Epoch: [1][850/850] Data 0.003 (0.003) Elapsed 11m 37s (remain 0m 0s) Loss: 0.271(1.013) Acc: 1.000(0.786) f1_Acc: 1.000(0.774) sent/s 19 


  0%|          | 0/213 [00:00<?, ?it/s]

Train loss 1.0131570794300222 accuracy 0.7825287017957021
Validate loss 0.8975718234416464 accuracy 0.7554199180172406


----------
Epoch 2/29
----------
Epoch: [2][1/850] Data 0.391 (0.391) Elapsed 0m 1s (remain 17m 21s) Loss: 0.890(0.890) Acc: 0.750(0.750) f1_Acc: 0.750(0.750) sent/s 13 
Epoch: [2][201/850] Data 0.003 (0.005) Elapsed 2m 38s (remain 8m 31s) Loss: 0.688(0.710) Acc: 0.812(0.781) f1_Acc: 0.792(0.771) sent/s 20 
Epoch: [2][401/850] Data 0.003 (0.004) Elapsed 5m 15s (remain 5m 53s) Loss: 0.773(0.702) Acc: 0.875(0.812) f1_Acc: 0.854(0.799) sent/s 20 
Epoch: [2][601/850] Data 0.003 (0.003) Elapsed 7m 52s (remain 3m 15s) Loss: 1.236(0.717) Acc: 0.688(0.781) f1_Acc: 0.688(0.771) sent/s 20 
Epoch: [2][801/850] Data 0.003 (0.003) Elapsed 10m 29s (remain 0m 38s) Loss: 1.272(0.727) Acc: 0.625(0.750) f1_Acc: 0.594(0.735) sent/s 20 
Epoch: [2][850/850] Data 0.003 (0.003) Elapsed 11m 7s (remain 0m 0s) Loss: 0.040(0.729) Acc: 1.000(0.762) f1_Acc: 1.000(0.748) sent/s 20 


  0%|          | 0/213 [00:00<?, ?it/s]

Train loss 0.728504963166527 accuracy 0.8241095083897557
Validate loss 0.7980026344598179 accuracy 0.7789350682321508


----------
Epoch 3/29
----------
Epoch: [3][1/850] Data 0.379 (0.379) Elapsed 0m 1s (remain 17m 20s) Loss: 0.455(0.455) Acc: 0.875(0.875) f1_Acc: 0.896(0.896) sent/s 13 
Epoch: [3][201/850] Data 0.003 (0.005) Elapsed 3m 38s (remain 11m 45s) Loss: 0.717(0.587) Acc: 0.812(0.844) f1_Acc: 0.812(0.854) sent/s 15 
Epoch: [3][401/850] Data 0.003 (0.004) Elapsed 6m 15s (remain 7m 0s) Loss: 0.524(0.568) Acc: 0.938(0.875) f1_Acc: 0.917(0.875) sent/s 17 
Epoch: [3][601/850] Data 0.002 (0.003) Elapsed 8m 52s (remain 3m 40s) Loss: 0.283(0.564) Acc: 0.938(0.891) f1_Acc: 0.958(0.896) sent/s 18 
Epoch: [3][801/850] Data 0.003 (0.003) Elapsed 11m 29s (remain 0m 42s) Loss: 0.146(0.562) Acc: 0.938(0.900) f1_Acc: 0.938(0.904) sent/s 19 
Epoch: [3][850/850] Data 0.003 (0.003) Elapsed 12m 7s (remain 0m 0s) Loss: 0.074(0.568) Acc: 1.000(0.905) f1_Acc: 1.000(0.909) sent/s 19 


  0%|          | 0/213 [00:00<?, ?it/s]

Train loss 0.5683061474929285 accuracy 0.8566382101854578
Validate loss 0.8077426442412983 accuracy 0.7841739896884494


----------
Epoch 4/29
----------
Epoch: [4][1/850] Data 0.381 (0.381) Elapsed 0m 1s (remain 17m 4s) Loss: 0.423(0.423) Acc: 0.875(0.875) f1_Acc: 0.850(0.850) sent/s 13 
Epoch: [4][201/850] Data 0.003 (0.005) Elapsed 2m 38s (remain 8m 31s) Loss: 0.865(0.436) Acc: 0.875(0.875) f1_Acc: 0.847(0.849) sent/s 20 
Epoch: [4][401/850] Data 0.003 (0.004) Elapsed 5m 45s (remain 6m 27s) Loss: 0.116(0.427) Acc: 1.000(0.917) f1_Acc: 1.000(0.899) sent/s 19 
Epoch: [4][601/850] Data 0.003 (0.003) Elapsed 8m 22s (remain 3m 28s) Loss: 0.838(0.441) Acc: 0.875(0.906) f1_Acc: 0.854(0.888) sent/s 19 
Epoch: [4][801/850] Data 0.003 (0.003) Elapsed 11m 0s (remain 0m 40s) Loss: 0.058(0.436) Acc: 1.000(0.925) f1_Acc: 1.000(0.910) sent/s 19 
Epoch: [4][850/850] Data 0.003 (0.003) Elapsed 11m 38s (remain 0m 0s) Loss: 2.307(0.438) Acc: 0.500(0.905) f1_Acc: 0.500(0.891) sent/s 19 


  0%|          | 0/213 [00:00<?, ?it/s]

Train loss 0.4384572821981001 accuracy 0.8871798645863999
Validate loss 0.8383630593941315 accuracy 0.8015343828341632


----------
Epoch 5/29
----------
Epoch: [5][1/850] Data 0.378 (0.378) Elapsed 0m 1s (remain 17m 8s) Loss: 0.919(0.919) Acc: 0.812(0.812) f1_Acc: 0.812(0.812) sent/s 13 
Epoch: [5][201/850] Data 0.003 (0.005) Elapsed 2m 38s (remain 8m 31s) Loss: 0.429(0.325) Acc: 0.812(0.812) f1_Acc: 0.792(0.802) sent/s 20 
Epoch: [5][401/850] Data 0.003 (0.004) Elapsed 5m 15s (remain 5m 53s) Loss: 0.319(0.328) Acc: 0.938(0.854) f1_Acc: 0.938(0.847) sent/s 20 
Epoch: [5][601/850] Data 0.003 (0.003) Elapsed 7m 52s (remain 3m 15s) Loss: 0.136(0.323) Acc: 0.938(0.875) f1_Acc: 0.938(0.870) sent/s 20 
Epoch: [5][801/850] Data 0.003 (0.003) Elapsed 10m 59s (remain 0m 40s) Loss: 0.582(0.333) Acc: 0.875(0.875) f1_Acc: 0.875(0.871) sent/s 19 
Epoch: [5][850/850] Data 0.003 (0.003) Elapsed 11m 37s (remain 0m 0s) Loss: 0.495(0.336) Acc: 0.750(0.869) f1_Acc: 0.750(0.865) sent/s 19 


  0%|          | 0/213 [00:00<?, ?it/s]

Train loss 0.3359826865419855 accuracy 0.9133058581100972
Validate loss 0.8509238002681886 accuracy 0.7995614814183725


----------
Epoch 6/29
----------
Epoch: [6][1/850] Data 0.382 (0.382) Elapsed 0m 1s (remain 16m 39s) Loss: 0.067(0.067) Acc: 1.000(1.000) f1_Acc: 1.000(1.000) sent/s 14 
Epoch: [6][201/850] Data 0.002 (0.005) Elapsed 2m 38s (remain 8m 30s) Loss: 0.525(0.276) Acc: 0.875(0.938) f1_Acc: 0.896(0.948) sent/s 20 
Epoch: [6][401/850] Data 0.003 (0.004) Elapsed 5m 15s (remain 5m 53s) Loss: 0.053(0.278) Acc: 1.000(0.958) f1_Acc: 1.000(0.965) sent/s 20 
Epoch: [6][601/850] Data 0.003 (0.003) Elapsed 7m 52s (remain 3m 15s) Loss: 0.277(0.284) Acc: 0.938(0.953) f1_Acc: 0.917(0.953) sent/s 20 
Epoch: [6][801/850] Data 0.004 (0.003) Elapsed 10m 29s (remain 0m 38s) Loss: 0.726(0.278) Acc: 0.812(0.925) f1_Acc: 0.784(0.919) sent/s 20 
Epoch: [6][850/850] Data 0.003 (0.003) Elapsed 11m 7s (remain 0m 0s) Loss: 0.041(0.277) Acc: 1.000(0.929) f1_Acc: 1.000(0.923) sent/s 20 


  0%|          | 0/213 [00:00<?, ?it/s]

Train loss 0.27674443589801434 accuracy 0.9306005298793053
Validate loss 0.9201979486647766 accuracy 0.8112048261652288


----------
Epoch 7/29
----------
Epoch: [7][1/850] Data 0.391 (0.391) Elapsed 0m 1s (remain 17m 6s) Loss: 0.031(0.031) Acc: 1.000(1.000) f1_Acc: 1.000(1.000) sent/s 13 
Epoch: [7][201/850] Data 0.003 (0.012) Elapsed 3m 40s (remain 11m 50s) Loss: 0.276(0.221) Acc: 0.938(0.969) f1_Acc: 0.938(0.969) sent/s 15 
Epoch: [7][401/850] Data 0.003 (0.007) Elapsed 6m 17s (remain 7m 2s) Loss: 1.034(0.216) Acc: 0.812(0.917) f1_Acc: 0.792(0.910) sent/s 17 
Epoch: [7][601/850] Data 0.003 (0.006) Elapsed 8m 54s (remain 3m 41s) Loss: 0.510(0.217) Acc: 0.938(0.922) f1_Acc: 0.938(0.917) sent/s 18 
Epoch: [7][801/850] Data 0.003 (0.005) Elapsed 11m 31s (remain 0m 42s) Loss: 0.256(0.220) Acc: 0.938(0.925) f1_Acc: 0.938(0.921) sent/s 19 
Epoch: [7][850/850] Data 0.003 (0.005) Elapsed 12m 9s (remain 0m 0s) Loss: 0.043(0.220) Acc: 1.000(0.929) f1_Acc: 1.000(0.925) sent/s 19 


  0%|          | 0/213 [00:00<?, ?it/s]

Train loss 0.21990114051277299 accuracy 0.9452458051221667
Validate loss 1.0371629646563894 accuracy 0.8088062022607417


----------
Epoch 8/29
----------
Epoch: [8][1/850] Data 0.380 (0.380) Elapsed 0m 1s (remain 16m 39s) Loss: 0.412(0.412) Acc: 0.875(0.875) f1_Acc: 0.875(0.875) sent/s 14 
Epoch: [8][201/850] Data 0.003 (0.005) Elapsed 2m 38s (remain 8m 31s) Loss: 0.071(0.167) Acc: 1.000(0.938) f1_Acc: 1.000(0.938) sent/s 20 
Epoch: [8][401/850] Data 0.003 (0.004) Elapsed 5m 46s (remain 6m 27s) Loss: 0.007(0.180) Acc: 1.000(0.958) f1_Acc: 1.000(0.958) sent/s 19 
Epoch: [8][601/850] Data 0.003 (0.003) Elapsed 8m 23s (remain 3m 28s) Loss: 0.256(0.184) Acc: 0.875(0.938) f1_Acc: 0.825(0.925) sent/s 19 
Epoch: [8][801/850] Data 0.003 (0.003) Elapsed 11m 0s (remain 0m 40s) Loss: 0.004(0.179) Acc: 1.000(0.950) f1_Acc: 1.000(0.940) sent/s 19 
Epoch: [8][850/850] Data 0.003 (0.003) Elapsed 11m 38s (remain 0m 0s) Loss: 0.006(0.178) Acc: 1.000(0.952) f1_Acc: 1.000(0.943) sent/s 19 


  0%|          | 0/213 [00:00<?, ?it/s]

Train loss 0.17819230853461226 accuracy 0.9583455990579924
Validate loss 1.06504667600936 accuracy 0.8204338385107652


----------
Epoch 9/29
----------
Epoch: [9][1/850] Data 0.394 (0.394) Elapsed 0m 1s (remain 17m 28s) Loss: 0.137(0.137) Acc: 0.938(0.938) f1_Acc: 0.938(0.938) sent/s 13 
Epoch: [9][201/850] Data 0.003 (0.005) Elapsed 2m 38s (remain 8m 32s) Loss: 0.004(0.138) Acc: 1.000(0.969) f1_Acc: 1.000(0.969) sent/s 20 
Epoch: [9][401/850] Data 0.003 (0.004) Elapsed 5m 15s (remain 5m 53s) Loss: 0.003(0.130) Acc: 1.000(0.979) f1_Acc: 1.000(0.979) sent/s 20 
Epoch: [9][601/850] Data 0.003 (0.003) Elapsed 7m 53s (remain 3m 16s) Loss: 0.007(0.145) Acc: 1.000(0.984) f1_Acc: 1.000(0.984) sent/s 20 
Epoch: [9][801/850] Data 0.003 (0.003) Elapsed 10m 30s (remain 0m 38s) Loss: 0.127(0.142) Acc: 0.938(0.975) f1_Acc: 0.938(0.975) sent/s 20 
Epoch: [9][850/850] Data 0.003 (0.003) Elapsed 11m 8s (remain 0m 0s) Loss: 0.003(0.144) Acc: 1.000(0.976) f1_Acc: 1.000(0.976) sent/s 20 


  0%|          | 0/213 [00:00<?, ?it/s]

Train loss 0.14373457585713664 accuracy 0.9659994112452165
Validate loss 1.087125134910129 accuracy 0.8215962598311061


----------
Epoch 10/29
----------
Epoch: [10][1/850] Data 0.385 (0.385) Elapsed 0m 1s (remain 17m 18s) Loss: 0.268(0.268) Acc: 0.938(0.938) f1_Acc: 0.938(0.938) sent/s 13 
Epoch: [10][201/850] Data 0.003 (0.005) Elapsed 2m 38s (remain 8m 31s) Loss: 0.006(0.093) Acc: 1.000(0.969) f1_Acc: 1.000(0.969) sent/s 20 
Epoch: [10][401/850] Data 0.003 (0.004) Elapsed 5m 15s (remain 5m 53s) Loss: 0.005(0.106) Acc: 1.000(0.979) f1_Acc: 1.000(0.979) sent/s 20 
Epoch: [10][601/850] Data 0.003 (0.003) Elapsed 7m 52s (remain 3m 15s) Loss: 0.200(0.102) Acc: 0.938(0.969) f1_Acc: 0.938(0.969) sent/s 20 
Epoch: [10][801/850] Data 0.003 (0.003) Elapsed 10m 30s (remain 0m 38s) Loss: 0.013(0.108) Acc: 1.000(0.975) f1_Acc: 1.000(0.975) sent/s 20 
Epoch: [10][850/850] Data 0.003 (0.003) Elapsed 11m 8s (remain 0m 0s) Loss: 0.003(0.111) Acc: 1.000(0.976) f1_Acc: 1.000(0.976) sent/s 20 


  0%|          | 0/213 [00:00<?, ?it/s]

Train loss 0.11058923210370228 accuracy 0.9748307329997057
Validate loss 1.223678102572514 accuracy 0.8150769578806176


----------
Epoch 11/29
----------
Epoch: [11][1/850] Data 0.384 (0.384) Elapsed 0m 31s (remain 441m 52s) Loss: 0.002(0.002) Acc: 1.000(1.000) f1_Acc: 1.000(1.000) sent/s 1 
Epoch: [11][201/850] Data 0.003 (0.005) Elapsed 3m 8s (remain 10m 9s) Loss: 0.002(0.084) Acc: 1.000(1.000) f1_Acc: 1.000(1.000) sent/s 17 
Epoch: [11][401/850] Data 0.003 (0.004) Elapsed 5m 46s (remain 6m 27s) Loss: 0.258(0.094) Acc: 0.938(0.979) f1_Acc: 0.958(0.986) sent/s 19 
Epoch: [11][601/850] Data 0.003 (0.003) Elapsed 8m 23s (remain 3m 28s) Loss: 0.003(0.091) Acc: 1.000(0.984) f1_Acc: 1.000(0.990) sent/s 19 
Epoch: [11][801/850] Data 0.003 (0.003) Elapsed 11m 0s (remain 0m 40s) Loss: 0.416(0.092) Acc: 0.938(0.975) f1_Acc: 0.938(0.979) sent/s 19 
Epoch: [11][850/850] Data 0.002 (0.003) Elapsed 11m 38s (remain 0m 0s) Loss: 0.002(0.091) Acc: 1.000(0.976) f1_Acc: 1.000(0.980) sent/s 19 


  0%|          | 0/213 [00:00<?, ?it/s]

Train loss 0.09087771091582503 accuracy 0.9785104503974096
Validate loss 1.2737939784471646 accuracy 0.8163537069138486


----------
Epoch 12/29
----------
Epoch: [12][1/850] Data 0.382 (0.382) Elapsed 0m 1s (remain 16m 39s) Loss: 0.454(0.454) Acc: 0.938(0.938) f1_Acc: 0.938(0.938) sent/s 14 
Epoch: [12][201/850] Data 0.003 (0.005) Elapsed 2m 38s (remain 8m 31s) Loss: 0.370(0.080) Acc: 0.938(0.938) f1_Acc: 0.938(0.938) sent/s 20 
Epoch: [12][401/850] Data 0.003 (0.004) Elapsed 5m 15s (remain 5m 53s) Loss: 0.039(0.076) Acc: 0.938(0.938) f1_Acc: 0.938(0.938) sent/s 20 
Epoch: [12][601/850] Data 0.003 (0.003) Elapsed 8m 23s (remain 3m 28s) Loss: 0.042(0.075) Acc: 1.000(0.953) f1_Acc: 1.000(0.953) sent/s 19 
Epoch: [12][801/850] Data 0.003 (0.003) Elapsed 11m 0s (remain 0m 40s) Loss: 0.002(0.069) Acc: 1.000(0.963) f1_Acc: 1.000(0.963) sent/s 19 
Epoch: [12][850/850] Data 0.003 (0.003) Elapsed 11m 38s (remain 0m 0s) Loss: 0.002(0.070) Acc: 1.000(0.964) f1_Acc: 1.000(0.964) sent/s 19 


  0%|          | 0/213 [00:00<?, ?it/s]

Train loss 0.07016465904275167 accuracy 0.9852811304091846
Validate loss 1.3142246450655526 accuracy 0.8186235877972475


----------
Epoch 13/29
----------
Epoch: [13][1/850] Data 0.400 (0.400) Elapsed 0m 1s (remain 16m 59s) Loss: 0.002(0.002) Acc: 1.000(1.000) f1_Acc: 1.000(1.000) sent/s 13 
Epoch: [13][201/850] Data 0.003 (0.005) Elapsed 2m 38s (remain 8m 31s) Loss: 0.019(0.048) Acc: 1.000(1.000) f1_Acc: 1.000(1.000) sent/s 20 
Epoch: [13][401/850] Data 0.003 (0.004) Elapsed 5m 15s (remain 5m 53s) Loss: 0.004(0.050) Acc: 1.000(1.000) f1_Acc: 1.000(1.000) sent/s 20 
Epoch: [13][601/850] Data 0.003 (0.003) Elapsed 7m 52s (remain 3m 15s) Loss: 0.002(0.056) Acc: 1.000(1.000) f1_Acc: 1.000(1.000) sent/s 20 
Epoch: [13][801/850] Data 0.003 (0.003) Elapsed 11m 0s (remain 0m 40s) Loss: 0.001(0.054) Acc: 1.000(1.000) f1_Acc: 1.000(1.000) sent/s 19 
Epoch: [13][850/850] Data 0.003 (0.003) Elapsed 11m 38s (remain 0m 0s) Loss: 0.068(0.055) Acc: 1.000(1.000) f1_Acc: 1.000(1.000) sent/s 19 


  0%|          | 0/213 [00:00<?, ?it/s]

Train loss 0.054806129789671594 accuracy 0.9877097438916692
Validate loss 1.3162965913584124 accuracy 0.8195788672140264


----------
Epoch 14/29
----------
Epoch: [14][1/850] Data 0.398 (0.398) Elapsed 0m 1s (remain 16m 51s) Loss: 0.002(0.002) Acc: 1.000(1.000) f1_Acc: 1.000(1.000) sent/s 13 
Epoch: [14][201/850] Data 0.003 (0.005) Elapsed 2m 38s (remain 8m 31s) Loss: 0.099(0.057) Acc: 1.000(1.000) f1_Acc: 1.000(1.000) sent/s 20 
Epoch: [14][401/850] Data 0.003 (0.004) Elapsed 5m 15s (remain 5m 53s) Loss: 0.004(0.058) Acc: 1.000(1.000) f1_Acc: 1.000(1.000) sent/s 20 
Epoch: [14][601/850] Data 0.003 (0.003) Elapsed 7m 52s (remain 3m 15s) Loss: 0.001(0.055) Acc: 1.000(1.000) f1_Acc: 1.000(1.000) sent/s 20 
Epoch: [14][801/850] Data 0.003 (0.003) Elapsed 10m 30s (remain 0m 38s) Loss: 0.001(0.050) Acc: 1.000(1.000) f1_Acc: 1.000(1.000) sent/s 20 
Epoch: [14][850/850] Data 0.002 (0.003) Elapsed 11m 8s (remain 0m 0s) Loss: 0.001(0.050) Acc: 1.000(1.000) f1_Acc: 1.000(1.000) sent/s 20 


  0%|          | 0/213 [00:00<?, ?it/s]

Train loss 0.0502855731999915 accuracy 0.9893288195466589
Validate loss 1.357759236740555 accuracy 0.8279039861380622


----------
Epoch 15/29
----------
Epoch: [15][1/850] Data 0.405 (0.405) Elapsed 0m 1s (remain 17m 35s) Loss: 0.001(0.001) Acc: 1.000(1.000) f1_Acc: 1.000(1.000) sent/s 13 
Epoch: [15][201/850] Data 0.003 (0.005) Elapsed 2m 38s (remain 8m 31s) Loss: 0.002(0.031) Acc: 1.000(1.000) f1_Acc: 1.000(1.000) sent/s 20 
Epoch: [15][401/850] Data 0.003 (0.004) Elapsed 6m 16s (remain 7m 1s) Loss: 0.001(0.034) Acc: 1.000(1.000) f1_Acc: 1.000(1.000) sent/s 17 
Epoch: [15][601/850] Data 0.003 (0.003) Elapsed 8m 53s (remain 3m 40s) Loss: 0.416(0.031) Acc: 0.938(0.984) f1_Acc: 0.912(0.978) sent/s 18 
Epoch: [15][801/850] Data 0.003 (0.003) Elapsed 11m 30s (remain 0m 42s) Loss: 0.002(0.032) Acc: 1.000(0.988) f1_Acc: 1.000(0.982) sent/s 19 
Epoch: [15][850/850] Data 0.003 (0.003) Elapsed 12m 8s (remain 0m 0s) Loss: 0.001(0.031) Acc: 1.000(0.988) f1_Acc: 1.000(0.983) sent/s 19 


  0%|          | 0/213 [00:00<?, ?it/s]

Train loss 0.031080830665594496 accuracy 0.9938916691198116
Validate loss 1.4500630081986092 accuracy 0.8236798050024453


----------
Epoch 16/29
----------
Epoch: [16][1/850] Data 0.397 (0.397) Elapsed 0m 1s (remain 16m 56s) Loss: 0.001(0.001) Acc: 1.000(1.000) f1_Acc: 1.000(1.000) sent/s 13 
Epoch: [16][201/850] Data 0.003 (0.005) Elapsed 2m 38s (remain 8m 32s) Loss: 0.001(0.012) Acc: 1.000(1.000) f1_Acc: 1.000(1.000) sent/s 20 
Epoch: [16][401/850] Data 0.003 (0.004) Elapsed 5m 16s (remain 5m 53s) Loss: 0.001(0.015) Acc: 1.000(1.000) f1_Acc: 1.000(1.000) sent/s 20 
Epoch: [16][601/850] Data 0.003 (0.003) Elapsed 8m 23s (remain 3m 28s) Loss: 0.001(0.018) Acc: 1.000(1.000) f1_Acc: 1.000(1.000) sent/s 19 
Epoch: [16][801/850] Data 0.003 (0.003) Elapsed 11m 1s (remain 0m 40s) Loss: 0.001(0.021) Acc: 1.000(1.000) f1_Acc: 1.000(1.000) sent/s 19 
Epoch: [16][850/850] Data 0.003 (0.003) Elapsed 11m 39s (remain 0m 0s) Loss: 0.001(0.023) Acc: 1.000(1.000) f1_Acc: 1.000(1.000) sent/s 19 


  0%|          | 0/213 [00:00<?, ?it/s]

Train loss 0.02268047647819105 accuracy 0.9950691786870769
Validate loss 1.4201348107299951 accuracy 0.8210942951237703


----------
Epoch 17/29
----------
Epoch: [17][1/850] Data 0.405 (0.405) Elapsed 0m 1s (remain 17m 0s) Loss: 0.000(0.000) Acc: 1.000(1.000) f1_Acc: 1.000(1.000) sent/s 13 
Epoch: [17][201/850] Data 0.003 (0.005) Elapsed 2m 38s (remain 8m 31s) Loss: 0.000(0.009) Acc: 1.000(1.000) f1_Acc: 1.000(1.000) sent/s 20 
Epoch: [17][401/850] Data 0.003 (0.004) Elapsed 5m 16s (remain 5m 53s) Loss: 0.000(0.011) Acc: 1.000(1.000) f1_Acc: 1.000(1.000) sent/s 20 
Epoch: [17][601/850] Data 0.003 (0.003) Elapsed 7m 53s (remain 3m 16s) Loss: 0.006(0.015) Acc: 1.000(1.000) f1_Acc: 1.000(1.000) sent/s 20 
Epoch: [17][801/850] Data 0.003 (0.003) Elapsed 10m 30s (remain 0m 38s) Loss: 0.000(0.021) Acc: 1.000(1.000) f1_Acc: 1.000(1.000) sent/s 20 
Epoch: [17][850/850] Data 0.002 (0.003) Elapsed 11m 8s (remain 0m 0s) Loss: 0.000(0.022) Acc: 1.000(1.000) f1_Acc: 1.000(1.000) sent/s 20 


  0%|          | 0/213 [00:00<?, ?it/s]

Train loss 0.022031291575338084 accuracy 0.9954371504268473
Validate loss 1.4245407767510512 accuracy 0.8244924116011066


----------
Epoch 18/29
----------
Epoch: [18][1/850] Data 0.418 (0.418) Elapsed 1m 1s (remain 870m 26s) Loss: 0.000(0.000) Acc: 1.000(1.000) f1_Acc: 1.000(1.000) sent/s 0 
Epoch: [18][201/850] Data 0.003 (0.005) Elapsed 3m 38s (remain 11m 46s) Loss: 0.000(0.012) Acc: 1.000(1.000) f1_Acc: 1.000(1.000) sent/s 15 
Epoch: [18][401/850] Data 0.003 (0.004) Elapsed 6m 16s (remain 7m 1s) Loss: 0.003(0.016) Acc: 1.000(1.000) f1_Acc: 1.000(1.000) sent/s 17 
Epoch: [18][601/850] Data 0.003 (0.003) Elapsed 8m 53s (remain 3m 41s) Loss: 0.001(0.014) Acc: 1.000(1.000) f1_Acc: 1.000(1.000) sent/s 18 
Epoch: [18][801/850] Data 0.004 (0.003) Elapsed 11m 31s (remain 0m 42s) Loss: 0.000(0.013) Acc: 1.000(1.000) f1_Acc: 1.000(1.000) sent/s 19 
Epoch: [18][850/850] Data 0.002 (0.003) Elapsed 12m 9s (remain 0m 0s) Loss: 0.004(0.013) Acc: 1.000(1.000) f1_Acc: 1.000(1.000) sent/s 19 


  0%|          | 0/213 [00:00<?, ?it/s]

Train loss 0.012673023562990228 accuracy 0.9974241978216074
Validate loss 1.4969527493425707 accuracy 0.8254634738872296


----------
Epoch 19/29
----------
Epoch: [19][1/850] Data 0.397 (0.397) Elapsed 0m 1s (remain 16m 52s) Loss: 0.000(0.000) Acc: 1.000(1.000) f1_Acc: 1.000(1.000) sent/s 13 
Epoch: [19][201/850] Data 0.003 (0.005) Elapsed 2m 38s (remain 8m 31s) Loss: 0.000(0.009) Acc: 1.000(1.000) f1_Acc: 1.000(1.000) sent/s 20 
Epoch: [19][401/850] Data 0.003 (0.004) Elapsed 5m 46s (remain 6m 27s) Loss: 0.000(0.009) Acc: 1.000(1.000) f1_Acc: 1.000(1.000) sent/s 19 
Epoch: [19][601/850] Data 0.003 (0.003) Elapsed 8m 23s (remain 3m 28s) Loss: 0.000(0.010) Acc: 1.000(1.000) f1_Acc: 1.000(1.000) sent/s 19 
Epoch: [19][801/850] Data 0.003 (0.003) Elapsed 11m 0s (remain 0m 40s) Loss: 0.001(0.010) Acc: 1.000(1.000) f1_Acc: 1.000(1.000) sent/s 19 
Epoch: [19][850/850] Data 0.003 (0.003) Elapsed 11m 39s (remain 0m 0s) Loss: 0.000(0.010) Acc: 1.000(1.000) f1_Acc: 1.000(1.000) sent/s 19 


  0%|          | 0/213 [00:00<?, ?it/s]

Train loss 0.010448246968185013 accuracy 0.9974241978216074
Validate loss 1.4864011660254857 accuracy 0.8303664077429399


----------
Epoch 20/29
----------
Epoch: [20][1/850] Data 0.397 (0.397) Elapsed 0m 1s (remain 17m 25s) Loss: 0.001(0.001) Acc: 1.000(1.000) f1_Acc: 1.000(1.000) sent/s 13 
Epoch: [20][201/850] Data 0.003 (0.005) Elapsed 2m 38s (remain 8m 31s) Loss: 0.000(0.008) Acc: 1.000(1.000) f1_Acc: 1.000(1.000) sent/s 20 
Epoch: [20][401/850] Data 0.003 (0.004) Elapsed 5m 15s (remain 5m 53s) Loss: 0.000(0.012) Acc: 1.000(1.000) f1_Acc: 1.000(1.000) sent/s 20 
Epoch: [20][601/850] Data 0.003 (0.003) Elapsed 7m 52s (remain 3m 15s) Loss: 0.000(0.009) Acc: 1.000(1.000) f1_Acc: 1.000(1.000) sent/s 20 
Epoch: [20][801/850] Data 0.003 (0.003) Elapsed 11m 0s (remain 0m 40s) Loss: 0.000(0.010) Acc: 1.000(1.000) f1_Acc: 1.000(1.000) sent/s 19 
Epoch: [20][850/850] Data 0.003 (0.003) Elapsed 11m 38s (remain 0m 0s) Loss: 0.000(0.009) Acc: 1.000(1.000) f1_Acc: 1.000(1.000) sent/s 19 


  0%|          | 0/213 [00:00<?, ?it/s]

Train loss 0.0092769434471342 accuracy 0.9980865469531941
Validate loss 1.5083083427319384 accuracy 0.8291658650136442


----------
Epoch 21/29
----------
Epoch: [21][1/850] Data 0.396 (0.396) Elapsed 0m 1s (remain 16m 52s) Loss: 0.000(0.000) Acc: 1.000(1.000) f1_Acc: 1.000(1.000) sent/s 13 
Epoch: [21][201/850] Data 0.004 (0.005) Elapsed 2m 38s (remain 8m 32s) Loss: 0.001(0.004) Acc: 1.000(1.000) f1_Acc: 1.000(1.000) sent/s 20 
Epoch: [21][401/850] Data 0.003 (0.004) Elapsed 5m 16s (remain 5m 53s) Loss: 0.096(0.003) Acc: 0.938(0.979) f1_Acc: 0.938(0.979) sent/s 20 
Epoch: [21][601/850] Data 0.003 (0.004) Elapsed 7m 53s (remain 3m 16s) Loss: 0.000(0.005) Acc: 1.000(0.984) f1_Acc: 1.000(0.984) sent/s 20 
Epoch: [21][801/850] Data 0.003 (0.003) Elapsed 10m 31s (remain 0m 38s) Loss: 0.001(0.006) Acc: 1.000(0.988) f1_Acc: 1.000(0.988) sent/s 20 
Epoch: [21][850/850] Data 0.003 (0.003) Elapsed 11m 9s (remain 0m 0s) Loss: 0.000(0.006) Acc: 1.000(0.988) f1_Acc: 1.000(0.988) sent/s 20 


  0%|          | 0/213 [00:00<?, ?it/s]

Train loss 0.005964497666418594 accuracy 0.9985281130409185
Validate loss 1.4891244605086043 accuracy 0.8277402003778971


----------
Epoch 22/29
----------
Epoch: [22][1/850] Data 0.402 (0.402) Elapsed 0m 1s (remain 16m 55s) Loss: 0.000(0.000) Acc: 1.000(1.000) f1_Acc: 1.000(1.000) sent/s 13 
Epoch: [22][201/850] Data 0.003 (0.005) Elapsed 3m 38s (remain 11m 46s) Loss: 0.000(0.004) Acc: 1.000(1.000) f1_Acc: 1.000(1.000) sent/s 15 
Epoch: [22][401/850] Data 0.003 (0.004) Elapsed 6m 16s (remain 7m 1s) Loss: 0.000(0.003) Acc: 1.000(1.000) f1_Acc: 1.000(1.000) sent/s 17 
Epoch: [22][601/850] Data 0.004 (0.003) Elapsed 8m 53s (remain 3m 41s) Loss: 0.000(0.003) Acc: 1.000(1.000) f1_Acc: 1.000(1.000) sent/s 18 
Epoch: [22][801/850] Data 0.004 (0.003) Elapsed 11m 30s (remain 0m 42s) Loss: 0.000(0.002) Acc: 1.000(1.000) f1_Acc: 1.000(1.000) sent/s 19 
Epoch: [22][850/850] Data 0.003 (0.003) Elapsed 12m 9s (remain 0m 0s) Loss: 0.000(0.002) Acc: 1.000(1.000) f1_Acc: 1.000(1.000) sent/s 19 


  0%|          | 0/213 [00:00<?, ?it/s]

Train loss 0.0023595797741770017 accuracy 0.998969679128643
Validate loss 1.5175752971177054 accuracy 0.8306093605332849


----------
Epoch 23/29
----------
Epoch: [23][1/850] Data 0.405 (0.405) Elapsed 0m 1s (remain 17m 17s) Loss: 0.000(0.000) Acc: 1.000(1.000) f1_Acc: 1.000(1.000) sent/s 13 
Epoch: [23][201/850] Data 0.003 (0.005) Elapsed 2m 38s (remain 8m 31s) Loss: 0.000(0.002) Acc: 1.000(1.000) f1_Acc: 1.000(1.000) sent/s 20 
Epoch: [23][401/850] Data 0.003 (0.004) Elapsed 5m 15s (remain 5m 53s) Loss: 0.000(0.002) Acc: 1.000(1.000) f1_Acc: 1.000(1.000) sent/s 20 
Epoch: [23][601/850] Data 0.003 (0.003) Elapsed 8m 23s (remain 3m 28s) Loss: 0.000(0.003) Acc: 1.000(1.000) f1_Acc: 1.000(1.000) sent/s 19 
Epoch: [23][801/850] Data 0.003 (0.003) Elapsed 11m 0s (remain 0m 40s) Loss: 0.000(0.004) Acc: 1.000(1.000) f1_Acc: 1.000(1.000) sent/s 19 
Epoch: [23][850/850] Data 0.002 (0.003) Elapsed 11m 38s (remain 0m 0s) Loss: 0.000(0.003) Acc: 1.000(1.000) f1_Acc: 1.000(1.000) sent/s 19 


  0%|          | 0/213 [00:00<?, ?it/s]

Train loss 0.003377862514862596 accuracy 0.9992640565204594
Validate loss 1.5529223724956074 accuracy 0.8303496773232327


----------
Epoch 24/29
----------
Epoch: [24][1/850] Data 0.406 (0.406) Elapsed 0m 1s (remain 17m 1s) Loss: 0.000(0.000) Acc: 1.000(1.000) f1_Acc: 1.000(1.000) sent/s 13 
Epoch: [24][201/850] Data 0.003 (0.005) Elapsed 2m 38s (remain 8m 31s) Loss: 0.000(0.002) Acc: 1.000(1.000) f1_Acc: 1.000(1.000) sent/s 20 
Epoch: [24][401/850] Data 0.003 (0.004) Elapsed 5m 15s (remain 5m 53s) Loss: 0.000(0.001) Acc: 1.000(1.000) f1_Acc: 1.000(1.000) sent/s 20 
Epoch: [24][601/850] Data 0.003 (0.003) Elapsed 7m 53s (remain 3m 15s) Loss: 0.000(0.001) Acc: 1.000(1.000) f1_Acc: 1.000(1.000) sent/s 20 
Epoch: [24][801/850] Data 0.003 (0.003) Elapsed 11m 0s (remain 0m 40s) Loss: 0.000(0.002) Acc: 1.000(1.000) f1_Acc: 1.000(1.000) sent/s 19 
Epoch: [24][850/850] Data 0.003 (0.003) Elapsed 11m 38s (remain 0m 0s) Loss: 0.000(0.001) Acc: 1.000(1.000) f1_Acc: 1.000(1.000) sent/s 19 


  0%|          | 0/213 [00:00<?, ?it/s]

Train loss 0.0014763847041895618 accuracy 0.9992640565204594
Validate loss 1.5521261989470674 accuracy 0.8332684308301226


----------
Epoch 25/29
----------
Epoch: [25][1/850] Data 0.399 (0.399) Elapsed 0m 1s (remain 17m 33s) Loss: 0.000(0.000) Acc: 1.000(1.000) f1_Acc: 1.000(1.000) sent/s 13 
Epoch: [25][201/850] Data 0.003 (0.005) Elapsed 2m 38s (remain 8m 31s) Loss: 0.000(0.001) Acc: 1.000(1.000) f1_Acc: 1.000(1.000) sent/s 20 
Epoch: [25][401/850] Data 0.003 (0.004) Elapsed 5m 15s (remain 5m 53s) Loss: 0.000(0.003) Acc: 1.000(1.000) f1_Acc: 1.000(1.000) sent/s 20 
Epoch: [25][601/850] Data 0.003 (0.003) Elapsed 7m 52s (remain 3m 15s) Loss: 0.000(0.002) Acc: 1.000(1.000) f1_Acc: 1.000(1.000) sent/s 20 
Epoch: [25][801/850] Data 0.003 (0.003) Elapsed 10m 30s (remain 0m 38s) Loss: 0.000(0.002) Acc: 1.000(1.000) f1_Acc: 1.000(1.000) sent/s 20 
Epoch: [25][850/850] Data 0.002 (0.003) Elapsed 11m 8s (remain 0m 0s) Loss: 0.001(0.002) Acc: 1.000(1.000) f1_Acc: 1.000(1.000) sent/s 20 


  0%|          | 0/213 [00:00<?, ?it/s]

Train loss 0.0019290121646006755 accuracy 0.9995584339122756
Validate loss 1.558105951735052 accuracy 0.8301737126412848


----------
Epoch 26/29
----------
Epoch: [26][1/850] Data 0.400 (0.400) Elapsed 0m 1s (remain 16m 54s) Loss: 0.000(0.000) Acc: 1.000(1.000) f1_Acc: 1.000(1.000) sent/s 13 
Epoch: [26][201/850] Data 0.003 (0.005) Elapsed 3m 38s (remain 11m 46s) Loss: 0.000(0.001) Acc: 1.000(1.000) f1_Acc: 1.000(1.000) sent/s 15 
Epoch: [26][401/850] Data 0.003 (0.004) Elapsed 6m 15s (remain 7m 0s) Loss: 0.000(0.001) Acc: 1.000(1.000) f1_Acc: 1.000(1.000) sent/s 17 
Epoch: [26][601/850] Data 0.003 (0.003) Elapsed 8m 53s (remain 3m 40s) Loss: 0.000(0.001) Acc: 1.000(1.000) f1_Acc: 1.000(1.000) sent/s 18 
Epoch: [26][801/850] Data 0.003 (0.003) Elapsed 11m 30s (remain 0m 42s) Loss: 0.000(0.001) Acc: 1.000(1.000) f1_Acc: 1.000(1.000) sent/s 19 
Epoch: [26][850/850] Data 0.003 (0.003) Elapsed 12m 8s (remain 0m 0s) Loss: 0.000(0.001) Acc: 1.000(1.000) f1_Acc: 1.000(1.000) sent/s 19 


  0%|          | 0/213 [00:00<?, ?it/s]

Train loss 0.0008753111558168841 accuracy 0.9996320282602297
Validate loss 1.5728555188782853 accuracy 0.8290749809442561


----------
Epoch 27/29
----------
Epoch: [27][1/850] Data 0.403 (0.403) Elapsed 0m 1s (remain 16m 58s) Loss: 0.000(0.000) Acc: 1.000(1.000) f1_Acc: 1.000(1.000) sent/s 13 
Epoch: [27][201/850] Data 0.003 (0.005) Elapsed 2m 38s (remain 8m 32s) Loss: 0.000(0.002) Acc: 1.000(1.000) f1_Acc: 1.000(1.000) sent/s 20 
Epoch: [27][401/850] Data 0.003 (0.004) Elapsed 5m 16s (remain 5m 54s) Loss: 0.000(0.001) Acc: 1.000(1.000) f1_Acc: 1.000(1.000) sent/s 20 
Epoch: [27][601/850] Data 0.003 (0.004) Elapsed 8m 24s (remain 3m 28s) Loss: 0.000(0.001) Acc: 1.000(1.000) f1_Acc: 1.000(1.000) sent/s 19 
Epoch: [27][801/850] Data 0.004 (0.003) Elapsed 11m 1s (remain 0m 40s) Loss: 0.000(0.001) Acc: 1.000(1.000) f1_Acc: 1.000(1.000) sent/s 19 
Epoch: [27][850/850] Data 0.003 (0.003) Elapsed 11m 39s (remain 0m 0s) Loss: 0.000(0.001) Acc: 1.000(1.000) f1_Acc: 1.000(1.000) sent/s 19 


  0%|          | 0/213 [00:00<?, ?it/s]

Train loss 0.0010323101284926467 accuracy 0.9995584339122756
Validate loss 1.570065488003931 accuracy 0.8314275400349862


----------
Epoch 28/29
----------
Epoch: [28][1/850] Data 0.406 (0.406) Elapsed 0m 1s (remain 17m 5s) Loss: 0.000(0.000) Acc: 1.000(1.000) f1_Acc: 1.000(1.000) sent/s 13 
Epoch: [28][201/850] Data 0.003 (0.005) Elapsed 2m 38s (remain 8m 32s) Loss: 0.000(0.000) Acc: 1.000(1.000) f1_Acc: 1.000(1.000) sent/s 20 
Epoch: [28][401/850] Data 0.003 (0.004) Elapsed 5m 16s (remain 5m 54s) Loss: 0.000(0.000) Acc: 1.000(1.000) f1_Acc: 1.000(1.000) sent/s 20 
Epoch: [28][601/850] Data 0.003 (0.003) Elapsed 7m 53s (remain 3m 16s) Loss: 0.000(0.000) Acc: 1.000(1.000) f1_Acc: 1.000(1.000) sent/s 20 
Epoch: [28][801/850] Data 0.003 (0.003) Elapsed 10m 31s (remain 0m 38s) Loss: 0.000(0.001) Acc: 1.000(1.000) f1_Acc: 1.000(1.000) sent/s 20 
Epoch: [28][850/850] Data 0.003 (0.003) Elapsed 11m 9s (remain 0m 0s) Loss: 0.000(0.001) Acc: 1.000(1.000) f1_Acc: 1.000(1.000) sent/s 20 


  0%|          | 0/213 [00:00<?, ?it/s]

Train loss 0.0005454862979792463 accuracy 0.9997792169561378
Validate loss 1.5707292995488935 accuracy 0.8319444168585036


----------
Epoch 29/29
----------
Epoch: [29][1/850] Data 0.402 (0.402) Elapsed 0m 1s (remain 16m 58s) Loss: 0.000(0.000) Acc: 1.000(1.000) f1_Acc: 1.000(1.000) sent/s 13 
Epoch: [29][201/850] Data 0.003 (0.005) Elapsed 2m 38s (remain 8m 32s) Loss: 0.000(0.000) Acc: 1.000(1.000) f1_Acc: 1.000(1.000) sent/s 20 
Epoch: [29][401/850] Data 0.003 (0.004) Elapsed 5m 16s (remain 5m 54s) Loss: 0.001(0.001) Acc: 1.000(1.000) f1_Acc: 1.000(1.000) sent/s 20 
Epoch: [29][601/850] Data 0.003 (0.003) Elapsed 7m 53s (remain 3m 16s) Loss: 0.000(0.001) Acc: 1.000(1.000) f1_Acc: 1.000(1.000) sent/s 20 
Epoch: [29][801/850] Data 0.003 (0.003) Elapsed 10m 31s (remain 0m 38s) Loss: 0.000(0.001) Acc: 1.000(1.000) f1_Acc: 1.000(1.000) sent/s 20 
Epoch: [29][850/850] Data 0.003 (0.003) Elapsed 11m 9s (remain 0m 0s) Loss: 0.000(0.001) Acc: 1.000(1.000) f1_Acc: 1.000(1.000) sent/s 20 


  0%|          | 0/213 [00:00<?, ?it/s]

Train loss 0.0006591976280863572 accuracy 0.9997792169561378
Validate loss 1.5726112512531703 accuracy 0.831801184863589




## Inference

In [None]:
class CategoryDataset(Dataset):
  def __init__(self, text, tokenizer, feature_extractor, max_len):
    self.text = text
    # self.image_path = image_path
    self.tokenizer = tokenizer
    self.feature_extractor = feature_extractor
    self.max_len = max_len
  def __len__(self):
    return len(self.text)
  def __getitem__(self, item):
    text = str(self.text[item])
    # image_path = os.path.join('/workspace/Dacon/data',str(self.image_path[item])[2:])
    # image = cv2.imread(image_path)
    encoding = self.tokenizer.encode_plus(
      text,
      add_special_tokens=True,
      max_length=self.max_len,
      return_token_type_ids=False,
      padding = 'max_length',
      truncation = True,
      return_attention_mask=True,
      return_tensors='pt',
    )
    # image_feature = self.feature_extractor(images=image, return_tensors="pt")
    return {
      'input_ids': encoding['input_ids'].flatten(),
      'attention_mask': encoding['attention_mask'].flatten(),
    #   'pixel_values': image_feature['pixel_values'][0],
    }

def create_data_loader(df, tokenizer, feature_extractor, max_len, batch_size, shuffle_=False):
    ds = CategoryDataset(
        text=df.overview.to_numpy(),
        # image_path = df.img_path.to_numpy(),
        tokenizer=tokenizer,
        feature_extractor = feature_extractor,
        max_len=max_len
    )
    return DataLoader(
        ds,
        batch_size=batch_size,
        num_workers=4,
        shuffle = shuffle_
    )

In [None]:
def inference(model,data_loader,device,n_examples):
  model = model.eval()
  preds_arr = []
  preds_arr2 = []
  preds_arr3 = []
  for d in tqdm(data_loader):
    with torch.no_grad():
      input_ids = d["input_ids"].to(device)
      attention_mask = d["attention_mask"].to(device)
    #   pixel_values = d['pixel_values'].to(device)

      outputs,outputs2,outputs3 = model(
        input_ids=input_ids,
        attention_mask=attention_mask,
        # pixel_values=pixel_values
      )

      _, preds = torch.max(outputs, dim=1)
      _, preds2 = torch.max(outputs2, dim=1)
      _, preds3 = torch.max(outputs3, dim=1)

      preds_arr.append(preds.cpu().numpy())
      preds_arr2.append(preds2.cpu().numpy())
      preds_arr3.append(preds3.cpu().numpy())

      nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)

  return preds_arr, preds_arr2, preds_arr3

In [None]:
test = pd.read_csv(file_path + '/test.csv')

In [None]:
eval_data_loader = create_data_loader(test, tokenizer, feature_extractor, 256, 1)

preds_arr, preds_arr2, preds_arr3 = inference(
        model,
        eval_data_loader,
        device,
        len(test)
        )

  0%|          | 0/7280 [00:00<?, ?it/s]

## Submission

In [None]:
sample_submission = pd.read_csv(file_path + '/sample_submission.csv')
arr = ['5일장', 'ATV', 'MTB', '강', '게스트하우스', '계곡', '고궁', '고택', '골프', '공연장',
       '공예,공방', '공원', '관광단지', '국립공원', '군립공원', '기념관', '기념탑/기념비/전망대',
       '기암괴석', '기타', '기타행사', '농.산.어촌 체험', '다리/대교', '대중콘서트', '대형서점',
       '도립공원', '도서관', '동굴', '동상', '등대', '래프팅', '면세점', '모텔', '문', '문화관광축제',
       '문화원', '문화전수시설', '뮤지컬', '미술관/화랑', '민물낚시', '민박', '민속마을', '바/까페',
       '바다낚시', '박람회', '박물관', '발전소', '백화점', '번지점프', '복합 레포츠', '분수', '빙벽등반',
       '사격장', '사찰', '산', '상설시장', '생가', '서비스드레지던스', '서양식', '섬', '성',
       '수련시설', '수목원', '수상레포츠', '수영', '스노쿨링/스킨스쿠버다이빙', '스카이다이빙', '스케이트',
       '스키(보드) 렌탈샵', '스키/스노보드', '승마', '식음료', '썰매장', '안보관광', '야영장,오토캠핑장',
       '약수터', '연극', '영화관', '온천/욕장/스파', '외국문화원', '요트', '윈드서핑/제트스키',
       '유람선/잠수함관광', '유명건물', '유스호스텔', '유원지', '유적지/사적지', '이색거리', '이색찜질방',
       '이색체험', '인라인(실내 인라인 포함)', '일반축제', '일식', '자동차경주', '자연생태관광지',
       '자연휴양림', '자전거하이킹', '전문상가', '전시관', '전통공연', '종교성지', '중식', '채식전문점',
       '카약/카누', '카지노', '카트', '컨벤션', '컨벤션센터', '콘도미니엄', '클래식음악회', '클럽',
       '터널', '테마공원', '트래킹', '특산물판매점', '패밀리레스토랑', '펜션', '폭포', '학교', '한식',
       '한옥스테이', '항구/포구', '해수욕장', '해안절경', '헬스투어', '헹글라이딩/패러글라이딩', '호수',
       '홈스테이', '희귀동.식물']

In [None]:
for i in range(len(preds_arr3)):
    sample_submission.loc[i,'cat3'] = arr[preds_arr3[i][0]]

sample_submission.to_csv(file_path + '/submit_1016_1.csv', index=False)