[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1NLtpspxcoo467TNaVOHJHC6bQki3ECNK?usp=sharing)

https://www.kaggle.com/code/rhtsingh/utilizing-transformer-representations-efficiently

# Import library and Setup

In [1]:
from IPython.display import clear_output
!pip install transformers
!pip install datasets

# jarvisx17/japanese-sentiment-analysis用
!pip install fugashi
!pip install unidic_lite

clear_output()

In [2]:
import os
import sys
import pandas as pd
import argparse
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import random

from sklearn.metrics import (accuracy_score, f1_score,
                             precision_score, recall_score,)

import torch
import torch.nn as nn
from torch.nn import functional as F
from torch.utils.data import DataLoader

import time
from datetime import datetime
import pytz

import datasets
from datasets import load_dataset
from transformers import (AutoModel, AutoTokenizer, AutoConfig,
                          AutoModelForSequenceClassification,)
from transformers import (Trainer, TrainingArguments,
                          DataCollatorWithPadding, EarlyStoppingCallback)
from transformers.modeling_outputs import SequenceClassifierOutput

from tqdm import tqdm_notebook as tqdm
from IPython.display import clear_output
import warnings
warnings.filterwarnings("ignore")
import gc
gc.collect();

pd.set_option('display.max_rows', 100) 
pd.set_option('display.max_columns', 100)
pd.set_option('display.width', 100)

In [3]:
# CREATE colaboratory folder
PROJECT = 'CDLE_NLP'
def SET_Dir(PROJECT):
  # CREATE PROJECT FOLD
  PROJECTDIR = f'/content/{PROJECT}'
  if not os.path.exists(f"/content/{PROJECT}"):
    !mkdir {PROJECT}
  # CREATE INPUT FOLD
  INPUTDIR = f'/content/{PROJECT}/input'
  if not os.path.exists(f"/content/{PROJECT}/input"):
    !mkdir {PROJECT}/input
  # CREATE OUTPUT FOLD
  OUTPUTDIR = f'/content/{PROJECT}/output'
  if not os.path.exists(f"/content/{PROJECT}/output"):
    !mkdir {PROJECT}/output
  # CREATE MODEL FOLD
  MODELDIR = f'/content/{PROJECT}/model'
  if not os.path.exists(f"/content/{PROJECT}/model"):
    !mkdir {PROJECT}/model
  return PROJECTDIR, INPUTDIR, OUTPUTDIR, MODELDIR
PROJECTDIR, INPUTDIR, OUTPUTDIR, MODELDIR = SET_Dir(PROJECT)

In [4]:
# MOUNT GoogleDrive
from google.colab import drive 
drive.mount('/content/drive')

Mounted at /content/drive


# Helper

In [5]:
def seed_everything(seed):
    random.seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True

def set_output_dir(cfg):
  dt_now = datetime.now(pytz.timezone('Asia/Tokyo'))
  dttime = str(dt_now.year)+str(dt_now.month)+str(dt_now.day)+"-"+str(dt_now.hour)+"-"+str(dt_now.minute)+"-"+str(dt_now.second)
  cfg.output_dir = f"{dttime}-{cfg.model_path_or_name}-finetuned-emotion"
  print(f'SET output_dir: {cfg.output_dir}')

# Set Config

* tip
  * casedは大文字と小文字を区別  
  * uncasedは大文字と小文字を区別しない

In [6]:
class Config:
    model_path_or_name = 'jarvisx17/japanese-sentiment-analysis'
    output_dir = f"{model_path_or_name}-finetuned-emotion"

    num_classes = 3
    max_seq_length = 512
    padding = 'max_length'

    num_epochs = 30
    early_stopping_patience = 3 # epoc
    batch_size = 128
    gradient_accumulation_steps = 1 # Memory saving( # 実際のbatch_size = batch_size / gradient_accumulation_steps)
    lr = 2e-5
    weight_decay = 0.01
    warmup_steps = 500
    
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    num_workers = 1
    seed = 72
seed_everything(Config.seed)

# Load Dataset

In [7]:
dataset = load_dataset("tyqiangz/multilingual-sentiments", "japanese")
dataset.set_format(type="pandas")

train_df = dataset['train'][:].rename(columns={'label': 'labels'}).sample(n=10000) # DEMO
valid_df = dataset['validation'][:].rename(columns={'label': 'labels'})
test_df = dataset['test'][:].rename(columns={'label': 'labels'})

del dataset; gc.collect();
clear_output()

In [8]:
print(f'train_df len: {len(train_df)}'); display(train_df.head(2));
print(f'valid_df len: {len(valid_df)}'); display(valid_df.head(2))
print(f'test_df len: {len(test_df)}'); display(test_df.head(2))

train_df len: 10000


Unnamed: 0,text,source,labels
9548,OSをバージョンアップすると、途端に使えなくなる。どんな仕組みで作動するのか知らないが、期待...,amazon_reviews_multi,2
28735,設置位置の調整中に気付いたのですが、ダッシュボード上に白い粉が付着していました。息で軽く飛ば...,amazon_reviews_multi,2


valid_df len: 3000


Unnamed: 0,text,source,labels
0,味自体及び吸い心地は良いのだが、不良品が多過ぎる。私の場合５本のうち２本が蒸気も出ず、吸い込...,amazon_reviews_multi,2
1,ホームボタン周りの気泡が全く抜けません。 返金をお願いしましたが、断られた。,amazon_reviews_multi,2


test_df len: 3000


Unnamed: 0,text,source,labels
0,購入、貼付け後2週間もたたないうちに、上側から剥がれてきて画面から浮いた状態になってしまった...,amazon_reviews_multi,2
1,以下の3点の理由により、期待はずれの粗悪品。 ①他の方のレビューにもある通り、天板の外観が掲...,amazon_reviews_multi,2


# Set Tokenize

In [9]:
def LoadTokenizer(cfg, *, pretrained:bool=True, model_path:str=None):
  # https://zenn.dev/hellorusk/articles/7fd588cae5b173
  if pretrained:
    tokenizer = AutoTokenizer.from_pretrained(cfg.model_path_or_name)
  else:
    tokenizer = AutoTokenizer.from_pretrained(model_path)

  ## infomation
  clear_output()
  print('*'*20);
  if pretrained:
    print(f'model name: {cfg.model_path_or_name}')
  else:
    print(f'model path: {model_path}')
  print(f'tokenizer語彙数: {tokenizer.vocab_size}, tokenizer最大長: {tokenizer.model_max_length}');
  print(f'tokenizer input_names: {tokenizer.model_input_names}');
  print('*'*20);

  return tokenizer

In [10]:
def create_dataset(df:pd.DataFrame, tokenizer, *, cfg=None, remove_columns:list=None):
  def preprocess(examples):
    return tokenizer(examples['text'],
                     add_special_tokens=True,
                     padding=True,
                     max_length=cfg.max_seq_length,
                     truncation='longest_first',
                     return_tensors='pt',
                     return_attention_mask=True)
    
  raw_dataset = datasets.Dataset.from_pandas(df)
  return raw_dataset.map(preprocess, batched=True, remove_columns=remove_columns,
                         desc="Running tokenizer on dataset",)

In [11]:
tokenizer = LoadTokenizer(Config)

********************
model name: jarvisx17/japanese-sentiment-analysis
tokenizer語彙数: 32768, tokenizer最大長: 1000000000000000019884624838656
tokenizer input_names: ['input_ids', 'token_type_ids', 'attention_mask']
********************


# Create Dataset

In [12]:
remove_columns = ['text','source']
trn_dataset = create_dataset(train_df, tokenizer, cfg=Config, remove_columns=remove_columns)
val_dataset = create_dataset(valid_df, tokenizer, cfg=Config, remove_columns=remove_columns)

Running tokenizer on dataset:   0%|          | 0/10000 [00:00<?, ? examples/s]

Running tokenizer on dataset:   0%|          | 0/3000 [00:00<?, ? examples/s]

# Model

In [13]:
class CustomModelPooler(nn.Module):

  def __init__(self, cfg , *,
               dropout:float=0.1, use_multi_layer:int=None,
               requires_grad_layer:bool=False, requires_grad_pooler:bool=False,
               requires_grad_layer_slit:int=None,
               pretrained:bool=True, model_path:str=None,):
    super(CustomModelPooler,self).__init__()
    self.num_classes = cfg.num_classes
    self.dropout = dropout
    self.use_multi_layer = use_multi_layer
    self.model_path = model_path
    self.requires_grad_layer = requires_grad_layer
    self.requires_grad_pooler = requires_grad_pooler
    self.requires_grad_layer_slit = requires_grad_layer_slit

    # model
    self.model_path_or_name = cfg.model_path_or_name
    self.config = AutoConfig.from_pretrained(cfg.model_path_or_name)
    if pretrained:
      self.encoder = (AutoModel
                      .from_pretrained(cfg.model_path_or_name,
                                      config=self.config)
                      .to(cfg.device))
      # 学習済みモデルの重み凍結
      for _, param in self.encoder.named_parameters():
        param.requires_grad = False
      # 必要に応じて layer層 や pooler層 を学習
      if self.requires_grad_layer:
        for _, param in self.encoder.encoder.layer[self.requires_grad_layer_slit:].named_parameters():
          param.requires_grad = True
      if self.requires_grad_pooler:
        for _, param in self.encoder.pooler.named_parameters():
          param.requires_grad = True
    else:
      self.encoder = (AutoModel
                      .from_pretrained(self.model_path,
                                      config=self.config)
                      .to(cfg.device))
    # head
    self.classifier = nn.Sequential(
        nn.Dropout(dropout),
        nn.Linear(self.encoder.config.hidden_size, self.num_classes),
    )
    ## infomation
    clear_output()
    print('*'*20);
    if pretrained:
      print(f'model name: {cfg.model_path_or_name}, device: {cfg.device}');
    else:
      print(f'model path: {self.model_path}, device: {cfg.device}');
    print(f'model hidden_size: {self.config.hidden_size}, num_hidden_layers: {self.config.num_hidden_layers}');
    print(f'=> num_classes: {self.num_classes}');
    if self.requires_grad_layer or self.requires_grad_pooler:
      print('=> requires_grad is True list')
      for name, param in self.encoder.named_parameters():
          if param.requires_grad : print(f'   {name}');
    else:
      print('=> requires_grad is ALL False')
    print('*'*20);

  def forward(self, input_ids=None, attention_mask=None, token_type_ids=None, labels=None, **kwargs):
    # body
    outputs = self.encoder(input_ids=input_ids,
                           attention_mask=attention_mask,
                           token_type_ids=token_type_ids,
                           output_hidden_states=False)
    # head(input: pooler_output)
    head_input = outputs.pooler_output
    logits = self.classifier(head_input)

    loss = None
    if labels is not None:
      loss_fct = nn.CrossEntropyLoss()
      loss = loss_fct(logits.view(-1, self.num_classes), labels.view(-1))

    return SequenceClassifierOutput(loss=loss, logits=logits)

class CustomModelLastHiddenLayer(nn.Module):

  def __init__(self, cfg , *,
               dropout:float=0.1, use_multi_layer:int=None,
               requires_grad_layer:bool=False, requires_grad_pooler:bool=False,
               requires_grad_layer_slit:int=None,
               pretrained:bool=True, model_path:str=None,):
    super(CustomModelLastHiddenLayer,self).__init__()
    self.num_classes = cfg.num_classes
    self.dropout = dropout
    self.use_multi_layer = use_multi_layer
    self.model_path = model_path
    self.requires_grad_layer = requires_grad_layer
    self.requires_grad_pooler = requires_grad_pooler
    self.requires_grad_layer_slit = requires_grad_layer_slit

    # model
    self.model_path_or_name = cfg.model_path_or_name
    self.config = AutoConfig.from_pretrained(cfg.model_path_or_name)
    if pretrained:
      self.encoder = (AutoModel
                      .from_pretrained(cfg.model_path_or_name,
                                      config=self.config)
                      .to(cfg.device))
      # 学習済みモデルの重み凍結
      for _, param in self.encoder.named_parameters():
        param.requires_grad = False
      # 必要に応じて layer層 や pooler層 を学習
      if self.requires_grad_layer:
        for _, param in self.encoder.encoder.layer[self.requires_grad_layer_slit:].named_parameters():
          param.requires_grad = True
      if self.requires_grad_pooler:
        for _, param in self.encoder.pooler.named_parameters():
          param.requires_grad = True
    else:
      self.encoder = (AutoModel
                      .from_pretrained(self.model_path,
                                      config=self.config)
                      .to(cfg.device))
    # head
    self.classifier = nn.Sequential(
        nn.Dropout(dropout),
        nn.Linear(self.encoder.config.hidden_size, self.num_classes),
    )
    ## infomation
    clear_output()
    print('*'*20);
    if pretrained:
      print(f'model name: {cfg.model_path_or_name}, device: {cfg.device}');
    else:
      print(f'model path: {self.model_path}, device: {cfg.device}');
    print(f'model hidden_size: {self.config.hidden_size}, num_hidden_layers: {self.config.num_hidden_layers}');
    print(f'=> num_classes: {self.num_classes}');
    if self.requires_grad_layer or self.requires_grad_pooler:
      print('=> requires_grad is True list')
      for name, param in self.encoder.named_parameters():
          if param.requires_grad : print(f'   {name}');
    else:
      print('=> requires_grad is ALL False')
    print('*'*20);

  def forward(self, input_ids=None, attention_mask=None, token_type_ids=None, labels=None, **kwargs):
    # body
    outputs = self.encoder(input_ids=input_ids,
                           attention_mask=attention_mask,
                           token_type_ids=token_type_ids,
                           output_hidden_states=False)
    # head(input: [CLS] feature of last_hidden_state)
    head_input = outputs.last_hidden_state[:, 0]
    logits = self.classifier(head_input)

    loss = None
    if labels is not None:
      loss_fct = nn.CrossEntropyLoss()
      loss = loss_fct(logits.view(-1, self.num_classes), labels.view(-1))

    return SequenceClassifierOutput(loss=loss, logits=logits)

class CustomModelMultiHiddenLayer(nn.Module):

  def __init__(self, cfg , *,
               dropout:float=0.1, use_multi_layer:int=None,
               requires_grad_layer:bool=False, requires_grad_pooler:bool=False,
               requires_grad_layer_slit:int=None,
               pretrained:bool=True, model_path:str=None,):
    super(CustomModelMultiHiddenLayer,self).__init__()
    self.num_classes = cfg.num_classes
    self.dropout = dropout
    self.use_multi_layer = use_multi_layer
    self.model_path = model_path
    self.requires_grad_layer = requires_grad_layer
    self.requires_grad_pooler = requires_grad_pooler
    self.requires_grad_layer_slit = requires_grad_layer_slit

    # model
    self.model_path_or_name = cfg.model_path_or_name
    self.config = AutoConfig.from_pretrained(cfg.model_path_or_name)
    if pretrained:
      self.encoder = (AutoModel
                      .from_pretrained(cfg.model_path_or_name,
                                      config=self.config)
                      .to(cfg.device))
      # 学習済みモデルの重み凍結
      for _, param in self.encoder.named_parameters():
        param.requires_grad = False
      # 必要に応じて layer層 や pooler層 を学習
      if self.requires_grad_layer:
        for _, param in self.encoder.encoder.layer[self.requires_grad_layer_slit:].named_parameters():
          param.requires_grad = True
      if self.requires_grad_pooler:
        for _, param in self.encoder.pooler.named_parameters():
          param.requires_grad = True
    else:
      self.encoder = (AutoModel
                      .from_pretrained(self.model_path,
                                      config=self.config)
                      .to(cfg.device))
    # head
    self.classifier = nn.Sequential(
        nn.Dropout(self.dropout),
        nn.Linear(self.encoder.config.hidden_size*(self.use_multi_layer*-1), self.num_classes),
    )
    ## infomation
    clear_output()
    print('*'*20);
    if pretrained:
      print(f'model name: {cfg.model_path_or_name}, device: {cfg.device}');
    else:
      print(f'model path: {self.model_path}, device: {cfg.device}');
    print(f'model hidden_size: {self.config.hidden_size}, num_hidden_layers: {self.config.num_hidden_layers}');
    print(f'=> num_classes: {self.num_classes}');
    if self.requires_grad_layer or self.requires_grad_pooler:
      print('=> requires_grad is True list')
      for name, param in self.encoder.named_parameters():
          if param.requires_grad : print(f'   {name}');
    else:
      print('=> requires_grad is ALL False')
    print('*'*20);

  def forward(self, input_ids=None, attention_mask=None, token_type_ids=None, labels=None, **kwargs):
    # body
    outputs = self.encoder(input_ids=input_ids,
                           attention_mask=attention_mask,
                           token_type_ids=token_type_ids,
                           output_hidden_states=True)
    # head(input: [CLS] feature of multiple hidden_states)
    head_input = torch.cat(outputs.hidden_states[self.use_multi_layer:], 2)[:,0,:]
    logits = self.classifier(head_input)

    loss = None
    if labels is not None:
      loss_fct = nn.CrossEntropyLoss()
      loss = loss_fct(logits.view(-1, self.num_classes), labels.view(-1))

    return SequenceClassifierOutput(loss=loss, logits=logits)


def masked_mean(hidden_state, mask):
    mask = mask[:, :, np.newaxis].float()
    hidden_state_sum = torch.sum(hidden_state * mask, 1)
    mask_sum = torch.clamp(torch.sum(mask, 1), min=1e-5)
    return hidden_state_sum / mask_sum
def masked_max(hidden_state, mask):
    mask = (1 - mask[:, :, np.newaxis]).float()
    hidden_state = hidden_state - mask * 1e3
    return torch.max(hidden_state, 1)[0]

class CustomModelLastHiddenLayerMeanPooling(nn.Module):

  def __init__(self, cfg , *,
               dropout:float=0.1, use_multi_layer:int=None,
               requires_grad_layer:bool=False, requires_grad_pooler:bool=False,
               requires_grad_layer_slit:int=None,
               pretrained:bool=True, model_path:str=None,):
    super(CustomModelLastHiddenLayerMeanPooling,self).__init__()
    self.num_classes = cfg.num_classes
    self.dropout = dropout
    self.use_multi_layer = use_multi_layer
    self.model_path = model_path
    self.requires_grad_layer = requires_grad_layer
    self.requires_grad_pooler = requires_grad_pooler
    self.requires_grad_layer_slit = requires_grad_layer_slit

    # model
    self.model_path_or_name = cfg.model_path_or_name
    self.config = AutoConfig.from_pretrained(cfg.model_path_or_name)
    if pretrained:
      self.encoder = (AutoModel
                      .from_pretrained(cfg.model_path_or_name,
                                      config=self.config)
                      .to(cfg.device))
      # 学習済みモデルの重み凍結
      for _, param in self.encoder.named_parameters():
        param.requires_grad = False
      # 必要に応じて layer層 や pooler層 を学習
      if self.requires_grad_layer:
        for _, param in self.encoder.encoder.layer[self.requires_grad_layer_slit:].named_parameters():
          param.requires_grad = True
      if self.requires_grad_pooler:
        for _, param in self.encoder.pooler.named_parameters():
          param.requires_grad = True
    else:
      self.encoder = (AutoModel
                      .from_pretrained(self.model_path,
                                      config=self.config)
                      .to(cfg.device))
    # head
    self.classifier = nn.Sequential(
        nn.Dropout(self.dropout),
        nn.Linear(self.encoder.config.hidden_size, self.num_classes),
    )
    ## infomation
    clear_output()
    print('*'*20);
    if pretrained:
      print(f'model name: {cfg.model_path_or_name}, device: {cfg.device}');
    else:
      print(f'model path: {self.model_path}, device: {cfg.device}');
    print(f'model hidden_size: {self.config.hidden_size}, num_hidden_layers: {self.config.num_hidden_layers}');
    print(f'=> num_classes: {self.num_classes}');
    if self.requires_grad_layer or self.requires_grad_pooler:
      print('=> requires_grad is True list')
      for name, param in self.encoder.named_parameters():
          if param.requires_grad : print(f'   {name}');
    else:
      print('=> requires_grad is ALL False')
    print('*'*20);

  def forward(self, input_ids=None, attention_mask=None, token_type_ids=None, labels=None, **kwargs):
    # body
    outputs = self.encoder(input_ids=input_ids,
                           attention_mask=attention_mask,
                           token_type_ids=token_type_ids,
                           output_hidden_states=False)
    # head(input: mean [CLS] feature of last_hidden_state )
    head_input = masked_mean(outputs.last_hidden_state, attention_mask)
    logits = self.classifier(head_input)

    loss = None
    if labels is not None:
      loss_fct = nn.CrossEntropyLoss()
      loss = loss_fct(logits.view(-1, self.num_classes), labels.view(-1))

    return SequenceClassifierOutput(loss=loss, logits=logits)

class CustomModelLastHiddenLayerMaxPooling(nn.Module):

  def __init__(self, cfg , *,
               dropout:float=0.1, use_multi_layer:int=None,
               requires_grad_layer:bool=False, requires_grad_pooler:bool=False,
               requires_grad_layer_slit:int=None,
               pretrained:bool=True, model_path:str=None,):
    super(CustomModelLastHiddenLayerMaxPooling,self).__init__()
    self.num_classes = cfg.num_classes
    self.dropout = dropout
    self.use_multi_layer = use_multi_layer
    self.model_path = model_path
    self.requires_grad_layer = requires_grad_layer
    self.requires_grad_pooler = requires_grad_pooler
    self.requires_grad_layer_slit = requires_grad_layer_slit

    # model
    self.model_path_or_name = cfg.model_path_or_name
    self.config = AutoConfig.from_pretrained(cfg.model_path_or_name)
    if pretrained:
      self.encoder = (AutoModel
                      .from_pretrained(cfg.model_path_or_name,
                                      config=self.config)
                      .to(cfg.device))
      # 学習済みモデルの重み凍結
      for _, param in self.encoder.named_parameters():
        param.requires_grad = False
      # 必要に応じて layer層 や pooler層 を学習
      if self.requires_grad_layer:
        for _, param in self.encoder.encoder.layer[self.requires_grad_layer_slit:].named_parameters():
          param.requires_grad = True
      if self.requires_grad_pooler:
        for _, param in self.encoder.pooler.named_parameters():
          param.requires_grad = True
    else:
      self.encoder = (AutoModel
                      .from_pretrained(self.model_path,
                                      config=self.config)
                      .to(cfg.device))
    # head
    self.classifier = nn.Sequential(
        nn.Dropout(self.dropout),
        nn.Linear(self.encoder.config.hidden_size, self.num_classes),
    )
    ## infomation
    clear_output()
    print('*'*20);
    if pretrained:
      print(f'model name: {cfg.model_path_or_name}, device: {cfg.device}');
    else:
      print(f'model path: {self.model_path}, device: {cfg.device}');
    print(f'model hidden_size: {self.config.hidden_size}, num_hidden_layers: {self.config.num_hidden_layers}');
    print(f'=> num_classes: {self.num_classes}');
    if self.requires_grad_layer or self.requires_grad_pooler:
      print('=> requires_grad is True list')
      for name, param in self.encoder.named_parameters():
          if param.requires_grad : print(f'   {name}');
    else:
      print('=> requires_grad is ALL False')
    print('*'*20);

  def forward(self, input_ids=None, attention_mask=None, token_type_ids=None, labels=None, **kwargs):
    # body
    outputs = self.encoder(input_ids=input_ids,
                           attention_mask=attention_mask,
                           token_type_ids=token_type_ids,
                           output_hidden_states=False)
    # head(input: maximum [CLS] feature of last_hidden_state )
    head_input = masked_max(outputs.last_hidden_state, attention_mask)
    logits = self.classifier(head_input)

    loss = None
    if labels is not None:
      loss_fct = nn.CrossEntropyLoss()
      loss = loss_fct(logits.view(-1, self.num_classes), labels.view(-1))

    return SequenceClassifierOutput(loss=loss, logits=logits)

class CustomModelLastHiddenLayerMeanMaxPooling(nn.Module):

  def __init__(self, cfg , *,
               dropout:float=0.1, use_multi_layer:int=None,
               requires_grad_layer:bool=False, requires_grad_pooler:bool=False,
               requires_grad_layer_slit:int=None,
               pretrained:bool=True, model_path:str=None,):
    super(CustomModelLastHiddenLayerMeanMaxPooling,self).__init__()
    self.num_classes = cfg.num_classes
    self.dropout = dropout
    self.use_multi_layer = use_multi_layer
    self.model_path = model_path
    self.requires_grad_layer = requires_grad_layer
    self.requires_grad_pooler = requires_grad_pooler
    self.requires_grad_layer_slit = requires_grad_layer_slit

    # model
    self.model_path_or_name = cfg.model_path_or_name
    self.config = AutoConfig.from_pretrained(cfg.model_path_or_name)
    if pretrained:
      self.encoder = (AutoModel
                      .from_pretrained(cfg.model_path_or_name,
                                      config=self.config)
                      .to(cfg.device))
      # 学習済みモデルの重み凍結
      for _, param in self.encoder.named_parameters():
        param.requires_grad = False
      # 必要に応じて layer層 や pooler層 を学習
      if self.requires_grad_layer:
        for _, param in self.encoder.encoder.layer[self.requires_grad_layer_slit:].named_parameters():
          param.requires_grad = True
      if self.requires_grad_pooler:
        for _, param in self.encoder.pooler.named_parameters():
          param.requires_grad = True
    else:
      self.encoder = (AutoModel
                      .from_pretrained(self.model_path,
                                      config=self.config)
                      .to(cfg.device))
    # head
    self.classifier = nn.Sequential(
        nn.Dropout(self.dropout),
        nn.Linear(self.encoder.config.hidden_size*2, self.num_classes),
    )
    ## infomation
    clear_output()
    print('*'*20);
    if pretrained:
      print(f'model name: {cfg.model_path_or_name}, device: {cfg.device}');
    else:
      print(f'model path: {self.model_path}, device: {cfg.device}');
    print(f'model hidden_size: {self.config.hidden_size}, num_hidden_layers: {self.config.num_hidden_layers}');
    print(f'=> num_classes: {self.num_classes}');
    if self.requires_grad_layer or self.requires_grad_pooler:
      print('=> requires_grad is True list')
      for name, param in self.encoder.named_parameters():
          if param.requires_grad : print(f'   {name}');
    else:
      print('=> requires_grad is ALL False')
    print('*'*20);

  def forward(self, input_ids=None, attention_mask=None, token_type_ids=None, labels=None, **kwargs):
    # body
    outputs = self.encoder(input_ids=input_ids,
                           attention_mask=attention_mask,
                           token_type_ids=token_type_ids,
                           output_hidden_states=False)
    # head(input: mean and maximum [CLS] feature of last_hidden_state )
    head_input = torch.cat(
                        (
                            masked_mean(outputs.last_hidden_state,attention_mask),
                            masked_max(outputs.last_hidden_state, attention_mask)
                        ), 1)
    logits = self.classifier(head_input)

    loss = None
    if labels is not None:
      loss_fct = nn.CrossEntropyLoss()
      loss = loss_fct(logits.view(-1, self.num_classes), labels.view(-1))

    return SequenceClassifierOutput(loss=loss, logits=logits)

# Set Train Args

In [14]:
def compute_metrics(pred):
  labels = pred.label_ids
  preds = pred.predictions.argmax(-1)
  acc = accuracy_score(labels, preds)
  f1 = f1_score(labels, preds, average='weighted')
  precision = precision_score(labels, preds, average='weighted')
  recall = recall_score(labels, preds, average='weighted')
  return {'accuracy': acc, 'f1': f1, 'precision': precision, 'recall': recall,}

In [15]:
def set_args(cfg):
  batch_size_ = cfg.batch_size // cfg.gradient_accumulation_steps # 実際のbatch_size
  logging_steps = len(trn_dataset) // batch_size_

  set_output_dir(cfg)
  training_args = TrainingArguments(output_dir=MODELDIR+'/'+cfg.output_dir,# 出力場所
                                    label_names=['labels'],        # targetLabel( Default: labels )
                                    num_train_epochs=cfg.num_epochs,
                                    gradient_accumulation_steps=cfg.gradient_accumulation_steps,
                                    per_device_train_batch_size=batch_size_,
                                    per_device_eval_batch_size=batch_size_,
                                    learning_rate=cfg.lr,          # Default: 5e-5
                                    lr_scheduler_type='linear',    # 減衰設定( Default: linear )
                                    weight_decay=cfg.weight_decay, # 重み減衰の強さ
                                    warmup_steps=cfg.warmup_steps, # 学習率スケジューラのウォームアップステップ数
                                    seed=cfg.seed,
                                    fp16=True, # Automatic Mixed Precision (AMP)
                                    evaluation_strategy='epoch', # 評価タイミング
                                    save_strategy='epoch',       # 保存タイミング
                                    logging_strategy='epoch',    # loggingタイミング
                                    # eval_steps=logging_steps,
                                    # save_steps=logging_steps,
                                    # logging_steps=logging_steps, 
                                    save_total_limit = 1,        # output_dirに残すチェックポイントの数
                                    metric_for_best_model='f1',  # EarlyStoppingの判断基準(compute_metrics)
                                    load_best_model_at_end=True, # EarlyStoppingを使用するならTrue
                                    disable_tqdm=False,
                                    push_to_hub=False, 
                                    log_level='error',
                                    report_to=['none'],
                                    )
  return training_args, training_args.output_dir

# Training

In [16]:
model = CustomModelPooler(Config,
                          requires_grad_layer=True, requires_grad_pooler=True,
                          requires_grad_layer_slit=-2)
training_args, save_path = set_args(Config)

trainer = Trainer(model=model, args=training_args,
                  compute_metrics=compute_metrics,
                  train_dataset=trn_dataset,
                  eval_dataset=val_dataset,
                  tokenizer=tokenizer,
                  callbacks=[EarlyStoppingCallback(
                      early_stopping_patience=Config.early_stopping_patience
                      )],
                  data_collator=DataCollatorWithPadding(
                      tokenizer=tokenizer, padding='longest',
                      ),
                  )

trn_result = trainer.train()
val_result = trainer.evaluate(eval_dataset=val_dataset)
display(pd.DataFrame(val_result, index=[0]))

trainer.save_model(save_path)
save_path_1 = save_path

torch.cuda.empty_cache(); gc.collect();

********************
model name: jarvisx17/japanese-sentiment-analysis, device: cuda
model hidden_size: 768, num_hidden_layers: 12
=> num_classes: 3
=> requires_grad is True list
   encoder.layer.10.attention.self.query.weight
   encoder.layer.10.attention.self.query.bias
   encoder.layer.10.attention.self.key.weight
   encoder.layer.10.attention.self.key.bias
   encoder.layer.10.attention.self.value.weight
   encoder.layer.10.attention.self.value.bias
   encoder.layer.10.attention.output.dense.weight
   encoder.layer.10.attention.output.dense.bias
   encoder.layer.10.attention.output.LayerNorm.weight
   encoder.layer.10.attention.output.LayerNorm.bias
   encoder.layer.10.intermediate.dense.weight
   encoder.layer.10.intermediate.dense.bias
   encoder.layer.10.output.dense.weight
   encoder.layer.10.output.dense.bias
   encoder.layer.10.output.LayerNorm.weight
   encoder.layer.10.output.LayerNorm.bias
   encoder.layer.11.attention.self.query.weight
   encoder.layer.11.attention.self.qu

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.9689,0.796381,0.666,0.574176,0.678792,0.666
2,0.6844,0.588844,0.746333,0.729831,0.738964,0.746333
3,0.5966,0.553072,0.763333,0.756831,0.757134,0.763333
4,0.5657,0.542115,0.774333,0.765981,0.768606,0.774333
5,0.5492,0.51024,0.784667,0.776557,0.780313,0.784667
6,0.5315,0.495036,0.788667,0.78399,0.784228,0.788667
7,0.5257,0.495073,0.794333,0.789749,0.790626,0.794333
8,0.5066,0.501105,0.793667,0.787726,0.789657,0.793667
9,0.5002,0.483902,0.8,0.79636,0.796196,0.8
10,0.4952,0.495991,0.800333,0.79757,0.797628,0.800333


Unnamed: 0,eval_loss,eval_accuracy,eval_f1,eval_precision,eval_recall,eval_runtime,eval_samples_per_second,eval_steps_per_second,epoch
0,0.47919,0.806333,0.805723,0.80541,0.806333,7.4079,404.97,3.24,14.0


In [17]:
model = CustomModelLastHiddenLayer(Config,
                                   requires_grad_layer=True, requires_grad_pooler=True,
                                   requires_grad_layer_slit=-2)
training_args, save_path = set_args(Config)

trainer = Trainer(model=model, args=training_args,
                  compute_metrics=compute_metrics,
                  train_dataset=trn_dataset,
                  eval_dataset=val_dataset,
                  tokenizer=tokenizer,
                  callbacks=[EarlyStoppingCallback(
                      early_stopping_patience=Config.early_stopping_patience
                      )],
                  data_collator=DataCollatorWithPadding(
                      tokenizer=tokenizer, padding='longest',
                      ),
                  )

trn_result = trainer.train()
val_result = trainer.evaluate(eval_dataset=val_dataset)
display(pd.DataFrame(val_result, index=[0]))

trainer.save_model(save_path)
save_path_2 = save_path

torch.cuda.empty_cache(); gc.collect();

********************
model name: jarvisx17/japanese-sentiment-analysis, device: cuda
model hidden_size: 768, num_hidden_layers: 12
=> num_classes: 3
=> requires_grad is True list
   encoder.layer.10.attention.self.query.weight
   encoder.layer.10.attention.self.query.bias
   encoder.layer.10.attention.self.key.weight
   encoder.layer.10.attention.self.key.bias
   encoder.layer.10.attention.self.value.weight
   encoder.layer.10.attention.self.value.bias
   encoder.layer.10.attention.output.dense.weight
   encoder.layer.10.attention.output.dense.bias
   encoder.layer.10.attention.output.LayerNorm.weight
   encoder.layer.10.attention.output.LayerNorm.bias
   encoder.layer.10.intermediate.dense.weight
   encoder.layer.10.intermediate.dense.bias
   encoder.layer.10.output.dense.weight
   encoder.layer.10.output.dense.bias
   encoder.layer.10.output.LayerNorm.weight
   encoder.layer.10.output.LayerNorm.bias
   encoder.layer.11.attention.self.query.weight
   encoder.layer.11.attention.self.qu

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,1.3869,0.90802,0.694333,0.66731,0.689318,0.694333
2,0.6961,0.57984,0.745333,0.729889,0.737126,0.745333
3,0.5903,0.548411,0.761667,0.753151,0.755059,0.761667
4,0.5615,0.537364,0.777333,0.767793,0.772806,0.777333
5,0.5461,0.510351,0.787,0.779764,0.782623,0.787
6,0.5275,0.494195,0.791,0.785818,0.786612,0.791
7,0.5231,0.489676,0.794667,0.790497,0.790915,0.794667
8,0.507,0.492244,0.796,0.790073,0.791936,0.796
9,0.4999,0.480021,0.8,0.796319,0.796218,0.8
10,0.4924,0.495486,0.797333,0.794509,0.794917,0.797333


Unnamed: 0,eval_loss,eval_accuracy,eval_f1,eval_precision,eval_recall,eval_runtime,eval_samples_per_second,eval_steps_per_second,epoch
0,0.480015,0.810333,0.808658,0.808009,0.810333,7.4034,405.218,3.242,17.0


In [18]:
model = CustomModelMultiHiddenLayer(Config, use_multi_layer=-4,
                                    requires_grad_layer=True, requires_grad_pooler=True,
                                    requires_grad_layer_slit=-2)
training_args, save_path = set_args(Config)

trainer = Trainer(model=model, args=training_args,
                  compute_metrics=compute_metrics,
                  train_dataset=trn_dataset,
                  eval_dataset=val_dataset,
                  tokenizer=tokenizer,
                  callbacks=[EarlyStoppingCallback(
                      early_stopping_patience=Config.early_stopping_patience
                      )],
                  data_collator=DataCollatorWithPadding(
                      tokenizer=tokenizer, padding='longest',
                      ),
                  )

trn_result = trainer.train()
val_result = trainer.evaluate(eval_dataset=val_dataset)
display(pd.DataFrame(val_result, index=[0]))

trainer.save_model(save_path)
save_path_3 = save_path

torch.cuda.empty_cache(); gc.collect();

********************
model name: jarvisx17/japanese-sentiment-analysis, device: cuda
model hidden_size: 768, num_hidden_layers: 12
=> num_classes: 3
=> requires_grad is True list
   encoder.layer.10.attention.self.query.weight
   encoder.layer.10.attention.self.query.bias
   encoder.layer.10.attention.self.key.weight
   encoder.layer.10.attention.self.key.bias
   encoder.layer.10.attention.self.value.weight
   encoder.layer.10.attention.self.value.bias
   encoder.layer.10.attention.output.dense.weight
   encoder.layer.10.attention.output.dense.bias
   encoder.layer.10.attention.output.LayerNorm.weight
   encoder.layer.10.attention.output.LayerNorm.bias
   encoder.layer.10.intermediate.dense.weight
   encoder.layer.10.intermediate.dense.bias
   encoder.layer.10.output.dense.weight
   encoder.layer.10.output.dense.bias
   encoder.layer.10.output.LayerNorm.weight
   encoder.layer.10.output.LayerNorm.bias
   encoder.layer.11.attention.self.query.weight
   encoder.layer.11.attention.self.qu

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,1.0205,0.752971,0.713333,0.679213,0.708823,0.713333
2,0.662,0.583991,0.748333,0.730099,0.742794,0.748333
3,0.5951,0.553902,0.766667,0.756462,0.760486,0.766667
4,0.5659,0.547183,0.773333,0.761913,0.769502,0.773333
5,0.5533,0.515359,0.784,0.77627,0.779625,0.784
6,0.5307,0.497857,0.790333,0.784277,0.786037,0.790333
7,0.5273,0.493129,0.795333,0.790788,0.79144,0.795333
8,0.512,0.496085,0.792,0.785697,0.787868,0.792
9,0.5055,0.478333,0.8,0.797489,0.796726,0.8
10,0.499,0.487472,0.804333,0.802069,0.801984,0.804333


Unnamed: 0,eval_loss,eval_accuracy,eval_f1,eval_precision,eval_recall,eval_runtime,eval_samples_per_second,eval_steps_per_second,epoch
0,0.478021,0.811667,0.81022,0.809533,0.811667,7.436,403.444,3.228,17.0


In [19]:
model = CustomModelLastHiddenLayerMeanPooling(Config,
                                              requires_grad_layer=True, requires_grad_pooler=True,
                                              requires_grad_layer_slit=-2)
training_args, save_path = set_args(Config)

trainer = Trainer(model=model, args=training_args,
                  compute_metrics=compute_metrics,
                  train_dataset=trn_dataset,
                  eval_dataset=val_dataset,
                  tokenizer=tokenizer,
                  callbacks=[EarlyStoppingCallback(
                      early_stopping_patience=Config.early_stopping_patience
                      )],
                  data_collator=DataCollatorWithPadding(
                      tokenizer=tokenizer, padding='longest',
                      ),
                  )

trn_result = trainer.train()
val_result = trainer.evaluate(eval_dataset=val_dataset)
display(pd.DataFrame(val_result, index=[0]))

trainer.save_model(save_path)
save_path_4 = save_path

torch.cuda.empty_cache(); gc.collect();

********************
model name: jarvisx17/japanese-sentiment-analysis, device: cuda
model hidden_size: 768, num_hidden_layers: 12
=> num_classes: 3
=> requires_grad is True list
   encoder.layer.10.attention.self.query.weight
   encoder.layer.10.attention.self.query.bias
   encoder.layer.10.attention.self.key.weight
   encoder.layer.10.attention.self.key.bias
   encoder.layer.10.attention.self.value.weight
   encoder.layer.10.attention.self.value.bias
   encoder.layer.10.attention.output.dense.weight
   encoder.layer.10.attention.output.dense.bias
   encoder.layer.10.attention.output.LayerNorm.weight
   encoder.layer.10.attention.output.LayerNorm.bias
   encoder.layer.10.intermediate.dense.weight
   encoder.layer.10.intermediate.dense.bias
   encoder.layer.10.output.dense.weight
   encoder.layer.10.output.dense.bias
   encoder.layer.10.output.LayerNorm.weight
   encoder.layer.10.output.LayerNorm.bias
   encoder.layer.11.attention.self.query.weight
   encoder.layer.11.attention.self.qu

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,1.2082,0.846504,0.714,0.683148,0.708157,0.714
2,0.6703,0.565059,0.754667,0.740097,0.748683,0.754667
3,0.5763,0.530186,0.776667,0.768999,0.771022,0.776667
4,0.5431,0.522558,0.782667,0.774631,0.777958,0.782667
5,0.5321,0.501566,0.79,0.783734,0.785616,0.79
6,0.5201,0.48803,0.796667,0.793393,0.792952,0.796667
7,0.513,0.488709,0.793667,0.789214,0.789784,0.793667
8,0.4957,0.495782,0.794667,0.788791,0.790969,0.794667
9,0.4874,0.484047,0.798667,0.795484,0.795217,0.798667
10,0.4808,0.501845,0.797,0.793585,0.793885,0.797


Unnamed: 0,eval_loss,eval_accuracy,eval_f1,eval_precision,eval_recall,eval_runtime,eval_samples_per_second,eval_steps_per_second,epoch
0,0.476218,0.802,0.801521,0.801229,0.802,7.4628,401.995,3.216,14.0


In [20]:
model = CustomModelLastHiddenLayerMaxPooling(Config,
                                             requires_grad_layer=True, requires_grad_pooler=True,
                                             requires_grad_layer_slit=-2)
training_args, save_path = set_args(Config)

trainer = Trainer(model=model, args=training_args,
                  compute_metrics=compute_metrics,
                  train_dataset=trn_dataset,
                  eval_dataset=val_dataset,
                  tokenizer=tokenizer,
                  callbacks=[EarlyStoppingCallback(
                      early_stopping_patience=Config.early_stopping_patience
                      )],
                  data_collator=DataCollatorWithPadding(
                      tokenizer=tokenizer, padding='longest',
                      ),
                  )

trn_result = trainer.train()
val_result = trainer.evaluate(eval_dataset=val_dataset)
display(pd.DataFrame(val_result, index=[0]))

trainer.save_model(save_path)
save_path_5 = save_path

torch.cuda.empty_cache(); gc.collect();

********************
model name: jarvisx17/japanese-sentiment-analysis, device: cuda
model hidden_size: 768, num_hidden_layers: 12
=> num_classes: 3
=> requires_grad is True list
   encoder.layer.10.attention.self.query.weight
   encoder.layer.10.attention.self.query.bias
   encoder.layer.10.attention.self.key.weight
   encoder.layer.10.attention.self.key.bias
   encoder.layer.10.attention.self.value.weight
   encoder.layer.10.attention.self.value.bias
   encoder.layer.10.attention.output.dense.weight
   encoder.layer.10.attention.output.dense.bias
   encoder.layer.10.attention.output.LayerNorm.weight
   encoder.layer.10.attention.output.LayerNorm.bias
   encoder.layer.10.intermediate.dense.weight
   encoder.layer.10.intermediate.dense.bias
   encoder.layer.10.output.dense.weight
   encoder.layer.10.output.dense.bias
   encoder.layer.10.output.LayerNorm.weight
   encoder.layer.10.output.LayerNorm.bias
   encoder.layer.11.attention.self.query.weight
   encoder.layer.11.attention.self.qu

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,1.2683,0.936162,0.650333,0.636977,0.64161,0.650333
2,0.7333,0.601542,0.731,0.708197,0.724717,0.731
3,0.6139,0.560194,0.757667,0.742857,0.752795,0.757667
4,0.5714,0.540149,0.771,0.759356,0.766618,0.771
5,0.5518,0.516131,0.781,0.773057,0.776054,0.781
6,0.5353,0.503443,0.787667,0.781459,0.783193,0.787667
7,0.5306,0.498205,0.787,0.781409,0.7826,0.787
8,0.5102,0.50166,0.788667,0.780333,0.785557,0.788667
9,0.5007,0.483214,0.799667,0.796819,0.796331,0.799667
10,0.498,0.490943,0.796667,0.793659,0.793401,0.796667


Unnamed: 0,eval_loss,eval_accuracy,eval_f1,eval_precision,eval_recall,eval_runtime,eval_samples_per_second,eval_steps_per_second,epoch
0,0.483214,0.799667,0.796819,0.796331,0.799667,7.4174,404.457,3.236,12.0


In [21]:
model = CustomModelLastHiddenLayerMeanMaxPooling(Config,
                                                 requires_grad_layer=True, requires_grad_pooler=True,
                                                 requires_grad_layer_slit=-2)
training_args, save_path = set_args(Config)

trainer = Trainer(model=model, args=training_args,
                  compute_metrics=compute_metrics,
                  train_dataset=trn_dataset,
                  eval_dataset=val_dataset,
                  tokenizer=tokenizer,
                  callbacks=[EarlyStoppingCallback(
                      early_stopping_patience=Config.early_stopping_patience
                      )],
                  data_collator=DataCollatorWithPadding(
                      tokenizer=tokenizer, padding='longest',
                      ),
                  )

trn_result = trainer.train()
val_result = trainer.evaluate(eval_dataset=val_dataset)
display(pd.DataFrame(val_result, index=[0]))

trainer.save_model(save_path)
save_path_6 = save_path

torch.cuda.empty_cache(); gc.collect();

********************
model name: jarvisx17/japanese-sentiment-analysis, device: cuda
model hidden_size: 768, num_hidden_layers: 12
=> num_classes: 3
=> requires_grad is True list
   encoder.layer.10.attention.self.query.weight
   encoder.layer.10.attention.self.query.bias
   encoder.layer.10.attention.self.key.weight
   encoder.layer.10.attention.self.key.bias
   encoder.layer.10.attention.self.value.weight
   encoder.layer.10.attention.self.value.bias
   encoder.layer.10.attention.output.dense.weight
   encoder.layer.10.attention.output.dense.bias
   encoder.layer.10.attention.output.LayerNorm.weight
   encoder.layer.10.attention.output.LayerNorm.bias
   encoder.layer.10.intermediate.dense.weight
   encoder.layer.10.intermediate.dense.bias
   encoder.layer.10.output.dense.weight
   encoder.layer.10.output.dense.bias
   encoder.layer.10.output.LayerNorm.weight
   encoder.layer.10.output.LayerNorm.bias
   encoder.layer.11.attention.self.query.weight
   encoder.layer.11.attention.self.qu

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,1.1314,0.83798,0.649333,0.535221,0.664287,0.649333
2,0.6871,0.575958,0.75,0.733245,0.744749,0.75
3,0.5955,0.541353,0.771333,0.76362,0.765418,0.771333
4,0.5595,0.530109,0.785,0.776529,0.780903,0.785
5,0.5455,0.505849,0.792667,0.787427,0.788368,0.792667
6,0.531,0.494763,0.795667,0.791432,0.791608,0.795667
7,0.5238,0.493043,0.793333,0.789836,0.789785,0.793333
8,0.5078,0.494541,0.795333,0.78984,0.791482,0.795333
9,0.4998,0.481815,0.797,0.794776,0.793984,0.797
10,0.4961,0.495787,0.794333,0.7928,0.793278,0.794333


Unnamed: 0,eval_loss,eval_accuracy,eval_f1,eval_precision,eval_recall,eval_runtime,eval_samples_per_second,eval_steps_per_second,epoch
0,0.491104,0.807,0.804326,0.80405,0.807,7.4435,403.035,3.224,23.0


## Export Drive

In [22]:
def COPY_File2Zip(Original_File_PATH, Copy_To_Directory, DELETE_ZIP=False, File_Name=None):  
  if File_Name is None:  
    File_Name = Original_File_PATH.split("/")[-1] # Fold Name  
  print(f"Original_File_PATH: {Original_File_PATH}\n ==>> Copy_To_Directory: {Copy_To_Directory}")  
  print(f"*** CREATE {File_Name}.zip ", end="")  
  !zip -q -r {File_Name}.zip {Original_File_PATH}  
  print(f"*** COPY TO ", end="")  
  !cp -f /content/{File_Name}.zip {Copy_To_Directory}/{File_Name}.zip  
  if DELETE_ZIP:  
    print(f"*** DELETE Original_ZIP ", end="")  
    !rm /content/{File_Name}.zip  
  print(f"*** DONE!")

In [23]:
if True:
  Copy_To_Directory = "/content/drive/MyDrive"
  dt_now = datetime.now(pytz.timezone('Asia/Tokyo'))
  dttime = str(dt_now.year)+str(dt_now.month)+str(dt_now.day)+"_"+str(dt_now.hour)+"_"+str(dt_now.minute)+"_"+str(dt_now.second) 
  File_Name=f"{dttime}_CDLE_NLP_TextClassification"
  COPY_File2Zip(MODELDIR, Copy_To_Directory, DELETE_ZIP=True, File_Name=File_Name)

Original_File_PATH: /content/CDLE_NLP/model
 ==>> Copy_To_Directory: /content/drive/MyDrive
*** CREATE 202349_23_23_42_CDLE_NLP_TextClassification.zip *** COPY TO *** DELETE Original_ZIP *** DONE!


# inference

## models load

In [24]:
# model_path = '**SAVE PATH**'
model_path = save_path_3

In [25]:
new_tokenizer = LoadTokenizer(Config, pretrained=False, model_path=model_path)

********************
model path: /content/CDLE_NLP/model/202349-22-36-47-jarvisx17/japanese-sentiment-analysis-finetuned-emotion
tokenizer語彙数: 32768, tokenizer最大長: 1000000000000000019884624838656
tokenizer input_names: ['input_ids', 'token_type_ids', 'attention_mask']
********************


In [26]:
# new_model = CustomModelPooler(Config, pretrained=False, model_path=model_path)
# new_model = CustomModelLastHiddenLayer(Config, pretrained=False, model_path=model_path)
new_model = CustomModelMultiHiddenLayer(Config, use_multi_layer=-4, pretrained=False, model_path=model_path)
# new_model = CustomModelLastHiddenLayerMeanPooling(Config, pretrained=False, model_path=model_path)
# new_model = CustomModelLastHiddenLayerMaxPooling(Config, pretrained=False, model_path=model_path)
# new_model = CustomModelLastHiddenLayerMeanMaxPooling(Config, pretrained=False, model_path=model_path)

new_model.load_state_dict(torch.load(f"{model_path}/pytorch_model.bin"))

********************
model path: /content/CDLE_NLP/model/202349-22-36-47-jarvisx17/japanese-sentiment-analysis-finetuned-emotion, device: cuda
model hidden_size: 768, num_hidden_layers: 12
=> num_classes: 3
=> requires_grad is ALL False
********************


<All keys matched successfully>

## infer

In [27]:
remove_columns = ['text','source']
test_dataset = create_dataset(test_df[['text','source']], new_tokenizer, cfg=Config, remove_columns=remove_columns)
y_test = test_df['labels']

Running tokenizer on dataset:   0%|          | 0/3000 [00:00<?, ? examples/s]

In [28]:
inference_args = TrainingArguments(output_dir='/',         # 出力場所(inferenceは意味はない)
                                   label_names=['labels'], # targetLabel( Default: labels )
                                  #  fp16=True,
                                   disable_tqdm=False,
                                   push_to_hub=False,
                                   report_to=['none'],
                                   )
inference_trainer = Trainer(model=new_model,
                            args=inference_args,
                            tokenizer=tokenizer,
                            data_collator=DataCollatorWithPadding(
                            tokenizer=tokenizer, padding="longest"
                            ),
                            )

test_predictions = []
pred = inference_trainer.predict(test_dataset).predictions.astype(float)
test_predictions.append(pred.squeeze())

torch.cuda.empty_cache(); gc.collect();

In [29]:
def metrics(y_true, y_prob):
  y_pred = y_prob.argmax(-1)
  acc = accuracy_score(y_true, y_pred)
  f1 = f1_score(y_true, y_pred, average='weighted')
  precision = precision_score(y_true, y_pred, average='weighted')
  recall = recall_score(y_true, y_pred, average='weighted')
  return {'accuracy': acc, 'f1': f1, 'precision': precision, 'recall': recall,}

In [30]:
result = metrics(y_test, test_predictions[0])
display(pd.DataFrame(result, index=[0]))

Unnamed: 0,accuracy,f1,precision,recall
0,0.788667,0.788657,0.788654,0.788667


# footer

In [31]:
# # ランタイムの終了
# from google.colab import runtime
# runtime.unassign()