# Data Preparing

In [1]:
import os
import csv
import random
import numpy as np
import pandas as pd

In [2]:
corpus_list = ['cejc','mpdd']
situation_list = ['apology','request','thanksgiving']
sen_type_list = ['query','res']
src_type = 'translated' #'translated'
ver_name = '100_culturize_all_both'
context_len = 0

save_dir = f'outputs/context/{ver_name}/{context_len}/'

In [3]:
def get_data_as_list(path):
    data = []
    with open(path, 'r', encoding='utf-8-sig')as f:
        reader = csv.reader(f)
        for row in reader:
            data.append(row[0])
    return data

def get_df(corpus_list, situation_list, sen_type_list, src_type, context_len, train_type):
    target_text = []
    input_text = []
    prefix = []
    for corpus in corpus_list:
        for situation in situation_list:
            for sen_type in sen_type_list:
                f_path = f'/nfs/nas-7.1/yamashita/LAB/dialogue_data/data/{corpus}/{situation}/{context_len}/rewrited_{sen_type}_{train_type}'
                target_text += get_data_as_list(f_path)
                f_path = f'/nfs/nas-7.1/yamashita/LAB/dialogue_data/data/{corpus}/{situation}/{context_len}/{src_type}_{sen_type}_{train_type}'
                input_text += get_data_as_list(f_path)
                prefix += [f'{corpus} {situation} {sen_type}']*len(get_data_as_list(f_path))
    df = pd.DataFrame([prefix,input_text,target_text], index=['prefix','input_text','target_text']).astype(str).T
    return df

# Finetune

In [4]:
import logging
import pandas as pd
from simpletransformers.t5 import T5Model, T5Args

logging.basicConfig(level=logging.INFO)
transformers_logger = logging.getLogger("transformers")
transformers_logger.setLevel(logging.WARNING)

train_type = 'train'    
train_df = get_df(corpus_list, situation_list, sen_type_list, src_type, context_len, train_type)

train_type = 'val'  
eval_df = get_df(corpus_list, situation_list, sen_type_list, src_type, context_len, train_type)

train_df["prefix"] = ""
eval_df["prefix"] = ""

display(train_df.iloc[:5])
display(eval_df.iloc[:5])

Unnamed: 0,prefix,input_text,target_text
0,,query: 啊。。爸爸+對不起。,啊，爸爸對不起...
1,,query: 啊。。對不起，我不知道,啊，不好意思。
2,,query: 抱歉。而+角國。,還要一份角煮。
3,,query: 還有，它在那邊。。我有一個推薦。。媽媽+對不起。,啊你看那邊。有他們的招牌誒。抱歉，媽媽。
4,,query: 啊。。對不起，我不知道。我可以嗎？,啊，不好意思。


Unnamed: 0,prefix,input_text,target_text
0,,query: 是啊。。對不起，我不知道,那個，不好意思。
1,,query: 對不起，我暫停了一下。,不好意思，讓您久等了。
2,,query: 你是認真的嗎？。你是認真的嗎？。抱歉。我讓你去做吧。,開玩笑的吧？你是認真的嗎？不好意思，借我弄一下行嗎？
3,,query: 是啊。。抱歉,嗯...抱歉誒。
4,,query: 啊。。我明白了。抱歉,啊，是這樣啊。辛苦你了。


In [5]:
model_args = T5Args()

model_args.max_seq_length = 128
model_args.length_penalty = 20
model_args.train_batch_size = 2
model_args.eval_batch_size = 2
model_args.num_train_epochs = 20
model_args.evaluate_during_training = True
model_args.evaluate_during_training_steps = 500
model_args.use_multiprocessing = False
model_args.fp16 = False
model_args.early_stopping_metric = 'eval_loss'
model_args.early_stopping_metric_minimize = True
model_args.early_stopping_patience = 3
model_args.use_early_stopping = True
model_args.save_eval_checkpoints = True
model_args.save_eval_checkpoints = False
model_args.learning_rate = 3e-5
model_args.best_model_dir = save_dir+'best_model/'
model_args.output_dir = save_dir+'ckpt/'
model_args.save_model_every_epoch = True
model_args.save_steps = -1
model_args.no_cache = True
model_args.reprocess_input_data = True
model_args.overwrite_output_dir = True
model_args.preprocess_inputs = False
model_args.num_return_sequences = 1
model_args.wandb_project = ver_name

model = T5Model("mt5", "google/mt5-base", args=model_args)
# Train the model
os.environ['WANDB_CONSOLE'] = 'off'
model.train_model(train_df, eval_data=eval_df)

INFO:simpletransformers.t5.t5_utils: Creating features from dataset file at cache_dir/


HBox(children=(FloatProgress(value=0.0, max=2636.0), HTML(value='')))

INFO:simpletransformers.t5.t5_model: Training started



Using Adafactor for T5


HBox(children=(FloatProgress(value=0.0, description='Epoch', max=20.0, style=ProgressStyle(description_width='…

ERROR:wandb.jupyter:Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
wandb: Currently logged in as: natsukinateyamashita (use `wandb login --relogin` to force relogin)


HBox(children=(FloatProgress(value=0.0, description='Running Epoch 0 of 20', max=1318.0, style=ProgressStyle(d…

	add_(Number alpha, Tensor other)
Consider using one of the following signatures instead:
	add_(Tensor other, *, Number alpha) (Triggered internally at  /opt/conda/conda-bld/pytorch_1607370141920/work/torch/csrc/utils/python_arg_parser.cpp:882.)
  exp_avg_sq_row.mul_(beta2t).add_(1.0 - beta2t, update.mean(dim=-1))
INFO:simpletransformers.t5.t5_utils: Creating features from dataset file at cache_dir/


HBox(children=(FloatProgress(value=0.0, max=330.0), HTML(value='')))






INFO:simpletransformers.t5.t5_model: No improvement in eval_loss
INFO:simpletransformers.t5.t5_model: Current step: 1
INFO:simpletransformers.t5.t5_model: Early stopping patience: 3
INFO:simpletransformers.t5.t5_utils: Creating features from dataset file at cache_dir/


HBox(children=(FloatProgress(value=0.0, max=330.0), HTML(value='')))







INFO:simpletransformers.t5.t5_utils: Creating features from dataset file at cache_dir/


HBox(children=(FloatProgress(value=0.0, max=330.0), HTML(value='')))






HBox(children=(FloatProgress(value=0.0, description='Running Epoch 1 of 20', max=1318.0, style=ProgressStyle(d…

INFO:simpletransformers.t5.t5_utils: Creating features from dataset file at cache_dir/


HBox(children=(FloatProgress(value=0.0, max=330.0), HTML(value='')))






INFO:simpletransformers.t5.t5_utils: Creating features from dataset file at cache_dir/


HBox(children=(FloatProgress(value=0.0, max=330.0), HTML(value='')))






INFO:simpletransformers.t5.t5_utils: Creating features from dataset file at cache_dir/


HBox(children=(FloatProgress(value=0.0, max=330.0), HTML(value='')))







INFO:simpletransformers.t5.t5_utils: Creating features from dataset file at cache_dir/


HBox(children=(FloatProgress(value=0.0, max=330.0), HTML(value='')))






HBox(children=(FloatProgress(value=0.0, description='Running Epoch 2 of 20', max=1318.0, style=ProgressStyle(d…

INFO:simpletransformers.t5.t5_utils: Creating features from dataset file at cache_dir/


HBox(children=(FloatProgress(value=0.0, max=330.0), HTML(value='')))






INFO:simpletransformers.t5.t5_utils: Creating features from dataset file at cache_dir/


HBox(children=(FloatProgress(value=0.0, max=330.0), HTML(value='')))







INFO:simpletransformers.t5.t5_utils: Creating features from dataset file at cache_dir/


HBox(children=(FloatProgress(value=0.0, max=330.0), HTML(value='')))






HBox(children=(FloatProgress(value=0.0, description='Running Epoch 3 of 20', max=1318.0, style=ProgressStyle(d…

INFO:simpletransformers.t5.t5_utils: Creating features from dataset file at cache_dir/


HBox(children=(FloatProgress(value=0.0, max=330.0), HTML(value='')))






INFO:simpletransformers.t5.t5_utils: Creating features from dataset file at cache_dir/


HBox(children=(FloatProgress(value=0.0, max=330.0), HTML(value='')))






INFO:simpletransformers.t5.t5_utils: Creating features from dataset file at cache_dir/


HBox(children=(FloatProgress(value=0.0, max=330.0), HTML(value='')))







INFO:simpletransformers.t5.t5_utils: Creating features from dataset file at cache_dir/


HBox(children=(FloatProgress(value=0.0, max=330.0), HTML(value='')))






HBox(children=(FloatProgress(value=0.0, description='Running Epoch 4 of 20', max=1318.0, style=ProgressStyle(d…

INFO:simpletransformers.t5.t5_utils: Creating features from dataset file at cache_dir/


HBox(children=(FloatProgress(value=0.0, max=330.0), HTML(value='')))






INFO:simpletransformers.t5.t5_utils: Creating features from dataset file at cache_dir/


HBox(children=(FloatProgress(value=0.0, max=330.0), HTML(value='')))






INFO:simpletransformers.t5.t5_utils: Creating features from dataset file at cache_dir/


HBox(children=(FloatProgress(value=0.0, max=330.0), HTML(value='')))







INFO:simpletransformers.t5.t5_utils: Creating features from dataset file at cache_dir/


HBox(children=(FloatProgress(value=0.0, max=330.0), HTML(value='')))






HBox(children=(FloatProgress(value=0.0, description='Running Epoch 5 of 20', max=1318.0, style=ProgressStyle(d…

INFO:simpletransformers.t5.t5_utils: Creating features from dataset file at cache_dir/


HBox(children=(FloatProgress(value=0.0, max=330.0), HTML(value='')))






INFO:simpletransformers.t5.t5_utils: Creating features from dataset file at cache_dir/


HBox(children=(FloatProgress(value=0.0, max=330.0), HTML(value='')))







INFO:simpletransformers.t5.t5_utils: Creating features from dataset file at cache_dir/


HBox(children=(FloatProgress(value=0.0, max=330.0), HTML(value='')))






HBox(children=(FloatProgress(value=0.0, description='Running Epoch 6 of 20', max=1318.0, style=ProgressStyle(d…

INFO:simpletransformers.t5.t5_utils: Creating features from dataset file at cache_dir/


HBox(children=(FloatProgress(value=0.0, max=330.0), HTML(value='')))






INFO:simpletransformers.t5.t5_model: No improvement in eval_loss
INFO:simpletransformers.t5.t5_model: Current step: 1
INFO:simpletransformers.t5.t5_model: Early stopping patience: 3
INFO:simpletransformers.t5.t5_utils: Creating features from dataset file at cache_dir/


HBox(children=(FloatProgress(value=0.0, max=330.0), HTML(value='')))






INFO:simpletransformers.t5.t5_utils: Creating features from dataset file at cache_dir/


HBox(children=(FloatProgress(value=0.0, max=330.0), HTML(value='')))







INFO:simpletransformers.t5.t5_utils: Creating features from dataset file at cache_dir/


HBox(children=(FloatProgress(value=0.0, max=330.0), HTML(value='')))






HBox(children=(FloatProgress(value=0.0, description='Running Epoch 7 of 20', max=1318.0, style=ProgressStyle(d…

INFO:simpletransformers.t5.t5_utils: Creating features from dataset file at cache_dir/


HBox(children=(FloatProgress(value=0.0, max=330.0), HTML(value='')))






INFO:simpletransformers.t5.t5_utils: Creating features from dataset file at cache_dir/


HBox(children=(FloatProgress(value=0.0, max=330.0), HTML(value='')))






INFO:simpletransformers.t5.t5_utils: Creating features from dataset file at cache_dir/


HBox(children=(FloatProgress(value=0.0, max=330.0), HTML(value='')))







INFO:simpletransformers.t5.t5_utils: Creating features from dataset file at cache_dir/


HBox(children=(FloatProgress(value=0.0, max=330.0), HTML(value='')))






HBox(children=(FloatProgress(value=0.0, description='Running Epoch 8 of 20', max=1318.0, style=ProgressStyle(d…

INFO:simpletransformers.t5.t5_utils: Creating features from dataset file at cache_dir/


HBox(children=(FloatProgress(value=0.0, max=330.0), HTML(value='')))






INFO:simpletransformers.t5.t5_utils: Creating features from dataset file at cache_dir/


HBox(children=(FloatProgress(value=0.0, max=330.0), HTML(value='')))







INFO:simpletransformers.t5.t5_utils: Creating features from dataset file at cache_dir/


HBox(children=(FloatProgress(value=0.0, max=330.0), HTML(value='')))






HBox(children=(FloatProgress(value=0.0, description='Running Epoch 9 of 20', max=1318.0, style=ProgressStyle(d…

INFO:simpletransformers.t5.t5_utils: Creating features from dataset file at cache_dir/


HBox(children=(FloatProgress(value=0.0, max=330.0), HTML(value='')))






INFO:simpletransformers.t5.t5_utils: Creating features from dataset file at cache_dir/


HBox(children=(FloatProgress(value=0.0, max=330.0), HTML(value='')))






INFO:simpletransformers.t5.t5_utils: Creating features from dataset file at cache_dir/


HBox(children=(FloatProgress(value=0.0, max=330.0), HTML(value='')))






INFO:simpletransformers.t5.t5_model: No improvement in eval_loss
INFO:simpletransformers.t5.t5_model: Current step: 1
INFO:simpletransformers.t5.t5_model: Early stopping patience: 3





INFO:simpletransformers.t5.t5_utils: Creating features from dataset file at cache_dir/


HBox(children=(FloatProgress(value=0.0, max=330.0), HTML(value='')))






HBox(children=(FloatProgress(value=0.0, description='Running Epoch 10 of 20', max=1318.0, style=ProgressStyle(…

INFO:simpletransformers.t5.t5_utils: Creating features from dataset file at cache_dir/


HBox(children=(FloatProgress(value=0.0, max=330.0), HTML(value='')))






INFO:simpletransformers.t5.t5_utils: Creating features from dataset file at cache_dir/


HBox(children=(FloatProgress(value=0.0, max=330.0), HTML(value='')))







INFO:simpletransformers.t5.t5_utils: Creating features from dataset file at cache_dir/


HBox(children=(FloatProgress(value=0.0, max=330.0), HTML(value='')))






HBox(children=(FloatProgress(value=0.0, description='Running Epoch 11 of 20', max=1318.0, style=ProgressStyle(…

INFO:simpletransformers.t5.t5_utils: Creating features from dataset file at cache_dir/


HBox(children=(FloatProgress(value=0.0, max=330.0), HTML(value='')))






INFO:simpletransformers.t5.t5_model: No improvement in eval_loss
INFO:simpletransformers.t5.t5_model: Current step: 1
INFO:simpletransformers.t5.t5_model: Early stopping patience: 3
INFO:simpletransformers.t5.t5_utils: Creating features from dataset file at cache_dir/


HBox(children=(FloatProgress(value=0.0, max=330.0), HTML(value='')))






INFO:simpletransformers.t5.t5_model: No improvement in eval_loss
INFO:simpletransformers.t5.t5_model: Current step: 2
INFO:simpletransformers.t5.t5_model: Early stopping patience: 3
INFO:simpletransformers.t5.t5_utils: Creating features from dataset file at cache_dir/


HBox(children=(FloatProgress(value=0.0, max=330.0), HTML(value='')))







INFO:simpletransformers.t5.t5_utils: Creating features from dataset file at cache_dir/


HBox(children=(FloatProgress(value=0.0, max=330.0), HTML(value='')))






HBox(children=(FloatProgress(value=0.0, description='Running Epoch 12 of 20', max=1318.0, style=ProgressStyle(…

INFO:simpletransformers.t5.t5_utils: Creating features from dataset file at cache_dir/


HBox(children=(FloatProgress(value=0.0, max=330.0), HTML(value='')))






INFO:simpletransformers.t5.t5_model: No improvement in eval_loss
INFO:simpletransformers.t5.t5_model: Current step: 1
INFO:simpletransformers.t5.t5_model: Early stopping patience: 3
INFO:simpletransformers.t5.t5_utils: Creating features from dataset file at cache_dir/


HBox(children=(FloatProgress(value=0.0, max=330.0), HTML(value='')))






INFO:simpletransformers.t5.t5_model: No improvement in eval_loss
INFO:simpletransformers.t5.t5_model: Current step: 2
INFO:simpletransformers.t5.t5_model: Early stopping patience: 3
INFO:simpletransformers.t5.t5_utils: Creating features from dataset file at cache_dir/


HBox(children=(FloatProgress(value=0.0, max=330.0), HTML(value='')))







INFO:simpletransformers.t5.t5_utils: Creating features from dataset file at cache_dir/


HBox(children=(FloatProgress(value=0.0, max=330.0), HTML(value='')))






HBox(children=(FloatProgress(value=0.0, description='Running Epoch 13 of 20', max=1318.0, style=ProgressStyle(…

INFO:simpletransformers.t5.t5_utils: Creating features from dataset file at cache_dir/


HBox(children=(FloatProgress(value=0.0, max=330.0), HTML(value='')))






INFO:simpletransformers.t5.t5_model: No improvement in eval_loss
INFO:simpletransformers.t5.t5_model: Current step: 1
INFO:simpletransformers.t5.t5_model: Early stopping patience: 3
INFO:simpletransformers.t5.t5_utils: Creating features from dataset file at cache_dir/


HBox(children=(FloatProgress(value=0.0, max=330.0), HTML(value='')))






INFO:simpletransformers.t5.t5_model: No improvement in eval_loss
INFO:simpletransformers.t5.t5_model: Current step: 2
INFO:simpletransformers.t5.t5_model: Early stopping patience: 3





INFO:simpletransformers.t5.t5_utils: Creating features from dataset file at cache_dir/


HBox(children=(FloatProgress(value=0.0, max=330.0), HTML(value='')))






HBox(children=(FloatProgress(value=0.0, description='Running Epoch 14 of 20', max=1318.0, style=ProgressStyle(…

INFO:simpletransformers.t5.t5_utils: Creating features from dataset file at cache_dir/


HBox(children=(FloatProgress(value=0.0, max=330.0), HTML(value='')))






INFO:simpletransformers.t5.t5_model: No improvement in eval_loss
INFO:simpletransformers.t5.t5_model: Current step: 3
INFO:simpletransformers.t5.t5_model: Early stopping patience: 3
INFO:simpletransformers.t5.t5_utils: Creating features from dataset file at cache_dir/


HBox(children=(FloatProgress(value=0.0, max=330.0), HTML(value='')))






INFO:simpletransformers.t5.t5_utils: Creating features from dataset file at cache_dir/


HBox(children=(FloatProgress(value=0.0, max=330.0), HTML(value='')))







INFO:simpletransformers.t5.t5_utils: Creating features from dataset file at cache_dir/


HBox(children=(FloatProgress(value=0.0, max=330.0), HTML(value='')))






HBox(children=(FloatProgress(value=0.0, description='Running Epoch 15 of 20', max=1318.0, style=ProgressStyle(…

INFO:simpletransformers.t5.t5_utils: Creating features from dataset file at cache_dir/


HBox(children=(FloatProgress(value=0.0, max=330.0), HTML(value='')))






INFO:simpletransformers.t5.t5_model: No improvement in eval_loss
INFO:simpletransformers.t5.t5_model: Current step: 1
INFO:simpletransformers.t5.t5_model: Early stopping patience: 3
INFO:simpletransformers.t5.t5_utils: Creating features from dataset file at cache_dir/


HBox(children=(FloatProgress(value=0.0, max=330.0), HTML(value='')))






INFO:simpletransformers.t5.t5_utils: Creating features from dataset file at cache_dir/


HBox(children=(FloatProgress(value=0.0, max=330.0), HTML(value='')))






INFO:simpletransformers.t5.t5_model: No improvement in eval_loss
INFO:simpletransformers.t5.t5_model: Current step: 1
INFO:simpletransformers.t5.t5_model: Early stopping patience: 3





INFO:simpletransformers.t5.t5_utils: Creating features from dataset file at cache_dir/


HBox(children=(FloatProgress(value=0.0, max=330.0), HTML(value='')))






HBox(children=(FloatProgress(value=0.0, description='Running Epoch 16 of 20', max=1318.0, style=ProgressStyle(…

INFO:simpletransformers.t5.t5_utils: Creating features from dataset file at cache_dir/


HBox(children=(FloatProgress(value=0.0, max=330.0), HTML(value='')))






INFO:simpletransformers.t5.t5_model: No improvement in eval_loss
INFO:simpletransformers.t5.t5_model: Current step: 2
INFO:simpletransformers.t5.t5_model: Early stopping patience: 3
INFO:simpletransformers.t5.t5_utils: Creating features from dataset file at cache_dir/


HBox(children=(FloatProgress(value=0.0, max=330.0), HTML(value='')))






INFO:simpletransformers.t5.t5_model: No improvement in eval_loss
INFO:simpletransformers.t5.t5_model: Current step: 3
INFO:simpletransformers.t5.t5_model: Early stopping patience: 3





INFO:simpletransformers.t5.t5_utils: Creating features from dataset file at cache_dir/


HBox(children=(FloatProgress(value=0.0, max=330.0), HTML(value='')))






HBox(children=(FloatProgress(value=0.0, description='Running Epoch 17 of 20', max=1318.0, style=ProgressStyle(…

INFO:simpletransformers.t5.t5_utils: Creating features from dataset file at cache_dir/


HBox(children=(FloatProgress(value=0.0, max=330.0), HTML(value='')))






INFO:simpletransformers.t5.t5_model: Patience of 3 steps reached
INFO:simpletransformers.t5.t5_model: Training terminated.






INFO:simpletransformers.t5.t5_model: Training of google/mt5-base model complete. Saved to outputs/context/100_culturize_all_both/0/ckpt/.


(22500,
 {'global_step': [500,
   1000,
   1318,
   1500,
   2000,
   2500,
   2636,
   3000,
   3500,
   3954,
   4000,
   4500,
   5000,
   5272,
   5500,
   6000,
   6500,
   6590,
   7000,
   7500,
   7908,
   8000,
   8500,
   9000,
   9226,
   9500,
   10000,
   10500,
   10544,
   11000,
   11500,
   11862,
   12000,
   12500,
   13000,
   13180,
   13500,
   14000,
   14498,
   14500,
   15000,
   15500,
   15816,
   16000,
   16500,
   17000,
   17134,
   17500,
   18000,
   18452,
   18500,
   19000,
   19500,
   19770,
   20000,
   20500,
   21000,
   21088,
   21500,
   22000,
   22406,
   22500],
  'eval_loss': [9.201855061270974,
   6.376867352109967,
   5.281393578558257,
   4.9027706724224664,
   4.210173011187351,
   3.7315418489051586,
   3.6083257173046923,
   3.455683136708809,
   3.253776863714059,
   3.1951164137684938,
   3.174012767997655,
   3.1350189398861295,
   3.039152871410955,
   3.015262727778066,
   2.984450937423742,
   2.950635526103504,
   2.89313683

# Test

In [6]:

import logging
import sacrebleu
import pandas as pd
from simpletransformers.t5 import T5Model, T5Args


logging.basicConfig(level=logging.INFO)
transformers_logger = logging.getLogger("transformers")
transformers_logger.setLevel(logging.WARNING)


model_args = T5Args()
model_args.max_length = 128
model_args.length_penalty = 20
model_args.num_beams = 10

model = T5Model("mt5", save_dir+"best_model/", args=model_args)

In [7]:
train_type = 'test'  
eval_df = get_df(corpus_list, situation_list, sen_type_list, src_type, context_len, train_type)

to_ja_truth = eval_df.loc[eval_df["prefix"].str.contains("mpdd")]["target_text"].tolist()
to_ja_input = eval_df.loc[eval_df["prefix"].str.contains("mpdd")]["input_text"].tolist()
to_ja_prefix = eval_df.loc[eval_df["prefix"].str.contains("mpdd")]["prefix"].tolist()

to_zh_truth = eval_df.loc[eval_df["prefix"].str.contains("cejc")]["target_text"].tolist()
to_zh_input = eval_df.loc[eval_df["prefix"].str.contains("cejc")]["input_text"].tolist()
to_zh_prefix = eval_df.loc[eval_df["prefix"].str.contains("cejc")]["prefix"].tolist()

to_ja_input = [": " + input_text for input_text in to_ja_input]
to_zh_input = [": " + input_text for input_text in to_zh_input]
# to_ja_input = [prefix + ": " + input_text for prefix, input_text in zip(to_ja_prefix, to_ja_input)]
# to_zh_input = [prefix + ": " + input_text for prefix, input_text in zip(to_zh_prefix, to_zh_input)]
to_ja_input[:10]

[': query: 今日はお仕事の邪魔をして申し訳ありません。',
 ': query: あなたが傷ついた時はあなたの世話をするべきなのに、代わりに私の世話をしなければならない、ごめんなさい...',
 ': query: すみません、大した読者ではありません。',
 ': query: ごめん、先に片付けろって意味だったんだけど、持ってるとストレスがたまるから。',
 ': query: お兄ちゃん、次兄ちゃん、遅くなってごめんね。',
 ': query: 皆さん、本当にごめんなさい! カップルはここの結婚式場が綺麗でロマンチックだとは思っていませんでした。 ということで、二人だけで結婚式をするために、ロマンチックな素敵な場所に行ってきました！（笑）。 カップルがいなくなってしまいましたが、私たちの結婚式はまだまだ続きます。',
 ': query: ちょっと意地悪をしていたことが判明しました！ 弟さん、ごめんなさい! あけましておめでとうございます。',
 ': query: 9時過ぎにお腹を空かせてしまってごめんなさい。',
 ': query: 早いですね。 遅れているのは私だけでしょうか。 お待たせしました。',
 ': query: ごめんなさい 彼と話してみるわ']

In [8]:
# Predict
to_ja_preds = model.predict(to_ja_input)
# to_ja_bleu = sacrebleu.corpus_bleu(to_ja_preds, to_ja_truth)
# print("--------------------------")
# print("to_ja_bleu: ", to_ja_bleu.score)

to_zh_preds = model.predict(to_zh_input)
# to_zh_bleu = sacrebleu.corpus_bleu(to_zh_preds, to_zh_truth)
# print("--------------------------")
# print("to_zh_bleu: ", to_zh_bleu.score)

HBox(children=(FloatProgress(value=0.0, description='Generating outputs', max=20.0, style=ProgressStyle(descri…






HBox(children=(FloatProgress(value=0.0, description='Decoding outputs', max=160.0, style=ProgressStyle(descrip…




HBox(children=(FloatProgress(value=0.0, description='Generating outputs', max=22.0, style=ProgressStyle(descri…




HBox(children=(FloatProgress(value=0.0, description='Decoding outputs', max=174.0, style=ProgressStyle(descrip…




In [9]:
r_ja_df = pd.DataFrame([to_ja_preds,to_ja_truth],index=[f'{ver_name}', 'truth'])
r_ja_df.T.to_csv(save_dir+'ja_preds_truth.csv',encoding='utf_8_sig')

r_zh_df = pd.DataFrame([to_zh_preds,to_zh_truth],index=[f'{ver_name}', 'truth'])
r_zh_df.T.to_csv(save_dir+'zh_preds_truth.csv',encoding='utf_8_sig')