In [116]:
from transformers import (
    AutoModelForSeq2SeqLM, AutoTokenizer, Seq2SeqTrainingArguments, Seq2SeqTrainer, DataCollatorForSeq2Seq,
    BartForConditionalGeneration, BartTokenizer, pipeline
)

from sklearn.model_selection import train_test_split
from torch.utils.data import Dataset

import numpy as np
import pandas as pd

import mecab_ko as Mecab
from nltk.translate.bleu_score import sentence_bleu

In [117]:
model = BartForConditionalGeneration.from_pretrained("heegyu/kobart-text-style-transfer")
tokenizer = BartTokenizer.from_pretrained("heegyu/kobart-text-style-transfer")

class TextStyleTransferDataset(Dataset):
    def __init__(self, dataframe, tokenizer):
        self.df = dataframe
        self.tokenizer = tokenizer
    
    def __len__(self):
        return len(self.df)

    def __getitem__(self, index):
        row = self.df.iloc[index, :]
        text1 = row[0]
        text2 = row[1]
        target_style = row.index[1]
        target_style_name = style_map[target_style]
    
        encoder_text = f"{target_style_name} 말투로 변환: {text1}"
        decoder_text = f"{text2}{self.tokenizer.eos_token}"
        model_inputs = self.tokenizer(encoder_text, max_length=64, truncation=True)
        with self.tokenizer.as_target_tokenizer():
            labels = tokenizer(decoder_text, max_length=64, truncation=True)
        model_inputs['labels'] = labels['input_ids']
        del model_inputs['token_type_ids']
        
        return model_inputs

def transfer_text_style(model, text, target_style, **kwargs):
    input = f"{target_style} 말투로 변환: {text}"
    out = model(input, max_length=64, **kwargs)
    print(text, target_style, out[0]['generated_text'], sep=" -> ")
    
model = pipeline('text2text-generation', model='heegyu/kobart-text-style-transfer')
styles = ['문어체','구어체','안드로이드','아재','채팅', '초등학생','이모티콘','enfp','신사','할아버지','할머니','중학생', '왕','나루토','선비','소심한','번역기']
style_map = {
    'formal': '문어체',
    'informal': '구어체',
    'android': '안드로이드',
    'azae': '아재',
    'chat': '채팅',
    'choding': '초등학생',
    'emoticon': '이모티콘',
    'enfp': 'enfp',
    'gentle': '신사',
    'halbae': '할아버지',
    'halmae': '할머니',
    'joongding': '중학생',
    'king': '왕',
    'naruto': '나루토',
    'seonbi': '선비',
    'sosim': '소심한',
    'translator': '번역기'
}

text = "같다 전공 친구."
for style in styles: transfer_text_style(model, text, style)

loading configuration file https://huggingface.co/heegyu/kobart-text-style-transfer/resolve/main/config.json from cache at /home/dodant/.cache/huggingface/transformers/779bf78efd6eb3e3487551bc1dde4b5ecae50902202a7c4ab92da7c04f204fc7.0e9e5b476887b939765fc5f36a4b627f02eb5e26ab7be309d95f3d7c30234f37
Model config BartConfig {
  "_name_or_path": "gogamza/kobart-base-v2",
  "activation_dropout": 0.0,
  "activation_function": "gelu",
  "add_bias_logits": false,
  "add_final_layer_norm": false,
  "architectures": [
    "BartForConditionalGeneration"
  ],
  "attention_dropout": 0.0,
  "author": "Heewon Jeon(madjakarta@gmail.com)",
  "bos_token_id": 1,
  "classif_dropout": 0.1,
  "classifier_dropout": 0.1,
  "d_model": 768,
  "decoder_attention_heads": 16,
  "decoder_ffn_dim": 3072,
  "decoder_layerdrop": 0.0,
  "decoder_layers": 6,
  "decoder_start_token_id": 1,
  "do_blenderbot_90_layernorm": false,
  "dropout": 0.1,
  "encoder_attention_heads": 16,
  "encoder_ffn_dim": 3072,
  "encoder_layerd

같다 전공 친구. -> 문어체 -> 같은 학과 친구에요.
같다 전공 친구. -> 구어체 -> 나는 같은 전공 친구야.
같다 전공 친구. -> 안드로이드 -> 안드로이드. 전공. 친구.
같다 전공 친구. -> 아재 -> 아 맞~ 전공 친구여~
같다 전공 친구. -> 채팅 -> ᄋᄋ 난 전공 친구임
같다 전공 친구. -> 초등학생 -> ᄋᄋ 같은 전공 친구임
같다 전공 친구. -> 이모티콘 -> 난 전공 전공 친구야 ( ́`)
같다 전공 친구. -> enfp -> 난 다 전공 친구야 ᄒᄒ
같다 전공 친구. -> 신사 -> 같네 전공 친구입니다.
같다 전공 친구. -> 할아버지 -> 나는... 전공이 같구먼...
같다 전공 친구. -> 할머니 -> 염병 전공이여
같다 전공 친구. -> 중학생 -> ᄋ 전공이 같음
같다 전공 친구. -> 왕 -> 그렇소. 전공이 같소.
같다 전공 친구. -> 나루토 -> 난 전공 전공이라니깐!
같다 전공 친구. -> 선비 -> 소생은 전공이 같소!
같다 전공 친구. -> 소심한 -> 난 전공 전공 친구야..
같다 전공 친구. -> 번역기 -> 나는 같은 전공이 같다.


In [134]:
model_name = "gogamza/kobart-base-v2"
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)

df = pd.read_csv("../MY_DATA/shots/gloss2text/0-shot.csv")
style_map = {'gloss': '글로스', 'spoken': '구어체'}

loading configuration file https://huggingface.co/gogamza/kobart-base-v2/resolve/main/config.json from cache at /home/dodant/.cache/huggingface/transformers/54a37e9385f90886428b084042f151c1a699203416d41765d94aac4cddb5fd5c.d098ef3866c1da94bdfaa5c1f24ecb7c5c16b37423b79263fbd3668d2ae61f91
Model config BartConfig {
  "_name_or_path": "gogamza/kobart-base-v2",
  "activation_dropout": 0.0,
  "activation_function": "gelu",
  "add_bias_logits": false,
  "add_final_layer_norm": false,
  "architectures": [
    "BartModel"
  ],
  "attention_dropout": 0.0,
  "author": "Heewon Jeon(madjakarta@gmail.com)",
  "bos_token_id": 1,
  "classif_dropout": 0.1,
  "classifier_dropout": 0.1,
  "d_model": 768,
  "decoder_attention_heads": 16,
  "decoder_ffn_dim": 3072,
  "decoder_layerdrop": 0.0,
  "decoder_layers": 6,
  "decoder_start_token_id": 1,
  "do_blenderbot_90_layernorm": false,
  "dropout": 0.1,
  "encoder_attention_heads": 16,
  "encoder_ffn_dim": 3072,
  "encoder_layerdrop": 0.0,
  "encoder_layers":

In [135]:
# df = pd.read_csv("../MY_DATA/shots/gloss2text/0-shot.csv")
# style_map = {'gloss': '글로스', 'spoken': '구어체'}

# df = pd.read_csv("../smilestyle_dataset.tsv", sep="\t")
# style_map = {
#     'formal': '문어체',
#     'informal': '구어체',
#     'android': '안드로이드',
#     'azae': '아재',
#     'chat': '채팅',
#     'choding': '초등학생',
#     'emoticon': '이모티콘',
#     'enfp': 'enfp',
#     'gentle': '신사',
#     'halbae': '할아버지',
#     'halmae': '할머니',
#     'joongding': '중학생',
#     'king': '왕',
#     'naruto': '나루토',
#     'seonbi': '선비',
#     'sosim': '소심한',
#     'translator': '번역기'
# }

dataset = TextStyleTransferDataset(df, tokenizer)
out = dataset[0]
print(out)
print(out['input_ids'])
print(out['labels'])
print(tokenizer.decode(out['input_ids']))
print(tokenizer.decode(out['labels']))
# out = dataset[1]
# print(out['input_ids'])
# print(out['labels'])
# print(tokenizer.decode(out['input_ids']))
# print(tokenizer.decode(out['labels']))

{'input_ids': [14112, 11763, 12687, 14070, 13282, 10338, 14296, 13716, 257, 15015, 16687, 15851, 27583, 23925], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': [15015, 14044, 15597, 15851, 14150, 15851, 14129, 11820, 12244, 262, 1]}
[14112, 11763, 12687, 14070, 13282, 10338, 14296, 13716, 257, 15015, 16687, 15851, 27583, 23925]
[15015, 14044, 15597, 15851, 14150, 15851, 14129, 11820, 12244, 262, 1]
구어체 말투로 변환: 조선 시대 언제 까지?
조선 시대는 언제부터 언제까지였죠?</s>


In [136]:
df_train, df_test = train_test_split(df, test_size=0.01, random_state=42)
train_dataset, test_dataset = TextStyleTransferDataset(df_train, tokenizer), TextStyleTransferDataset(df_test, tokenizer)
# print(len(df_train), len(df_test))

model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
data_collator = DataCollatorForSeq2Seq(model=model, tokenizer=tokenizer)

loading configuration file https://huggingface.co/gogamza/kobart-base-v2/resolve/main/config.json from cache at /home/dodant/.cache/huggingface/transformers/54a37e9385f90886428b084042f151c1a699203416d41765d94aac4cddb5fd5c.d098ef3866c1da94bdfaa5c1f24ecb7c5c16b37423b79263fbd3668d2ae61f91
Model config BartConfig {
  "_name_or_path": "gogamza/kobart-base-v2",
  "activation_dropout": 0.0,
  "activation_function": "gelu",
  "add_bias_logits": false,
  "add_final_layer_norm": false,
  "architectures": [
    "BartModel"
  ],
  "attention_dropout": 0.0,
  "author": "Heewon Jeon(madjakarta@gmail.com)",
  "bos_token_id": 1,
  "classif_dropout": 0.1,
  "classifier_dropout": 0.1,
  "d_model": 768,
  "decoder_attention_heads": 16,
  "decoder_ffn_dim": 3072,
  "decoder_layerdrop": 0.0,
  "decoder_layers": 6,
  "decoder_start_token_id": 1,
  "do_blenderbot_90_layernorm": false,
  "dropout": 0.1,
  "encoder_attention_heads": 16,
  "encoder_ffn_dim": 3072,
  "encoder_layerdrop": 0.0,
  "encoder_layers":

In [142]:
model_pth = "../model_chp/bart-gloss2text3"

# from datasets import load_metric
# 
# bleu_metric = load_metric("bleu")
# 
# def compute_metrics(p):
#     decoded_preds = [tokenizer.decode(pred, skip_special_tokens=True, clean_up_tokenization_spaces=True) for pred in p.predictions]
#     decoded_labels = [tokenizer.decode(label, skip_special_tokens=True, clean_up_tokenization_spaces=True) for label in p.label_ids]
#     
#     # BLEU 점수 계산
#     bleu_score = bleu_metric.compute(predictions=decoded_preds, references=decoded_labels)
#     return {"bleu": bleu_score["score"]}


training_args = Seq2SeqTrainingArguments(
    output_dir=model_pth, #The output directory
    overwrite_output_dir=True, #overwrite the content of the output directory
    num_train_epochs=30, # number of training epochs
    per_device_train_batch_size=32, # batch size for training
    per_device_eval_batch_size=32,  # batch size for evaluation
    eval_steps=500, # Number of update steps between two evaluations.
    save_steps=1000, # after # steps model is saved 
    warmup_steps=300,# number of warmup steps for learning rate scheduler
    prediction_loss_only=True,
    evaluation_strategy="steps",
    save_total_limit=3
)
print(training_args)
# trainer = Seq2SeqTrainer(
#     model=model,
#     args=training_args,
#     data_collator=data_collator,
#     train_dataset=train_dataset,
#     eval_dataset=test_dataset,
#     # compute_metrics=compute_metrics,
# )
# 
# trainer.train()
# trainer.save_model(model_pth)

PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).


Seq2SeqTrainingArguments(
_n_gpu=1,
adafactor=False,
adam_beta1=0.9,
adam_beta2=0.999,
adam_epsilon=1e-08,
bf16=False,
bf16_full_eval=False,
dataloader_drop_last=False,
dataloader_num_workers=0,
dataloader_pin_memory=True,
ddp_find_unused_parameters=None,
debug=[],
deepspeed=None,
disable_tqdm=False,
do_eval=True,
do_predict=False,
do_train=False,
eval_accumulation_steps=None,
eval_steps=500,
evaluation_strategy=IntervalStrategy.STEPS,
fp16=False,
fp16_backend=auto,
fp16_full_eval=False,
fp16_opt_level=O1,
generation_max_length=None,
generation_num_beams=None,
gradient_accumulation_steps=1,
gradient_checkpointing=False,
greater_is_better=None,
group_by_length=False,
half_precision_backend=auto,
hub_model_id=None,
hub_strategy=HubStrategy.EVERY_SAVE,
hub_token=<HUB_TOKEN>,
ignore_data_skip=False,
label_names=None,
label_smoothing_factor=0.0,
learning_rate=5e-05,
length_column_name=length,
load_best_model_at_end=False,
local_rank=-1,
log_level=-1,
log_level_replica=-1,
log_on_each_node=T

In [138]:
nlg = pipeline('text2text-generation', model=model_pth, tokenizer=tokenizer)

def generate_text(pipe, text, target_style, num_return_sequences=5, max_length=60):
    target_style_name = style_map[target_style]
    text = f"{target_style_name} 말투로 변환: {text}"
    out = pipe(text, num_return_sequences=num_return_sequences, max_length=max_length)
    return [x['generated_text'] for x in out]

loading configuration file ../model_chp/bart-gloss2text3/config.json
Model config BartConfig {
  "_name_or_path": "../model_chp/bart-gloss2text3",
  "activation_dropout": 0.0,
  "activation_function": "gelu",
  "add_bias_logits": false,
  "add_final_layer_norm": false,
  "architectures": [
    "BartForConditionalGeneration"
  ],
  "attention_dropout": 0.0,
  "author": "Heewon Jeon(madjakarta@gmail.com)",
  "bos_token_id": 1,
  "classif_dropout": 0.1,
  "classifier_dropout": 0.1,
  "d_model": 768,
  "decoder_attention_heads": 16,
  "decoder_ffn_dim": 3072,
  "decoder_layerdrop": 0.0,
  "decoder_layers": 6,
  "decoder_start_token_id": 1,
  "do_blenderbot_90_layernorm": false,
  "dropout": 0.1,
  "encoder_attention_heads": 16,
  "encoder_ffn_dim": 3072,
  "encoder_layerdrop": 0.0,
  "encoder_layers": 6,
  "eos_token_id": 1,
  "extra_pos_embeddings": 2,
  "force_bos_token_to_be_generated": false,
  "forced_eos_token_id": 1,
  "gradient_checkpointing": false,
  "id2label": {
    "0": "NEGAT

In [139]:
target_styles = df.columns
print(target_styles)
src_text = """
어쩌다 마주친 그대 모습에 내 마음을 빼앗겨 버렸네
"""

print("입력 문장:", src_text)
print(generate_text(nlg, src_text, 'gloss', num_return_sequences=1, max_length=1000)[0])
print(generate_text(nlg, src_text, 'spoken', num_return_sequences=1, max_length=1000)[0])

Index(['gloss', 'spoken'], dtype='object')
입력 문장: 
어쩌다 마주친 그대 모습에 내 마음을 빼앗겨 버렸네
어쩌다 마주친 그대 모습에 내 마음을 빼앗겼다.
어쩌다 마주친 그대 모습에 내 마음을 빼앗겼다.


In [140]:
mecab = Mecab.Tagger('-Owakati')

df = pd.read_csv("../MY_DATA/gloss_from_book.csv")
SCORE1, SCORE2, SCORE3, SCORE4, SCORET = [], [], [], [], []
for i in range(len(df)):
    input_text = df.iloc[i]['gloss']
    gt_text = df.iloc[i]['spoken']
    output_text = generate_text(nlg, input_text, 'spoken', num_return_sequences=1, max_length=1000)[0]
    gtext = mecab.parse(gt_text).split()
    outputtext = mecab.parse(output_text).split()
    score1 = sentence_bleu([gtext], outputtext, weights=(1, 0, 0, 0))
    score2 = sentence_bleu([gtext], outputtext, weights=(0, 1, 0, 0))
    score3 = sentence_bleu([gtext], outputtext, weights=(0, 0, 1, 0))
    score4 = sentence_bleu([gtext], outputtext, weights=(0, 0, 0, 1))
    scoret = sentence_bleu([gtext], outputtext)
    print(f'{i}/{len(df)} {input_text} -> {output_text}')
    print(gt_text)
    print(f"BLEU score: {score1:.3f} {score2:.3f} {score3:.3f} {score4:.3f} {scoret:.3f}")
    SCORE1.append(score1)
    SCORE2.append(score2)
    SCORE3.append(score3)
    SCORE4.append(score4)
    SCORET.append(scoret)
BLEU1_AVG, BLEU2_AVG, BLEU3_AVG, BLEU4_AVG, BLEUT_AVG = np.mean(SCORE1), np.mean(SCORE2), np.mean(SCORE3), np.mean(SCORE4), np.mean(SCORET)
print(f"BLEU score: {BLEU1_AVG:.3f} {BLEU2_AVG:.3f} {BLEU3_AVG:.3f} {BLEU4_AVG:.3f} {BLEUT_AVG:.3f}")

0/128 안녕 ? 만나다 반갑다 . 나 농인 . -> 안녕하세요 만나서 반갑습니다.
 안녕하십니까? 만나서 반갑습니다. 저는 농인입니다.
BLEU score: 0.413 0.337 0.236 0.189 0.281
1/128 안녕 ? 만나다 반갑다 . 나 청인 . -> 안녕하세요 만나서 반갑다
 안녕하십니까? 만나서 반갑습니다. 저는 청인입니다.
BLEU score: 0.263 0.184 0.074 0.000 0.000


The hypothesis contains 0 counts of 4-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()


2/128 당신 이름 무엇 ? -> 당신의 이름이 뭐에요?
 당신의 이름은 무엇입니까?
BLEU score: 0.500 0.286 0.167 0.000 0.000
3/128 당신 농인 ? -> 당신은 농인인가요?
 당신은 농인입니까?
BLEU score: 0.800 0.500 0.333 0.000 0.000
4/128 아니오 . 나 청인 . -> 나는 청인입니다.
 아니오. 나는 청인입니다.
BLEU score: 0.670 0.670 0.670 0.670 0.670
5/128 고맙다 . 미안하다 . 괜찮다 . -> 고맙다
 고맙습니다. 미안합니다. 괜찮습니다.
BLEU score: 0.015 0.000 0.000 0.000 0.000
6/128 수고 다음 또 만나다 -> 수고 다음에 또 만나자
 수고하셨습니다. 다음에 또 뵙겠습니다.
BLEU score: 0.245 0.147 0.092 0.000 0.000


The hypothesis contains 0 counts of 2-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()
The hypothesis contains 0 counts of 3-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()


7/128 나이 몇 ? 나 20 . -> 나이 몇 몇이죠?
 나이가 어떻게 되세요? 나는 20살입니다.
BLEU score: 0.123 0.000 0.000 0.000 0.000
8/128 나 같다 . 나 20 . -> 나 같아 같아
 저랑 동갑이군요. 나도 20살입니다.
BLEU score: 0.049 0.000 0.000 0.000 0.000
9/128 우리 둘 친하게 지내다 괜찮다 . -> 우리 둘 친하게 지내니까 괜찮아
 우리 친하게 지냅시다.
BLEU score: 0.375 0.143 0.000 0.000 0.000
10/128 괜찮다 OK 좋다 . -> 괜찮아 OK, 좋아.
 좋아요.
BLEU score: 0.286 0.000 0.000 0.000 0.000
11/128 휴대폰 번호 무엇 ? -> 휴대폰 번 뭐에요?
 휴대폰 번호가 어떻게 되나요?
BLEU score: 0.282 0.000 0.000 0.000 0.000
12/128 나 휴대폰 번호 010-2456-2642 . -> 내 휴대폰 번호 010-2456-2642.
 내 휴대폰 번호는 010-2456-2642입니다.
BLEU score: 0.801 0.601 0.458 0.267 0.492
13/128 앞으로 자주 문자메시지를 주고받다 연락하다 OK . -> 앞으로 자주 문자메시지를 주고받기로 했는데 연락할게.
 앞으로 자주 연락 나눠요.
BLEU score: 0.357 0.154 0.083 0.000 0.000
14/128 당신 1살 위 . -> 당신은 1살 위야
 저보다 한 살 더 많으시네요.
BLEU score: 0.101 0.000 0.000 0.000 0.000
15/128 나 2살 아래 . -> 나는 2살 아래야
 제가 두 살 더 적습니다.
BLEU score: 0.119 0.000 0.000 0.000 0.000
16/128 그 신발 돈 얼마 ? -> 그 신발 돈 얼마야?
 그 신발은 얼마입니까?
BLEU score: 0.667 0.200 0.000 0.000 0.000
17/

In [None]:
BLEU score: 0.387 0.158 0.084 0.039 0.056
BLEU score: 0.394 0.148 0.077 0.040 0.060
BLEU score: 0.389 0.139 0.071 0.036 0.050

BLEU score: 0.418 0.089 0.036 0.015 0.028

In [141]:
mecab = Mecab.Tagger('-Owakati')

df = pd.read_csv("../MY_DATA/gloss_from_book.csv")
SCORE1, SCORE2, SCORE3, SCORE4, SCORET = [], [], [], [], []
for i in range(len(df)):
    input_text = df.iloc[i]['gloss']
    gt_text = df.iloc[i]['spoken']
    gtext = mecab.parse(input_text).split()
    outputtext = mecab.parse(gt_text).split()
    score1 = sentence_bleu([gtext], outputtext, weights=(1, 0, 0, 0))
    score2 = sentence_bleu([gtext], outputtext, weights=(0, 1, 0, 0))
    score3 = sentence_bleu([gtext], outputtext, weights=(0, 0, 1, 0))
    score4 = sentence_bleu([gtext], outputtext, weights=(0, 0, 0, 1))
    scoret = sentence_bleu([gtext], outputtext)
    print(f'{i}/{len(df)} {input_text} -> {gt_text}')
    print(f"BLEU score: {score1:.3f} {score2:.3f} {score3:.3f} {score4:.3f} {scoret:.3f}")
    SCORE1.append(score1)
    SCORE2.append(score2)
    SCORE3.append(score3)
    SCORE4.append(score4)
    SCORET.append(scoret)
BLEU1_AVG, BLEU2_AVG, BLEU3_AVG, BLEU4_AVG, BLEUT_AVG = np.mean(SCORE1), np.mean(SCORE2), np.mean(SCORE3), np.mean(SCORE4), np.mean(SCORET)
print(f"BLEU score: {BLEU1_AVG:.3f} {BLEU2_AVG:.3f} {BLEU3_AVG:.3f} {BLEU4_AVG:.3f} {BLEUT_AVG:.3f}")

0/128 안녕 ? 만나다 반갑다 . 나 농인 . ->  안녕하십니까? 만나서 반갑습니다. 저는 농인입니다.
BLEU score: 0.500 0.077 0.000 0.000 0.000
1/128 안녕 ? 만나다 반갑다 . 나 청인 . ->  안녕하십니까? 만나서 반갑습니다. 저는 청인입니다.
BLEU score: 0.500 0.077 0.000 0.000 0.000
2/128 당신 이름 무엇 ? ->  당신의 이름은 무엇입니까?
BLEU score: 0.571 0.000 0.000 0.000 0.000
3/128 당신 농인 ? ->  당신은 농인입니까?
BLEU score: 0.600 0.000 0.000 0.000 0.000
4/128 아니오 . 나 청인 . ->  아니오. 나는 청인입니다.
BLEU score: 0.714 0.333 0.200 0.000 0.000
5/128 고맙다 . 미안하다 . 괜찮다 . ->  고맙습니다. 미안합니다. 괜찮습니다.
BLEU score: 0.597 0.224 0.000 0.000 0.000
6/128 수고 다음 또 만나다 ->  수고하셨습니다. 다음에 또 뵙겠습니다.
BLEU score: 0.250 0.000 0.000 0.000 0.000
7/128 나이 몇 ? 나 20 . ->  나이가 어떻게 되세요? 나는 20살입니다.
BLEU score: 0.417 0.091 0.000 0.000 0.000
8/128 나 같다 . 나 20 . ->  저랑 동갑이군요. 나도 20살입니다.
BLEU score: 0.333 0.091 0.000 0.000 0.000
9/128 우리 둘 친하게 지내다 괜찮다 . ->  우리 친하게 지냅시다.
BLEU score: 0.359 0.112 0.000 0.000 0.000
10/128 괜찮다 OK 좋다 . ->  좋아요.
BLEU score: 0.245 0.000 0.000 0.000 0.000
11/128 휴대폰 번호 무엇 ? ->  휴대폰 번호가 어떻게 되나요?
BLEU score: 0.42

The hypothesis contains 0 counts of 3-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()
The hypothesis contains 0 counts of 4-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()
The hypothesis contains 0 counts of 2-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()
