In [1]:
import glob
import csv
import os
import pandas as pd
import sacrebleu
# import pyter
import numpy as np
import matplotlib as plt
import matplotlib.style
plt.style.use('dark_background')

In [2]:
from bert_score import score
def calc_bert_score(cands, refs):
    """ BERTスコアの算出
    Args:
        cands ([List[str]]): [比較元の文]
        refs ([List[str]]): [比較対象の文]
    Returns:
        [(List[float], List[float], List[float])]: [(Precision, Recall, F1スコア)]
    """
#     Precision, Recall, F1 = score(cands, refs, lang="others", verbose=True)
    Precision, Recall, F1 = score(cands, refs, lang="others", verbose=True, device=1)
    Precision = np.mean(Precision.numpy().tolist())
    Recall = np.mean(Recall.numpy().tolist())
    F1 = np.mean(F1.numpy().tolist())
    return Precision, Recall, F1

In [None]:
def get_scores(hyp,ref):
    corpus_bleu = sacrebleu.corpus_bleu(hyp, [ref])
    
    self_bleu = 0
    hyp_ = np.array(hyp)
    for idx in range(len(hyp)):
        rev_idx = np.arange(len(hyp))
        # [True, True, True, True, False, True, False, True, True, True] を作れば良いという方針
        bool_idx = np.ones(len(hyp), dtype=bool)
        bool_idx[idx] = False
        rev_idx = rev_idx[bool_idx]
        for rev in rev_idx:
            tmp_score = sacrebleu.sentence_bleu(hyp_[idx], [hyp_[rev]])
            self_bleu += tmp_score.score
    self_bleu /= len(hyp)*(len(hyp)-1)
    
    ter_scores=[]
    for h,r in zip(hyp,ref):
        s = pyter.ter(h.split(), r.split())
        ter_scores.append(s)
    corpus_ter = np.mean(np.array(ter_scores))
#     corpus_ter = pyter.ter(hyp,ref)
    P, R, F1 = calc_bert_score(hyp, ref)
    return [corpus_bleu.score, self_bleu, corpus_ter, P, R, F1]

In [None]:
def get_data_as_list(path):
    data = []
    with open(path, 'r', encoding='utf-8-sig')as f:
        reader = csv.reader(f)
        for row in reader:
            data.append(row[0])
    return data

def get_df(corpus_list, situation_list, sen_type_list, src_type, context_len, train_type):
    target_text = []
    input_text = []
    prefix = []
    for corpus in corpus_list:
        for situation in situation_list:
            for sen_type in sen_type_list:
                f_path = f'/nfs/nas-7.1/yamashita/LAB/dialogue_data/data/{corpus}/{situation}/{context_len}/rewrited_{sen_type}_{train_type}'
                target_text += get_data_as_list(f_path)
                f_path = f'/nfs/nas-7.1/yamashita/LAB/dialogue_data/data/{corpus}/{situation}/{context_len}/{src_type}_{sen_type}_{train_type}'
                input_text += get_data_as_list(f_path)
                prefix += [f'{corpus} {situation} {sen_type}']*len(get_data_as_list(f_path))
    df = pd.DataFrame([prefix,input_text,target_text], index=['prefix','input_text','target_text']).astype(str).T
    return df

def get_score_df(path, lang):
    path_list = glob.glob(path)
    print(path_list)

    name_list = []
    score_list = []
    score_df = pd.DataFrame()
    for path in path_list:
        name = path.split('/')[2:4]
        name_list.append("_".join(name))

        df = pd.read_csv(path, index_col=0).astype(str)
        preds = df.iloc[:,0].to_list()
        truth = df.iloc[:,1].to_list() 

        score_list.append(get_scores(preds, truth))
    
    # DEEPL
    corpus_list = ['cejc','mpdd']
    situation_list = ['apology','request','thanksgiving']
    sen_type_list = ['query','res']
    context_len = 0
    src_type = 'translated' #'translated'
    train_type = 'test'  
    
    eval_df = get_df(corpus_list, situation_list, sen_type_list, src_type, context_len, train_type)
    eval_df['input_text']=eval_df['input_text'].str.replace('query: ','')
    
    if lang=='ja':
        _truth = eval_df.loc[eval_df["prefix"].str.contains("mpdd")]["target_text"].tolist()
        _input = eval_df.loc[eval_df["prefix"].str.contains("mpdd")]["input_text"].tolist()
    elif lang=='zh':
        _truth = eval_df.loc[eval_df["prefix"].str.contains("cejc")]["target_text"].tolist()
        _input = eval_df.loc[eval_df["prefix"].str.contains("cejc")]["input_text"].tolist()
    
    name_list.append('DeepL')
    score_list.append(get_scores(_input,_truth))
    
    score_df = pd.concat([score_df,pd.DataFrame(score_list, index=name_list, columns=['bleu','selfbleu','ter','bertscore_p','bertscore_r','bertscore_f1'])])
    return score_df

In [None]:
dir_path = 'outputs/context/'

lang = 'ja'
path = os.path.join(dir_path, f'*/*/{lang}_preds_truth.csv')

ja_score_df = get_score_df(path, lang)

lang = 'zh'
path = os.path.join(dir_path, f'*/*/{lang}_preds_truth.csv')
zh_score_df = get_score_df(path, lang)

In [None]:
ja_score_df[['ter','bleu','selfbleu','bertscore_f1']].sort_index(ascending=True)
zh_score_df[['ter','bleu','selfbleu','bertscore_f1']].sort_index(ascending=True)

os.makedirs('for_thesis/generation/scores/',exist_ok=True)
ja_score_df.to_csv('for_thesis/generation/scores/ja_score_table.csv',encoding='utf_8_sig')
zh_score_df.to_csv('for_thesis/generation/scores/zh_score_table.csv',encoding='utf_8_sig')

In [None]:
ja_score_df[['ter','bleu','selfbleu','bertscore_f1']].sort_values('bertscore_f1', ascending=True).plot.bar(rot=70,subplots=True,figsize=(20,6))
zh_score_df[['ter','bleu','selfbleu','bertscore_f1']].sort_values('bertscore_f1', ascending=True).plot.bar(rot=70,subplots=True,figsize=(20,6))

# Translation x:context

In [None]:
# ja_score_df[['ter','bleu','selfbleu','bertscore_f1']].sort_values('bertscore_f1', ascending=True).plot.bar(rot=70,subplots=True,figsize=(20,6))
ja_score_df.filter(regex='^(?!.*prefix).*$', axis=0).filter(regex='^(0.*|DeepL)$', axis=0)[['ter','bleu','selfbleu','bertscore_f1']].sort_values('bleu', ascending=True).plot.bar(rot=70,subplots=True,figsize=(20,6))
display(ja_score_df.filter(regex='^(?!.*prefix).*$', axis=0).filter(regex='^(0.*|DeepL)$', axis=0)[['ter','bleu','selfbleu','bertscore_f1']].sort_values('bleu', ascending=True))
zh_score_df.filter(regex='^(?!.*prefix).*$', axis=0).filter(regex='^(0.*|DeepL)$', axis=0)[['ter','bleu','selfbleu','bertscore_f1']].sort_values('bleu', ascending=True).plot.bar(rot=70,subplots=True,figsize=(20,6))
display(zh_score_df.filter(regex='^(?!.*prefix).*$', axis=0).filter(regex='^(0.*|DeepL)$', axis=0)[['ter','bleu','selfbleu','bertscore_f1']].sort_values('bleu', ascending=True))

# Translation x:prefix

In [None]:
ja_score_df.filter(regex='^(?!.*rel).*$', axis=0).filter(regex='^(0.*|DeepL)$', axis=0)[['ter','bleu','selfbleu','bertscore_f1']].sort_values('bertscore_f1', ascending=True).plot.bar(rot=70,subplots=True,figsize=(20,6))
display(ja_score_df.filter(regex='^(?!.*rel).*$', axis=0).filter(regex='^(0.*|DeepL)$', axis=0)[['ter','bleu','selfbleu','bertscore_f1']].sort_values('bertscore_f1', ascending=True))
zh_score_df.filter(regex='^(?!.*rel).*$', axis=0).filter(regex='^(0.*|DeepL)$', axis=0)[['ter','bleu','selfbleu','bertscore_f1']].sort_values('bertscore_f1', ascending=True).plot.bar(rot=70,subplots=True,figsize=(20,6))
display(zh_score_df.filter(regex='^(?!.*rel).*$', axis=0).filter(regex='^(0.*|DeepL)$', axis=0)[['ter','bleu','selfbleu','bertscore_f1']].sort_values('bertscore_f1', ascending=True))

# Translation x:prefix relation

In [None]:
# ja_score_df.filter(regex='^(0.*|DeepL)$', axis=0)[['ter','bleu','selfbleu','bertscore_f1']].sort_values('bertscore_f1', ascending=True).plot.bar(rot=70,subplots=True,figsize=(20,6))
display(ja_score_df.filter(regex='^(0.*|DeepL)$', axis=0)[['ter','bleu','selfbleu','bertscore_f1']].sort_values('bertscore_f1', ascending=True))
# zh_score_df.filter(regex='^(0.*|DeepL)$', axis=0)[['ter','bleu','selfbleu','bertscore_f1']].sort_values('bertscore_f1', ascending=True).plot.bar(rot=70,subplots=True,figsize=(20,6))
display(zh_score_df.filter(regex='^(0.*|DeepL)$', axis=0)[['ter','bleu','selfbleu','bertscore_f1']].sort_values('bertscore_f1', ascending=True))

# StyleTrasnfer x:context

In [None]:
# ja_score_df[['ter','bleu','selfbleu','bertscore_f1']].sort_values('bertscore_f1', ascending=True).plot.bar(rot=70,subplots=True,figsize=(20,6))
ja_score_df.filter(regex='^(?!.*prefix).*$', axis=0).filter(regex='^(1.*|DeepL)$', axis=0)[['ter','bleu','selfbleu','bertscore_f1']].sort_values('bertscore_f1', ascending=True).plot.bar(rot=70,subplots=True,figsize=(20,6))
display(ja_score_df.filter(regex='^(?!.*prefix).*$', axis=0).filter(regex='^(1.*|DeepL)$', axis=0)[['ter','bleu','selfbleu','bertscore_f1']].sort_values('bertscore_f1', ascending=True))
zh_score_df.filter(regex='^(?!.*prefix).*$', axis=0).filter(regex='^(1.*|DeepL)$', axis=0)[['ter','bleu','selfbleu','bertscore_f1']].sort_values('bertscore_f1', ascending=True).plot.bar(rot=70,subplots=True,figsize=(20,6))
display(zh_score_df.filter(regex='^(?!.*prefix).*$', axis=0).filter(regex='^(1.*|DeepL)$', axis=0)[['ter','bleu','selfbleu','bertscore_f1']].sort_values('bertscore_f1', ascending=True))

# StyleTransfer x:prefix

In [None]:
ja_score_df.filter(regex='^(?!.*rel).*$', axis=0).filter(regex='^(1.*|DeepL)$', axis=0)[['ter','bleu','selfbleu','bertscore_f1']].sort_values('bertscore_f1', ascending=True).plot.bar(rot=70,subplots=True,figsize=(20,6))
display(ja_score_df.filter(regex='^(?!.*rel).*$', axis=0).filter(regex='^(1.*|DeepL)$', axis=0)[['ter','bleu','selfbleu','bertscore_f1']].sort_values('bertscore_f1', ascending=True))
zh_score_df.filter(regex='^(?!.*rel).*$', axis=0).filter(regex='^(1.*|DeepL)$', axis=0)[['ter','bleu','selfbleu','bertscore_f1']].sort_values('bertscore_f1', ascending=True).plot.bar(rot=70,subplots=True,figsize=(20,6))
display(zh_score_df.filter(regex='^(?!.*rel).*$', axis=0).filter(regex='^(1.*|DeepL)$', axis=0)[['ter','bleu','selfbleu','bertscore_f1']].sort_values('bertscore_f1', ascending=True))

# StyleTransfer x:prefix relation

In [None]:
# ja_score_df.filter(regex='^(1.*|DeepL)$', axis=0)[['ter','bleu','selfbleu','bertscore_f1']].sort_values('bleu', ascending=True).plot.bar(rot=70,subplots=True,figsize=(20,6))
display(ja_score_df.filter(regex='^(1.*|DeepL)$', axis=0)[['ter','bleu','selfbleu','bertscore_f1']].sort_values('bertscore_f1', ascending=True))
# zh_score_df.filter(regex='^(1.*|DeepL)$', axis=0)[['ter','bleu','selfbleu','bertscore_f1']].sort_values('bleu', ascending=True).plot.bar(rot=70,subplots=True,figsize=(20,6))
display(zh_score_df.filter(regex='^(1.*|DeepL)$', axis=0)[['ter','bleu','selfbleu','bertscore_f1']].sort_values('bertscore_f1', ascending=True))

# StyleTransfer input:000_output

In [None]:
# ja_score_df.filter(regex='^([0|3].*|DeepL)$', axis=0)[['ter','bleu','selfbleu','bertscore_f1']].sort_values('bertscore_f1', ascending=True).plot.bar(rot=70,subplots=True,figsize=(20,6))
display(ja_score_df.filter(regex='^([0|3].*|DeepL)$', axis=0)[['ter','bleu','selfbleu','bertscore_f1']].sort_values('bertscore_f1', ascending=True))
# zh_score_df.filter(regex='^([0|3].*|DeepL)$', axis=0)[['ter','bleu','selfbleu','bertscore_f1']].sort_values('bertscore_f1', ascending=True).plot.bar(rot=70,subplots=True,figsize=(20,6))
display(zh_score_df.filter(regex='^([0|3].*|DeepL)$', axis=0)[['ter','bleu','selfbleu','bertscore_f1']].sort_values('bertscore_f1', ascending=True))

ja_0=ja_score_df.filter(regex='^([0|3].*|DeepL)$', axis=0)[['ter','bleu','selfbleu','bertscore_f1']].sort_values('bertscore_f1', ascending=True).filter(regex='^([0].*)$', axis=0).mean()
ja_3=ja_score_df.filter(regex='^([0|3].*|DeepL)$', axis=0)[['ter','bleu','selfbleu','bertscore_f1']].sort_values('bertscore_f1', ascending=True).filter(regex='^([3].*)$', axis=0).mean()
ja_mean = pd.concat([ja_0,ja_3],axis=1)
ja_mean=ja_mean.set_axis(['mt5 TL','mt5 TL+ST'],axis=1)
display(ja_mean.T)
zh_0=zh_score_df.filter(regex='^([0|3].*|DeepL)$', axis=0)[['ter','bleu','selfbleu','bertscore_f1']].sort_values('bertscore_f1', ascending=True).filter(regex='^([0].*)$', axis=0).mean()
zh_3=zh_score_df.filter(regex='^([0|3].*|DeepL)$', axis=0)[['ter','bleu','selfbleu','bertscore_f1']].sort_values('bertscore_f1', ascending=True).filter(regex='^([3].*)$', axis=0).mean()
zh_mean = pd.concat([zh_0,zh_3],axis=1)
zh_mean=zh_mean.set_axis(['mt5 TL','mt5 TL+ST'],axis=1)
display(zh_mean.T)

In [None]:
display(ja_score_df[['ter','bleu','selfbleu','bertscore_f1']].corr())
display(ja_score_df.filter(regex='^(?!.*prefix).*$', axis=0).filter(regex='^(0.*|DeepL)$', axis=0)[['ter','bleu','selfbleu','bertscore_f1']].corr())
display(ja_score_df.filter(regex='^(?!.*prefix).*$', axis=0).filter(regex='^(1.*|DeepL)$', axis=0)[['ter','bleu','selfbleu','bertscore_f1']].corr())
display(zh_score_df[['ter','bleu','selfbleu','bertscore_f1']].corr())
display(zh_score_df.filter(regex='^(?!.*prefix).*$', axis=0).filter(regex='^(0.*|DeepL)$', axis=0)[['ter','bleu','selfbleu','bertscore_f1']].corr())
display(zh_score_df.filter(regex='^(?!.*prefix).*$', axis=0).filter(regex='^(1.*|DeepL)$', axis=0)[['ter','bleu','selfbleu','bertscore_f1']].corr())

ja_score_df[['ter','bleu','selfbleu','bertscore_f1']].corr().to_csv('for_thesis/generation/scores/ja_score_correlation.csv', encoding='utf-8-sig')
zh_score_df[['ter','bleu','selfbleu','bertscore_f1']].corr().to_csv('for_thesis/generation/scores/zh_score_correlation.csv', encoding='utf-8-sig')

# アウトプット

In [None]:
def get_output(path,lang):
    path_list = glob.glob(path)
    print(path_list)

    name_list = []
    df = pd.DataFrame()
    for i,path in enumerate(path_list):
        name = path.split('/')[2:4]
#         if name[1:3] == situ_and_sentype:
        name_list.append("_".join(name))
        tmpdf = pd.read_csv(path, index_col=0).astype(str)
        if i == 0:
            df = tmpdf.drop_duplicates(subset=['truth'])
        else:
            df = pd.merge(df, tmpdf.drop_duplicates(subset=['truth']), on='truth',how='outer')

    corpus_list = ['cejc','mpdd']
    situation_list = ['apology','request','thanksgiving']
    sen_type_list = ['query','res']
    context_len = 0
    src_type = 'translated' #'translated'
    train_type = 'test'  

    eval_df = get_df(corpus_list, situation_list, sen_type_list, src_type, context_len, train_type)
    
    if lang=='ja':
        tmpdf = eval_df.loc[eval_df["prefix"].str.contains("mpdd")]
    elif lang=='zh':
        tmpdf = eval_df.loc[eval_df["prefix"].str.contains("cejc")]
    
    name_list.append('DeepL')        
    tmpdf.rename(columns={'target_text': 'truth', 'input_text':'DeepL'},inplace=True)
    df = pd.merge(df, tmpdf.drop_duplicates(), on='truth',how='outer')
    
    src_type = 'original' #'translated'
    train_type = 'test'  

    eval_df = get_df(corpus_list, situation_list, sen_type_list, src_type, context_len, train_type)
    
    if lang=='ja':
        tmpdf = eval_df.loc[eval_df["prefix"].str.contains("mpdd")]
    elif lang=='zh':
        tmpdf = eval_df.loc[eval_df["prefix"].str.contains("cejc")]
    
    name_list.append('Original')        
    tmpdf.rename(columns={'target_text': 'truth', 'input_text':'Original'},inplace=True)
    df = pd.merge(df, tmpdf.drop_duplicates(), on=['truth','prefix'],how='outer')
    
    return df
    

In [None]:
import re


dir_path = 'outputs/context/'
context_len = 1
save_fname = 'all_both'
# save_fname = 'all_res'
# save_fname = 'apology_all'
# save_fname = 'request_all'
# save_fname = 'thanksgiving_all'
lang = 'ja'
path = os.path.join(dir_path, f'*/{context_len}/{lang}_preds_truth.csv')
ja_df = get_output(path,lang)
# display(ja_df)
# print(len(df.drop_duplicates()))


In [None]:
lang = 'zh'
path = os.path.join(dir_path, f'*/{context_len}/{lang}_preds_truth.csv')
zh_df = get_output(path,lang)
# display(zh_df)
# print(len(df.drop_duplicates()))

In [None]:
pd.set_option('display.max_rows', 400)


ja_df[['prefix','Original','000_translate_all_both','000_translate_all_both_prefix','000_translate_all_both_prefix_rel','DeepL','truth']].to_excel(dir_path+f'ja_{save_fname}_{context_len}.xlsx')
zh_df[['prefix','Original','000_translate_all_both','000_translate_all_both_prefix','000_translate_all_both_prefix_rel','DeepL','truth']].to_excel(dir_path+f'zh_{save_fname}_{context_len}.xlsx')
# ja_df[['prefix','Original','000_translate_all_both','000_translate_all_both_prefix','000_translate_all_both_prefix_rel','300_culturizefromT5train_all_both','300_culturizefromT5train_all_both_prefix','300_culturizefromT5train_all_both_prefix','DeepL','100_culturize_all_both','100_culturize_all_both_prefix','100_culturize_all_both_prefix_rel','truth']].to_excel(dir_path+f'ja_{save_fname}_{context_len}.xlsx')
# zh_df[['prefix','Original','000_translate_all_both','000_translate_all_both_prefix','000_translate_all_both_prefix_rel','300_culturizefromT5train_all_both','300_culturizefromT5train_all_both_prefix','300_culturizefromT5train_all_both_prefix','DeepL','100_culturize_all_both','100_culturize_all_both_prefix','100_culturize_all_both_prefix_rel','truth']].to_excel(dir_path+f'zh_{save_fname}_{context_len}.xlsx')

In [None]:
ja_df = ja_df[['prefix','Original','000_translate_all_both','000_translate_all_both_prefix','000_translate_all_both_prefix_rel','DeepL','truth']]
zh_df = zh_df[['prefix','Original','000_translate_all_both','000_translate_all_both_prefix','000_translate_all_both_prefix_rel','DeepL','truth']]

In [None]:
column_list=['Original','000_translate_all_both','000_translate_all_both_prefix','000_translate_all_both_prefix_rel']
name_list=[]
score_list=[]
for column in column_list:
    sen_list = ja_df[column].to_list()
    truth_list = ja_df['truth'].to_list()
    sen_scores=[]
    for i,sen in enumerate(sen_list):
        sen = str(sen)
        sen = sen.replace('query: ','')
        _,_,f1 = calc_bert_score([sen], [truth_list[i]])
        sen_scores.append(f1)
#     _,_,f1 = calc_bert_score(sen_list, truth_list)
    score_list.append(sen_scores)
    name_list.append(column+' score')

In [None]:
ja_score_df = pd.DataFrame(score_list,index=name_list).T
# ja_score_df

In [None]:
column_list=['Original','000_translate_all_both','000_translate_all_both_prefix','000_translate_all_both_prefix_rel']
name_list=[]
score_list=[]
for column in column_list:
    sen_list = zh_df[column].to_list()
    truth_list = zh_df['truth'].to_list()
    sen_scores=[]
    for i,sen in enumerate(sen_list):
        sen = sen.replace('query: ','')
        _,_,f1 = calc_bert_score([sen], [truth_list[i]])
        sen_scores.append(f1)
#     _,_,f1 = calc_bert_score(sen_list, truth_list)
    score_list.append(sen_scores)
    name_list.append(column+' score')

In [None]:
zh_score_df = pd.DataFrame(score_list,index=name_list).T
# zh_score_df

In [None]:
ja_score_df.to_excel(dir_path+f'ja_score_{save_fname}_{context_len}.xlsx')
zh_score_df.to_excel(dir_path+f'zh_score_{save_fname}_{context_len}.xlsx')

## for Thesis

In [6]:
hyp = ['お願い。なんでも言ってくれていいから。']
ref = ['同級生のためにもお願いします。 何かリクエストがあれば、ぜひ聞いてみてくださいね。']

_, _, F1 = calc_bert_score(hyp, ref)

calculating scores...
computing bert embedding.


HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))


computing greedy matching.


HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))


done in 0.03 seconds, 33.20 sentences/sec


In [7]:
F1

0.7529513239860535

In [8]:
hyp = ['お願い。なんでも言ってくれていいから。']
ref = ['お願いしたいことがあるなら教えてください。']
_, _, F1 = calc_bert_score(hyp, ref)

calculating scores...
computing bert embedding.


HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))


computing greedy matching.


HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))


done in 0.03 seconds, 33.42 sentences/sec


In [9]:
F1

0.81302410364151

In [35]:
hyp = ['お願い。なんでも言ってくれていいから。']
ref = ['私にできることなら、なんでもするから言ってよ。お願い。']
_, _, F1 = calc_bert_score(hyp, ref)

calculating scores...
computing bert embedding.


HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))


computing greedy matching.


HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))


done in 0.03 seconds, 35.51 sentences/sec


In [36]:
F1

0.8270285129547119

In [10]:
hyp = ['啊，其實，我也有話想說']
ref = ['啊。让我看看。抱歉。我也有。']

_, _, F1 = calc_bert_score(hyp, ref)

calculating scores...
computing bert embedding.


HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))


computing greedy matching.


HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))


done in 0.03 seconds, 33.79 sentences/sec


In [11]:
F1

0.7478231191635132

In [12]:
hyp = ['啊，其實，我也有話想說']
ref = ['啊,不好意思']

_, _, F1 = calc_bert_score(hyp, ref)

calculating scores...
computing bert embedding.


HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))


computing greedy matching.


HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))


done in 0.03 seconds, 33.75 sentences/sec


In [13]:
F1

0.7479795813560486

In [31]:
hyp = ['啊，其實，我也有話想說']
ref = ['啊！我有一個事情想要講']

_, _, F1 = calc_bert_score(hyp, ref)

calculating scores...
computing bert embedding.


HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))


computing greedy matching.


HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))


done in 0.03 seconds, 35.20 sentences/sec


In [32]:
F1

0.792472243309021