# Enhancing Machine Translation of News: Japanese to English Translation

## If dataframe already built

Postprocessing done for individual sentences, see Helsinki_single.pdf

In [1]:
import pandas as pd
import evaluate

pd.set_option('display.max_colwidth', 1000)

cache_dir = 'D:\\.cache'                                # Because I lacked space in my main disk
test = pd.read_csv("./data.csv", header=0)

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
def calculate_bleu_score(predictions, references, max_order=4):
    """
    Compute BLEU score for predictions and references

    Parameters:
        predictions: List of predicted translations
        references: List of reference translations
        max_order: Maximum order of n-grams to consider, default is 4

    Returns:
        BLEU score for the predictions and references
    """
    bleu = evaluate.load("bleu", cache_dir=cache_dir)
    return bleu.compute(predictions=predictions, references=references, max_order=max_order)

def calculate_rouge_score(predictions, references):
    """
    Compute ROUGE score for predictions and references

    Parameters:
        predictions: List of predicted translations
        references: List of reference translations

    Returns:
        ROUGE score for the predictions and references
    """
    rouge = evaluate.load("rouge", cache_dir=cache_dir)
    return rouge.compute(predictions=predictions, references=references)

def calculate_chrf_score(predictions, references):
    """
    Compute chrF score for predictions and references

    Parameters:
        predictions: List of predicted translations
        references: List of reference translations

    Returns:
        chrF score for the predictions and references
    """
    chrf = evaluate.load("chrf", cache_dir=cache_dir)
    return chrf.compute(predictions=predictions, references=references)

def calculate_bleurt_score(predictions, references):
    """
    Compute Bleurt score for predictions and references
    Could be used for evaluating translations, but we didn't find the Bleurt scores of WMT23,
    so couldn't compare

    Parameters:
        predictions: List of predicted translations
        references: List of reference translations

    Returns:
        Bleurt score for the predictions and references
    """
    bleurt = evaluate.load("bleurt", cache_dir=cache_dir)
    return bleurt.compute(predictions=predictions, references=references)

def calculate_comet_score(sources, predictions, references):
    """
    Compute COMET score for predictions and references

    Parameters:
        sources: List of source translations
        predictions: List of predicted translations
        references: List of reference translations

    Returns:
        COMET score for the predictions and references
    """
    comet = evaluate.load("comet", cache_dir=cache_dir)
    return comet.compute(sources=sources, predictions=predictions, references=references)

In [3]:
def calculate(pred, ref, dataframe):
    bleu_test = calculate_bleu_score(dataframe[pred].tolist(), dataframe[ref].tolist())
    rouge_test = calculate_rouge_score(dataframe[pred].tolist(), dataframe[ref].tolist())
    chrf_test = calculate_chrf_score(dataframe[pred].tolist(), dataframe[ref].tolist())
    bleurt_test = calculate_bleurt_score(dataframe[pred].tolist(), dataframe[ref].tolist())
    comet_test = calculate_comet_score(dataframe['jp'].tolist(), dataframe[pred].tolist(),dataframe[ref].tolist())
    print(f"BLEU score: {bleu_test}")
    print(f"ROUGE score: {rouge_test}")
    print(f"CHRF score: {chrf_test}")
    print(f"Bleurt score: {sum(bleurt_test['scores']) / len(bleurt_test['scores'])}")
    print(f"COMET score: {comet_test['mean_score']}")

In [4]:
calculate('prediction English Deletion + Japanese Deletion', 'en', test)

Downloading builder script: 100%|██████████| 6.27k/6.27k [00:00<?, ?B/s]


ImportError: To be able to use evaluate-metric/rouge, you need to install the following dependencies['rouge_score'] using 'pip install rouge_score' for instance'

In [None]:
calculate('prediction English Deletion + Japanese Deletion', 'google_translation', test)

In [None]:
calculate('prediction MeCab', 'en', test)

In [None]:
calculate('prediction KyTea', 'en', test)

In [None]:
calculate('prediction spaCy', 'en', test)