# Enhancing Machine Translation of News: Japanese to English Translation

## If dataframe already built

Postprocessing done for individual sentences, see Helsinki_single.pdf

In [6]:
import pandas as pd
import evaluate
import torch
import logging
import tensorflow as tf



#pd.set_option('display.max_colwidth', 1000)

cache_dir = './.cache'                                # Because I lacked space in my main disk
test = pd.read_csv("./data.csv", header=0)


tf.get_logger().setLevel(logging.ERROR)
logging.getLogger('tensorflow').setLevel(logging.ERROR)



In [7]:
def calculate_bleu_score(predictions, references, max_order=4):
    """
    Compute BLEU score for predictions and references

    Parameters:
        predictions: List of predicted translations
        references: List of reference translations
        max_order: Maximum order of n-grams to consider, default is 4

    Returns:
        BLEU score for the predictions and references
    """
    bleu = evaluate.load("bleu", cache_dir=cache_dir)
    print("BLEU Score: ", bleu.compute(predictions=predictions, references=references, max_order=max_order))
    del bleu
    torch.cuda.empty_cache()

def calculate_rouge_score(predictions, references):
    """
    Compute ROUGE score for predictions and references

    Parameters:
        predictions: List of predicted translations
        references: List of reference translations

    Returns:
        ROUGE score for the predictions and references
    """
    rouge = evaluate.load("rouge", cache_dir=cache_dir)
    print("ROUGE Score: ", rouge.compute(predictions=predictions, references=references))
    del rouge
    torch.cuda.empty_cache()

def calculate_chrf_score(predictions, references):
    """
    Compute chrF score for predictions and references

    Parameters:
        predictions: List of predicted translations
        references: List of reference translations

    Returns:
        chrF score for the predictions and references
    """
    chrf = evaluate.load("chrf", cache_dir=cache_dir)
    print("chrF Score: ", chrf.compute(predictions=predictions, references=references))
    del chrf
    torch.cuda.empty_cache()

# def calculate_bleurt_score(predictions, references):
#     """
#     Compute Bleurt score for predictions and references
#     Could be used for evaluating translations, but we didn't find the Bleurt scores of WMT23,
#     so couldn't compare

#     Parameters:
#         predictions: List of predicted translations
#         references: List of reference translations

#     Returns:
#         Bleurt score for the predictions and references
#     """
#     bleurt = evaluate.load("bleurt", cache_dir=cache_dir)
#     tmp = bleurt.compute(predictions=predictions, references=references)
#     print("Bleurt Score: ", sum(tmp['scores'])/len(tmp['scores']))
#     del bleurt
#     torch.cuda.empty_cache()

def calculate_comet_score(sources, predictions, references):
    """
    Compute COMET score for predictions and references

    Parameters:
        sources: List of source translations
        predictions: List of predicted translations
        references: List of reference translations

    Returns:
        COMET score for the predictions and references
    """
    comet = evaluate.load("comet", cache_dir=cache_dir)
    print("COMET Score: ", comet.compute(sources=sources, predictions=predictions, references=references)['mean_score'])
    del comet
    torch.cuda.empty_cache()

In [8]:
def calculate(pred, ref, dataframe):
    predlist = dataframe[pred].tolist()
    reflist = dataframe[ref].tolist()
    calculate_bleu_score(predlist, reflist)
    calculate_rouge_score(predlist, reflist)
    calculate_chrf_score(predlist, reflist)
    calculate_comet_score(dataframe['jp'].tolist(), predlist, reflist)

In [9]:
calculate('prediction English Deletion + Japanese Deletion', 'en', test[['en', 'prediction English Deletion + Japanese Deletion', 'jp']])

BLEU Score:  {'bleu': 0.14586079058462165, 'precisions': [0.48201215240298184, 0.20499866662715932, 0.1080200815499506, 0.060545017007709], 'brevity_penalty': 0.9148303846402471, 'length_ratio': 0.9182596457817399, 'translation_length': 2191007, 'reference_length': 2386043}
ROUGE Score:  {'rouge1': 0.4734976076698739, 'rouge2': 0.22614609751264425, 'rougeL': 0.40810550727201805, 'rougeLsum': 0.4081434172624776}
chrF Score:  {'score': 41.2073568777114, 'char_order': 6, 'word_order': 0, 'beta': 2}


Fetching 5 files:   0%|          | 0/5 [00:00<?, ?it/s]

INFO:pytorch_lightning.utilities.migration.utils:Lightning automatically upgraded your loaded checkpoint from v1.8.3.post1 to v2.2.2. To apply the upgrade to your files permanently, run `python -m pytorch_lightning.utilities.upgrade_checkpoint ../../.cache/huggingface/hub/models--Unbabel--wmt22-comet-da/snapshots/371e9839ca4e213dde891b066cf3080f75ec7e72/checkpoints/model.ckpt`
c:\Users\Victo\AppData\Local\Programs\Python\Python312\Lib\site-packages\pytorch_lightning\core\saving.py:188: Found keys that are not in the model state dict but in the checkpoint: ['encoder.model.embeddings.position_ids']


COMET Score:  0.7523743284170223


In [10]:
calculate('prediction English Deletion + Japanese Deletion', 'google_translation', test[['google_translation', 'prediction English Deletion + Japanese Deletion', 'jp']])

BLEU Score:  {'bleu': 0.23152776773729306, 'precisions': [0.5862030564028321, 0.31369866156129833, 0.19142070445916734, 0.12136650491711941], 'brevity_penalty': 0.9056094789904402, 'length_ratio': 0.9097963283544532, 'translation_length': 2191007, 'reference_length': 2408239}
ROUGE Score:  {'rouge1': 0.5770739241937017, 'rouge2': 0.3331429586871332, 'rougeL': 0.5140948535860443, 'rougeLsum': 0.5140795066992838}
chrF Score:  {'score': 49.45209930396326, 'char_order': 6, 'word_order': 0, 'beta': 2}


Fetching 5 files:   0%|          | 0/5 [00:00<?, ?it/s]

INFO:pytorch_lightning.utilities.migration.utils:Lightning automatically upgraded your loaded checkpoint from v1.8.3.post1 to v2.2.2. To apply the upgrade to your files permanently, run `python -m pytorch_lightning.utilities.upgrade_checkpoint ../../.cache/huggingface/hub/models--Unbabel--wmt22-comet-da/snapshots/371e9839ca4e213dde891b066cf3080f75ec7e72/checkpoints/model.ckpt`
c:\Users\Victo\AppData\Local\Programs\Python\Python312\Lib\site-packages\pytorch_lightning\core\saving.py:188: Found keys that are not in the model state dict but in the checkpoint: ['encoder.model.embeddings.position_ids']


COMET Score:  0.7910001221535337


In [11]:
calculate('prediction MeCab', 'en', test[['en', 'prediction MeCab', 'jp']])

BLEU Score:  {'bleu': 0.14204834677325318, 'precisions': [0.4774083902131296, 0.20079139975771124, 0.10518026077708952, 0.05879114643053754], 'brevity_penalty': 0.9103656276481146, 'length_ratio': 0.9141528463653003, 'translation_length': 2181208, 'reference_length': 2386043}
ROUGE Score:  {'rouge1': 0.46796373840770955, 'rouge2': 0.22241075371415892, 'rougeL': 0.4032370404210559, 'rougeLsum': 0.40325738411501777}
chrF Score:  {'score': 40.29398965594634, 'char_order': 6, 'word_order': 0, 'beta': 2}


Fetching 5 files:   0%|          | 0/5 [00:00<?, ?it/s]

INFO:pytorch_lightning.utilities.migration.utils:Lightning automatically upgraded your loaded checkpoint from v1.8.3.post1 to v2.2.2. To apply the upgrade to your files permanently, run `python -m pytorch_lightning.utilities.upgrade_checkpoint ../../.cache/huggingface/hub/models--Unbabel--wmt22-comet-da/snapshots/371e9839ca4e213dde891b066cf3080f75ec7e72/checkpoints/model.ckpt`
c:\Users\Victo\AppData\Local\Programs\Python\Python312\Lib\site-packages\pytorch_lightning\core\saving.py:188: Found keys that are not in the model state dict but in the checkpoint: ['encoder.model.embeddings.position_ids']


COMET Score:  0.7460105917712674


In [12]:
calculate('prediction KyTea', 'en', test[['en', 'prediction KyTea', 'jp']])

BLEU Score:  {'bleu': 0.04019690172550246, 'precisions': [0.26394726997074075, 0.059946763343188575, 0.020400841686560138, 0.008087966068931012], 'brevity_penalty': 1.0, 'length_ratio': 1.045207902791358, 'translation_length': 2493911, 'reference_length': 2386043}
ROUGE Score:  {'rouge1': 0.24614102782146252, 'rouge2': 0.06751660022976004, 'rougeL': 0.20688100717050617, 'rougeLsum': 0.20688953657200027}
chrF Score:  {'score': 22.671869670334274, 'char_order': 6, 'word_order': 0, 'beta': 2}


Fetching 5 files:   0%|          | 0/5 [00:00<?, ?it/s]

INFO:pytorch_lightning.utilities.migration.utils:Lightning automatically upgraded your loaded checkpoint from v1.8.3.post1 to v2.2.2. To apply the upgrade to your files permanently, run `python -m pytorch_lightning.utilities.upgrade_checkpoint ../../.cache/huggingface/hub/models--Unbabel--wmt22-comet-da/snapshots/371e9839ca4e213dde891b066cf3080f75ec7e72/checkpoints/model.ckpt`
c:\Users\Victo\AppData\Local\Programs\Python\Python312\Lib\site-packages\pytorch_lightning\core\saving.py:188: Found keys that are not in the model state dict but in the checkpoint: ['encoder.model.embeddings.position_ids']


COMET Score:  0.5191643061788593


In [13]:
calculate('prediction spaCy', 'en', test[['en', 'prediction spaCy', 'jp']])

BLEU Score:  {'bleu': 0.1421073405503944, 'precisions': [0.47759661442845114, 0.20084755807586754, 0.1051730217095653, 0.058708298705924256], 'brevity_penalty': 0.9109270557074493, 'length_ratio': 0.9146683441999998, 'translation_length': 2182438, 'reference_length': 2386043}
ROUGE Score:  {'rouge1': 0.4679353191547481, 'rouge2': 0.2223107373180696, 'rougeL': 0.4030778823275719, 'rougeLsum': 0.403097589105214}
chrF Score:  {'score': 40.31739191434267, 'char_order': 6, 'word_order': 0, 'beta': 2}


Fetching 5 files:   0%|          | 0/5 [00:00<?, ?it/s]

INFO:pytorch_lightning.utilities.migration.utils:Lightning automatically upgraded your loaded checkpoint from v1.8.3.post1 to v2.2.2. To apply the upgrade to your files permanently, run `python -m pytorch_lightning.utilities.upgrade_checkpoint ../../.cache/huggingface/hub/models--Unbabel--wmt22-comet-da/snapshots/371e9839ca4e213dde891b066cf3080f75ec7e72/checkpoints/model.ckpt`
c:\Users\Victo\AppData\Local\Programs\Python\Python312\Lib\site-packages\pytorch_lightning\core\saving.py:188: Found keys that are not in the model state dict but in the checkpoint: ['encoder.model.embeddings.position_ids']


COMET Score:  0.7459628520108207
