# TER SCORING

In [None]:
import pandas as pd
import sacrebleu
from statistics import mean

def compute_ter_score(src_text_list, tgt_op_list, sys_op_list, filename):
    target = [['']]
    system = ['']
    ter_scores_list = []
    csv_frame = pd.DataFrame(columns=["INPUT", "REFERENCE", "MACHINE_TRANSLATION", "NUM_OF_EDITS", "NUM_OF_REF", "TER_SCORE"])

    for index, target_op in enumerate(tgt_op_list):
        system_op = sys_op_list[index]
        source_text = src_text_list[index]

        reference_words = target_op.split()
        candidate_words = system_op.split()

        correct_candidate_words = len(set(candidate_words) & set(reference_words))
        total_reference_words = len(reference_words)
        num_of_edits = total_reference_words - correct_candidate_words

        system[0] = system_op
        target[0] = [target_op]
        
        ter_score = num_of_edits/total_reference_words

        ter_scores_list.append(ter_score)
        
        csv_frame = csv_frame.append({"INPUT": source_text, "REFERENCE": target_op, "MACHINE_TRANSLATION": system_op, "NUM_OF_EDITS": num_of_edits, "NUM_OF_REF": total_reference_words, "TER_SCORE": ter_score}, ignore_index=True)
        
    average_ter_score = mean(ter_scores_list)
    csv_frame = csv_frame.append({"AVERAGE_TER_SCORE": average_ter_score}, ignore_index=True)
    csv_frame.to_csv(filename, index=False)

# Training Data Tagalog to Ilokano 
dict_tl_il_result = pd.read_json('../src/json data/Tagalog to Ilokano/Hybrid Translator/dict_tl-il_op_ex.json')
src_text_list = dict_tl_il_result['Source Text'].tolist()
tgt_op_list = dict_tl_il_result['Target Output'].tolist()
sys_op_list = dict_tl_il_result['System Output'].tolist()
compute_ter_score(src_text_list, tgt_op_list, sys_op_list, '../src/scores/Tagalog to Ilokano/train/train_ter_tl-il.csv')

# Training Data Ilokano to Tagalog
dict_il_tl_result = pd.read_json('../src/json data/Ilokano to Tagalog/Hybrid Translator/dict_il-tl_op_ex.json')
src_text_list = dict_il_tl_result['Source Text'].tolist()
tgt_op_list = dict_il_tl_result['Target Output'].tolist()
sys_op_list = dict_il_tl_result['System Output'].tolist()
compute_ter_score(src_text_list, tgt_op_list, sys_op_list, '../src/scores/Ilokano to Tagalog/train/train_ter_il-tl.csv')

# Testing Data Tagalog to Ilokano
dict_tl_il_result = pd.read_json('../src/json data/Tagalog to Ilokano/Hybrid Translator/dict_tl-il_ter.json')
src_text_list = dict_tl_il_result['Source Text'].tolist()
tgt_op_list = dict_tl_il_result['Target Output'].tolist()
sys_op_list = dict_tl_il_result['System Output'].tolist()
compute_ter_score(src_text_list, tgt_op_list, sys_op_list, '../src/scores/Tagalog to Ilokano/test/test_ter_tl-il.csv')

# Testing Data Ilokano to Tagalog
dict_il_tl_result = pd.read_json('../src/json data/Ilokano to Tagalog/Hybrid Translator/dict_il-tl_test.json')
src_text_list = dict_il_tl_result['Source Text'].tolist()
tgt_op_list = dict_il_tl_result['Target Output'].tolist()
sys_op_list = dict_il_tl_result['System Output'].tolist()
compute_ter_score(src_text_list, tgt_op_list, sys_op_list, '../src/scores/Ilokano to Tagalog/test/test_ter_il-tl.csv')