# ROUGE SCORE (Recall, Precision, F1 Score)

In [None]:
import pandas as pd

def compute_rouge_score(src_text_list, tgt_op_list, sys_op_list, filename):
    correct_candidate_words = 0
    total_reference_words = 0
    correct_reference_words = 0
    total_candidate_words = 0
    recall_sum = 0
    precision_sum = 0
    f1_sum = 0
    csv_frame = pd.DataFrame(columns=["INPUT", "REFERENCE", "MACHINE_TRANSLATION", "CANDIDATE_IN_REF", "TOTAL_IN_REF", "RECALL", "REF_IN_CANDIDATE", "TOTAL_IN_CANDIDATE", "PRECISION", "F1_SCORE"])

    for index, target_op in enumerate(tgt_op_list):
        source_text = src_text_list[index]
        system_op = sys_op_list[index]

        reference_words = target_op.split()
        candidate_words = system_op.split()

        # Increment the number of correctly identified candidate words in the reference
        correct_candidate_words = len(set(candidate_words) & set(reference_words))
        
        # Increment the total number of words in the reference
        total_reference_words = len(reference_words)
        
        # Calculate the number of correctly identified reference words in the candidate
        correct_reference_words = len(set(candidate_words) & set(reference_words))
        
        # Calculate the total number of words in the candidate
        total_candidate_words = len(candidate_words)

        # Calculate the recall score
        recall = correct_candidate_words / total_reference_words    
        
        # Calculate the precision score
        precision = correct_reference_words / total_candidate_words
        
        #Calculate F1 Score
        if precision == 0 or recall == 0:
            f1_score = 0
        else:
            f1_score = 2 * (precision * recall) / (precision + recall)

        recall_sum += recall
        precision_sum += precision
        f1_sum += f1_score

        csv_frame = csv_frame.append({"INPUT": source_text, "REFERENCE": target_op, "MACHINE_TRANSLATION": system_op, "CANDIDATE_IN_REF": correct_candidate_words, "TOTAL_IN_REF": total_reference_words, "RECALL": recall, "REF_IN_CANDIDATE": correct_reference_words, "TOTAL_IN_CANDIDATE": total_candidate_words, "PRECISION": precision, "F1_SCORE": f1_score}, ignore_index=True)
        
    # Calculate the average recall, precision, and F1 scores
    average_recall = recall_sum / len(src_text_list)
    average_precision = precision_sum / len(src_text_list)
    average_f1 = f1_sum / len(src_text_list)
    csv_frame = csv_frame.append({"AVERAGE_RECALL": average_recall, "AVERAGE_PRECISION": average_precision, "AVERAGE_F1": average_f1}, ignore_index=True)
    csv_frame.to_csv(filename, index=False)

# Training Data Tagalog to Ilokano 
dict_tl_il_result = pd.read_json('../src/json data/Tagalog to Ilokano/Hybrid Translator/dict_tl-il_op_ex.json')
src_text_list = dict_tl_il_result['Source Text'].tolist()
tgt_op_list = dict_tl_il_result['Target Output'].tolist()
sys_op_list = dict_tl_il_result['System Output'].tolist()
compute_rouge_score(src_text_list, tgt_op_list, sys_op_list, '../src/scores/Tagalog to Ilokano/train/train_rouge_tl-il.csv')

# Training Data Ilokano to Tagalog
dict_il_tl_result = pd.read_json('../src/json data/Ilokano to Tagalog/Hybrid Translator/dict_il-tl_op_ex.json')
src_text_list = dict_il_tl_result['Source Text'].tolist()
tgt_op_list = dict_il_tl_result['Target Output'].tolist()
sys_op_list = dict_il_tl_result['System Output'].tolist()
compute_rouge_score(src_text_list, tgt_op_list, sys_op_list, '../src/scores/Ilokano to Tagalog/train/train_rouge_il-tl.csv')

# Testing Data Tagalog to Ilokano
dict_tl_il_result = pd.read_json('../src/json data/Tagalog to Ilokano/Hybrid Translator/dict_tl-il_test.json')
src_text_list = dict_tl_il_result['Source Text'].tolist()
tgt_op_list = dict_tl_il_result['Target Output'].tolist()
sys_op_list = dict_tl_il_result['System Output'].tolist()
compute_rouge_score(src_text_list, tgt_op_list, sys_op_list, '../src/scores/Tagalog to Ilokano/test/test_rouge_tl-il.csv')

# Testing Data Ilokano to Tagalog
dict_il_tl_result = pd.read_json('../src/json data/Ilokano to Tagalog/Hybrid Translator/dict_il-tl_test.json')
src_text_list = dict_il_tl_result['Source Text'].tolist()
tgt_op_list = dict_il_tl_result['Target Output'].tolist()
sys_op_list = dict_il_tl_result['System Output'].tolist()
compute_rouge_score(src_text_list, tgt_op_list, sys_op_list, '../src/scores/Ilokano to Tagalog/test/test_rouge_il-tl.csv')