# ILOKANO TO TAGALOG


In [1]:

import json
import csv
import pandas as pd
from nltk.translate.bleu_score import SmoothingFunction, sentence_bleu, modified_precision
from nltk.translate.bleu_score import corpus_bleu, sentence_bleu
from functools import reduce
import math


def calculate_brevity_penalty(reference, candidate):
    r = len(reference)
    c = len(candidate)
    if c == r:
        return 1.0
    else:
        return min(1, (r + 1) / (c + 1))

def brevity_penalty_per_sentence(data):
    penalties = []
    for example in data:
        reference = example['Target Output'].split()
        candidate = example['System Output'].split()
        penalty = calculate_brevity_penalty(reference, candidate)
        penalties.append(penalty)
    return penalties

def sentence_geometric_avg_precision(data):
    avg_precisions = []
    for example in data:
        reference = example['Target Output'].split()
        candidate = example['System Output'].split()
        smoothing = SmoothingFunction()
        score = sentence_bleu([reference], candidate, smoothing_function=smoothing.method4)
        avg_precision = (score ** (1 / 4))
        avg_precisions.append(avg_precision)
    return avg_precisions


def bleu_score_per_sentence(data, penalties, avg_precisions):
    bleu_scores = []
    for i, example in enumerate(data):
        bleu_score = penalties[i] * avg_precisions[i]
        bleu_scores.append(bleu_score)
    return bleu_scores

def average_bleu(bleu_scores):
    return sum(bleu_scores) / len(bleu_scores)

with open('../src/json data/Ilokano to Tagalog/Hybrid Translator/dict_il-tl_test.json', "r") as f:
    data = json.load(f)

penalties = brevity_penalty_per_sentence(data)
avg_precisions = sentence_geometric_avg_precision(data)
bleu_scores = bleu_score_per_sentence(data, penalties, avg_precisions)
average_bleu_score = average_bleu(bleu_scores)

with open('../src/scores/Ilokano to Tagalog/test/new_test_bleu_il-tl.csv', 'w', newline='') as file:
    writer = csv.writer(file)
    writer.writerow(["Source Text", "Brevity Penalty", "Geometric ave precision", "BLEU Score",])
    for i, example in enumerate(data):
        writer.writerow([example['Source Text'], penalties[i], avg_precisions[i], bleu_scores[i]])
    writer.writerow(['Average BLEU Score'])
    writer.writerow([average_bleu_score]) 
       

    
    

