# Get data

In [None]:
data_dir="data"

data = {
    "sample": [],
    "finetuned": [],
    "vanilla": []
}

In [None]:
import pickle
import os

for dataset in data.keys():
    with open(os.path.join(data_dir, "{}.pickle".format(dataset)), 'rb') as fw:
        data[dataset] = pickle.load(fw)

In [None]:
len(data['finetuned'])

## Cosine similarity

In [None]:
cosine_sim([data["sample"][0], data["finetuned"][0]])[0,1]

In [None]:
from cosine_similarity import cosine_sim

avg = 0
for k, rec1 in enumerate(data["sample"]):
    best = 0
    for i in range(0,10):
        rec2 = data["finetuned"][k*10 + i]
        cos = cosine_sim([rec1, rec2])[0,1]
        best = max(best, cos)
    avg += best

avg = avg/len(data["sample"])
print("avg: ", avg)

## Language check

In [None]:
import language_check
tool = language_check.LanguageTool('en-US')
#tool.disable_spellchecking()
results = tool.check(data["finetuned"][0])
results_filtered = [result for result in results if result.ruleId!='WHITESPACE_RULE' ]
results_filtered

In [None]:
import language_check
tool = language_check.LanguageTool('en-US')
#tool.disable_spellchecking()

avg = 0
dataset = "finetuned"

for rec in data[dataset]:
    results = tool.check(rec)
    results_filtered = [result for result in results if result.ruleId!='WHITESPACE_RULE' ]
    avg += len(results_filtered)

print(avg/len(data[dataset]))

## Readibility
* textstat.smog_index(test_data)
* textstat.flesch_kincaid_grade(test_data)
* textstat.coleman_liau_index(test_data)
* textstat.automated_readability_index(test_data)
* textstat.dale_chall_readability_score(test_data)
* textstat.difficult_words(test_data)
* textstat.linsear_write_formula(test_data)
* textstat.gunning_fog(test_data)
* textstat.text_standard(test_data)

In [None]:
import numpy as np
import textstat
from scipy import stats

ret = []
dataset = "finetuned"

for rec in data[dataset]:
    result = textstat.flesch_reading_ease(rec)
    #print(result)
    ret.append(result)
    
print(np.mean(ret), np.median(ret), stats.mode(ret))

In [None]:
import numpy as np
import textstat
from scipy import stats

ret = []
dataset = "sample"

for rec in data[dataset]:
    result = textstat.smog_index(rec)
    #print(result)
    ret.append(result)
    
print(np.mean(ret), np.median(ret), stats.mode(ret))

In [None]:
import numpy as np
import textstat
from scipy import stats

ret = []
dataset = "finetuned"

for rec in data[dataset]:
    result = textstat.gunning_fog(rec)
    #print(result)
    ret.append(result)
    
print(np.mean(ret), np.median(ret), stats.mode(ret))

In [None]:
import numpy as np
import textstat
from scipy import stats

ret = []
dataset = "sample"

for rec in data[dataset]:
    result = textstat.dale_chall_readability_score(rec)
    #print(result)
    ret.append(result)
    
print(np.mean(ret), np.median(ret), stats.mode(ret))

## Translation

In [None]:
import nltk
import nltk.translate.bleu_score as bleu
from nltk.translate.bleu_score import SmoothingFunction

import nltk.translate.gleu_score as gleu
import nltk.translate.meteor_score as meteor

def wer_count(hyp, ref, print_matrix=False):
    N = len(hyp)
    M = len(ref)
    L = np.zeros((N,M))
    for i in range(0, N):
        for j in range(0, M):
            if min(i,j) == 0:
                L[i,j] = max(i,j)
            else:
                deletion = L[i-1,j] + 1
                insertion = L[i,j-1] + 1
                sub = 1 if hyp[i] != ref[j] else 0
                substitution = L[i-1,j-1] + sub
                L[i,j] = min(deletion, min(insertion, substitution))
    return int(L[N-1, M-1])

def bleu_score(recipe, refer):
    hyp = recipe
    refs = refer
    smoothie = SmoothingFunction().method4
    score_ref_a = bleu.sentence_bleu(refs, hyp, smoothing_function=smoothie)
    return score_ref_a

def gleu_score(recipe, refer):
    hyp = recipe
    refs = refer
    score_ref_a = gleu.sentence_gleu(refs, hyp)
    return score_ref_a

from jiwer import wer, mer

def wer_score(recipe, refer):
    hyp = recipe
    refs = refer

    mn = 99999
    for ref in refs:
        b = wer(ref, hyp)
        mn = min(mn, b)
       
    return mn

In [None]:
from tqdm import tqdm

ret = []
for k, rec1 in enumerate(tqdm(data["sample"])):
    rec2 = data["finetuned"][k*10: k*10 + 10]
    res = bleu_score(rec1, rec2)
    ret.append(res)

np.mean(ret)

In [None]:
from tqdm import tqdm

ret = []
for k, rec1 in enumerate(tqdm(data["sample"])):
    rec2 = data["vanilla"][k*10: k*10 + 10]
    res = gleu_score(rec1, rec2)
    ret.append(res)

np.mean(ret)

In [None]:
from tqdm import tqdm

ret = []
for k, rec1 in enumerate(tqdm(data["sample"])):
    rec2 = data["vanilla"][k*10: k*10 + 10]
    res = wer_score(rec1, rec2)
    ret.append(res)

np.mean(ret)