In [None]:
import pandas as pd
import numpy as np

import pyarrow.parquet as pa
from nltk.translate.bleu_score import sentence_bleu
import tensorflow as tf
import tensorflow_text

In [None]:
table = pa.read_table('./data/to_spanish.parquet')
df_spanish = table.to_pandas()
df_spanish = df_spanish.iloc[350000:353000,:]
len(df_spanish)

In [None]:
table = pa.read_table('./data/to_swedish.parquet')
df_swedish = table.to_pandas()
df_swedish = df_swedish.iloc[350000:353000,:]
len(df_swedish)

In [None]:
en_es_ds = np.array([row['en'] for row in df_spanish['translation']])
sp_en_ds = np.array([row['es'] for row in df_spanish['translation']])
en_sv_ds = np.array([row['en'] for row in df_swedish['translation']])
sv_en_ds = np.array([row['sv'] for row in df_swedish['translation']])

In [None]:
def bleu_score_base(source, target, model):
  score = []
  for src, label in zip(source, target):
    output = model.translate([src])
    sentence = output[0].numpy().decode()
    sentence_list = sentence.split(" ")
    new = []
    for word in sentence_list:
      if word == "" or word == '[UNK]' or word == "'":
        continue
      new.append(word)
    
    score.append(sentence_bleu(references=label.split(" "), hypothesis=new, weights=[1,0,0,0]))
    
  average_accuracy = np.mean(score)
  return average_accuracy 

In [None]:
def bleu_score_tflite(source, target, interpreter):
  interpreter.allocate_tensors()
  input_details = interpreter.get_input_details()
  output_details = interpreter.get_output_details()
  input_shape = input_details[0]['shape']
  score = []
  for src, label in zip(source, target):
    interpreter.set_tensor(input_details[0]['index'], np.array([src]))
    interpreter.invoke()

    output = interpreter.get_tensor(output_details[0]['index'])
    sentence = output[0].decode('utf-8')
    sentence_list = sentence.split(" ")
    new = []
    for word in sentence_list:
      if word == "" or word == '[UNK]' or word == "'":
        continue
      new.append(word)
    
    score.append(sentence_bleu(references=label.split(" "), hypothesis=new, weights=[1,0,0,0]))
    
  average_accuracy = np.mean(score)
  return average_accuracy 

In [None]:
en_es_model = tf.saved_model.load('./models/en_es_translation')
bleu_score_base(en_es_ds, sp_en_ds, en_es_model)

In [None]:
es_en_model = tf.saved_model.load('./models/es_en_translation')
bleu_score_base(sp_en_ds, en_es_ds, es_en_model)

In [None]:
en_es_tflite = tf.lite.Interpreter(model_path='tflite_models/en_es_translation.tflite')
bleu_score_tflite(en_es_ds, sp_en_ds, en_es_tflite)

In [None]:
es_en_tflite = tf.lite.Interpreter(model_path='tflite_models/es_en_translation.tflite')
bleu_score_tflite(sp_en_ds, en_es_ds, es_en_tflite)

In [None]:
en_sv_model = tf.saved_model.load('./models/en_sv_translation')
bleu_score_base(en_sv_ds, sv_en_ds, en_sv_model)

In [None]:
sv_en_model = tf.saved_model.load('./models/sv_en_translation')
bleu_score_base(sv_en_ds, en_sv_ds, sv_en_model)

In [None]:
en_sv_tflite = tf.lite.Interpreter(model_path='tflite_models/en_sv_translation.tflite')
bleu_score_tflite(en_sv_ds, sv_en_ds, en_sv_tflite)

In [None]:
en_sv_tflite = tf.lite.Interpreter(model_path='tflite_models/sv_en_translation.tflite')
bleu_score_tflite(sv_en_ds, en_sv_ds, sv_en_model)