# Description

In this section, we will evaluate the built translation model on the IWSLT-15 dataset using BLEU score.

In [1]:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' 
import tensorflow as tf
gpus = tf.config.list_physical_devices('GPU')
if gpus:
    try: tf.config.experimental.set_memory_growth(gpus[0], True)
    except RuntimeError as e:   print(e)

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import regex as re
from sklearn.model_selection import train_test_split
import nltk
from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction

from tensorflow_text.tools.wordpiece_vocab import bert_vocab_from_dataset
import tensorflow_text as tf_text

from read_file_utils import *
from model_utils import *


2024-09-09 17:33:54.537523: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-09-09 17:33:54.537546: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-09-09 17:33:54.538064: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
  from pandas.core import (


In [2]:
PATH_FILE_TEST_EN = r"data/processed_data/en_sent_test.txt"
PATH_FILE_TEST_VI = r"data/processed_data/vi_sent_test.txt"

PATH_MODEL_TRANSLATOR = "translator"

# 1. Load dataset

In [12]:
# N = 200

list_en_sentence = read_text_file(PATH_FILE_TEST_EN)
list_vi_sentence = read_text_file(PATH_FILE_TEST_VI)

# list_en_sentence = list_en_sentence[:N]
# list_vi_sentence = list_vi_sentence[:N]

assert len(list_en_sentence) == len(list_vi_sentence)
print(f"Number of sentence: {len(list_en_sentence)}")

Number of sentence: 602183


In [13]:
idx = np.random.randint(0, len(list_en_sentence))

en_sentence = list_en_sentence[idx]
vi_sentence = list_vi_sentence[idx]

print(f"English: {en_sentence}")
print(f"Vietname: {vi_sentence}")

English: passing through , we could see the beauty of the green grass , the grapevines , and the many coloured flowers of nisan just blossoming .
Vietname: trên đường đi tôi thấy vẻ đẹp của cỏ xanh , những ruộng nho và những đoá hoa xuân muôn màu đang khoe sắc .


# 2. Load model

In [6]:
translator = tf.saved_model.load(PATH_MODEL_TRANSLATOR)

In [7]:
def calculate_bleu_score(list_true_sentence, pred_sentence):

    if isinstance(pred_sentence, str):
        pred_sentence = pred_sentence.split()

    for (idx, true_sentence) in enumerate(list_true_sentence):
        if isinstance(true_sentence, str):
            true_sentence = true_sentence.split()
            list_true_sentence[idx] = true_sentence

    smoothie = SmoothingFunction().method1  # Smoothing function for cases with no n-gram overlaps
    score = sentence_bleu(list_true_sentence, pred_sentence, smoothing_function=smoothie)

    return score

In [26]:
idx = np.random.randint(0, len(list_en_sentence))

en_sentence = list_en_sentence[idx]
en_sentence = en_sentence.lower()
vi_sentence = list_vi_sentence[idx]
vi_sentence = vi_sentence.lower()

print(f"English: {en_sentence}")
print(f"Vietname: {vi_sentence}")

print("-"*100)

translated_text, translated_tokens, attention_weights = translator(tf.constant(en_sentence))
translated_text = translated_text.numpy().decode('utf-8')
print(f"Translated text: {translated_text}")

score = calculate_bleu_score([vi_sentence], translated_text)
print("BLEU score:", score)

English: younger people are at a higher risk of contracting chlamydia .
Vietname: người trẻ tuổi có nguy cơ nhiễm chlamydia cao hơn .
----------------------------------------------------------------------------------------------------
Translated text: những người trẻ tuổi có nguy cơ mắc chlamydia cao hơn .
BLEU score: 0.6340466277046861


# 3. Evaluation

We will calculate average BLEU score for each sentence on the entire corpus.

In [27]:
list_bleu_scores = []

for idx, (en_sentence, vi_sentence) in enumerate(zip(list_en_sentence, list_vi_sentence)):
    if idx % 1_000 == 0:
        print(f"idx = {idx}")

    translated_text, translated_tokens, attention_weights = translator(tf.constant(en_sentence))
    translated_text = translated_text.numpy().decode('utf-8')
    score = calculate_bleu_score([vi_sentence], translated_text)
    list_bleu_scores.append(score)

print(f"Average BLEU score: {sum(list_bleu_scores)/len(list_bleu_scores)}")

idx = 0
idx = 1000
idx = 2000
idx = 3000
idx = 4000
idx = 5000
idx = 6000
idx = 7000
idx = 8000
idx = 9000
idx = 10000
idx = 11000
idx = 12000
idx = 13000
idx = 14000
idx = 15000
idx = 16000
idx = 17000
idx = 18000
idx = 19000
idx = 20000
idx = 21000
idx = 22000
idx = 23000
idx = 24000
idx = 25000
idx = 26000
idx = 27000
idx = 28000
idx = 29000
idx = 30000
idx = 31000
idx = 32000
idx = 33000
idx = 34000
idx = 35000
idx = 36000
idx = 37000
idx = 38000
idx = 39000
idx = 40000
idx = 41000
idx = 42000
idx = 43000
idx = 44000
idx = 45000
idx = 46000
idx = 47000
idx = 48000
idx = 49000
idx = 50000
idx = 51000
idx = 52000
idx = 53000
idx = 54000
idx = 55000
idx = 56000
idx = 57000
idx = 58000
idx = 59000
idx = 60000
idx = 61000
idx = 62000
idx = 63000
idx = 64000
idx = 65000
idx = 66000
idx = 67000
idx = 68000
idx = 69000
idx = 70000
idx = 71000
idx = 72000
idx = 73000
idx = 74000
idx = 75000
idx = 76000
idx = 77000
idx = 78000
idx = 79000
idx = 80000
idx = 81000
idx = 82000
idx = 83000
idx =