In [1]:
import glob
import os
import re
from tqdm import tqdm
from nltk.translate.bleu_score import corpus_bleu

In [2]:
cwd = os.getcwd()

In [3]:
ref_folder_name='LibriTranslatedNoCommas' #folder with reference translations
mt_folder_name ='LibriTranslatedJasper' #folder with translations of the Jasper output text
#ref_folder_name and mt_folder_name should be in the same directory as the notebook
datasets = ['dev-clean', 'test-clean']

In [4]:
def process_line(line):
    #remove punctuation marks and split line into audio and text 
    line = re.sub(r'[^\w\s]', '', line)
    line = line.replace('\n', '')
    line_audio, line_text = line.split(' ', 1)
    return line_audio, line_text

def preprocess_mt_and_refs(dataset_name): 
    #collect mt translation and it's reference  
    references = glob.glob(os.path.join(cwd, f'{ref_folder_name}/{dataset_name}/**/**/*.txt'))
    mts = glob.glob(os.path.join(cwd, f'{mt_folder_name}/{dataset_name}/**/**/*.trans_jasper.txt'))
    mt_predictions, references  = [], []
    for mt_txt in tqdm(mts, position=0, leave=False):
        path_folder = mt_txt.split('.trans_jasper.txt')[0].split(dataset_name)[1]
        ref_txt = cwd + f'/{ref_folder_name}/{dataset_name}'+ path_folder +'.trans.txt'

        with open(mt_txt, 'r') as mt_file:
            mt_lines = mt_file.readlines()
        with open(ref_txt, 'r') as ref_file:
            ref_lines = ref_file.readlines()
        for mt, ref in zip(mt_lines, ref_lines):

            mt_audio, mt_text = process_line(mt)
            ref_audio, ref_text = process_line(ref)

            assert mt_audio == ref_audio
            mt_predictions.append(mt_text.split(' '))
            references.append([ref_text.split(' ')])
    return references, mt_predictions

In [5]:
for dataset in datasets:
    references, mt_predictions = preprocess_mt_and_refs(dataset)
    score = corpus_bleu(references, mt_predictions)
    print(f"Dataset: {dataset}, BLUE score: {score:.3f}")

                                      

Dataset: dev-clean, BLUE score: 0.849




Dataset: test-clean, BLUE score: 0.851
