# Testing the Decoder

In [1]:
import sys
import os

parent_directory = os.path.abspath(os.path.join(os.getcwd(), os.pardir))
sys.path.append(parent_directory)

In [2]:
import LM.lm as lm
import TM.tm as tm
import Decoder.decoder as decoder
import utils as utils

## Preparing the Data

### Load the Training Data

In [3]:
direction = 'forward'
f_corpus, e_corpus = utils.load_data('../data/data.json', direction)
print(f_corpus)
print(e_corpus)

['the girl is in france', 'paris is a city in france', 'the girl is beautiful', 'paris is a beautiful city', 'the girl is in church', 'a church is in paris', 'the church is beautiful', 'france has a beautiful church']
['la fille est en france', 'paris est une ville en france', 'la fille est belle', 'paris est une belle ville', 'la fille est a la eglise', 'une eglise est en paris', 'la eglise est belle', 'la france a une belle eglise']


## The Translation Models

### IBM Model1

In [4]:
ibm1 = tm.IBMModel1(f_corpus, e_corpus)
ibm1.preprocess()
ibm1.train(100000000, 0.0000001)
print('ibm1.translation_table: ', ibm1.translation_table)
print('ibm1.translation_tuple: ', ibm1.translation_tuple)

# for f_e_pair, prob in sorted(ibm1.translation_tuple.items()):
#     print(f_e_pair, prob)

ibm1.translation_table:  defaultdict(<function IBMModel1.train.<locals>.<lambda> at 0x0000025BC3651C60>, {'the': defaultdict(<function IBMModel1.train.<locals>.<lambda>.<locals>.<lambda> at 0x0000025BC3653EB0>, {'la': 1.0, 'fille': 7.514982547997355e-130, 'est': 0.0, 'en': 0.0, 'france': 0.0, 'belle': 0.0, 'a': 1.0848840330713814e-130, 'eglise': 1e-323, 'paris': 0.0, 'ville': 0.0, 'une': 0.0}), 'girl': defaultdict(<function IBMModel1.train.<locals>.<lambda>.<locals>.<lambda> at 0x0000025BE38717E0>, {'la': 9.146385095349588e-131, 'fille': 0.8738487367397165, 'est': 0.0, 'en': 0.0, 'france': 0.0, 'belle': 0.0, 'a': 0.1261512632602836, 'eglise': 0.0, 'paris': 0.0, 'ville': 0.0, 'une': 0.0}), 'is': defaultdict(<function IBMModel1.train.<locals>.<lambda>.<locals>.<lambda> at 0x0000025BE3871750>, {'la': 1.5e-323, 'fille': 1e-323, 'est': 1.0, 'en': 1e-323, 'france': 0.0, 'paris': 1e-323, 'une': 1e-323, 'ville': 5e-324, 'belle': 1e-323, 'a': 1e-323, 'eglise': 5.909669534380058e-253}), 'in': de

## The Language Models

In [5]:
unigram_lm = lm.Unigram(e_corpus)
unigram_lm.preprocess()
unigram_lm.train()

In [6]:
bigram_lm = lm.Bigram(e_corpus)
bigram_lm.preprocess()
bigram_lm.train()

In [7]:
trigram_lm = lm.Trigram(e_corpus)
trigram_lm.preprocess()
trigram_lm.train()

## The Decoder (Translation)

In [8]:
translator = decoder.Decoder(ibm1)
translator.translate("the girl is in france")

'la fille est en france'

### With Unigram

In [9]:
translator = decoder.Decoder(ibm1, unigram_lm)
translator.translate("the girl is in france")

'la fille est en france'

### With Bigram

In [10]:
translator = decoder.Decoder(ibm1, bigram_lm)
translator.translate("the girl is in france")

'la fille est en france'

### With Trigram

In [11]:
translator = decoder.Decoder(ibm1, trigram_lm)
translator.translate("the girl is in france")

'la fille est en france'