# Testing the Translation Model

In [8]:
import sys
import os

parent_directory = os.path.abspath(os.path.join(os.getcwd(), os.pardir))
sys.path.append(parent_directory)

In [9]:
import nltk
import TM.tm as tm
import utils as utils

## Preparing the Data

### Load the Training Data

In [10]:
f_corpus, e_corpus = utils.load_data("../data/small.json", 100)
print('f_corpus len', len(f_corpus))
print('e_corpus len', len(e_corpus))
print('f_corpus', f_corpus)
print('e_corpus', e_corpus)

f_corpus len 100
e_corpus len 100
f_corpus ['the date is not a fetish', 'the results speak for themselves', 'it should mean that everyone is given an equal opportunity', 'europe has done a very great deal of work in this field which is to be welcomed', 'we simply have to build it stage by stage', 'it is an issue of proportionality', 'we are talking about rights plurality and freedom', 'that attempt did not succeed but it does represent a precedent for that kind of gathering', 'this green paper is important seeing as it concerns a matter that needs to be dealt with', 'mr audy asked the following question are your samples of sufficient size', 'it is up to you to consider and choose the outcome you want to see', 'what can we expect from china', 'we must pay by giving financial support to developing countries', 'i will however refer your request to the conference of presidents which will meet later this afternoon', 'it says that the act also complies with international standards and pursue

## The Translation Models

### IBM Model1

In [11]:
ibm1 = tm.IBMModel1(f_corpus, e_corpus)
ibm1.preprocess()
ibm1.train()
ibm1.print_ds()

f_corpus:  [['the', 'date', 'is', 'not', 'a', 'fetish'], ['the', 'results', 'speak', 'for', 'themselves'], ['it', 'should', 'mean', 'that', 'everyone', 'is', 'given', 'an', 'equal', 'opportunity'], ['europe', 'has', 'done', 'a', 'very', 'great', 'deal', 'of', 'work', 'in', 'this', 'field', 'which', 'is', 'to', 'be', 'welcomed'], ['we', 'simply', 'have', 'to', 'build', 'it', 'stage', 'by', 'stage'], ['it', 'is', 'an', 'issue', 'of', 'proportionality'], ['we', 'are', 'talking', 'about', 'rights', 'plurality', 'and', 'freedom'], ['that', 'attempt', 'did', 'not', 'succeed', 'but', 'it', 'does', 'represent', 'a', 'precedent', 'for', 'that', 'kind', 'of', 'gathering'], ['this', 'green', 'paper', 'is', 'important', 'seeing', 'as', 'it', 'concerns', 'a', 'matter', 'that', 'needs', 'to', 'be', 'dealt', 'with'], ['mr', 'audy', 'asked', 'the', 'following', 'question', 'are', 'your', 'samples', 'of', 'sufficient', 'size'], ['it', 'is', 'up', 'to', 'you', 'to', 'consider', 'and', 'choose', 'the', '

#### Test the Model

In [12]:
f_sent = "this is yousef"
f_sent_tokens = nltk.word_tokenize(f_sent.lower())

e_sent_tokens = []
for f_sent_token in f_sent_tokens:
    translation_probs = ibm1.translation_table[f_sent_token]
    if translation_probs:
        best_e_sent_token = max(translation_probs, key=translation_probs.get)
        e_sent_tokens.append(best_e_sent_token)
    else:
        e_sent_tokens.append(f_sent_token)

print('e_sent_tokens before removing the None tokens', e_sent_tokens)
e_sent_tokens = [token for token in e_sent_tokens if token is not None]
print('e_sent_tokens after removing the None tokens', e_sent_tokens)
e_sent_sent = ' '.join(e_sent_tokens)
print(e_sent_sent)

e_sent_tokens before removing the None tokens ['this', 'be', 'yousef']
e_sent_tokens after removing the None tokens ['this', 'be', 'yousef']
this be yousef


## Saving the Models

In [15]:
# save the language models
utils.save_model(ibm1, "models/ibm1_model.pkl")