# Load Model From Drive

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


# Initialize Paths of Models and Parameters

In [2]:
path= "/content/drive/My Drive/Machine Learning/GitHub Projects/machine-translation/models/"

model_list=["2908","1000"]

# Install and Import Modules

In [3]:
import tensorflow as tf
from tensorflow import keras
from keras.utils.vis_utils import plot_model
import pickle
from tensorflow.keras.layers import Input, Embedding,Dense,  LSTM
from tensorflow.keras import layers , activations , models , preprocessing , utils
from tensorflow.keras.models import load_model
import numpy as np
from nltk.translate.bleu_score import sentence_bleu
import pandas as pd

!pip install -q "tensorflow-text==2.8.*"
import tensorflow as tf
import tensorflow_text as text

!pip install fasttext
import fasttext.util

[K     |████████████████████████████████| 4.9 MB 5.1 MB/s 
[?25hLooking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting fasttext
  Downloading fasttext-0.9.2.tar.gz (68 kB)
[K     |████████████████████████████████| 68 kB 3.1 MB/s 
[?25hCollecting pybind11>=2.2
  Using cached pybind11-2.10.0-py3-none-any.whl (213 kB)
Building wheels for collected packages: fasttext
  Building wheel for fasttext (setup.py) ... [?25l[?25hdone
  Created wheel for fasttext: filename=fasttext-0.9.2-cp37-cp37m-linux_x86_64.whl size=3165333 sha256=2a6feb8d43874fcdc7d52d4c6682950a5fbcd14148647820d6977a531559e16a
  Stored in directory: /root/.cache/pip/wheels/4e/ca/bf/b020d2be95f7641801a6597a29c8f4f19e38f9c02a345bab9b
Successfully built fasttext
Installing collected packages: pybind11, fasttext
Successfully installed fasttext-0.9.2 pybind11-2.10.0


# Download Sentences for Test

In [4]:
!wget http://www.manythings.org/anki/hin-eng.zip -O hin-eng.zip
!unzip hin-eng.zip

--2022-10-09 14:41:43--  http://www.manythings.org/anki/hin-eng.zip
Resolving www.manythings.org (www.manythings.org)... 173.254.30.110
Connecting to www.manythings.org (www.manythings.org)|173.254.30.110|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: 131711 (129K) [application/zip]
Saving to: ‘hin-eng.zip’


2022-10-09 14:41:44 (2.20 MB/s) - ‘hin-eng.zip’ saved [131711/131711]

Archive:  hin-eng.zip
  inflating: hin.txt                 
  inflating: _about.txt              


# Def. Get Summary Statistics for every model 

In [5]:
def get_model_statistics_summary(model_path,path_encoder_parameters,path_encoder_dictionary,path_decoder_parameters,path_decoder_dictionary, encoderPath, decoderPath ):

 
    reconstructed_model = keras.models.load_model(model_path)
    plot_model(reconstructed_model, to_file='modelsummary.png', show_shapes=True, show_layer_names=True)
    reconstructed_model.summary()


    ## Load Dictionaries and Parameters 
    path_encoder_parameters= path_encoder_parameters
    path_encoder_dictionary= path_encoder_dictionary
    path_decoder_parameters= path_decoder_parameters
    path_decoder_dictionary= path_decoder_dictionary

    # loading
    with open(path_encoder_parameters, 'rb') as handle:
        encoder_parameters = pickle.load(handle)

    # loading
    with open(path_encoder_dictionary, 'rb') as handle:
        encoder_dictionary = pickle.load(handle)

    # loading
    with open(path_decoder_parameters, 'rb') as handle:
        decoder_parameters= pickle.load(handle)

    # loading
    with open(path_decoder_dictionary, 'rb') as handle:
        decoder_dictionary = pickle.load(handle)    

    print(encoder_parameters)
    # encoder_dictionary
    print(decoder_parameters)
    # decoder_dictionary

    encoder_inputs = reconstructed_model.input[0]  # input_1
    encoder_outputs, state_h_enc, state_c_enc = reconstructed_model.layers[4].output  # lstm_1
    encoder_states = [state_h_enc, state_c_enc]
    encoder_model = keras.Model(encoder_inputs, encoder_states)
    latent_dim = 256  # Note: may be need to save in drive as well


    num_decoder_tokens =decoder_parameters['num_decoder_tokens']
    max_output_length= decoder_parameters['max_decoder_seq_length']
    max_input_length= encoder_parameters['max_encoder_seq_length']

    encoder_word_dict=encoder_dictionary
    decoder_word_dict= decoder_dictionary


    decoder_inputs = Input(shape=( max_output_length , ))
    decoder_embedding = Embedding( num_decoder_tokens, 256 , mask_zero=True) (decoder_inputs)

    decoder_lstm = LSTM( 256 , return_state=True , return_sequences=True , recurrent_dropout=0.2 , dropout=0.2)
    decoder_dense = Dense( num_decoder_tokens , activation=tf.keras.activations.softmax ) 


    def str_to_tokens( sentence : str ):
        words = sentence.lower().split()
        tokens_list = list()
        for word in words:
                # print("word ", word, eng_word_dict.get(word,1) )
                my_word=  encoder_word_dict.get(word,1)
                tokens_list.append(my_word) 
    
        return preprocessing.sequence.pad_sequences( [tokens_list] , maxlen=max_input_length , padding='post')


    def make_inference_models():
        
            encoder_model = tf.keras.models.Model(encoder_inputs, encoder_states)
            
            decoder_state_input_h = tf.keras.layers.Input(shape=( 256,))
            decoder_state_input_c = tf.keras.layers.Input(shape=( 256 ,))
            
            decoder_states_inputs = [decoder_state_input_h, decoder_state_input_c]
            
            decoder_outputs, state_h, state_c = decoder_lstm(
                decoder_embedding , initial_state=decoder_states_inputs)
            decoder_states = [state_h, state_c]
            decoder_outputs = decoder_dense(decoder_outputs)
            decoder_model = tf.keras.models.Model(
                [decoder_inputs] + decoder_states_inputs,
                [decoder_outputs] + decoder_states)
            
            return encoder_model , decoder_model


    enc_model , dec_model = make_inference_models()


    # Test Previous Model


    encoderPath= encoderPath
    decoderPath= decoderPath

    # loading

    enc_model =  load_model(encoderPath)
    dec_model  =  load_model(decoderPath)

    def translate_sentence(sentence):
        for epoch in range(1 ):
            states_values = enc_model.predict( str_to_tokens(sentence ) )
            empty_target_seq = np.zeros( ( 1 , 1 ) )
            empty_target_seq[0, 0] = decoder_word_dict['start']
            stop_condition = False
            decoded_translation = ''
            while not stop_condition :
                dec_outputs , h , c = dec_model.predict([ empty_target_seq ] + states_values )
                sampled_word_index = np.argmax( dec_outputs[0, -1, :] )
                sampled_word = None
                for word , index in decoder_word_dict.items() :
                    if sampled_word_index == index :
                        decoded_translation += ' {}'.format( word )
                        sampled_word = word
                
                if sampled_word == 'end' or len(decoded_translation.split()) > max_output_length:
                    stop_condition = True
                    
                empty_target_seq = np.zeros( ( 1 , 1 ) )  
                empty_target_seq[ 0 , 0 ] = sampled_word_index
                states_values = [ h , c ] 

            print("Decoded Traslation ", decoded_translation )
        return  decoded_translation


    ## Get sentences to test the model

    lines = pd.read_table( 'hin.txt' , names=[ 'eng' , 'hindi' ] )
    lines.reset_index( level=0 , inplace=True )
    lines.rename( columns={ 'index' : 'eng' , 'eng' : 'hindi' , 'hindi' : 'c' } , inplace=True )
    lines = lines.drop( 'c' , 1 )  

    sample_sentences= lines[-10:]
    sample_sentences

    # Reference Token 

    reference_tokens=[]

    for line in sample_sentences['eng']:
        print( line.split() ) 
        reference_tokens.append( line.split() )

    df = pd.DataFrame(      columns=['reference', 'candidate', 'bleu_score'],  )

    df["reference"]= reference_tokens



    # Candidate Tokens 
    candidate_tokens=[]


    for line in sample_sentences['hindi']:
    
        result= translate_sentence(line)
        temp =result.split()
        temp= temp[:-1]
        candidate_tokens.append(temp)
        

    df["candidate"]= candidate_tokens


    ## Calculate BLEU score

    scores=[]
    for reference, candidate in zip(df['reference'], df['candidate']):
    
        result= sentence_bleu([reference], candidate)
        scores.append(result)
    
    df["bleu_score"]= scores    ## BLEU score calculated


    ## Calcualte ROUGE score
    scores=[]
    for reference, candidate in zip(df['reference'], df['candidate']):
        temp =['captain', 'of', 'the', 'delta', 'flight']
        references =tf.ragged.constant([reference])
        hypotheses= tf.ragged.constant([candidate])

        result= text.metrics.rouge_l(hypotheses, references)
        
        result_str= " F-measure: "+str(result.f_measure.numpy()[0]) +"  Precision: "+str(result.p_measure.numpy()[0])+"  Recall: "+str(result.r_measure.numpy()[0])
        column=["f_measure", "p_measure", "r_measure"]
        data= [[result.f_measure.numpy()[0] ,result.p_measure.numpy()[0] , result.r_measure.numpy()[0] ]]
        metric= pd.DataFrame(data=data, columns=column)
        resultObj= {"f_measure": result.f_measure.numpy()[0] , "p_measure": result.p_measure.numpy()[0],  "r_measure":result.r_measure.numpy()[0] }  
        scores.append(resultObj)
    
    

    df["rouge_score"]= scores  ## ROUGE score calculated



    rouge_metric= pd.DataFrame.from_records(df['rouge_score'])

    average_f_measure = rouge_metric['f_measure'].mean()
    average_p_measure = rouge_metric['p_measure'].mean()
    average_r_measure = rouge_metric['r_measure'].mean()
    # average_cosine= df['cosine_similarity'].mean()
    average_bleu= df['bleu_score'].mean()

    ## return BLEU and ROUGE score to the list 
    return [average_f_measure, average_p_measure,average_r_measure, average_bleu]

        

    

# Get Summary Statistics Table

In [7]:

stat=[]
for item in model_list:

    model_path= path+item+"/model.h5" 
    path_encoder_parameters= path+item+"/parameters/encoder_parameters.pickle" 
    path_encoder_dictionary= path+item+"/dictionaries/encoder_dictionary.pickle" 
    path_decoder_parameters= path+item+"/parameters/decoder_parameters.pickle" 
    path_decoder_dictionary= path+item+"/dictionaries/decoder_dictionary.pickle" 
    encoderPath= path+item+"/enc_model.h5" 
    decoderPath= path+item+"/dec_model.h5" 
    print(model_path, path_encoder_parameters,path_encoder_dictionary,path_decoder_parameters,path_decoder_dictionary,encoderPath,decoderPath)
    result= get_model_statistics_summary(model_path, path_encoder_parameters,path_encoder_dictionary,path_decoder_parameters,path_decoder_dictionary,encoderPath,decoderPath)
    stat.append(result)

table =pd.DataFrame(columns=["average_f_measure", "average_p_measure","average_r_measure",  "average_bleu"], data=stat)
table

/content/drive/My Drive/Machine Learning/GitHub Projects/machine-translation/models/2908/model.h5 /content/drive/My Drive/Machine Learning/GitHub Projects/machine-translation/models/2908/parameters/encoder_parameters.pickle /content/drive/My Drive/Machine Learning/GitHub Projects/machine-translation/models/2908/dictionaries/encoder_dictionary.pickle /content/drive/My Drive/Machine Learning/GitHub Projects/machine-translation/models/2908/parameters/decoder_parameters.pickle /content/drive/My Drive/Machine Learning/GitHub Projects/machine-translation/models/2908/dictionaries/decoder_dictionary.pickle /content/drive/My Drive/Machine Learning/GitHub Projects/machine-translation/models/2908/enc_model.h5 /content/drive/My Drive/Machine Learning/GitHub Projects/machine-translation/models/2908/dec_model.h5
Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected t



['Mother', 'Teresa', 'was', 'a', 'Catholic', 'nun', 'who', 'lived', 'and', 'worked', 'in', 'Calcutta,', 'India.']
['George', 'Washington', 'was', 'the', 'first', 'president', 'of', 'the', 'Unites', 'States', 'of', 'America.']
['From', 'my', 'point', 'of', 'view,', 'Australia', 'is', 'one', 'of', 'the', 'best', 'countries', 'in', 'the', 'world.']
['In', '1951,', 'Sister', 'Teresa', 'was', 'sent', 'to', 'Calcutta,', 'then', 'the', 'largest', 'city', 'in', 'India.']
['Mother', 'Teresa', 'used', 'the', 'prize', 'money', 'for', 'her', 'work', 'in', 'India', 'and', 'around', 'the', 'world.']
['If', 'you', 'go', 'to', 'that', 'supermarket,', 'you', 'can', 'buy', 'most', 'things', 'you', 'use', 'in', 'your', 'daily', 'life.']
['The', 'passengers', 'who', 'were', 'injured', 'in', 'the', 'accident', 'were', 'taken', 'to', 'the', 'nearest', 'hospital.']
['Democracy', 'is', 'the', 'worst', 'form', 'of', 'government,', 'except', 'all', 'the', 'others', 'that', 'have', 'been', 'tried.']
['If', 'my',



Decoded Traslation   a man was thrown for a long were wrong end
Decoded Traslation   could you a doctor in end
Decoded Traslation   my father has just cleared the room of the table end
Decoded Traslation   the man woke up for a strange man end
Decoded Traslation   whoever wins the rule to the rumor say that we have a house end
Decoded Traslation   it was not to do that he went to the job end
Decoded Traslation   did the police arrest his job end
Decoded Traslation   the doctor advised that he went to the job to the party end
Decoded Traslation   my father is afraid of her grandmother end
Decoded Traslation   i don't think if i had a fight with my mother time end
/content/drive/My Drive/Machine Learning/GitHub Projects/machine-translation/models/1000/model.h5 /content/drive/My Drive/Machine Learning/GitHub Projects/machine-translation/models/1000/parameters/encoder_parameters.pickle /content/drive/My Drive/Machine Learning/GitHub Projects/machine-translation/models/1000/dictionaries/enc

The hypothesis contains 0 counts of 2-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()
The hypothesis contains 0 counts of 3-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()
The hypothesis contains 0 counts of 4-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()


Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None, 12)]         0           []                               
                                                                                                  
 input_2 (InputLayer)           [(None, 9)]          0           []                               
                                                                                                  
 embedding (Embedding)          (None, 12, 256)      309760      ['input_1[0][0]']                
                                                                                                  
 embedding_1 (Embedding)        (None, 9, 256)       236032      ['input_2[0][0]']                
                                                                                              



['Mother', 'Teresa', 'was', 'a', 'Catholic', 'nun', 'who', 'lived', 'and', 'worked', 'in', 'Calcutta,', 'India.']
['George', 'Washington', 'was', 'the', 'first', 'president', 'of', 'the', 'Unites', 'States', 'of', 'America.']
['From', 'my', 'point', 'of', 'view,', 'Australia', 'is', 'one', 'of', 'the', 'best', 'countries', 'in', 'the', 'world.']
['In', '1951,', 'Sister', 'Teresa', 'was', 'sent', 'to', 'Calcutta,', 'then', 'the', 'largest', 'city', 'in', 'India.']
['Mother', 'Teresa', 'used', 'the', 'prize', 'money', 'for', 'her', 'work', 'in', 'India', 'and', 'around', 'the', 'world.']
['If', 'you', 'go', 'to', 'that', 'supermarket,', 'you', 'can', 'buy', 'most', 'things', 'you', 'use', 'in', 'your', 'daily', 'life.']
['The', 'passengers', 'who', 'were', 'injured', 'in', 'the', 'accident', 'were', 'taken', 'to', 'the', 'nearest', 'hospital.']
['Democracy', 'is', 'the', 'worst', 'form', 'of', 'government,', 'except', 'all', 'the', 'others', 'that', 'have', 'been', 'tried.']
['If', 'my',



Decoded Traslation   she isn't my cousin end
Decoded Traslation   she is my wealthy woman end
Decoded Traslation   my family loved my woman end
Decoded Traslation   she is his man but the job end
Decoded Traslation   that guy annoys me end
Decoded Traslation   he is not my little toy end
Decoded Traslation   she is poor but the sharp end
Decoded Traslation   she isn't my cousin end
Decoded Traslation   he tends to be very beer end
Decoded Traslation   she isn't my cousin end


Unnamed: 0,average_f_measure,average_p_measure,average_r_measure,average_bleu
0,0.118541,0.154241,0.098203,4.1541230000000004e-156
1,0.04619,0.084286,0.032063,1.071106e-232
