### Load Model From Drive

In [3]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
import tensorflow as tf
from tensorflow import keras
from keras.utils.vis_utils import plot_model
import pickle
from tensorflow.keras.layers import Input, Embedding,Dense,  LSTM
from tensorflow.keras import layers , activations , models , preprocessing , utils
from tensorflow.keras.models import load_model
import numpy as np
from nltk.translate.bleu_score import sentence_bleu
import pandas as pd

In [4]:
!pip install -q "tensorflow-text==2.8.*"
import tensorflow as tf
import tensorflow_text as text

[K     |████████████████████████████████| 4.9 MB 5.3 MB/s 
[?25h

In [5]:
!pip install fasttext
import fasttext.util

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting fasttext
  Downloading fasttext-0.9.2.tar.gz (68 kB)
[K     |████████████████████████████████| 68 kB 3.9 MB/s 
[?25hCollecting pybind11>=2.2
  Using cached pybind11-2.10.0-py3-none-any.whl (213 kB)
Building wheels for collected packages: fasttext
  Building wheel for fasttext (setup.py) ... [?25l[?25hdone
  Created wheel for fasttext: filename=fasttext-0.9.2-cp37-cp37m-linux_x86_64.whl size=3165348 sha256=2381b29fc7e5573879bcc64de62b2b7f25e5d8dc5c3266335b4ee167722a6f8f
  Stored in directory: /root/.cache/pip/wheels/4e/ca/bf/b020d2be95f7641801a6597a29c8f4f19e38f9c02a345bab9b
Successfully built fasttext
Installing collected packages: pybind11, fasttext
Successfully installed fasttext-0.9.2 pybind11-2.10.0


#Path of Saved Models

In [6]:
## Move to root directory
!ls
%cd drive/MyDrive/Machine\ Learning/GitHub\ Projects/machine-translation
!ls

## Move to models directory
%cd models/
!ls

drive  sample_data
/content/drive/MyDrive/Machine Learning/GitHub Projects/machine-translation
gitCommands.ipynb  models  notes.txt  README.md  test  train
/content/drive/MyDrive/Machine Learning/GitHub Projects/machine-translation/models
1000  2908  _about.txt	hin-eng.zip  hin.txt  modelsummary.png


In [7]:
model_2908="2908/model.h5"
model_1000="1000/model.h5"
model_2909=""
reconstructed_model = keras.models.load_model(model_2908)
plot_model(reconstructed_model, to_file='modelsummary.png', show_shapes=True, show_layer_names=True)
reconstructed_model.summary()


## Load Dictionaries and Parameters 
path_encoder_parameters='2908/parameters/encoder_parameters.pickle'
path_encoder_dictionary='2908/dictionaries/encoder_dictionary.pickle'
path_decoder_parameters='2908/parameters/decoder_parameters.pickle'
path_decoder_dictionary='2908/dictionaries/decoder_dictionary.pickle'

# loading
with open(path_encoder_parameters, 'rb') as handle:
    encoder_parameters = pickle.load(handle)

# loading
with open(path_encoder_dictionary, 'rb') as handle:
    encoder_dictionary = pickle.load(handle)

# loading
with open(path_decoder_parameters, 'rb') as handle:
    decoder_parameters= pickle.load(handle)

# loading
with open(path_decoder_dictionary, 'rb') as handle:
    decoder_dictionary = pickle.load(handle)    

print(encoder_parameters)
# encoder_dictionary
print(decoder_parameters)
# decoder_dictionary

encoder_inputs = reconstructed_model.input[0]  # input_1
encoder_outputs, state_h_enc, state_c_enc = reconstructed_model.layers[4].output  # lstm_1
encoder_states = [state_h_enc, state_c_enc]
encoder_model = keras.Model(encoder_inputs, encoder_states)
latent_dim = 256  # Note: may be need to save in drive as well


num_decoder_tokens =decoder_parameters['num_decoder_tokens']
max_output_length= decoder_parameters['max_decoder_seq_length']
max_input_length= encoder_parameters['max_encoder_seq_length']

encoder_word_dict=encoder_dictionary
decoder_word_dict= decoder_dictionary


decoder_inputs = Input(shape=( max_output_length , ))
decoder_embedding = Embedding( num_decoder_tokens, 256 , mask_zero=True) (decoder_inputs)

decoder_lstm = LSTM( 256 , return_state=True , return_sequences=True , recurrent_dropout=0.2 , dropout=0.2)
decoder_dense = Dense( num_decoder_tokens , activation=tf.keras.activations.softmax ) 


def str_to_tokens( sentence : str ):
    words = sentence.lower().split()
    tokens_list = list()
    for word in words:
          # print("word ", word, eng_word_dict.get(word,1) )
          my_word=  encoder_word_dict.get(word,1)
          tokens_list.append(my_word) 
  
    return preprocessing.sequence.pad_sequences( [tokens_list] , maxlen=max_input_length , padding='post')


def make_inference_models():
    
    encoder_model = tf.keras.models.Model(encoder_inputs, encoder_states)
    
    decoder_state_input_h = tf.keras.layers.Input(shape=( 256,))
    decoder_state_input_c = tf.keras.layers.Input(shape=( 256 ,))
    
    decoder_states_inputs = [decoder_state_input_h, decoder_state_input_c]
    
    decoder_outputs, state_h, state_c = decoder_lstm(
        decoder_embedding , initial_state=decoder_states_inputs)
    decoder_states = [state_h, state_c]
    decoder_outputs = decoder_dense(decoder_outputs)
    decoder_model = tf.keras.models.Model(
        [decoder_inputs] + decoder_states_inputs,
        [decoder_outputs] + decoder_states)
    
    return encoder_model , decoder_model


enc_model , dec_model = make_inference_models()


# Test Previous Model


encoderPath="2908/enc_model.h5" 
decoderPath="2908/dec_model.h5"

# loading

enc_model =  load_model(encoderPath)
dec_model  =  load_model(decoderPath)

for epoch in range(1 ):
    states_values = enc_model.predict( str_to_tokens("मुझे खाने से प्यार है" ) )
    empty_target_seq = np.zeros( ( 1 , 1 ) )
    empty_target_seq[0, 0] = decoder_word_dict['start']
    stop_condition = False
    decoded_translation = ''
    while not stop_condition :
        dec_outputs , h , c = dec_model.predict([ empty_target_seq ] + states_values )
        sampled_word_index = np.argmax( dec_outputs[0, -1, :] )
        sampled_word = None
        for word , index in decoder_word_dict.items() :
            if sampled_word_index == index :
                decoded_translation += ' {}'.format( word )
                sampled_word = word
        
        if sampled_word == 'end' or len(decoded_translation.split()) > max_output_length:
            stop_condition = True
            
        empty_target_seq = np.zeros( ( 1 , 1 ) )  
        empty_target_seq[ 0 , 0 ] = sampled_word_index
        states_values = [ h , c ] 

    print("Decoded Traslation ", decoded_translation )
    # print(f"{bcolors.OKGREEN}Decoded Traslation: { decoded_translation}{bcolors.ENDC}")

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None, 21)]         0           []                               
                                                                                                  
 input_2 (InputLayer)           [(None, 20)]         0           []                               
                                                                                                  
 embedding (Embedding)          (None, 21, 256)      770816      ['input_1[0][0]']                
                                                                                                  
 embedding_1 (Embedding)        (None, 20, 256)      612864      ['input_2[0][0]']                
                                                                                              



Decoded Traslation   i'm not a doctor end


##Copy of Previous Cell

In [None]:
%cd ..
%cd ..
%cd ..
%cd ..
%cd ..
%cd ..
%cd ..
!ls ## Need to change in the future: currently text files are downloaded on my Drive

!ls
%cd content 
!ls
## Move to root directory
!ls
%cd drive/MyDrive/Machine\ Learning/GitHub\ Projects/machine-translation
!ls

## Move to models directory
%cd models/
!ls

In [None]:
!wget http://www.manythings.org/anki/hin-eng.zip -O hin-eng.zip
!unzip hin-eng.zip

In [None]:

model_2908="2908/model.h5"
model_1000="1000/model.h5"
model_2909=""
reconstructed_model = keras.models.load_model(model_2908)
plot_model(reconstructed_model, to_file='modelsummary.png', show_shapes=True, show_layer_names=True)
reconstructed_model.summary()


## Load Dictionaries and Parameters 
path_encoder_parameters='2908/parameters/encoder_parameters.pickle'
path_encoder_dictionary='2908/dictionaries/encoder_dictionary.pickle'
path_decoder_parameters='2908/parameters/decoder_parameters.pickle'
path_decoder_dictionary='2908/dictionaries/decoder_dictionary.pickle'

# loading
with open(path_encoder_parameters, 'rb') as handle:
    encoder_parameters = pickle.load(handle)

# loading
with open(path_encoder_dictionary, 'rb') as handle:
    encoder_dictionary = pickle.load(handle)

# loading
with open(path_decoder_parameters, 'rb') as handle:
    decoder_parameters= pickle.load(handle)

# loading
with open(path_decoder_dictionary, 'rb') as handle:
    decoder_dictionary = pickle.load(handle)    

print(encoder_parameters)
# encoder_dictionary
print(decoder_parameters)
# decoder_dictionary

encoder_inputs = reconstructed_model.input[0]  # input_1
encoder_outputs, state_h_enc, state_c_enc = reconstructed_model.layers[4].output  # lstm_1
encoder_states = [state_h_enc, state_c_enc]
encoder_model = keras.Model(encoder_inputs, encoder_states)
latent_dim = 256  # Note: may be need to save in drive as well


num_decoder_tokens =decoder_parameters['num_decoder_tokens']
max_output_length= decoder_parameters['max_decoder_seq_length']
max_input_length= encoder_parameters['max_encoder_seq_length']

encoder_word_dict=encoder_dictionary
decoder_word_dict= decoder_dictionary


decoder_inputs = Input(shape=( max_output_length , ))
decoder_embedding = Embedding( num_decoder_tokens, 256 , mask_zero=True) (decoder_inputs)

decoder_lstm = LSTM( 256 , return_state=True , return_sequences=True , recurrent_dropout=0.2 , dropout=0.2)
decoder_dense = Dense( num_decoder_tokens , activation=tf.keras.activations.softmax ) 


def str_to_tokens( sentence : str ):
    words = sentence.lower().split()
    tokens_list = list()
    for word in words:
          # print("word ", word, eng_word_dict.get(word,1) )
          my_word=  encoder_word_dict.get(word,1)
          tokens_list.append(my_word) 
  
    return preprocessing.sequence.pad_sequences( [tokens_list] , maxlen=max_input_length , padding='post')


def make_inference_models():
    
    encoder_model = tf.keras.models.Model(encoder_inputs, encoder_states)
    
    decoder_state_input_h = tf.keras.layers.Input(shape=( 256,))
    decoder_state_input_c = tf.keras.layers.Input(shape=( 256 ,))
    
    decoder_states_inputs = [decoder_state_input_h, decoder_state_input_c]
    
    decoder_outputs, state_h, state_c = decoder_lstm(
        decoder_embedding , initial_state=decoder_states_inputs)
    decoder_states = [state_h, state_c]
    decoder_outputs = decoder_dense(decoder_outputs)
    decoder_model = tf.keras.models.Model(
        [decoder_inputs] + decoder_states_inputs,
        [decoder_outputs] + decoder_states)
    
    return encoder_model , decoder_model


enc_model , dec_model = make_inference_models()


# Test Previous Model


encoderPath="2908/enc_model.h5" 
decoderPath="2908/dec_model.h5"

# loading

enc_model =  load_model(encoderPath)
dec_model  =  load_model(decoderPath)

def translate_sentence(sentence):
  for epoch in range(1 ):
    states_values = enc_model.predict( str_to_tokens(sentence ) )
    empty_target_seq = np.zeros( ( 1 , 1 ) )
    empty_target_seq[0, 0] = decoder_word_dict['start']
    stop_condition = False
    decoded_translation = ''
    while not stop_condition :
        dec_outputs , h , c = dec_model.predict([ empty_target_seq ] + states_values )
        sampled_word_index = np.argmax( dec_outputs[0, -1, :] )
        sampled_word = None
        for word , index in decoder_word_dict.items() :
            if sampled_word_index == index :
                decoded_translation += ' {}'.format( word )
                sampled_word = word
        
        if sampled_word == 'end' or len(decoded_translation.split()) > max_output_length:
            stop_condition = True
            
        empty_target_seq = np.zeros( ( 1 , 1 ) )  
        empty_target_seq[ 0 , 0 ] = sampled_word_index
        states_values = [ h , c ] 

    print("Decoded Traslation ", decoded_translation )
  return  decoded_translation



lines = pd.read_table( 'hin.txt' , names=[ 'eng' , 'hindi' ] )
lines.reset_index( level=0 , inplace=True )
lines.rename( columns={ 'index' : 'eng' , 'eng' : 'hindi' , 'hindi' : 'c' } , inplace=True )
lines = lines.drop( 'c' , 1 )  

sample_sentences= lines[-10:]
sample_sentences

# Reference Token 

reference_tokens=[]

for line in sample_sentences['eng']:
   print( line.split() ) 
   reference_tokens.append( line.split() )

df = pd.DataFrame(      columns=['reference', 'candidate', 'bleu_score'],  )

df["reference"]= reference_tokens

df

# Candidate Tokens 
candidate_tokens=[]


for line in sample_sentences['hindi']:
   
   result= translate_sentence(line)
   temp =result.split()
   temp= temp[:-1]
   candidate_tokens.append(temp)
   

df["candidate"]= candidate_tokens

df

scores=[]
for reference, candidate in zip(df['reference'], df['candidate']):
  
   result= sentence_bleu([reference], candidate)
   scores.append(result)
  #  print( result)
 

df["bleu_score"]= scores

df

