In [None]:
%tensorflow_version 2.x
import tensorflow as tf
!pip install tensorboardX
!pip install language_models
device_name = tf.test.gpu_device_name()
if device_name != '/device:GPU:0':
  raise SystemError('GPU device not found')
print('Found GPU at: {}'.format(device_name))

# !git clone https://github.com/pbloem/language-models.git
!git clone https://github.com/GuyKabiri/language_models

Colab only includes TensorFlow 2.x; %tensorflow_version has no effect.
Collecting tensorboardX
  Downloading tensorboardX-2.6.2.2-py2.py3-none-any.whl (101 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m101.7/101.7 kB[0m [31m961.1 kB/s[0m eta [36m0:00:00[0m
Installing collected packages: tensorboardX
Successfully installed tensorboardX-2.6.2.2
[31mERROR: Could not find a version that satisfies the requirement language_models (from versions: none)[0m[31m
[0m[31mERROR: No matching distribution found for language_models[0m[31m
[0mFound GPU at: /device:GPU:0
Cloning into 'language_models'...
remote: Enumerating objects: 106, done.[K
remote: Total 106 (delta 0), reused 0 (delta 0), pack-reused 106[K
Receiving objects: 100% (106/106), 19.38 MiB | 20.33 MiB/s, done.
Resolving deltas: 100% (55/55), done.


In [None]:
!pwd
##!git clone https://github.com/GuyKabiri/language_models

/content


In [None]:
import keras

import keras.backend as K
from keras.datasets import imdb
from keras.layers import  LSTM, Embedding, TimeDistributed, Input, Dense
from keras.models import Model
from tensorflow.python.client import device_lib

from tqdm import tqdm
import os, random

from argparse import ArgumentParser

import numpy as np

from tensorboardX import SummaryWriter

from language_models import util

CHECK = 5

In [None]:
def generate_seq(model : Model, seed, size, temperature=1.0):
    """
    :param model: The complete RNN language model
    :param seed: The first few wordas of the sequence to start generating from
    :param size: The total size of the sequence to generate
    :param temperature: This controls how much we follow the probabilities provided by the network. For t=1.0 we just
        sample directly according to the probabilities. Lower temperatures make the high-probability words more likely
        (providing more likely, but slightly boring sentences) and higher temperatures make the lower probabilities more
        likely (resulting is weirder sentences). For temperature=0.0, the generation is _greedy_, i.e. the word with the
        highest probability is always chosen.
    :return: A list of integers representing a samples sentence
    """

    ls = seed.shape[0]

    # Due to the way Keras RNNs work, we feed the model a complete sequence each time. At first it's just the seed,
    # zero-padded to the right length. With each iteration we sample and set the next character.

    tokens = np.concatenate([seed, np.zeros(size - ls)])

    for i in range(ls, size):

        probs = model.predict(tokens[None,:])

        # Extract the i-th probability vector and sample an index from it
        next_token = util.sample_logits(probs[0, i-1, :], temperature=temperature)

        tokens[i] = next_token

    return [int(t) for t in tokens]

In [None]:
def sparse_loss(y_true, y_pred):
    return K.sparse_categorical_crossentropy(y_true, y_pred, from_logits=True)

In [None]:
class Args:
  epochs = 20 # Number of epochs
  embedding_size = 300 # Size of the word embeddings on the input layer.
  out_every = 1 # Output every n epochs.
  lr = 0.001 # Learning rate
  batch = 128 # Batch size
  task = 'wikisimple'
  data = './data' # Data file. Should contain one sentence per line.
  lstm_capacity = 256
  max_length = None # Sentence max length.
  top_words = 10000 # Word list size.
  limit = None # Character cap for the corpus - not relevant in our exercise.
  tb_dir = './runs/words' # Tensorboard directory
  seed = -1 # RNG seed. Negative for random (seed is printed for reproducability).
  extra = None # Number of extra LSTM layers.

options = Args()

In [None]:
import tensorflow as tf
from tensorflow.keras.layers import Input, Embedding, LSTM, TimeDistributed, Dense
from tensorflow.keras.models import Model

def create_custom_model(extra_layers, is_backward):
    # Define input layer
    input_layer = Input(shape=(None,))

    # Define embedding layer
    embedding_layer = Embedding(numwords, options.embedding_size, input_length=None)(input_layer)

    # Apply embedding layer
    embedded = embedding_layer

    # Define LSTM layer
    lstm_layer = LSTM(options.lstm_capacity, return_sequences=True, go_backwards=is_backward)(embedded)

    # Add extra LSTM layers if specified
    if extra_layers is not None:
        for _ in range(extra_layers):
            lstm_layer = LSTM(options.lstm_capacity, return_sequences=True, go_backwards=is_backward)(lstm_layer)

    # Define output layer
    output_layer = TimeDistributed(Dense(numwords, activation='linear'))(lstm_layer)

    # Create model
    model = Model(input_layer, output_layer)

    # Compile model
    opt = tf.keras.optimizers.Adam(lr=options.lr)
    lss = sparse_loss
    model.compile(opt, lss)

    # Print model summary
    model.summary()

    return model



In [None]:
if options.seed < 0:
    seed = random.randint(0, 1000000)
    print('random seed: ', seed)
    np.random.seed(seed)
else:
    np.random.seed(options.seed)

if options.task == 'wikisimple':

    x, w2i, i2w = util.load_words(util.DIR + '/datasets/wikisimple.txt', vocab_size=options.top_words, limit=options.limit)

    # Finding the length of the longest sequence
    x_max_len = max([len(sentence) for sentence in x])

    numwords = len(i2w)
    print('max sequence length ', x_max_len)
    print(numwords, 'distinct words')

    x = util.batch_pad(x, options.batch, add_eos=True)

elif options.task == 'file':

    x, w2i, i2w = util.load_words(options.data_dir, vocab_size=options.top_words, limit=options.limit)

    # Finding the length of the longest sequence
    x_max_len = max([len(sentence) for sentence in x])

    numwords = len(i2w)
    print('max sequence length ', x_max_len)
    print(numwords, 'distinct words')

    x = util.batch_pad(x, options.batch, add_eos=True)

else:
    raise Exception('Task {} not recognized.'.format(options.task))

def decode(seq):
    return ' '.join(i2w[id] for id in seq)

print('Finished data loading. ', sum([b.shape[0] for b in x]), ' sentences loaded')


random seed:  192316
raw data read
max sequence length  132
10000 distinct words
max length per batch:  [15, 15, 16, 16, 16, 16, 17, 17, 17, 17, 17, 17, 17, 17, 17, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 28, 28, 28, 28, 28, 28, 28, 28, 28, 29, 29, 29, 29, 29, 29, 30, 30, 30, 30, 30, 30, 30, 31, 31, 31, 31, 31, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 34, 34, 34, 34, 35, 35, 35, 35, 36, 36, 36, 37, 37, 38, 38, 39, 39, 40, 40, 41, 42, 42, 43, 44, 45,

In [None]:
import torch
tbw = SummaryWriter(log_dir=options.tb_dir)

#- Since we have a variable batch size, we make our own training loop, and train with
#  model.train_on_batch(...). It's a little more verbose, but it gives us more control.

def train_model(model, title):
  epoch = 0
  #instances_seen = 0
  while epoch < options.epochs:
      for batch in tqdm(train_data):
          n, l = batch.shape

          batch_shifted = np.concatenate([np.ones((n, 1)), batch], axis=1)  # prepend start symbol
          batch_out = np.concatenate([batch, np.zeros((n, 1))], axis=1)     # append pad symbol

          loss = model.train_on_batch(batch_shifted, batch_out[:, :, None])
      print(loss)
      epoch += 1
  return loss

****Q1:Splitting the dataset into train, validation, and test sets*****

In [None]:
import torch
train_data, valid_data, test_data = torch.utils.data.random_split(x, [int(len(x)*0.8),int(len(x)*0.1),int(len(x)*0.1)+1])




****Q2: calculate perplexity****

In [None]:
def calculate_perplexity(cEntropy):
    return 2 ** (cEntropy)


****Q3: All we need to do is to change the value of the flag 'backword' to 'TRUE' ****

*****Q4: # No extra layers, forward direction ****

In [None]:
model1 = create_custom_model(None, False)





Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, None)]            0         
                                                                 
 embedding (Embedding)       (None, None, 300)         3000000   
                                                                 
 lstm (LSTM)                 (None, None, 256)         570368    
                                                                 
 time_distributed (TimeDist  (None, None, 10000)       2570000   
 ributed)                                                        
                                                                 
Total params: 6140368 (23.42 MB)
Trainable params: 6140368 (23.42 MB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


****Q4: one extra layer, forward direction****

In [None]:
model2 = create_custom_model(1, False)



Model: "model_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_2 (InputLayer)        [(None, None)]            0         
                                                                 
 embedding_1 (Embedding)     (None, None, 300)         3000000   
                                                                 
 lstm_1 (LSTM)               (None, None, 256)         570368    
                                                                 
 lstm_2 (LSTM)               (None, None, 256)         525312    
                                                                 
 time_distributed_1 (TimeDi  (None, None, 10000)       2570000   
 stributed)                                                      
                                                                 
Total params: 6665680 (25.43 MB)
Trainable params: 6665680 (25.43 MB)
Non-trainable params: 0 (0.00 Byte)
___________________

****Q4: No extra layers, backward ****

In [None]:
model3 = create_custom_model(None, True)



Model: "model_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_3 (InputLayer)        [(None, None)]            0         
                                                                 
 embedding_2 (Embedding)     (None, None, 300)         3000000   
                                                                 
 lstm_3 (LSTM)               (None, None, 256)         570368    
                                                                 
 time_distributed_2 (TimeDi  (None, None, 10000)       2570000   
 stributed)                                                      
                                                                 
Total params: 6140368 (23.42 MB)
Trainable params: 6140368 (23.42 MB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


****Q4: One extra layer, backward****

In [None]:
model4 = create_custom_model(1, True)



Model: "model_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_4 (InputLayer)        [(None, None)]            0         
                                                                 
 embedding_3 (Embedding)     (None, None, 300)         3000000   
                                                                 
 lstm_4 (LSTM)               (None, None, 256)         570368    
                                                                 
 lstm_5 (LSTM)               (None, None, 256)         525312    
                                                                 
 time_distributed_3 (TimeDi  (None, None, 10000)       2570000   
 stributed)                                                      
                                                                 
Total params: 6665680 (25.43 MB)
Trainable params: 6665680 (25.43 MB)
Non-trainable params: 0 (0.00 Byte)
___________________

****Q5: Function of probability****

In [None]:
from tensorflow.keras.preprocessing.text import Tokenizer

def sentence_probability(model, sentence):
    indices = []
    for word in sentence.split():
        if word not in w2i:
            indices.append(w2i['<UNK>'])
        else:
            indices.append(w2i[word])

    # Insert the start symbol index (1) at the beginning of the indices
    indices.insert(0, 1)

    # Convert the list of indices to a numpy array
    indices = np.array(indices)

    # Make a prediction using the model
    predictions = model.predict(indices[None, :])

    # Apply softmax activation to get probabilities
    softmax = tf.keras.layers.Softmax()
    probabilities = softmax(predictions)

    # Calculate the probability of the sentence
    sentence_probability = np.exp(np.sum(np.log([probabilities[0][i + 1][indices[i]] for i in range(len(indices) - 1)])))

    print("The probability of '{}': {}".format(sentence, sentence_probability))

    return sentence_probability




In [None]:
train_model(model1, 'model1 - train')


100%|██████████| 186/186 [00:38<00:00,  4.88it/s]


6.251443386077881


100%|██████████| 186/186 [00:07<00:00, 25.98it/s]


5.88588285446167


100%|██████████| 186/186 [00:07<00:00, 26.43it/s]


5.6782684326171875


100%|██████████| 186/186 [00:07<00:00, 25.70it/s]


5.519617557525635


100%|██████████| 186/186 [00:07<00:00, 26.00it/s]


5.388356685638428


100%|██████████| 186/186 [00:07<00:00, 25.38it/s]


5.267702579498291


100%|██████████| 186/186 [00:07<00:00, 25.59it/s]


5.16424036026001


100%|██████████| 186/186 [00:07<00:00, 25.42it/s]


5.069571018218994


100%|██████████| 186/186 [00:07<00:00, 24.77it/s]


4.9825921058654785


100%|██████████| 186/186 [00:07<00:00, 24.41it/s]


4.900674343109131


100%|██████████| 186/186 [00:07<00:00, 25.57it/s]


4.820597171783447


100%|██████████| 186/186 [00:07<00:00, 24.04it/s]


4.745187282562256


100%|██████████| 186/186 [00:07<00:00, 24.86it/s]


4.673551559448242


100%|██████████| 186/186 [00:07<00:00, 24.99it/s]


4.601725101470947


100%|██████████| 186/186 [00:07<00:00, 24.83it/s]


4.53444242477417


100%|██████████| 186/186 [00:07<00:00, 24.67it/s]


4.469528675079346


100%|██████████| 186/186 [00:07<00:00, 25.47it/s]


4.411933898925781


100%|██████████| 186/186 [00:07<00:00, 25.38it/s]


4.357798099517822


100%|██████████| 186/186 [00:07<00:00, 24.94it/s]


4.305532932281494


100%|██████████| 186/186 [00:07<00:00, 24.19it/s]

4.247199058532715





4.247199058532715

In [None]:
train_model(model2, 'model2 - train')

100%|██████████| 186/186 [00:27<00:00,  6.69it/s]


6.5347418785095215


100%|██████████| 186/186 [00:08<00:00, 22.68it/s]


6.514420509338379


100%|██████████| 186/186 [00:08<00:00, 22.67it/s]


6.466787338256836


100%|██████████| 186/186 [00:08<00:00, 22.06it/s]


6.19122838973999


100%|██████████| 186/186 [00:08<00:00, 21.91it/s]


6.03316068649292


100%|██████████| 186/186 [00:08<00:00, 22.31it/s]


5.949459552764893


100%|██████████| 186/186 [00:08<00:00, 22.31it/s]


5.889650344848633


100%|██████████| 186/186 [00:08<00:00, 23.09it/s]


5.832962989807129


100%|██████████| 186/186 [00:08<00:00, 22.40it/s]


5.773442268371582


100%|██████████| 186/186 [00:08<00:00, 22.94it/s]


5.717691421508789


100%|██████████| 186/186 [00:08<00:00, 22.33it/s]


5.661379814147949


100%|██████████| 186/186 [00:08<00:00, 22.84it/s]


5.59221887588501


100%|██████████| 186/186 [00:08<00:00, 22.63it/s]


5.522917747497559


100%|██████████| 186/186 [00:08<00:00, 22.38it/s]


5.455658435821533


100%|██████████| 186/186 [00:08<00:00, 21.69it/s]


5.391873836517334


100%|██████████| 186/186 [00:08<00:00, 21.71it/s]


5.325836181640625


100%|██████████| 186/186 [00:08<00:00, 22.79it/s]


5.258880615234375


100%|██████████| 186/186 [00:08<00:00, 22.91it/s]


5.199428081512451


100%|██████████| 186/186 [00:08<00:00, 22.32it/s]


5.1444902420043945


100%|██████████| 186/186 [00:08<00:00, 22.53it/s]

5.098410606384277





5.098410606384277

In [None]:
train_model(model3, 'model3 - train')

100%|██████████| 186/186 [00:24<00:00,  7.62it/s]


6.379228591918945


100%|██████████| 186/186 [00:07<00:00, 25.10it/s]


6.282315254211426


100%|██████████| 186/186 [00:07<00:00, 23.97it/s]


6.220735549926758


100%|██████████| 186/186 [00:07<00:00, 24.62it/s]


6.195218086242676


100%|██████████| 186/186 [00:07<00:00, 23.80it/s]


6.160033702850342


100%|██████████| 186/186 [00:07<00:00, 24.45it/s]


6.116074562072754


100%|██████████| 186/186 [00:07<00:00, 24.67it/s]


6.060958385467529


100%|██████████| 186/186 [00:07<00:00, 24.69it/s]


6.002007484436035


100%|██████████| 186/186 [00:07<00:00, 24.36it/s]


5.945425033569336


100%|██████████| 186/186 [00:07<00:00, 24.69it/s]


5.887324810028076


100%|██████████| 186/186 [00:07<00:00, 25.67it/s]


5.81635856628418


100%|██████████| 186/186 [00:07<00:00, 24.80it/s]


5.739992141723633


100%|██████████| 186/186 [00:07<00:00, 24.50it/s]


5.666952133178711


100%|██████████| 186/186 [00:07<00:00, 24.87it/s]


5.585628032684326


100%|██████████| 186/186 [00:07<00:00, 24.31it/s]


5.518144607543945


100%|██████████| 186/186 [00:07<00:00, 24.70it/s]


5.441370487213135


100%|██████████| 186/186 [00:07<00:00, 24.51it/s]


5.365584373474121


100%|██████████| 186/186 [00:07<00:00, 24.26it/s]


5.301890850067139


100%|██████████| 186/186 [00:07<00:00, 25.17it/s]


5.22584867477417


100%|██████████| 186/186 [00:07<00:00, 24.10it/s]

5.147737503051758





5.147737503051758

In [None]:
train_model(model4, 'model4 - train')

100%|██████████| 186/186 [00:27<00:00,  6.83it/s]


6.362420082092285


100%|██████████| 186/186 [00:08<00:00, 22.03it/s]


6.239293575286865


100%|██████████| 186/186 [00:08<00:00, 22.80it/s]


5.600415229797363


100%|██████████| 186/186 [00:08<00:00, 22.47it/s]


5.026319980621338


100%|██████████| 186/186 [00:08<00:00, 21.89it/s]


4.532773971557617


100%|██████████| 186/186 [00:08<00:00, 22.00it/s]


4.133639335632324


100%|██████████| 186/186 [00:08<00:00, 22.94it/s]


3.850419044494629


100%|██████████| 186/186 [00:08<00:00, 22.51it/s]


3.559657335281372


100%|██████████| 186/186 [00:09<00:00, 20.36it/s]


3.1758532524108887


100%|██████████| 186/186 [00:08<00:00, 21.46it/s]


2.814033269882202


100%|██████████| 186/186 [00:08<00:00, 22.18it/s]


2.5398175716400146


100%|██████████| 186/186 [00:08<00:00, 22.42it/s]


2.315072536468506


100%|██████████| 186/186 [00:08<00:00, 22.21it/s]


2.114975690841675


100%|██████████| 186/186 [00:08<00:00, 22.95it/s]


1.9208892583847046


100%|██████████| 186/186 [00:08<00:00, 22.47it/s]


1.7473849058151245


100%|██████████| 186/186 [00:08<00:00, 21.96it/s]


1.5838741064071655


100%|██████████| 186/186 [00:08<00:00, 21.81it/s]


1.4298782348632812


100%|██████████| 186/186 [00:08<00:00, 22.32it/s]


1.302207350730896


100%|██████████| 186/186 [00:08<00:00, 22.09it/s]


1.1728485822677612


100%|██████████| 186/186 [00:08<00:00, 22.56it/s]

1.0640184879302979





1.0640184879302979

****Q6: ****

In [None]:
import numpy as np

def generate_sentence(model):
    all_sentences = []
    seed = [w2i["i"], w2i["love"]]
    seed = np.insert(seed, 0, 1)
    temperatures = [0.1, 1, 10]
    for temp in temperatures:
        generated_sequence = generate_seq(model, seed, 7, temperature=temp)
        generated_sentence = decode(generated_sequence[1:])  # Exclude padding
        print('For temperature =', temp, ':', generated_sentence)
        all_sentences.append(generated_sentence)
    return all_sentences




In [None]:

sentence1 = generate_sentence(model1)


For temperature = 0.1 : i love lrb born september 13


  tokens[i] = next_token


For temperature = 1 : i love ferry may known as
For temperature = 10 : i love olympiad lighting naval announces


In [None]:

sentence2 = generate_sentence(model2)

For temperature = 0.1 : i love is a <UNK> of


  tokens[i] = next_token


For temperature = 1 : i love is a book and
For temperature = 10 : i love argentina fold showed bobby


In [None]:

sentences3 = generate_sentence(model3)



  tokens[i] = next_token


For temperature = 0.1 : i love <UNK> <UNK> <UNK> <UNK>
For temperature = 1 : i love commonly and as and
For temperature = 10 : i love rivers mardan sint 1992


In [None]:

sentence4 = generate_sentence(model4)

For temperature = 0.1 : i love <PAD> <PAD> <PAD> <PAD>


  tokens[i] = next_token


For temperature = 1 : i love <PAD> <PAD> <PAD> <PAD>
For temperature = 10 : i love marine orbit powers socialist


****Q7****

In [None]:
from scipy.special import logsumexp

def predict_next_word(model, num_model):
    while True:
        word = input("Please enter a word: ")
        if word in w2i:
            break
        else:
            print("Word not found in vocabulary. Please enter another word.")

    token = w2i[word]
    token = np.insert(token, 0, 1)
    pred = model.predict(token[None, :])
    pred = pred[0][1]
    pred = np.asarray(pred).astype('float64')
    pred = pred - logsumexp(pred)
    next_token = int(np.random.choice(len(pred), 1, p=np.exp(pred)))
    print("The next word for model", num_model, ":", i2w[next_token])





In [None]:
lpredict_next_word(model1,1)
predict_next_word(model2,2)
predict_next_word(model3,3)
predict_next_word(model4,4)

Please enter a word: love


  next_token = int(np.random.choice(len(pred), 1, p=np.exp(pred)))


The next word for model 1 : ''
Please enter a word: love
The next word for model 2 : khan
Please enter a word: ove
Word not found in vocabulary. Please enter another word.
Please enter a word: love
The next word for model 3 : 1998
Please enter a word: love
The next word for model 4 : majority


****Q7:perplexity****

In [None]:
def perplexityCalc(model, title, data_loader):
    total_loss = 0
    total_batches = len(data_loader)

    for batch in data_loader:
        # Prepare the batch
        shifted_batch = np.concatenate([np.ones((batch.shape[0], 1)), batch], axis=1)
        output_batch = np.concatenate([batch, np.zeros((batch.shape[0], 1))], axis=1)

        # Compute the loss
        batch_loss = model.test_on_batch(shifted_batch, output_batch[:, :, None])
        total_loss += batch_loss

    average_loss = total_loss / total_batches
    perplexity_score = calculate_perplexity(average_loss)

    print("Perplexity for", title, "is", perplexity_score)
    return perplexity_score

In [None]:
perplexityCalc(model1, 'model1 - train', train_data)
perplexityCalc(model1, 'model1 - validation', valid_data)
perplexityCalc(model1, 'model1 - test', test_data)

Perplexity for model1 - train is 16.847341698937555
Perplexity for model1 - validation is 32.544042806305384
Perplexity for model1 - test is 34.362245875111334


34.362245875111334

In [None]:
perplexityCalc(model2, 'model2 - train', train_data)
perplexityCalc(model2, 'model2 - validation', valid_data)
perplexityCalc(model2, 'model2 - test', test_data)

Perplexity for model2 - train is 30.431955304834997
Perplexity for model2 - validation is 37.83264270302552
Perplexity for model2 - test is 39.362068402874264


39.362068402874264

In [None]:
perplexityCalc(model3, 'model3 - train', train_data)
perplexityCalc(model3, 'model3 - validation', valid_data)
perplexityCalc(model3, 'model3 - test', test_data)

Perplexity for model3 - train is 29.0170034623388
Perplexity for model3 - validation is 54.09547233464761
Perplexity for model3 - test is 56.201167398603474


56.201167398603474

In [None]:
perplexityCalc(model4, 'model4 - train', train_data)
perplexityCalc(model4, 'model4 - validation', valid_data)
perplexityCalc(model4, 'model4 - test', test_data)

Perplexity for model4 - train is 2.0725369214509572
Perplexity for model4 - validation is 2.284337814627195
Perplexity for model4 - test is 2.349796069730992


2.349796069730992

****Q9****

In [None]:
def calculate_probabilities(model,sentences):
  for sentence in sentences:
    sentence_probability(model, sentence)
  sentence_probability(model, "i love cupcakes")


In [None]:
calculate_probabilities(model1,sentence1)

The probability of 'i love lrb born september 13': 1.5662308187623152e-25
The probability of 'i love ferry may known as': 7.116009774993139e-26
The probability of 'i love olympiad lighting naval announces': 2.384795684075354e-31
The probability of 'i love cupcakes': 3.581439954173683e-14


In [None]:
calculate_probabilities(model2,sentence2)

The probability of 'i love is a <UNK> of': 5.48605364818028e-23
The probability of 'i love is a book and': 1.6230837776825606e-25
The probability of 'i love argentina fold showed bobby': 3.203958999889377e-29
The probability of 'i love cupcakes': 2.3230926781721414e-15


In [None]:
calculate_probabilities(model3,sentences3)

The probability of 'i love <UNK> <UNK> <UNK> <UNK>': 6.80331290105277e-19
The probability of 'i love commonly and as and': 2.2844599288468124e-23
The probability of 'i love rivers mardan sint 1992': 7.656385558777207e-33
The probability of 'i love cupcakes': 9.825106559297666e-18


In [None]:
calculate_probabilities(model4,sentence4)

The probability of 'i love <PAD> <PAD> <PAD> <PAD>': 5.861934964304939e-20
The probability of 'i love <PAD> <PAD> <PAD> <PAD>': 5.861934964304939e-20
The probability of 'i love marine orbit powers socialist': 8.44317830295672e-36
The probability of 'i love cupcakes': 2.1848752325769792e-23
