In [26]:
import re

import torch
import torch.nn.functional as F
import torch.nn as nn
from torch.utils.data import DataLoader, Dataset
from torch._tensor import Tensor
import torch.optim as optim
from torch.nn.utils.rnn import pad_sequence

import os
import pandas as pd
import math

# Deep Learning for NLP - Lab Assignment 2

## Loading Data


In [2]:
DATA_PATH = "../data/google" # Folder path for the train/test/dev .tsv files

train_data_raw = pd.read_csv(os.path.join(DATA_PATH, "train.tsv"), sep='\t', header=None)[0].tolist()[0:5000]
test_data_raw = pd.read_csv(os.path.join(DATA_PATH, "test.tsv"), sep='\t', header=None)[0].tolist()[0:1000]
dev_data_raw = pd.read_csv(os.path.join(DATA_PATH, "dev.tsv"), sep='\t', header=None)[0].tolist()[0:1000]

In [3]:
def clean_str(string: str, tolower:bool=True) -> str:
    """
    Tokenization/string cleaning.
    Original taken from https://github.com/yoonkim/CNN_sentence/blob/master/process_data.py
    """
    string = re.sub(r"[^A-Za-z0-9(),!?\'\`]", " ", string)
    string = re.sub(r"\'s", " \'s", string)
    string = re.sub(r"\'ve", " \'ve", string)
    string = re.sub(r"n\'t", " n\'t", string)
    string = re.sub(r"\'re", " \'re", string)
    string = re.sub(r"\'d", " \'d", string)
    string = re.sub(r"\'ll", " \'ll", string)
    string = re.sub(r",", " , ", string)
    string = re.sub(r"!", " ! ", string)
    string = re.sub(r"\(", " ( ", string)
    string = re.sub(r"\)", " ) ", string)
    string = re.sub(r"\?", " ? ", string)
    string = re.sub(r"\s{2,}", " ", string)
    if tolower:
        string = string.lower()
    return string.strip()


train_data, test_data, dev_data = [], [], []

for sentence in train_data_raw:
    train_data.append(clean_str(sentence))
for sentence in dev_data_raw:
    dev_data.append(clean_str(sentence))
for sentence in test_data_raw:
    test_data.append(clean_str(sentence))

## Creating vocabulary


In [4]:
class WordDict:
    """Word dictionnary class.
    """
    # constructor, words must be a set containing all words
    def __init__(self, words:set) -> None:
        """Initialize a word dictionnary

        Args:
            words (set): set of all words in a dataset
        """
        assert type(words) == set
        self.word_to_idx = {word: idx for idx, word in enumerate(words)}
        self.idx_to_word = {idx: word for word, idx in self.word_to_idx.items()}

    def word_to_id(self, word:str) -> int:
        """Return the integer associated with a word.

        Args:
            word (str): word

        Returns:
            int: index of the word in the vocabulary
        """
        return self.word_to_idx[word]
    
    def id_to_word(self, idx:int) -> str:
        """Return the word associated with an integer.

        Args:
            idx (int): integer

        Returns:
            str: word at that index in the word dictionnary
        """
        return self.idx_to_word[idx]
    
    def __len__(self) -> int:
        """Compute length of the dictionnary

        Returns:
            int: length of the dictionnary of words
        """
        return len(self.word_to_idx)

In [5]:
train_words = set()

for sentence in train_data:
    train_words.update(sentence.split(" "))

train_words.update(["<bos>", "<eos>", "<unk>", "<pad>"])

word_dict = WordDict(train_words)

print("Number of words :", len(word_dict))
print(list(word_dict.idx_to_word.items())[0:5]) # Excerpt of the idx_to_word dictionnary

Number of words : 8159
[(0, 'delight'), (1, 'rosa'), (2, 'adhd'), (3, 'correlated'), (4, 'harvard')]


## Neural N-Gram model


In [6]:
class NGramDataset(Dataset):
    """Dataset for training the N-gram model, based on the Pytorch Dataset class.
    """
    def __init__(self, sentences:list[str], vocab:WordDict, context_size:int) -> None:
        """N-gram Dataset to use with the DataLoader feature of torch during training

        Args:
            sentences (list[str]): List of sentences
            vocab (WordDict): _description_
            context_size (int): _description_
        """
        super().__init__()
        self.data = []
        self.vocab = vocab
        self.context_size = context_size

        for sentence in sentences:
            tokens = ["<bos>"] * context_size + sentence.split(" ") + ["<eos>"] # Pad the sentence and extract words. We pad the sentence with context_size <bos> tokens for generating the first word.
            indices = [vocab.word_to_id(word) if word in vocab.word_to_idx.keys() else vocab.word_to_id('<unk>') for word in tokens] # Tokenize the sentence using the WordDict
            for i in range(context_size, len(indices)):
                self.data.append((torch.tensor(indices[i-context_size:i]),
                                              torch.tensor(indices[i]))) # Append the training data with a tuple ([word_1, word_2 ,...], word_n)

    def __len__(self) -> int:
        """Compute len of dataset (necessary)

        Returns:
            int: length of the dataset
        """
        return len(self.data)
    
    def __getitem__(self, idx:int) -> tuple[Tensor, Tensor]:
        """Return a tuple of tensor, containing encodings of the n-grams and next word to predict

        Args:
            idx (int): index of the item in the dataset

        Returns:
            tuple[Tensor, Tensor]: Tuple with n-gram data and next word in the sentence
        """
        return self.data[idx]

In [7]:
# Define the context size (e.g., 2 for bigrams)
CONTEXT_SIZE = 5  # For n-grams, context_size = n - 1

# Dataset
ngram_train_dataset = NGramDataset(train_data, word_dict, CONTEXT_SIZE)
ngram_dev_dataset = NGramDataset(dev_data, word_dict, CONTEXT_SIZE)
ngram_test_dataset = NGramDataset(test_data, word_dict, CONTEXT_SIZE)

print("Train_dataset length :", len(ngram_train_dataset))
print("Training item example :", ngram_train_dataset[123]) # Example of item in the training data
print("Training sentence example :", " ".join([word_dict.id_to_word(id) for id in ngram_train_dataset[123][0].tolist()]), "; Target word example :", word_dict.id_to_word(ngram_train_dataset[123][1].item()))

Train_dataset length : 74083
Training item example : (tensor([ 463, 3226, 2409,  398, 2303]), tensor(5439))
Training sentence example : youtube and outrage drama is ; Target word example : super


In [8]:
class NeuralNGramModel(nn.Module):
    """Class for the Neural N-gram module, based on the Pytorch based class.
    """
    def __init__(self, vocab_size:int, embed_size:int, context_size:int, hidden_size:int) -> None:
        """Initialize the Neural N-gram model

        Args:
            vocab_size (int): Number of words in the dictionnary
            embed_size (int): Embedding size
            context_size (int): Number 
            hidden_size (int): _description_
        """
        super().__init__()
        self.embeddings = nn.Embedding(vocab_size, embed_size)
        self.fc1 = nn.Linear(embed_size, hidden_size)
        self.fc2 = nn.Linear(hidden_size, vocab_size)
        self.dropout = nn.Dropout(p=0.3)

    def forward(self, x:Tensor) -> Tensor:
        """Forward pass of the model

        Args:
            x (Tensor): input of the model (batched), size (batch_size, context_size)

        Returns:
            Tensor: output of the model, in the form of a (vocab_size, x.shape(1)) tensor
        """

        embeds = self.embeddings(x) # (batch_size, context_size, embed_size)
        embeds = embeds.sum(dim=1) # (batch_size, embed_size)

        hidden = self.dropout(F.relu(self.fc1(embeds))) # (batch_size, hidden_size)
        output = self.fc2(hidden) # (batch_size, vocab_size)
        
        return output


In [9]:
# Model and training parameters
NGRAM_EMBED_SIZE = 64
NGRAM_HIDDEN_SIZE = 128
NGRAM_BATCH_SIZE = 32
NGRAM_EPOCHS = 5

# DataLoader
ngram_train_loader = DataLoader(ngram_train_dataset, batch_size=NGRAM_BATCH_SIZE, shuffle=True)
ngram_dev_loader = DataLoader(ngram_dev_dataset, batch_size=NGRAM_BATCH_SIZE)

# Initialize Model, Loss, Optimizer
vocab_size = len(word_dict)
ngram_model = NeuralNGramModel(vocab_size, NGRAM_EMBED_SIZE, CONTEXT_SIZE, NGRAM_HIDDEN_SIZE)
ngram_criterion = nn.CrossEntropyLoss(ignore_index=word_dict.word_to_id('<pad>')) # We can see the problem as a multi-label classification problem, classyfying among all possibles words in the vocab
ngram_optimizer = optim.Adam(ngram_model.parameters())

# Training Loop

for epoch in range(NGRAM_EPOCHS):
    ngram_model.train()
    ngram_train_loss = 0

    # Train loop
    for context, target in ngram_train_loader:

        # Forward pass
        output = ngram_model(context)
        loss = ngram_criterion(output, target)

        # Backpropagation
        ngram_optimizer.zero_grad()
        loss.backward()
        ngram_optimizer.step()

        ngram_train_loss += loss.item()
    
    # Dev loop
    ngram_model.eval()
    ngram_dev_loss = 0
    with torch.no_grad():
        for context, target in ngram_dev_loader:

            # Forward pass
            output = ngram_model(context)
            loss = ngram_criterion(output, target)
            ngram_dev_loss += loss.item()

    ngram_train_loss /= len(ngram_train_loader)
    ngram_dev_loss /= len(ngram_dev_loader)

    print(f"Epoch [{epoch+1}/{NGRAM_EPOCHS}], "
            f"Train Loss: {ngram_train_loss:.2f}, "
            f"Dev Loss: {ngram_dev_loss:.2f}")

Epoch [1/5], Train Loss: 6.72, Dev Loss: 6.58
Epoch [2/5], Train Loss: 6.27, Dev Loss: 6.68
Epoch [3/5], Train Loss: 6.04, Dev Loss: 6.85
Epoch [4/5], Train Loss: 5.83, Dev Loss: 7.10
Epoch [5/5], Train Loss: 5.64, Dev Loss: 7.44


## Sentence Generation


In [10]:
# Generation example

ngram_model.eval()

context = ["<bos>"] * CONTEXT_SIZE
context_indices = torch.tensor([word_dict.word_to_id(word) for word in context], dtype = torch.long).unsqueeze(0)


generated_sequence = context[:]
while generated_sequence[-1] != "<eos>":
    context_indices = torch.tensor([word_dict.word_to_id(word) for word in generated_sequence[-(CONTEXT_SIZE-1):]], dtype = torch.long).unsqueeze(0)
    with torch.no_grad():
        output = ngram_model(context_indices)
        probabilities = torch.softmax(output, dim=-1)

    predicted_index = torch.argmax(probabilities, dim=-1).item()
    predicted_word = word_dict.id_to_word(predicted_index)
    generated_sequence.append(predicted_word)

print("Deterministic generated sentence :", " ".join(generated_sequence[CONTEXT_SIZE:-1]))

Deterministic generated sentence : i love


In [11]:
# Generation example

ngram_model.eval()

context = ["<bos>"] * CONTEXT_SIZE
context_indices = torch.tensor([word_dict.word_to_id(word) for word in context], dtype = torch.long).unsqueeze(0)

for _ in range(10):
    generated_sequence = context[:]
    while generated_sequence[-1] != "<eos>":
        context_indices = torch.tensor([word_dict.word_to_id(word) for word in generated_sequence[-(CONTEXT_SIZE-1):]], dtype = torch.long).unsqueeze(0)
        with torch.no_grad():
            output = ngram_model(context_indices)
            probabilities = torch.softmax(output, dim=-1)

        predicted_index = torch.multinomial(probabilities.squeeze(), num_samples=1).item()
        predicted_word = word_dict.id_to_word(predicted_index)
        generated_sequence.append(predicted_word)

    print("Random generated sequence :", " ".join(generated_sequence[CONTEXT_SIZE:-1]))

Random generated sequence : how served it something feels it have him 2
Random generated sequence : like mrs option for if to shoe the fun was well trip high
Random generated sequence : used gonna
Random generated sequence : should recoiled ! do have so s can back
Random generated sequence : phrased times tsa door said ? i
Random generated sequence : eat emotional matter my i ) t but appreciate for y started would that even this have ever one i think hes do to did n't the ruined have me so mess are place before what for people as explain find 've lucky
Random generated sequence : hello friend then go how same him it i here like it but on his best calling
Random generated sequence : where needs you this from , please is actually possible going become were mother woman but word xd like so , there is this partner section great the ?
Random generated sequence : , congrats sounds i need to time never attractive pay in if
Random generated sequence : reddit once man people welcome was the ? t

## LSTM-based Autoregressive Model


In [42]:
class LSTMDataset(Dataset):
    """Dataset for training the LSTM model, based on the Pytorch Dataset class.
    """
    def __init__(self, sentences:list[str], vocab:WordDict) -> None:
        """_summary_

        Args:
            sentences (list[str]): List od sentences.
            vocab (WordDict): _description_
        """
        super().__init__()
        self.data = []
        self.vocab = vocab

        for sentence in sentences:
            tokens = ["<bos>"] + sentence.split(" ") + ["<eos>"]
            indices = [vocab.word_to_id(word) if word in vocab.word_to_idx.keys() else vocab.word_to_id('<unk>') for word in tokens] # Tokenize the sentence using the WordDict
            
            self.data.append((torch.tensor(indices),
                              torch.tensor(indices[1:]+[self.vocab.word_to_id('<pad>')])))
            
    def __len__(self) -> int:
        """Compute len of dataset (necessary)

        Returns:
            int: length of the dataset
        """
        return len(self.data)
    
    def __getitem__(self, idx:int) -> tuple[Tensor, Tensor]:
        """Return a tuple of tensor, containing encodings of the n-grams and next word to predict

        Args:
            idx (int): index of the item in the dataset

        Returns:
            tuple[Tensor, Tensor]: Tuple with sentence and shifted sentence for generation
        """
        return self.data[idx]


def collate_fn(batch:list[tuple]) -> tuple[Tensor, Tensor]:
    """Custom collate function for dynamically padding sentences

    Args:
        batch (list[tuple]): batch of a sentence Dataset

    Returns:
        tuple[Tensor, Tensor]: Dynamicallt padded sentences
    """
    sentences, shifted_sentences = zip(*batch)

    padded_sentences = pad_sequence(sentences, batch_first=True, padding_value=word_dict.word_to_id('<pad>'))
    padded_shifted_sentences = pad_sequence(shifted_sentences, batch_first=True, padding_value=word_dict.word_to_id('<pad>'))

    return padded_sentences, padded_shifted_sentences

In [43]:
# Dataset
lstm_train_dataset = LSTMDataset(train_data, word_dict)
lstm_dev_dataset = LSTMDataset(dev_data, word_dict)

print("Train_dataset length :", len(lstm_train_dataset))
print("Training item example :", lstm_train_dataset[123]) # Example of item in the training data
print("Training sentence example :", " ".join([word_dict.id_to_word(id) for id in lstm_train_dataset[123][0].tolist()]), " ; Target sentence example :", " ".join([word_dict.id_to_word(id) for id in lstm_train_dataset[123][1].tolist()]))

Train_dataset length : 5000
Training item example : (tensor([7151, 7733, 6294, 6591,  152, 5815,  474, 3748, 3621, 2914]), tensor([7733, 6294, 6591,  152, 5815,  474, 3748, 3621, 2914, 1685]))
Training sentence example : <bos> three words , no subtlety dude stop seriously <eos>  ; Target sentence example : three words , no subtlety dude stop seriously <eos> <pad>


In [14]:
class LSTMModel(nn.Module):
    """Class for the LSTM module, based on the Pytorch based class.
    """
    def __init__(self, vocab_size:int, embed_size:int, hidden_size:int, dropout_prob:float=0.3) -> None:
        """Initialize the LSTM

        Args:
            vocab_size (int): Number of words in the dictionnary
            embed_size (int): Embedding size
            hidden_size (int): Size of the hidden layer
        """
        super().__init__()
        self.embeddings = nn.Embedding(vocab_size, embed_size)
        self.lstm = nn.LSTM(embed_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, vocab_size)

        self.dropout_prob = dropout_prob

    def variational_dropout(self, input:Tensor) -> Tensor:
        if self.training: # Apply variational dropout only during training
            mask = (torch.rand_like(input) > self.dropout_prob).float().to(input.device)
            mask = mask.div_(1.0 - self.dropout_prob)
            return input * mask
        return input # If not in training, return input

    def forward(self, x:Tensor) -> Tensor:
        """Forward pass of the model

        Args:
            x (Tensor): input of the model (batched), size (batch_size, context_size)

        Returns:
            Tensor: output of the model, in the form of a (vocab_size, x.shape(1)) tensor
        """

        embeds = self.embeddings(x) # (batch_size, seq_len, embed_size)
        embeds = self.variational_dropout(embeds) # (batch_size, seq_len, embed_size)

        output, _ = self.lstm(embeds) # (batch_size, seq_len, hidden_size)
        output = self.variational_dropout(output) # (batch_size, seq_len, hidden_size)

        output = self.fc(output) # (batch_size, seq_len, vocab_size)

        
        return output


In [15]:
# Model and training parameters
LSTM_EMBED_SIZE = 64
LSTM_HIDDEN_SIZE = 128
LSTM_BATCH_SIZE = 32
LSTM_EPOCHS = 10

# DataLoader
lstm_train_loader = DataLoader(lstm_train_dataset, batch_size=LSTM_BATCH_SIZE, collate_fn=collate_fn, shuffle=True)
lstm_dev_loader = DataLoader(lstm_dev_dataset, batch_size=LSTM_BATCH_SIZE, collate_fn=collate_fn)

# Initialize Model, Loss, Optimizer
vocab_size = len(word_dict)
lstm_model = LSTMModel(vocab_size, LSTM_EMBED_SIZE, LSTM_HIDDEN_SIZE)
lstm_criterion = nn.CrossEntropyLoss(ignore_index=word_dict.word_to_id('<pad>')) # We can see the problem as a multi-label classification problem, classyfying among all possibles words in the vocab
lstm_optimizer = optim.Adam(lstm_model.parameters())

# Training Loop

for epoch in range(LSTM_EPOCHS):
    lstm_model.train()
    lstm_train_loss = 0

    # Train loop
    for context, target in lstm_train_loader:

        # Forward pass
        output = lstm_model(context)

        # Reshape for loss calculation
        output = output.view(-1, vocab_size) # (batch_size *  seq_len, vocab_size)
        target = target.view(-1) # (batch_size * seq_len)

        # Compute loss
        loss = lstm_criterion(output, target)

        # Backpropagation
        lstm_optimizer.zero_grad()
        loss.backward()
        lstm_optimizer.step()

        lstm_train_loss += loss.item()
    
    # Dev loop
    lstm_model.eval()
    lstm_dev_loss = 0
    with torch.no_grad():
        for context, target in lstm_dev_loader:

            # Forward pass
            output = lstm_model(context)

            # Reshape for loss calculation
            output = output.view(-1, vocab_size) # (batch_size *  seq_len, vocab_size)
            target = target.view(-1) # (batch_size * seq_len)

            # Compute loss
            loss = lstm_criterion(output, target)

    lstm_train_loss /= len(lstm_train_loader)
    lstm_dev_loss /= len(lstm_dev_loader)

    print(f"Epoch [{epoch+1}/{LSTM_EPOCHS}], "
            f"Train Loss: {lstm_train_loss:.2f}, "
            f"Dev Loss: {lstm_dev_loss:.2f}")

Epoch [1/10], Train Loss: 6.98, Dev Loss: 0.00
Epoch [2/10], Train Loss: 6.31, Dev Loss: 0.00
Epoch [3/10], Train Loss: 6.17, Dev Loss: 0.00
Epoch [4/10], Train Loss: 6.03, Dev Loss: 0.00
Epoch [5/10], Train Loss: 5.91, Dev Loss: 0.00
Epoch [6/10], Train Loss: 5.81, Dev Loss: 0.00
Epoch [7/10], Train Loss: 5.72, Dev Loss: 0.00
Epoch [8/10], Train Loss: 5.63, Dev Loss: 0.00
Epoch [9/10], Train Loss: 5.56, Dev Loss: 0.00
Epoch [10/10], Train Loss: 5.49, Dev Loss: 0.00


## Sentence Generation


In [16]:
# Generation example

lstm_model.eval()

context = ["<bos>"]
context_indices = torch.tensor([word_dict.word_to_id(word) for word in context], dtype = torch.long).unsqueeze(0)

MAX_LEN = 100

generated_sequence = context[:]
while generated_sequence[-1] != "<eos>" and len(generated_sequence) < MAX_LEN:
    context_indices = torch.tensor([word_dict.word_to_id(word) for word in generated_sequence], dtype = torch.long).unsqueeze(0)
    with torch.no_grad():
        output = lstm_model(context_indices)[:, -1]
        probabilities = torch.softmax(output, dim=-1)

    predicted_index = torch.argmax(probabilities, dim=-1).item()
    predicted_word = word_dict.id_to_word(predicted_index)
    generated_sequence.append(predicted_word)

print("Deterministic generated sentence :", " ".join(generated_sequence[1:-1]))

Deterministic generated sentence : i m not a lot of the same of the same


In [17]:
# Generation example

ngram_model.eval()

context = ["<bos>"]
context_indices = torch.tensor([word_dict.word_to_id(word) for word in context], dtype = torch.long).unsqueeze(0)

MAX_LEN = 100

for _ in range(10):
    generated_sequence = context[:]
    while generated_sequence[-1] != "<eos>" and len(generated_sequence) < MAX_LEN:
        context_indices = torch.tensor([word_dict.word_to_id(word) for word in generated_sequence], dtype = torch.long).unsqueeze(0)
        with torch.no_grad():
            output = lstm_model(context_indices)[:, -1]
            probabilities = torch.softmax(output, dim=-1)

        predicted_index = torch.multinomial(probabilities.squeeze(), num_samples=1).item()
        predicted_word = word_dict.id_to_word(predicted_index)
        generated_sequence.append(predicted_word)

    print("Random generated sentence :", " ".join(generated_sequence[1:-1]))

Random generated sentence : name cheese has to have 2 team spun thing or people


Random generated sentence : this for the name , are name
Random generated sentence : thanks i ve n't knowing
Random generated sentence : they can the wholesome for ? it is a lot , oh i would mean good personalities happen
Random generated sentence : ditto ( for r handle in off you does
Random generated sentence : they believe play real btw to a over of half deja stealing
Random generated sentence : that happened solar aside
Random generated sentence : no own name is a place he tiring if they 're a page
Random generated sentence : i am hi ( what means i visibly shit believe from our position never expected that and odd for him
Random generated sentence : why starter laugh someone brings nothing say my 's people


## Perplexity


In [None]:
class Perplexity:
    """Perplexity computation
    """
    def __init__(self) -> None:
        """Init method.
        """
        self.reset()
    def reset(self) -> None:
        """Rset method.
        """
        self.log_sum = 0
        self.total_words = 0

    def add_sentence(self, log_probs) -> None:
        """Compute values for one sentence and store it in the class.
        """
        self.log_sum += log_probs.sum().item()
        self.total_words += log_probs.size(0)

    def compute_perplexity(self) -> float:
        """Compute full Perplexity

        Returns:
            float: Final perplexity
        """
        return math.exp(-self.log_sum / self.total_words)

In [None]:
ngram_model.eval()
perplexity_object = Perplexity()

# Dataset & DataLoader
ngram_test_dataset = NGramDataset(test_data, word_dict, CONTEXT_SIZE)
ngram_test_loader = DataLoader(ngram_test_dataset, batch_size=NGRAM_BATCH_SIZE)

with torch.no_grad():
    for context, target in ngram_test_loader:
        output = ngram_model(context)
        log_probs = torch.log_softmax(output, dim=1)
        for i in range(len(target)):
            perplexity_object.add_sentence(log_probs[i][target[i]].unsqueeze(0))

perplexity = perplexity_object.compute_perplexity()

print(f"N-Gram Model Perplexity : {perplexity:.2f}")

N-Gram Model Perplexity : 1890.58


In [50]:
lstm_model.eval()
perplexity_object.reset()

# Dataset & DataLoader
lstm_test_dataset = LSTMDataset(test_data, word_dict)
lstm_test_loader = DataLoader(lstm_test_dataset, batch_size=LSTM_BATCH_SIZE, collate_fn=collate_fn)

with torch.no_grad():
    for context, target in lstm_test_loader:
        output = lstm_model(context)
        log_probs = torch.log_softmax(output, dim=2)

        log_probs = log_probs.view(-1, vocab_size)
        target = target.view(-1)

        mask = (target != word_dict.word_to_id('<pad>'))
        log_probs = log_probs[torch.arange(len(target))[mask], target[mask]]

        perplexity_object.add_sentence(log_probs)

perplexity = perplexity_object.compute_perplexity()

print(f"LSTM Model Perplexity : {perplexity:.2f}")

LSTM Model Perplexity : 435.29
