In [1]:
import torch

import pickle

import numpy as np

from tqdm import tqdm

from torch import nn

# Encodes categorical labels into numerical format (used for label preprocessing)
from sklearn.preprocessing import LabelEncoder

# Calculates the accuracy of a classification model (used for model evaluation)
from sklearn.metrics import accuracy_score

# Defines a custom dataset class for PyTorch (used for handling data)
import torch.utils.data

# Creates a DataLoader for efficient batch processing in PyTorch (used for data loading)
from torch.utils.data import DataLoader

# Splits a dataset into training and validation sets (used for data splitting)
from torch.utils.data import random_split

# Represents a multi-dimensional matrix in PyTorch (used for tensor manipulation)
from torch import Tensor

# Implements a linear layer in a neural network (used for defining neural network architecture)
from torch.nn import Linear

# Applies rectified linear unit (ReLU) activation function (used for introducing non-linearity)
from torch.nn import ReLU

# Applies sigmoid activation function (used for binary classification output)
from torch.nn import Sigmoid

# Base class for all neural network modules in PyTorch (used for creating custom models)
from torch.nn import Module

# Stochastic Gradient Descent optimizer (used for model optimization during training)
from torch.optim import SGD

# Binary Cross Entropy Loss function (used for binary classification problems)
from torch.nn import BCELoss

# Initializes weights using Kaiming uniform initialization (used for weight initialization)
from torch.nn.init import kaiming_uniform_

# Initializes weights using Xavier (Glorot) uniform initialization (used for weight initialization)
from torch.nn.init import xavier_uniform_

from torch.nn.utils.rnn import pad_sequence

In [None]:

with open('./pickles/word_sequences.pkl', 'rb') as file:
    word_sequences = pickle.load(file)

with open('./pickles/char_sequences_without_tashkeel.pkl', 'rb') as file:
    char_sequences = pickle.load(file)

with open('./pickles/tashkeel_sequences.pkl', 'rb') as file:
    labels = pickle.load(file)

with open('./pickles/val_word_sequences.pkl', 'rb') as file:
    val_word_sequences = pickle.load(file)

with open('./pickles/val_char_sequences_without_tashkeel.pkl', 'rb') as file:
    val_char_sequences = pickle.load(file)

with open('./pickles/val_tashkeel_sequences.pkl', 'rb') as file:
    val_labels = pickle.load(file)

with open('./pickles/sentence_diacritics_appearance_sequences.pickle', 'rb') as file:
    test_sentences_diacritics_sequences = pickle.load(file)

with open('./pickles/val_sentence_diacritics_appearance_sequences.pickle', 'rb') as file:
    val_sentences_diacritics_sequences = pickle.load(file)

with open('./pickles/segment_sequences.pickle', 'rb') as file:
    train_segment_sequences = pickle.load(file)

with open('./pickles/val_segment_sequences.pickle', 'rb') as file:
    val_segment_sequences = pickle.load(file)

In [None]:
print(len(word_sequences))
print(len(char_sequences[1]))
print(len(labels[1]))
print(len(test_sentences_diacritics_sequences[0]))

# Utility functions

In [None]:
def concatenate_characters(characters):
    # Create a tensor of zeros with the same shape as the last subsequence
    zeros_tensor = torch.zeros_like(characters[:, 0:1, :])

    # Concatenate it to the original tensor along the second dimension
    padded_x = torch.cat((characters, zeros_tensor), dim=1)

    # Now, padded_x will have zeros padded to the last subsequence

    temp1 = padded_x[:, :-1, :]
    temp2 = padded_x[:, 1:, :]

    # Concatenate along the last dimension
    concatenated_characters = torch.cat((temp1, temp2), dim=-1)

    return concatenated_characters

def concatenate_characters2(characters):
    # Create a tensor of zeros with the same shape as the last subsequence
    zeros_tensor = torch.zeros_like(characters[:, 0:1])

    # Concatenate it to the original tensor along the second dimension
    padded_x = torch.cat((characters, zeros_tensor), dim=1)

    # Now, padded_x will have zeros padded to the last subsequence

    temp1 = padded_x[:, :-1]
    temp2 = padded_x[:, 1:]

    # Concatenate along the last dimension
    concatenated_characters = torch.cat((temp1, temp2), dim=-1)

    return concatenated_characters

def concatenate_tensors_elementwise(tensor1, tensor2):
    result = torch.cat((tensor1, tensor2), dim=-1)
    return result

def concatenate_tensors_feature3(tensor1, tensor2):
    concatenated_tensor = torch.cat((tensor1, tensor2.unsqueeze(2)), dim=2)
    return concatenated_tensor

def pad_list(list, max_len, val):
    for i in range(len(list)):
        if len(list[i]) < max_len:
            for j in range(max_len - len(list[i])):
                list[i].append(val)

    return list

In [None]:
# dataset definition
# A custom Dataset class must implement three functions: __init__, __len__, and __getitem__.
class Dataset(torch.utils.data.Dataset):
    # load the dataset
    # The __init__ function is run once when instantiating the Dataset object
    def __init__(self, char_sequences, labels, word_sequences, diacritics_sequence, segment_sequences):
        
        self.x = torch.tensor(char_sequences)

        self.y = torch.tensor(labels)

        self.word = torch.tensor(word_sequences)

        self.diacritics_sequence = torch.tensor(diacritics_sequence)

        self.segment_sequences = torch.tensor(segment_sequences)

    # number of rows in the dataset
    # The __len__ function returns the number of samples in our dataset.
    def __len__(self):
        return len(self.x)

    def __getitem__(self, idx):
        return self.x[idx], self.y[idx], self.word[idx], self.diacritics_sequence[idx], self.segment_sequences[idx]

    # get indexes for train and test rows
    def get_splits(self, n_test=0.33):
        # determine sizes
        test_size = round(n_test * len(self.x))
        train_size = len(self.x) - test_size
        # calculate the split
        return random_split(self, [train_size, test_size])


# # prepare the dataset
# def prepare_data():
#     # load the dataset
#     dataset = CSVDataset()
#     # calculate split
#     train, test = dataset.get_splits()
#     # prepare data loaders
#     # The Dataset retrieves our dataset’s features and labels one sample at a time.
#     # While training a model, we typically want to pass samples in “minibatches”,
#     # reshuffle the data at every epoch to reduce model overfitting,
#     train_dl = DataLoader(train, batch_size=32, shuffle=True)
#     test_dl = DataLoader(test, batch_size=1024, shuffle=False)
#     return dataset.encoding_mapping, train_dl, test_dl

In [19]:
# dataset definition
# A custom Dataset class must implement three functions: __init__, __len__, and __getitem__.
class Dataset2(torch.utils.data.Dataset):
    # load the dataset
    # The __init__ function is run once when instantiating the Dataset object
    def __init__(self, words, labels):
        
        self.x = torch.tensor(words)

        self.y = torch.tensor(labels)


    # number of rows in the dataset
    # The __len__ function returns the number of samples in our dataset.
    def __len__(self):
        return len(self.x)

    def __getitem__(self, idx):
        return self.x[idx], self.y[idx]
    
    # get indexes for train and test rows
    def get_splits(self, n_test=0.33):
        # determine sizes
        test_size = round(n_test * len(self.x))
        train_size = len(self.x) - test_size
        # calculate the split
        return random_split(self, [train_size, test_size])


In [None]:
#convert labels to numpy array
print(len(labels[3]))
print(len(char_sequences[3]))
train_ds = Dataset(char_sequences, labels, word_sequences, test_sentences_diacritics_sequences, train_segment_sequences)
train_dl = torch.utils.data.DataLoader(train_ds, batch_size=64, shuffle=True)

dg = iter(train_dl)
X1, Y1, z1, d1, s1 = next(dg)
X2, Y2, z2, d2, s2 = next(dg)
print(Y1.shape, X1.shape, z1.shape, d1.shape, s1.shape, Y2.shape, X2.shape, z2.shape, d2.shape, s2.shape)
print(X1[0][:], "\n", Y1[0][:])

MODEL

In [None]:
class Char_model(nn.Module):
  def __init__(self, vocab_size=42, embedding_dim=50, hidden_size=50, n_classes=17):
    """
    The constructor of our NER model
    Inputs:
    - vacab_size: the number of unique words
    - embedding_dim: the embedding dimension
    - n_classes: the number of final classes (tags)

    embedding_dim here: 50 for char embedding + 50 for following char embedding + 1 for feature3 = 101
    """

    super(Char_model, self).__init__()

    input_len = 2*embedding_dim + 15 + 1
    ####################### TODO: Create the layers of your model #######################################
    # (1) Create the embedding layer
    self.embedding_char = nn.Embedding(vocab_size, embedding_dim)
    self.embedding_diacritics = nn.Embedding(14, 15)

    # (2) Create an LSTM layer with hidden size = hidden_size and batch_first = True
    self.lstm = nn.LSTM(input_len, hidden_size, batch_first=True)
    # batch_first makes the input and output tensors to be of shape (batch_size, seq_length, hidden_size)

    # (3) Create a linear layer
    self.linear = nn.Linear(hidden_size, n_classes)

    #####################################################################################################

  def forward(self, sentences, diacritics_list, segments, h_0=None, c_0=None):
    """
    This function does the forward pass of our model
    Inputs:
    - sentences: tensor of shape (batch_size, max_length)

    Returns:
    - final_output: tensor of shape (batch_size, max_length, n_classes)
    """

    final_output = None
    #############################################################
    sentences_embedded = self.embedding_char(sentences) 
    diacritics_embedded = self.embedding_diacritics(diacritics_list)
    
    sentences_embedded = concatenate_characters(sentences_embedded) #feature 1: concatenate characters
    sentences_embedded = concatenate_tensors_elementwise(sentences_embedded, diacritics_embedded) #feature 2: concatenate diacritics seen before
    sentences_embedded = concatenate_tensors_feature3(sentences_embedded, segments) #feature 3: concatenate segment for each character

    #check if h_0 and c_0 are provided or not
    if h_0 is None or c_0 is None:
      final_output, (h_0, c_0) = self.lstm(sentences_embedded)
    else:
      final_output, _ = self.lstm(sentences_embedded, (h_0, c_0)) 
      
    final_output = self.linear(final_output)  


    ############################################################
    return final_output

In [21]:
class Word_model(nn.Module):
  def __init__(self, vocab_size=2093761, embedding_dim=50, hidden_size=50, n_classes=17):
    """
    The constructor of our NER model
    Inputs:
    - vacab_size: the number of unique words
    - embedding_dim: the embedding dimension
    - n_classes: the number of final classes (tags)
    """
    super(Word_model, self).__init__()
    ####################### TODO: Create the layers of your model #######################################
    # (1) Create the embedding layer
    self.embedding = nn.Embedding(vocab_size, embedding_dim)

    # (2) Create an LSTM layer with hidden size = hidden_size and batch_first = True
    self.rnn = nn.RNN(hidden_size, hidden_size, batch_first=True)
    # batch_first makes the input and output tensors to be of shape (batch_size, seq_length, hidden_size)

    # (3) Create a linear layer
    self.linear = nn.Linear(hidden_size, vocab_size)
    #####################################################################################################

  def forward(self, sentences):
    """
    This function does the forward pass of our model
    Inputs:
    - sentences: tensor of shape (batch_size, max_length)

    Returns:
    - final_output: tensor of shape (batch_size, max_length, n_classes)
    """

    final_output = None
    ######################### TODO: implement the forward pass ####################################
    embedded = self.embedding(sentences) 
    rnn_output, h = self.rnn(embedded)  
    final_output = self.linear(rnn_output) 

    ###############################################################################################
    return final_output, h

In [22]:
lstm_model = Char_model()
word_model = Word_model()
# lstm_model.load_state_dict(torch.load('lstm_model_weights.pth'))
# word_model.load_state_dict(torch.load('word_model_weights.pth'))
print(lstm_model)
print(word_model)

Char_model(
  (embedding_char): Embedding(42, 50)
  (embedding_diacritics): Embedding(14, 15)
  (lstm): LSTM(116, 50, batch_first=True)
  (linear): Linear(in_features=50, out_features=17, bias=True)
)
Word_model(
  (embedding): Embedding(2093761, 50)
  (rnn): RNN(50, 50, batch_first=True)
  (linear): Linear(in_features=50, out_features=2093761, bias=True)
)


# Training

In [None]:
def train(lstm_model, context_model, train_dataset, batch_size=128, epochs=30, learning_rate=0.012):
  """
  This function implements the training logic
  Inputs:
  - model: the model ot be trained
  - train_dataset: the training set of type NERDataset
  - batch_size: integer represents the number of examples per step
  - epochs: integer represents the total number of epochs (full training pass)
  - learning_rate: the learning rate to be used by the optimizer
  """

  # (1) create the dataloader of the training set (make the shuffle=True)
  train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

  # (2) make the criterion cross entropy loss
  criterion = nn.CrossEntropyLoss()

  # (3) create the optimizer (Adam)
  optimizer = torch.optim.Adam(list(lstm_model.parameters()) + list(context_model.parameters()), lr=learning_rate)
  # GPU configuration
  use_cuda = torch.cuda.is_available()
  device = torch.device("cuda" if use_cuda else "cpu")
  if use_cuda:
    lstm_model = lstm_model.cuda()
    context_model = context_model.cuda()
    criterion = criterion.cuda()
  # device="cpu"
  for epoch_num in range(epochs):
    total_acc_train = 0
    total_loss_train = 0

    for train_input, train_label, train_context, train_diacritic, train_segments in tqdm(train_dataloader):
      

      # (4) move the train input to the device
      train_label = train_label.long().to(device)

      # (5) move the train label to the device
      train_input = train_input.long().to(device)

      train_context = train_context.long().to(device)

      train_diacritic = train_diacritic.long().to(device)

      train_segments = train_segments.long().to(device)

      # (6) do the forward pass
      # context, h_0 = context_model(train_context)
      # c_0 = torch.zeros(context.shape[0], 1, context.shape[2])
      # h_0 = torch.transpose(h_0, 0, 1)
      #h_0 = h_0.permute(1, 0, 2)
      #print(h_0.shape)
      # print(train_input.shape)
      # print(train_diacritic.shape)
      output = lstm_model(train_input, train_diacritic, train_segments)
      
      # (7) loss calculation (you need to think in this part how to calculate the loss correctly)
      batch_loss = criterion(output.reshape(-1, 17), train_label.reshape(-1))
  
      # (8) append the batch loss to the total_loss_train
      total_loss_train += batch_loss.item()
      
      # (9) calculate the batch accuracy (just add the number of correct predictions)
      acc = (output.argmax(dim=2) == train_label).sum().item()
      total_acc_train += acc

      # (10) zero your gradients
      optimizer.zero_grad()
      
      # (11) do the backward pass
      batch_loss.backward()

      # (12) update the weights with your optimizer
      optimizer.step()
      
    # epoch loss
    epoch_loss = total_loss_train / len(train_dataset)

    # (13) calculate the accuracy
    epoch_acc = total_acc_train / (len(train_dataset) * len(train_dataset[0][0]))
    # ba2sem 3la 3adad el kalemat fy kol el gomal 
    # kol gomla asln fyha 104 kelma, fa badrab dh fy 3adad el gomal bs

    print(
        f'Epochs: {epoch_num + 1} | Train Loss: {epoch_loss} \
        | Train Accuracy: {epoch_acc}\n')

  ##############################################################################################################
  

In [32]:
#train the word model 
def train_word_model(word_model, train_dataset,batch_size=128, epochs=30, learning_rate=0.012):
  """
  This function implements the training logic
  Inputs:
  - model: the model ot be trained
  - train_dataset: the training set of type NERDataset
  - batch_size: integer represents the number of examples per step
  - epochs: integer represents the total number of epochs (full training pass)
  - learning_rate: the learning rate to be used by the optimizer
  """

  # (1) create the dataloader of the training set (make the shuffle=True)
  train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
  
  # (2) make the criterion cross entropy loss
  criterion = nn.CrossEntropyLoss()

  # (3) create the optimizer (Adam)
  optimizer = torch.optim.Adam(word_model.parameters(), lr=learning_rate)
  # GPU configuration
  use_cuda = torch.cuda.is_available()
  device = torch.device("cuda" if use_cuda else "cpu")
  #clear the cache
  if use_cuda:
    word_model = word_model.cuda()
    criterion = criterion.cuda()
  # device="cpu"
  for epoch_num in range(epochs):
    total_acc_train = 0
    total_loss_train = 0
    # the word model only needs the word sequences and the labels
    for train_input, train_label in tqdm(train_dataloader):

      # (4) move the train input to the device
      train_label = train_label.long().to(device)

      # (5) move the train label to the device
      train_input = train_input.long().to(device)

      # (6) do the forward pass
      output, h = word_model(train_input)
      
      # (7) loss calculation (you need to think in this part how to calculate the loss correctly)
      batch_loss = criterion(output.reshape(-1, 2093761), train_label.reshape(-1))
  
      # (8) append the batch loss to the total_loss_train
      total_loss_train += batch_loss.item()
      
      # (9) calculate the batch accuracy (just add the number of correct predictions)
      acc = (output.argmax(dim=2) == train_label).sum().item()
      total_acc_train += acc

      # (10) zero your gradients
      optimizer.zero_grad()
      
      # (11) do the backward pass
      batch_loss.backward()

      # (12) update the weights with your optimizer
      optimizer.step()

    # epoch loss
    epoch_loss = total_loss_train / len(train_dataset)

    # (13) calculate the accuracy
    epoch_acc = total_acc_train / (len(train_dataset) * len(train_dataset[0][0]))
    # ba2sem 3la 3adad el kalemat fy kol el gomal
    # kol gomla asln fyha 104 kelma, fa badrab dh fy 3adad el gomal bs

    print(
        f'Epochs: {epoch_num + 1} | Train Loss: {epoch_loss} \
        | Train Accuracy: {epoch_acc}\n')
    
  ##############################################################################################################
    
      

    

In [33]:
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

# train the model
#the train dataset is the word sequences and the labels the gold labels are the
with open('./Dataset/word_level/words.txt', 'r', encoding='utf-8') as file:
    words = file.readlines()
with open('./Dataset/word_level/labels.txt', 'r', encoding='utf-8') as file:
    labels = file.readlines()

# remove the \n from the end of each word
words = [word[:-1] for word in words]
labels = [label[:-1] for label in labels]

# create the tokenizer
tokenizer = Tokenizer()
# fit the tokenizer on the documents
tokenizer.fit_on_texts(words)

# encode the words
encoded_words = tokenizer.texts_to_sequences(words)
#tokenize the labels
label_tokenizer = Tokenizer()
label_tokenizer.fit_on_texts(labels)
encoded_labels = label_tokenizer.texts_to_sequences(labels)
# pad sequences pad_sequences is used to ensure that all sequences in a list have the same length
max_length = max([len(seq) for seq in encoded_words])
encoded_words = pad_sequences(encoded_words, maxlen=max_length, padding='post')
encoded_labels = pad_sequences(encoded_labels, maxlen=max_length, padding='post')
# split the dataset into train and test
train_dataset = Dataset2(encoded_words, encoded_labels)
train_dataset, test_dataset = train_dataset.get_splits()

#clear the memory of torch
torch.cuda.empty_cache()

# train the model
train_word_model(word_model, train_dataset, batch_size=64, epochs=30, learning_rate=0.012)



100%|██████████| 1118/1118 [01:49<00:00, 10.18it/s]


Epochs: 1 | Train Loss: 0.1521331397427538         | Train Accuracy: 0.03820284423811055



100%|██████████| 1118/1118 [01:50<00:00, 10.07it/s]


Epochs: 2 | Train Loss: 0.11295279734015953         | Train Accuracy: 0.05541649769971893



100%|██████████| 1118/1118 [01:50<00:00, 10.11it/s]


Epochs: 3 | Train Loss: 0.0977635446283642         | Train Accuracy: 0.08802595332317201



100%|██████████| 1118/1118 [01:49<00:00, 10.22it/s]


Epochs: 4 | Train Loss: 0.08852977041093787         | Train Accuracy: 0.1485184511906926



100%|██████████| 1118/1118 [01:49<00:00, 10.23it/s]


Epochs: 5 | Train Loss: 0.08282096668705516         | Train Accuracy: 0.19059471704445347



100%|██████████| 1118/1118 [01:49<00:00, 10.23it/s]


Epochs: 6 | Train Loss: 0.07954062211048937         | Train Accuracy: 0.20986394082194845



100%|██████████| 1118/1118 [01:49<00:00, 10.23it/s]


Epochs: 7 | Train Loss: 0.07757790415744398         | Train Accuracy: 0.21779256918322543



100%|██████████| 1118/1118 [01:49<00:00, 10.22it/s]


Epochs: 8 | Train Loss: 0.07625484499225145         | Train Accuracy: 0.21984813949911206



100%|██████████| 1118/1118 [01:49<00:00, 10.24it/s]


Epochs: 9 | Train Loss: 0.07528097667253748         | Train Accuracy: 0.22201557758729182



100%|██████████| 1118/1118 [01:49<00:00, 10.23it/s]


Epochs: 10 | Train Loss: 0.07448219254148467         | Train Accuracy: 0.2236376602855425



100%|██████████| 1118/1118 [01:49<00:00, 10.20it/s]


Epochs: 11 | Train Loss: 0.07394138493769344         | Train Accuracy: 0.22395928013088529



100%|██████████| 1118/1118 [01:49<00:00, 10.21it/s]


Epochs: 12 | Train Loss: 0.07354775917084481         | Train Accuracy: 0.22531567686993972



100%|██████████| 1118/1118 [01:49<00:00, 10.22it/s]


Epochs: 13 | Train Loss: 0.07323270958717792         | Train Accuracy: 0.22591696614601542



100%|██████████| 1118/1118 [01:49<00:00, 10.20it/s]


Epochs: 14 | Train Loss: 0.07280980013244752         | Train Accuracy: 0.22563729671528254



 27%|██▋       | 307/1118 [00:30<01:19, 10.17it/s]


KeyboardInterrupt: 

In [None]:
torch.cuda.empty_cache()

train_dataset = Dataset(char_sequences, labels, word_sequences, test_sentences_diacritics_sequences, train_segment_sequences)
train(lstm_model, word_model, train_dataset)

In [None]:
torch.save(lstm_model.state_dict(), '30e3_lstm_model_weights.pth')
torch.save(word_model.state_dict(), '30e3_word_model_weights.pth')
tensor2 = tensor1 = torch.tensor([
    [[1, 2, 3], [4, 5, 6], [66, 55, 77]], 
    [[7, 8, 9], [10, 11, 12], [13, 14, 15]]
])

x = torch.tensor([
    [[1, 2, 3], [4, 5, 6], [66, 55, 77]], 
    [[7, 8, 9], [10, 11, 12], [13, 14, 15]]
])

# Get the shape of the input tensor
batch_size, sequence_length, feature_size = x.shape

# Create a tensor of zeros with the same shape as the last subsequence
zeros_tensor = torch.zeros_like(x[:, 0:1, :])

# Concatenate it to the original tensor along the second dimension
padded_x = torch.cat((x, zeros_tensor), dim=1)

# Now, padded_x will have zeros padded to the last subsequence
print(padded_x)

In [None]:
tensor1 = torch.tensor([
    [[1, 2, 3], [4, 5, 6], [66, 55, 77]], 
    [[7, 8, 9], [10, 11, 12], [13, 14, 15]]
])

# Concatenate tensor1 with itself along the last dimension
result = torch.cat((tensor1, tensor1), dim=-1)

print(result)

In [None]:
# List of strings
list_of_strings = ["hello", "world", "deep", "learning"]

# Convert strings to lists of character indices
list_of_lists = [list(map(ord, s)) for s in list_of_strings]

# Pad sequences to the same length
padded_sequences = pad_sequence([torch.tensor(seq) for seq in list_of_lists], batch_first=True, padding_value=0)

print(padded_sequences)

In [None]:
# Assuming you have two tensors A and B
tensor_A = torch.randn(128, 7183, 50)
tensor_B = torch.randint(0, 2, (128, 7183), dtype=torch.float32)  # Example tensor, adjust as needed

# Concatenate along the last dimension
concatenated_tensor = torch.cat((tensor_A, tensor_B.unsqueeze(2)), dim=2)

# Print the shape of the concatenated tensor
print(concatenated_tensor.shape)

# Evaluation

In [None]:
def evaluate(model, test_dataset, batch_size=256):
  """
  This function takes a NER model and evaluates its performance (accuracy) on a test data
  Inputs:
  - model: a NER model
  - test_dataset: dataset of type NERDataset
  """
  ########################### TODO: Replace the Nones in the following code ##########################

  # (1) create the test data loader
  test_dataloader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size)

  # GPU Configuration
  use_cuda = torch.cuda.is_available()
  device = torch.device("cuda" if use_cuda else "cpu")
  if use_cuda:
    model = model.cuda()

  total_acc_test = 0
  
  # (2) disable gradients
  with torch.no_grad():
    # 3mlna disable 3lshan e7na bn-predict (aw evaluate y3ny) b2a dlw2ty, msh bn-train

    for test_input, test_label, test_context, test_diacritics, test_segments in tqdm(test_dataloader):
      # (3) move the test input to the device
      test_label = test_label.to(device)

      # (4) move the test label to the device
      test_input = test_input.to(device)
      # brdo the comments should be reversed 
      test_context = test_context.long().to(device)
      test_diacritics = test_diacritics.long().to(device)
      test_segments = test_segments.long().to(device)
      # (5) do the forward pass
      output = model(test_input, test_diacritics, test_segments)
      print(test_input.shape)
      print(test_label.shape)
      print(output.shape)
      # accuracy calculation (just add the correct predicted items to total_acc_test)
      acc = (output.argmax(dim=2) == test_label).sum().item()
      total_acc_test += acc
    
    # (6) calculate the over all accuracy
    total_acc_test /= (len(test_dataset) * len(test_dataset[0][0]))
  ##################################################################################################

  
  print(f'\nTest Accuracy: {total_acc_test}')

In [None]:
test_dataset = Dataset(val_char_sequences, val_labels, val_word_sequences, val_sentences_diacritics_sequences, val_segment_sequences)
evaluate(lstm_model, test_dataset)

In [None]:
with open('./pickles/test_segment_sequences.pickle', 'rb') as file:
    test_segment_sequences = pickle.load(file)

with open('./pickles/test_sentence_diacritics_appearance_sequences.pickle', 'rb') as file:
    test_sentences_diacritics_sequences = pickle.load(file)

with open('./pickles/test_char_sequences_without_tashkeel.pkl', 'rb') as file:
    test_char_sequences = pickle.load(file)

print(len(test_segment_sequences[0]))
print(len(test_sentences_diacritics_sequences[0]))
print(len(test_char_sequences[0]))


In [None]:

torch.cuda.empty_cache()

lstm_model.load_state_dict(torch.load('30e3_lstm_model_weights.pth'))


output = lstm_model(torch.tensor(test_char_sequences), torch.tensor(test_sentences_diacritics_sequences), torch.tensor(test_segment_sequences))

print(output.shape)


# Extracting max Probabilities


In [11]:
outputs = pickle.load(open("./pickles/test_output.pkl", "rb"))

print(outputs[0][0])

softmax_output = nn.functional.softmax(outputs, dim=-1)
print(softmax_output[0][0])

# Find the index of the maximum value along the last axis
max_arg = torch.argmax(softmax_output, dim=-1)

# Add a new dimension at the end to make the shape (2000, 1904, 1)
new_tensor = torch.unsqueeze(max_arg, dim=-1)

print(new_tensor[0][0])

tensor([-2.7436, -4.6608,  2.0320, 12.3649, 11.4669,  6.2128,  5.5692, -1.1379,
        -0.8895, -1.8278, -4.4135, -2.7686, -8.9944, -3.0970, -9.0805, -8.3650,
        -7.4706], grad_fn=<SelectBackward0>)
tensor([1.9456e-07, 2.8602e-08, 2.3070e-05, 7.0889e-01, 2.8879e-01, 1.5093e-03,
        7.9291e-04, 9.6914e-07, 1.2424e-06, 4.8612e-07, 3.6629e-08, 1.8975e-07,
        3.7527e-10, 1.3663e-07, 3.4432e-10, 7.0423e-10, 1.7224e-09],
       grad_fn=<SelectBackward0>)
tensor([3])
