#<h1><center>**Natural Language Processing - XPOS MULTEXT East POS Project**</center></h1>
#<h1><center>**2023./2024.**</center></h1>
#<h2><center>*Grgur Živković, Mia Mužinić*</center></h1>


---


#<h1><center>**Model Testing**</center></h1>


***Torchinfo*** is a Python library used for providing detailed information about PyTorch models, including layer dimensions, parameter counts, and computational complexity. It offers a convenient way to inspect the structure of PyTorch models, which can be particularly useful for debugging, optimizing, and understanding neural network architectures.

In [None]:
!pip install torchinfo

Collecting torchinfo
  Downloading torchinfo-1.8.0-py3-none-any.whl (23 kB)
Installing collected packages: torchinfo
Successfully installed torchinfo-1.8.0


In [None]:
# Importing required libraries
import ast
import numpy as np

import torch
import torch.nn as nn
import torch.optim as optim
from torch.nn.utils.rnn import pad_sequence
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import OneHotEncoder
from sklearn.metrics import f1_score
from sklearn.metrics import classification_report

import torchinfo

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import pandas as pd

# Loading prepared data
file_path_train = '/content/drive/MyDrive/NLP2024/hr500k-train.csv'
file_path_val = '/content/drive/MyDrive/NLP2024/hr500k-dev.csv'
file_path_test = '/content/drive/MyDrive/NLP2024/hr500k-test.csv'

data_train = pd.read_csv(file_path_train)
data_val = pd.read_csv(file_path_val)
data_test = pd.read_csv(file_path_test)

# Displaying the first few rows for verification
data_test.head()

Unnamed: 0,tokens,xpos
0,"['Beograd', 'i', 'Priština', 'postigli', 'dogo...","['Npmsn', 'Cc', 'Npfsn', 'Vmp-pm', 'Ncmsan', '..."
1,"['Pregovarački', 'timovi', 'Beograda', 'i', 'P...","['Agpmpny', 'Ncmpn', 'Npmsg', 'Cc', 'Npfsg', '..."
2,"['Neki', 'tvrde', 'kako', 'su', 'sporazumi', '...","['Pi-mpn', 'Vmr3p', 'Cs', 'Var3p', 'Ncmpn', 'N..."
3,"['Dok', 'vlasti', 'u', 'Beogradu', 'pokušavaju...","['Cs', 'Ncfpn', 'Sl', 'Npmsl', 'Vmr3p', 'Vmn',..."
4,"['Nakon', 'završetka', 'razgovora', 'u', 'Brux...","['Sg', 'Ncmsg', 'Ncmpg', 'Sl', 'Npmsl', 'Z', '..."


In [None]:
# Convert string representations of token sequences to actual lists of tokens for training data
X_train = [ast.literal_eval(sentence) for sentence in data_train['tokens'].tolist()]

# Convert string representations of token sequences to actual lists of tokens for validation data
X_valid = [ast.literal_eval(sentence) for sentence in data_val['tokens'].tolist()]

# Convert string representations of token sequences to actual lists of tokens for test data
X_test = [ast.literal_eval(sentence) for sentence in data_test['tokens'].tolist()]

# Convert string representations of XPOS tag sequences to actual lists of tags for training data
Y_train = [ast.literal_eval(sentence) for sentence in data_train['xpos'].tolist()]

# Convert string representations of XPOS tag sequences to actual lists of tags for validation data
Y_valid = [ast.literal_eval(sentence) for sentence in data_val['xpos'].tolist()]

# Convert string representations of XPOS tag sequences to actual lists of tags for test data
Y_test = [ast.literal_eval(sentence) for sentence in data_test['xpos'].tolist()]

In [None]:
# Define the RNN model
class RNNClassifier(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(RNNClassifier, self).__init__()
        # Initialize the hidden size attribute
        self.hidden_size = hidden_size
        # Define the embedding layer to convert input indices into dense vectors
        self.embedding = nn.Embedding(input_size, embedding_dim)
        # Define the RNN layer
        self.rnn = nn.RNN(embedding_dim, hidden_size, batch_first=True)
        # Define the fully connected layer for classification
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        # Pass input through the embedding layer
        embedded = self.embedding(x)
        # Pass embedded input through the RNN layer
        output, hidden = self.rnn(embedded)
        # Pass RNN output through the fully connected layer
        output = self.fc(output)
        # Return the final output
        return output


In [None]:
# Convert words to integer indices
word_to_idx = {}  # Initialize an empty dictionary to store word-to-index mappings
idx_counter = 0    # Initialize a counter for index values
for split in [X_train, X_valid, X_test]:  # Iterate over the data splits
    for sentence in split:                # Iterate over sentences in each split
        for word in sentence:             # Iterate over words in each sentence
            if word not in word_to_idx:   # Check if the word is not already mapped to an index
                word_to_idx[word] = idx_counter  # Map the word to the current index
                idx_counter += 1          # Increment the index counter

# Convert XPOS tags to integer indices
xpos_to_idx = {}    # Initialize an empty dictionary to store XPOS tag-to-index mappings
idx_counter = 0    # Re-initialize the counter for index values
for tag_split in [Y_train, Y_valid, Y_test]:  # Iterate over the XPOS tag splits
    for tags in tag_split:                    # Iterate over lists of XPOS tags
        for tag in tags:                      # Iterate over individual XPOS tags
            if tag not in xpos_to_idx:         # Check if the XPOS tag is not already mapped to an index
                xpos_to_idx[tag] = idx_counter  # Map the XPOS tag to the current index
                idx_counter += 1              # Increment the index counter

In [None]:
# Define a PyTorch Dataset
class CustomDataset(Dataset):
    def __init__(self, sentences, xpos_tags, word_to_idx, xpos_to_idx):
        """
        Initialize the CustomDataset class.

        Args:
        - sentences (list of lists): List of sentences where each sentence is a list of words.
        - xpos_tags (list of lists): List of lists containing XPOS tags for each sentence.
        - word_to_idx (dict): Dictionary mapping words to integer indices.
        - xpos_to_idx (dict): Dictionary mapping XPOS tags to integer indices.
        """
        self.sentences = sentences   # Store the list of sentences
        self.xpos_tags = xpos_tags   # Store the list of lists containing XPOS tags
        self.word_to_idx = word_to_idx  # Store the word-to-index mapping
        self.xpos_to_idx = xpos_to_idx    # Store the XPOS tag-to-index mapping

    def __len__(self):
        """
        Return the total number of samples in the dataset.
        """
        return len(self.sentences)

    def __getitem__(self, idx):
        """
        Retrieve a sample from the dataset at the given index.

        Args:
        - idx (int): Index of the sample to retrieve.

        Returns:
        - word_indices (list): List of integer indices representing words in the sentence.
        - xpos_indices (list): List of integer indices representing XPOS tags in the sentence.
        """
        # Convert words in the sentence to integer indices using word_to_idx mapping
        word_indices = [self.word_to_idx[word] for word in self.sentences[idx]]
        # Convert XPOS tags in the sentence to integer indices using xpos_to_idx mapping
        xpos_indices = [self.xpos_to_idx[tag] for tag in self.xpos_tags[idx]]
        return word_indices, xpos_indices

In [None]:
# Create the dataset
dataset_test = CustomDataset(X_test, Y_test, word_to_idx, xpos_to_idx)

In [None]:
def collate_fn(batch):
    # Separate word indices and XPOS tag indices
    word_indices, xpos_indices = zip(*batch)

    # Pad sequences to the same length within each batch
    padded_word_indices = pad_sequence([torch.tensor(seq) for seq in word_indices], batch_first=True)
    padded_xpos_indices = pad_sequence([torch.tensor(seq) for seq in xpos_indices], batch_first=True)

    return padded_word_indices, padded_xpos_indices

In [None]:
batch_size = 32
dataloader_test = DataLoader(dataset_test, batch_size=batch_size, shuffle=True, collate_fn=collate_fn)

In [None]:
# Define hyperparameters
vocab_size = len(word_to_idx)
input_size = vocab_size
hidden_size = 128
output_size = len(xpos_to_idx)
embedding_dim = 100

# For GPU usage
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

criterion = nn.CrossEntropyLoss()

In [None]:
# Import the model
model = torch.load('/content/drive/MyDrive/NLP2024/entire_rnn_model.h5',map_location=torch.device('cpu'))

In [None]:
print(model)

RNNClassifier(
  (embedding): Embedding(73456, 100)
  (rnn): RNN(100, 128, batch_first=True)
  (fc): Linear(in_features=128, out_features=756, bias=True)
)


In [None]:
# Model summary
torchinfo.summary(model)

Layer (type:depth-idx)                   Param #
RNNClassifier                            --
├─Embedding: 1-1                         7,345,600
├─RNN: 1-2                               29,440
├─Linear: 1-3                            97,524
Total params: 7,472,564
Trainable params: 7,472,564
Non-trainable params: 0

In [None]:
# Evaluation mode for the model
model.eval()

RNNClassifier(
  (embedding): Embedding(73456, 100)
  (rnn): RNN(100, 128, batch_first=True)
  (fc): Linear(in_features=128, out_features=756, bias=True)
)

In [None]:
correct_test = 0
total_test = 0

with torch.no_grad():  # Disable gradient calculation for efficiency
    for inputs, targets in dataloader_test:
        inputs, targets = inputs.to(device), targets.to(device)

        # Forward pass
        outputs = model(inputs)

        # Calculate predictions and accuracy
        _, predicted = torch.max(outputs, 2)
        correct_test += (predicted == targets).sum().item()
        total_test += targets.numel()

# Calculate overall metrics
accuracy = 100 * correct_test / total_test

In [None]:
print(f"Model accuracy on test data: {accuracy:.2f}")

Model accuracy on test data: 91.08


In [None]:
# Empty lists to store true and predicted labels
true_labels = []
predicted_labels = []

with torch.no_grad():
    for inputs, targets in dataloader_test:
        inputs = inputs.to(device)

        # Forward pass
        outputs = model(inputs)

        # Calculate predictions
        _, predicted = torch.max(outputs, 2)

        # Flatten the targets and predicted tensors to compute metrics
        true_labels.extend(targets.view(-1).cpu().numpy())
        predicted_labels.extend(predicted.view(-1).cpu().numpy())

In [None]:
# Calculate weighted and unweighted average F1 score
f1 = f1_score(true_labels, predicted_labels, average='macro')
weighted_f1 = f1_score(true_labels, predicted_labels, average='weighted')

print(f"Unweighted F1 Score: {f1:.2f}")
print(f"Weighted F1 Score: {weighted_f1:.2f}")


Unweighted F1 Score: 0.46
Weighted F1 Score: 0.91


In [None]:
# Calculate classification report
report = classification_report(true_labels, predicted_labels)

print("Classification Report:")
print(report)

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00     87137
           1       0.13      0.17      0.15        24
           2       0.43      0.47      0.45        76
           3       0.68      0.68      0.68       286
           4       0.77      0.78      0.77       836
           5       0.83      0.79      0.81      1775
           6       0.84      0.85      0.84       156
           7       0.78      0.69      0.73       140
           8       0.82      0.80      0.81       621
           9       0.93      0.92      0.93      1332
          10       0.96      0.99      0.98      1699
          11       0.18      0.13      0.15        46
          12       0.68      0.69      0.68      1129
          13       0.69      0.74      0.71      1183
          14       0.73      0.77      0.75       681
          15       1.00      1.00      1.00      6493
          16       0.64      0.44      0.52        16
    

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


***The F1 score*** is a measure of a model's accuracy, usually in binary classification problems, but it can also be extended to multiclass classification settings. It combines precision and recall into a single metric, providing a balanced assessment of a model's performance.

*The F1 score* ranges from 0 to 1, where a score of 1 indicates perfect precision and recall, while a score of 0 indicates poor performance.

*The F1 score* is important because it provides a single value that summarizes a model's performance in terms of both precision and recall. It is especially useful in situations where the class distribution is imbalanced or when both false positives and false negatives are important. Additionally, the weighted average F1 score takes class imbalance into account by weighting the score by the number of true instances in each class, making it particularly useful in multiclass classification problems.

In [None]:
# Select an example sentence index for display
example_idx = 0

# Retrieve an example sentence and its true labels
example_sentence = X_test[example_idx]
true_labels = Y_test[example_idx]

# Convert the example sentence to word indices
word_indices = [word_to_idx[word] for word in example_sentence]

# Convert the true labels to their corresponding integer indices
true_indices = [xpos_to_idx[tag] for tag in true_labels]

# Convert the word indices and true label indices to PyTorch tensors
word_tensor = torch.tensor(word_indices).unsqueeze(0).to(device)

# Forward pass through the model to get predictions
with torch.no_grad():
    output = model(word_tensor)
    _, predicted_indices = torch.max(output, 2)

# Convert predicted indices back to XPOS tags
predicted_labels = [list(xpos_to_idx.keys())[list(xpos_to_idx.values()).index(idx)] for idx in predicted_indices.squeeze().cpu().numpy()]

# Print the example sentence, true labels, and predicted labels
print("Sentence:", example_sentence)
print("True labels:", true_labels)
print("Predicted labels:", predicted_labels)

Sentence: ['Beograd', 'i', 'Priština', 'postigli', 'dogovor', 'o', 'slobodi', 'kretanja']
True labels: ['Npmsn', 'Cc', 'Npfsn', 'Vmp-pm', 'Ncmsan', 'Sl', 'Ncfsl', 'Ncnsg']
Predicted labels: ['Npmsn', 'Cc', 'Npfsn', 'Vmp-pm', 'Ncmsan', 'Sl', 'Ncfsl', 'Ncnsg']
