In [5]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [1]:
pip install pandas numpy torch



In [2]:
pip install torch torchvision torchaudio



In [3]:
import torch

if torch.cuda.is_available():
    print("CUDA is available. You have a CUDA-enabled GPU.")
else:
    print("CUDA is not available. You do not have a CUDA-enabled GPU.")


CUDA is available. You have a CUDA-enabled GPU.


Data is imported using pandas, preprocessed, and converted into indices for training a part-of-speech tagging model.
A Bidirectional LSTM model architecture is defined using PyTorch's neural network module.
Training data is prepared, a custom dataset class is defined, and data loaders are set up for model training.
The model is trained for 10 epochs using Adam optimizer and NLLLoss criterion.
The trained model's state dictionary is saved to a file named "bilstm.h5".

In [6]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.nn.utils.rnn import pad_sequence
from torch.utils.data import Dataset, DataLoader

# Read data
train_data = pd.read_csv('/content/drive/MyDrive/PMF/3.semestar/OPJ/PROJEKT/train_data.csv', dtype={'form': str, 'upos': str})
dev_data = pd.read_csv('/content/drive/MyDrive/PMF/3.semestar/OPJ/PROJEKT/dev_data.csv', dtype={'form': str, 'upos': str})
test_data = pd.read_csv('/content/drive/MyDrive/PMF/3.semestar/OPJ/PROJEKT/test_data.csv', dtype={'form': str, 'upos': str})

# Drop rows with NaN values in the 'form' column
train_data = train_data.dropna(subset=['form'])

# Extract words and tags
words = train_data['form'].values.tolist()
tags = train_data['upos'].values.tolist()

# Create word-to-index and tag-to-index dictionaries
word2idx = {word: i + 1 for i, word in enumerate(set(words))}
word2idx['PAD'] = 0

tag2idx = {tag: i + 1 for i, tag in enumerate(set(tags))}
tag2idx['PAD'] = 0

# Convert words and tags to indices
X_train = [word2idx[word] for word in words]
y_train = [tag2idx[tag] for tag in tags]

# Define custom dataset
class CustomDataset(Dataset):
    def __init__(self, X, y):
        self.X = X
        self.y = y

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return torch.tensor(self.X[idx]), torch.tensor(self.y[idx])

# Define the model
class BiLSTMTagger(nn.Module):
    def __init__(self, vocab_size, embedding_dim, lstm_units, num_tags):
        super(BiLSTMTagger, self).__init__()
        self.word_embeddings = nn.Embedding(vocab_size, embedding_dim)
        self.lstm = nn.LSTM(embedding_dim, lstm_units, bidirectional=True)
        self.hidden2tag = nn.Linear(lstm_units * 2, num_tags)

    def forward(self, sentence):
        embeds = self.word_embeddings(sentence)
        lstm_out, _ = self.lstm(embeds.view(len(sentence), 1, -1))
        tag_space = self.hidden2tag(lstm_out.view(len(sentence), -1))
        tag_scores = nn.functional.log_softmax(tag_space, dim=1)
        return tag_scores

# Set parameters
max_word_len = max(len(word) for word in words)
vocab_size = len(word2idx)
num_tags = len(tag2idx)
embedding_dim = 128
lstm_units = 64

# Initialize model, criterion, and optimizer
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")  # Check if GPU is available
model = BiLSTMTagger(vocab_size, embedding_dim, lstm_units, num_tags).to(device)
criterion = nn.NLLLoss()
optimizer = optim.Adam(model.parameters())

# Prepare data loaders
train_dataset = CustomDataset(X_train, y_train)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)

# Train the model
for epoch in range(10):
    model.train()  # Set the model to training mode
    for inputs, targets in train_loader:
        inputs, targets = inputs.to(device), targets.to(device)  # Move data to GPU
        model.zero_grad()
        tag_scores = model(inputs)
        loss = criterion(tag_scores, targets)
        loss.backward()
        optimizer.step()

    # Evaluate on dev set
    model.eval()  # Set the model to evaluation mode


# Save the model
torch.save(model.state_dict(), "bilstm.h5")


A custom dataset class (CustomDataset) is defined for handling data during evaluation.
A trained BiLSTM model is loaded from a saved state dictionary file named "bilstm.h5".
Evaluation data (dev and test) is read from CSV files using pandas.
Words and tags are extracted from the evaluation data and converted into indices.
Data loaders are set up for evaluation using instances of CustomDataset and DataLoader.
A function to evaluate the F1 score is defined, which makes predictions using the loaded model.
F1 scores are calculated for both dev and test data using the evaluate function and sklearn's f1_score metric.
The F1 scores for both dev and test data are printed.

In [7]:
pip install scikit-learn



In [9]:
import pandas as pd
import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader
from sklearn.metrics import classification_report

# Define custom dataset
class CustomDataset(Dataset):
    def __init__(self, X, y):
        self.X = X
        self.y = y

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return torch.tensor(self.X[idx]), torch.tensor(self.y[idx])

# Load the saved model
model = BiLSTMTagger(vocab_size, embedding_dim, lstm_units, num_tags)
model.load_state_dict(torch.load("bilstm.h5"))
model.eval()

# Read data
dev_data = pd.read_csv('/content/drive/MyDrive/PMF/3.semestar/OPJ/PROJEKT/dev_data.csv', dtype={'form': str, 'upos': str})
test_data = pd.read_csv('/content/drive/MyDrive/PMF/3.semestar/OPJ/PROJEKT/test_data.csv', dtype={'form': str, 'upos': str})

# Extract words and tags
dev_words = dev_data['form'].values.tolist()
dev_tags = dev_data['upos'].values.tolist()
test_words = test_data['form'].values.tolist()
test_tags = test_data['upos'].values.tolist()

# Convert words and tags to indices
X_dev = [word2idx.get(word, word2idx['PAD']) for word in dev_words]
y_dev = [tag2idx[tag] for tag in dev_tags]
X_test = [word2idx.get(word, word2idx['PAD']) for word in test_words]
y_test = [tag2idx[tag] for tag in test_tags]

# Prepare data loaders
dev_dataset = CustomDataset(X_dev, y_dev)
test_dataset = CustomDataset(X_test, y_test)
dev_loader = DataLoader(dev_dataset, batch_size=32)
test_loader = DataLoader(test_dataset, batch_size=32)

# Function to evaluate F1 score
def evaluate(loader):
    predictions = []
    targets = []
    for inputs, labels in loader:
        with torch.no_grad():
            tag_scores = model(inputs)
            predicted_tags = torch.argmax(tag_scores, dim=1)
            predictions.extend(predicted_tags.tolist())
            targets.extend(labels.tolist())
    return predictions, targets

# Calculate F1 score and print classification report for dev data
dev_predictions, dev_targets = evaluate(dev_loader)
print("Classification Report for Dev Data:")
print(classification_report(dev_targets, dev_predictions))

# Calculate F1 score and print classification report for test data
test_predictions, test_targets = evaluate(test_loader)
print("Classification Report for Test Data:")
print(classification_report(test_targets, test_predictions))


Classification Report for Dev Data:
              precision    recall  f1-score   support

           1       0.98      1.00      0.99      3164
           2       0.99      0.99      0.99      4349
           3       0.96      0.54      0.69      2214
           4       0.94      0.70      0.80      5382
           5       0.98      0.82      0.90       857
           6       0.88      0.89      0.89      2581
           7       0.97      0.91      0.94      2015
           8       0.55      0.55      0.55        20
           9       0.74      0.99      0.84     11105
          10       0.93      0.63      0.75       843
          11       0.88      0.98      0.93      2266
          12       1.00      1.00      1.00      6197
          13       0.80      0.33      0.47       409
          14       0.98      0.79      0.88      4906
          15       0.98      0.92      0.95      1991
          16       0.88      0.90      0.89      1432
          17       1.00      0.94      0.97  

A function (decode_predictions) is defined to convert model predictions into human-readable tag labels.
Examples of correct and incorrect predictions for both dev and test data are printed.
For each data set, sentences along with their true and predicted tags are displayed.
Correct predictions are limited to 2 examples, followed by 2 examples of incorrect predictions for each data set.
Loop iterations are controlled to print only the required number of examples (2 correct and 2 wrong predictions).
The iteration stops once both correct and wrong prediction examples are printed for both dev and test data sets.

In [None]:
# Function to decode predictions
def decode_predictions(predictions):
    decoded_tags = []
    for pred in predictions:
        predicted_tag_index = torch.argmax(pred, dim=-1).item()
        predicted_tag = list(tag2idx.keys())[predicted_tag_index]
        decoded_tags.append(predicted_tag)
    return decoded_tags

# Print 2 correct and 2 wrong predictions for dev data
num_correct = 0
num_wrong = 0
print("\nExamples of Predictions for Dev Data:")
for i, (inputs, _) in enumerate(dev_loader):
    with torch.no_grad():
        tag_scores = model(inputs)
        decoded_predictions = decode_predictions(tag_scores)
    for j, pred_tag in enumerate(decoded_predictions):
        true_tag = list(tag2idx.keys())[dev_targets[i*32 + j]]
        if true_tag == pred_tag:
            if num_correct < 2:
                print("Example", i*32 + j + 1)
                print("Sentence:", dev_words[i*32 + j])
                print("True Tag:", true_tag)
                print("Predicted Tag:", pred_tag)
                print()
                num_correct += 1
        else:
            if num_wrong < 2:
                print("Example", i*32 + j + 1)
                print("Sentence:", dev_words[i*32 + j])
                print("True Tag:", true_tag)
                print("Predicted Tag:", pred_tag)
                print()
                num_wrong += 1
        if num_correct == 2 and num_wrong == 2:
            break
    if num_correct == 2 and num_wrong == 2:
        break

# Print 2 correct and 2 wrong predictions for test data
num_correct = 0
num_wrong = 0
print("\nExamples of Predictions for Test Data:")
for i, (inputs, _) in enumerate(test_loader):
    with torch.no_grad():
        tag_scores = model(inputs)
        decoded_predictions = decode_predictions(tag_scores)
    for j, pred_tag in enumerate(decoded_predictions):
        true_tag = list(tag2idx.keys())[test_targets[i*32 + j]]
        if true_tag == pred_tag:
            if num_correct < 2:
                print("Example", i*32 + j + 1)
                print("Sentence:", test_words[i*32 + j])
                print("True Tag:", true_tag)
                print("Predicted Tag:", pred_tag)
                print()
                num_correct += 1
        else:
            if num_wrong < 2:
                print("Example", i*32 + j + 1)
                print("Sentence:", test_words[i*32 + j])
                print("True Tag:", true_tag)
                print("Predicted Tag:", pred_tag)
                print()
                num_wrong += 1
        if num_correct == 2 and num_wrong == 2:
            break
    if num_correct == 2 and num_wrong == 2:
        break



Examples of Predictions for Dev Data:
Example 1
Sentence: Proces
True Tag: SCONJ
Predicted Tag: SCONJ

Example 2
Sentence: privatizacije
True Tag: SCONJ
Predicted Tag: SCONJ

Example 6
Sentence: povećalom
True Tag: SCONJ
Predicted Tag: CCONJ

Example 14
Sentence: učestalih
True Tag: PAD
Predicted Tag: CCONJ


Examples of Predictions for Test Data:
Example 1
Sentence: Beograd
True Tag: DET
Predicted Tag: DET

Example 2
Sentence: i
True Tag: PART
Predicted Tag: PART

Example 9
Sentence: Pregovarački
True Tag: PAD
Predicted Tag: CCONJ

Example 19
Sentence: oko
True Tag: PUNCT
Predicted Tag: ADP

