In [None]:
pip install torchtext

Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch==2.2.1->torchtext)
  Using cached nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (23.7 MB)
Collecting nvidia-cuda-runtime-cu12==12.1.105 (from torch==2.2.1->torchtext)
  Using cached nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (823 kB)
Collecting nvidia-cuda-cupti-cu12==12.1.105 (from torch==2.2.1->torchtext)
  Using cached nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (14.1 MB)
Collecting nvidia-cudnn-cu12==8.9.2.26 (from torch==2.2.1->torchtext)
  Using cached nvidia_cudnn_cu12-8.9.2.26-py3-none-manylinux1_x86_64.whl (731.7 MB)
Collecting nvidia-cublas-cu12==12.1.3.1 (from torch==2.2.1->torchtext)
  Using cached nvidia_cublas_cu12-12.1.3.1-py3-none-manylinux1_x86_64.whl (410.6 MB)
Collecting nvidia-cufft-cu12==11.0.2.54 (from torch==2.2.1->torchtext)
  Using cached nvidia_cufft_cu12-11.0.2.54-py3-none-manylinux1_x86_64.whl (121.6 MB)
Collecting nvidia-curand-cu12==10.3.2.106 

In [None]:
import torch
import torch.optim as optim
import torch.nn as nn
from torch.utils.data import Dataset

In [None]:
from torch.utils.data import Dataset
class DependencyParsingDataset(Dataset):
    def __init__(self, file_path, pos_vocab, dep_vocab):
        self.pos_vocab = pos_vocab
        self.dep_vocab = dep_vocab
        self.data = []
        with open(file_path, 'r') as file:
            for line in file:
                if line.strip() and not line.startswith("#"):  # Skip empty lines and comments
                    parts = line.strip().split()
                    if len(parts) > 7:  # Ensure there are enough parts
                        word_index = int(parts[0])  # ID of the word
                        pos_tag = self.pos_vocab[parts[3]]  # Convert POS tag to index
                        head_index = int(parts[6])  # ID of the head word
                        dep_rel = self.dep_vocab[parts[7]]  # Convert dependency label to index
                        self.data.append((word_index, pos_tag, head_index, dep_rel))

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
      word_index, pos_index, head_index, deprel_index = self.data[idx]
      return (torch.tensor([word_index], dtype=torch.long).unsqueeze(0),  # Adding sequence length dimension
            torch.tensor([pos_index], dtype=torch.long).unsqueeze(0),
            torch.tensor([head_index], dtype=torch.long).unsqueeze(0),
            torch.tensor([deprel_index], dtype=torch.long).unsqueeze(0))



In [None]:
def build_vocab(file_path, column_index):
    vocab = {}
    with open(file_path, 'r') as file:
        for line in file:
            if line.strip() and not line.startswith("#"):
                parts = line.strip().split()
                if parts[column_index] not in vocab:
                    vocab[parts[column_index]] = len(vocab)
    return vocab


In [None]:
import torch.nn as nn

class BiLSTMDependencyParser(nn.Module):
    def __init__(self, word_vocab_size, pos_vocab_size, dep_vocab_size, embedding_dim, hidden_dim):
        super().__init__()
        self.word_embeddings = nn.Embedding(word_vocab_size, embedding_dim)
        self.pos_embeddings = nn.Embedding(pos_vocab_size, embedding_dim)
        self.lstm = nn.LSTM(embedding_dim * 2, hidden_dim, bidirectional=True)
        self.head_predictor = nn.Linear(hidden_dim * 2, 1)
        self.dep_predictor = nn.Linear(hidden_dim * 2, dep_vocab_size)




    def forward(self, words, pos):
      # Generate embeddings
      word_embeds = self.word_embeddings(words)  # [batch_size, embedding_dim]
      pos_embeds = self.pos_embeddings(pos)      # [batch_size, embedding_dim]
      if word_embeds.dim() == 2:
        word_embeds = word_embeds.unsqueeze(1)  # Add sequence length dimension
      if pos_embeds.dim() == 2:
        pos_embeds = pos_embeds.unsqueeze(1)
      # Concatenate embeddings along the feature dimension (last dimension)
      try:
        embeddings = torch.cat([word_embeds, pos_embeds], dim=2)
      except Exception as e:
        print("Error during concatenation:", e)
        return None, None  # Early exit on error
      # LSTM and predictors
      lstm_out, _ = self.lstm(embeddings)
      head_logits = self.head_predictor(lstm_out.squeeze(1))
      dep_logits = self.dep_predictor(lstm_out.squeeze(1))
      return head_logits, dep_logits


In [None]:
from torch.utils.data import DataLoader

# Assuming the dataset and vocab building setup is already provided
pos_vocab = build_vocab('te_mtg-ud-train.conllu', 3)
dep_vocab = build_vocab('te_mtg-ud-train.conllu', 7)
train_dataset = DependencyParsingDataset('te_mtg-ud-train.conllu', pos_vocab, dep_vocab)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)

In [None]:
import torch
import torch.optim as optim
import torch.nn as nn

# Assume these are the sizes of your vocabularies
word_vocab_size = 10000
pos_vocab_size = 50
dep_vocab_size = 45
embedding_dim = 100
hidden_dim = 256

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = BiLSTMDependencyParser(word_vocab_size, pos_vocab_size, dep_vocab_size, embedding_dim, hidden_dim)
model.to(device)

# Using Mean Squared Error Loss for head prediction and CrossEntropyLoss for dependency prediction
head_loss_function = nn.MSELoss()
dep_loss_function = nn.CrossEntropyLoss()

# Optimizer
optimizer = optim.Adam(model.parameters(), lr=0.001)


In [None]:
def train(model, iterator, optimizer, head_loss_function, dep_loss_function, device):
    model.train()
    total_loss = 0

    for batch in iterator:
        # Ensure batch is a tuple of tensors
        words, pos, heads, deprels = batch
        words = words.to(device).squeeze(1)  # Adjust dimensions if necessary
        pos = pos.to(device).squeeze(1)
        heads = heads.to(device).squeeze(1)
        deprels = deprels.to(device).squeeze(1)

        optimizer.zero_grad()

        head_logits, dep_logits = model(words, pos)

        # Assuming head_logits and dep_logits are correctly shaped
        head_loss = head_loss_function(head_logits.squeeze(), heads.float())  # Adjust loss computation as needed
        dep_loss = dep_loss_function(dep_logits.view(-1, dep_vocab_size), deprels.view(-1))

        loss = head_loss + dep_loss
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    return total_loss / len(iterator)
# Run Training
num_epochs = 5
for epoch in range(num_epochs):
    train_loss = train(model, train_loader, optimizer, head_loss_function, dep_loss_function, device)
    print(f'Epoch {epoch+1}: Train Loss = {train_loss:.4f}')

  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)


Epoch 1: Train Loss = 6.7607
Epoch 2: Train Loss = 6.0963
Epoch 3: Train Loss = 6.0599
Epoch 4: Train Loss = 6.0525
Epoch 5: Train Loss = 6.0150


In [None]:
def evaluate(model, iterator, head_loss_function, dep_loss_function, device):
    model.eval()  # Set the model to evaluation mode
    total_head_loss = 0
    total_dep_loss = 0
    correct_heads = 0
    correct_labels = 0
    total_tokens = 0

    with torch.no_grad():  # No gradients needed
        for batch in iterator:
            words, pos, heads, deprels = batch
            words = words.to(device).squeeze()
            pos = pos.to(device).squeeze()
            heads = heads.to(device).squeeze()
            deprels = deprels.to(device).squeeze()

            head_logits, dep_logits = model(words, pos)

            head_loss = head_loss_function(head_logits.squeeze(), heads.float())
            dep_loss = dep_loss_function(dep_logits.view(-1, dep_logits.size(-1)), deprels)

            total_head_loss += head_loss.item()
            total_dep_loss += dep_loss.item()

            # Convert logits to predictions
            head_preds = head_logits.round().int()  # Assuming head_logits are regression outputs
            dep_preds = dep_logits.argmax(dim=1, keepdim=True).squeeze()

            # Calculate correct predictions for UAS and LAS
            correct_heads += (head_preds == heads).sum().item()
            correct_labels += ((head_preds == heads) & (dep_preds == deprels)).sum().item()
            total_tokens += words.size(0)

    uas = correct_heads / total_tokens
    las = correct_labels / total_tokens
    return total_head_loss / len(iterator), total_dep_loss / len(iterator), uas, las


In [None]:
pos_vocab = build_vocab('te_mtg-ud-test.conllu', 3)
dep_vocab = build_vocab('te_mtg-ud-test.conllu', 7)
valid_dataset = DependencyParsingDataset('te_mtg-ud-test.conllu', pos_vocab, dep_vocab)
valid_loader = DataLoader(valid_dataset, batch_size=32, shuffle=False)
pos_vocab = build_vocab('te_mtg-ud-dev.conllu', 3)
dep_vocab = build_vocab('te_mtg-ud-dev.conllu', 7)
dev_dataset = DependencyParsingDataset('te_mtg-ud-dev.conllu', pos_vocab, dep_vocab)
dev_loader = DataLoader(dev_dataset, batch_size=32, shuffle=False)

In [None]:
valid_head_loss, valid_dep_loss, valid_uas, valid_las = evaluate(model, valid_loader, head_loss_function, dep_loss_function, device)
print(f'Validation Head Loss: {valid_head_loss:.4f}')
print(f'Validation Dependency Loss: {valid_dep_loss:.4f}')
print(f'Validation UAS: {valid_uas:.4f}')
print(f'Validation LAS: {valid_las:.4f}')



Validation Head Loss: 5.3024
Validation Dependency Loss: 3.8710
Validation UAS: 6.7143
Validation LAS: 2.4827


In [None]:
dev_head_loss, dev_dep_loss, dev_uas, dev_las = evaluate(model, dev_loader, head_loss_function, dep_loss_function, device)
print(f'Validation Head Loss: {dev_head_loss:.4f}')
print(f'Validation Dependency Loss: {dev_dep_loss:.4f}')
print(f'Validation UAS: {dev_uas:.4f}')
print(f'Validation LAS: {dev_las:.4f}')

Validation Head Loss: 4.7559
Validation Dependency Loss: 5.3484
Validation UAS: 6.4305
Validation LAS: 1.1934
