## Importing necessary library

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from sklearn.metrics import f1_score
import json
import matplotlib.pyplot as plt
import numpy as np
import gensim.downloader as gensim_api

# Load word embeddings
word2vec = gensim_api.load("word2vec-google-news-300")
glove = gensim_api.load("glove-wiki-gigaword-300")
fasttext = gensim_api.load("fasttext-wiki-news-subwords-300")

ModuleNotFoundError: No module named 'torchtext'

In [None]:
# Load word embeddings
import gensim.downloader as gensim_api
word2vec = gensim_api.load("word2vec-google-news-300")
glove = gensim_api.load("glove-wiki-gigaword-300")
fasttext = gensim_api.load("fasttext-wiki-news-subwords-300")

In [None]:


# Define the BiLSTM-CRF model
class BiLSTM_CRF(nn.Module):
    def __init__(self, embedding_dim, hidden_dim, vocab_size, tag_to_idx, pretrained_embedding=None):
        super(BiLSTM_CRF, self).__init__()
        if pretrained_embedding is not None:
            self.embedding = nn.Embedding.from_pretrained(pretrained_embedding)
        else:
            self.embedding = nn.Embedding(vocab_size, embedding_dim)
        
        self.lstm = nn.LSTM(embedding_dim, hidden_dim // 2, num_layers=1, bidirectional=True, batch_first=True)
        self.hidden2tag = nn.Linear(hidden_dim, len(tag_to_idx))
        self.crf = CRF(len(tag_to_idx))

    def forward(self, sentence, tags=None):
        embeds = self.embedding(sentence)
        lstm_out, _ = self.lstm(embeds)
        emissions = self.hidden2tag(lstm_out)
        
        if tags is not None:
            loss = self.crf(emissions, tags)
            return loss
        else:
            return self.crf.decode(emissions)

# Define the CRF layer
class CRF(nn.Module):
    def __init__(self, num_tags):
        super(CRF, self).__init__()
        self.num_tags = num_tags
        self.transitions = nn.Parameter(torch.randn(num_tags, num_tags))

    def forward(self, emissions, tags):
        # Compute the CRF loss
        pass  # Implement according to the CRF loss formula

    def decode(self, emissions):
        # Viterbi decoding
        pass  # Implement according to the Viterbi decoding algorithm

# Convert dataset to PyTorch DataLoader
class NERDataset(Dataset):
    def __init__(self, data, word_to_idx, tag_to_idx):
        self.data = data
        self.word_to_idx = word_to_idx
        self.tag_to_idx = tag_to_idx

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        sentence = [self.word_to_idx[word] for word in self.data[idx]["text"].split()]
        tags = [self.tag_to_idx[tag] for tag in self.data[idx]["labels"]]
        return torch.LongTensor(sentence), torch.LongTensor(tags)

# Training function
def train(model, train_loader, optimizer, criterion, device):
    model.train()
    total_loss = 0
    for sentence, tags in train_loader:
        sentence, tags = sentence.to(device), tags.to(device)
        optimizer.zero_grad()
        loss = model(sentence, tags)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    return total_loss / len(train_loader)

# Evaluation function
def evaluate(model, data_loader, criterion, device):
    model.eval()
    total_loss = 0
    all_preds = []
    all_labels = []
    with torch.no_grad():
        for sentence, tags in data_loader:
            sentence, tags = sentence.to(device), tags.to(device)
            loss = model(sentence, tags)
            total_loss += loss.item()
            predictions = model(sentence)
            all_preds.extend(predictions)
            all_labels.extend(tags.cpu().numpy())
    
    f1 = f1_score(np.concatenate(all_labels), np.concatenate(all_preds), average='macro')
    return total_loss / len(data_loader), f1

# Training loop
def train_loop(model, train_loader, val_loader, optimizer, criterion, device, epochs=10):
    train_losses = []
    val_losses = []
    train_f1_scores = []
    val_f1_scores = []

    for epoch in range(epochs):
        train_loss = train(model, train_loader, optimizer, criterion, device)
        val_loss, val_f1 = evaluate(model, val_loader, criterion, device)

        train_losses.append(train_loss)
        val_losses.append(val_loss)
        train_f1_scores.append(train_f1)
        val_f1_scores.append(val_f1)

        print(f'Epoch {epoch + 1}/{epochs}: '
              f'Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}, '
              f'Train F1: {train_f1:.4f}, Val F1: {val_f1:.4f}')

    return train_losses, val_losses, train_f1_scores, val_f1_scores

# Data preprocessing
word_to_idx = {}  # Implement word to index mapping
tag_to_idx = {"O": 0, "B": 1, "I": 2}  # Implement tag to index mapping
embedding_dim = 300  # Set the desired embedding dimension
hidden_dim = 128  # Set the desired hidden dimension

# Create datasets and loaders for each pre-trained embedding
datasets = []
loaders = []
pretrained_embeddings = [Word2Vec(), GloVe(), FastText()]  # You can specify paths or sizes for these embeddings

for embedding in pretrained_embeddings:
    dataset = NERDataset(dataset, word_to_idx, tag_to_idx)
    loader = DataLoader(dataset, batch_size=64, shuffle=True)
    datasets.append(dataset)
    loaders.append(loader)

# Model training for each pre-trained embedding
for i, (dataset, loader) in enumerate(zip(datasets, loaders)):
    model = BiLSTM_CRF(embedding_dim, hidden_dim, len(word_to_idx), tag_to_idx, pretrained_embedding=embedding)
    optimizer = optim.Adam(model.parameters(), lr=0.001)
    criterion = nn.CrossEntropyLoss()  # You might need to adjust the loss function based on the CRF implementation
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    train_losses, val_losses, train_f1_scores, val_f1_scores = train_loop(model, loader, loader, optimizer, criterion, device)
    
    # Save the model
    torch.save(model.state_dict(), f'model_{i + 1}.pth')

    # Generate plots
    plt.figure(figsize=(12, 4))
    plt.subplot(1, 2, 1)
    plt.plot(train_losses, label='Training Loss')
    plt.plot(val_losses, label='Validation Loss')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.legend()

    plt.subplot(1, 2, 2)
    plt.plot(train_f1_scores, label='Training F1 Score')
    plt.plot(val_f1_scores, label='Validation F1 Score')
    plt.xlabel('Epochs')
    plt.ylabel('F1 Score')
    plt.legend()

    plt.tight_layout()
    plt.savefig(f'plot_{i + 1}.png')
    plt.show()
