In [1]:
from utilities.utils import *
from misc.embedding_def import EmbeddingFabric
from misc.indexer_def import Indexer
from misc.metrics_def import *
from trainer.model import *
from trainer.training_model import *
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

In [3]:
TRAIN_PATH = 'C:/Users/sidew/Desktop/AI/New folder/data/train.txt'
DEV_PATH = 'C:/Users/sidew/Desktop/AI/New folder/data/dev.txt'
TEST_PATH = 'C:/Users/sidew/Desktop/AI/New folder/data/test.txt'
EMBEDDINGS_PATH = 'C:/Users/sidew/Desktop/AI/embeddings/glove.6B.100d.txt'

In [4]:
glove = load_embedding_dict(EMBEDDINGS_PATH)

100%|███████████████████████████████████████████████████████████████████████| 400000/400000 [00:27<00:00, 14732.91it/s]


In [5]:
words, tags = read_ner_data_from_connl(TRAIN_PATH)

In [6]:
val_words, val_tags = read_ner_data_from_connl(DEV_PATH)

In [7]:
test_words, test_tags = read_ner_data_from_connl(TEST_PATH)

In [8]:
data_dict = {
    'train': (words, tags),
    'dev': (val_words, val_tags),
    'test': (test_words, test_tags)
}

In [9]:
words_indexer = Indexer(words)
tags_indexer = Indexer(tags)

In [10]:
EMBEDDING_DIM = 100
HIDDEN_DIM = 100

In [11]:
models = {}

In [12]:
strategy = "strategy"
model = LSTMTagger(EMBEDDING_DIM, HIDDEN_DIM,
               tags_indexer.size(), 
               lambda: EmbeddingFabric.get_embedding_layer(words_indexer, glove, strategy))

models[strategy] = model

In [13]:
labels = [x for x in tags_indexer.get_element_to_index_dict().values()]

In [14]:
plotting_data = {}
trained_models = {}

In [None]:
for name, model in models.items():
    print(f"Training {name} model")
    loss_function = nn.NLLLoss()
    optimizer = optim.SGD(model.parameters(), lr=0.1)
    
    metric_handler = MetricsHandler(labels)
    valid_metric = MetricsHandler(labels)
    
    model, train, valid, losses = train_model(model, optimizer,
                                  loss_function,
                                  data_dict, 
                                  128, #batch size 
                                  words_indexer, 
                                  tags_indexer, 
                                  metric_handler, 
                                  valid_metric,
                                  num_epochs=5)

In [3]:
test_metrics = MetricsHandler(labels)

for name, model in models.items():
    print(f"{name} results on test set:")
    with torch.no_grad():
        inputs = torch.tensor(words_indexer.elements_to_index(test_words), dtype=torch.long)
        true_vals = tags_indexer.elements_to_index(test_tags)
        tag_scores = model(inputs)
        prediction = get_tag_indexes_from_scores(tag_scores)
        test_metrics.update(prediction, true_vals)
        test_metrics.collect()
        for metric in test_metrics.metrics_dict.keys():
            print(f"{metric} - {test_metrics.metrics_dict[metric][-1]}")
        print()

strategy results on test set:
Precision - 0.8212440570395734
Recall - 0.6033666775396797
F1-score - 0.6868141812878461
F0.5-score - 0.7584959482622308
