In [1]:
import numpy as np
import codecs
from dataclasses import dataclass
from typing import List, Dict, Any

import matplotlib.pyplot as plt
import pandas as pd
import torch
import torch.nn.functional as F
from torch import Tensor, nn
import random

# Ensuring reproducibility
seed = 0
torch.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
np.random.seed(seed)
random.seed(seed)

import sys
sys.path.append('nlp_project')
from nlp_project.scripts.read_write_data import load_data
from nlp_project.models.classes import DataIterator, Batch, F1_evaluator, Train1BiLSTM

import gensim.models
GoogleEmbs = gensim.models.KeyedVectors.load_word2vec_format(
                                './nlp_project/models/GoogleNews-50k.bin', binary=True)

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
TRAIN_PATH = "nlp_project/data/processed/train_splits/labeled.conll"
DEV_PATH = "nlp_project/data/processed/dev.conll"
TEST_PATH  = "nlp_project/data/processed/test.conll"

x_train, y_train, bio_train, domain_train = load_data(TRAIN_PATH)
x_dev, y_dev, bio_dev, domain_dev = load_data(DEV_PATH)
x_test, y_test, bio_test, domain_test = load_data(TEST_PATH)

In [3]:
model = Train1BiLSTM(hidden_size=10)
model.fit(train=(x_train, y_train), 
          dev=(x_dev, y_dev), 
          print_metrics=False, 
          learning_rate=0.01, 
          epochs=20)

KeyboardInterrupt: 

In [None]:
model.train_f1_log, model.dev_f1_log

In [None]:
# Epoch 0, train: 0.000, dev: 0.000
# Epoch 1, train: 0.000, dev: 0.000
# Epoch 2, train: 0.437, dev: 0.609
# Epoch 3, train: 0.685, dev: 0.630
# Epoch 4, train: 0.707, dev: 0.639
# Epoch 5, train: 0.716, dev: 0.640
# Epoch 6, train: 0.723, dev: 0.639
# Epoch 7, train: 0.733, dev: 0.651
# Epoch 8, train: 0.747, dev: 0.650
# Epoch 9, train: 0.760, dev: 0.648
# Epoch 10, train: 0.769, dev: 0.643
# Epoch 11, train: 0.779, dev: 0.644
# Epoch 12, train: 0.786, dev: 0.646
# Epoch 13, train: 0.792, dev: 0.631
# Epoch 14, train: 0.798, dev: 0.640

In [None]:
# model = BaselineBiLSTM(hidden_size=10)
# model.fit(x_train, y_train, dev=(x_dev, y_dev), 
#           print_metrics=False, 
#           learning_rate=0.001, 
#           epochs=20)

In [None]:
"""
hidden_size=5, lr=0.05 --- Epoch 22, train: 0.861, dev: 0.774
hidden_size=10, lr=0.05 --- Epoch 22, train: 0.849, dev: 0.780
hidden_size=10, lr=0.05 --- Epoch 9, train: 0.849, dev: 0.778
hidden_size=9, lr=0.01 --- Epoch 25, train: 0.919, dev: 0.800

source for saving and loading: https://pytorch.org/tutorials/beginner/saving_loading_models.html#saving-loading-a-general-checkpoint-for-inference-and-or-resuming-training
"""

Saving and loading model:

In [None]:
SAVE_PATH = "nlp_project/models/Baseline.pt"
# torch.save(model, SAVE_PATH)

In [None]:
model2 = torch.load(SAVE_PATH)

In [None]:
baseline_errors = model2.evaluate(x_dev, y_dev, bio_dev, domain_dev)

In [None]:
model2.fit(train=(x_train, y_train), 
          dev=(x_dev, y_dev), 
          print_metrics=False, 
          learning_rate=0.01, 
          epochs=5)

<a style='text-decoration:none;line-height:16px;display:flex;color:#5B5B62;padding:10px;justify-content:end;' href='https://deepnote.com?utm_source=created-in-deepnote-cell&projectId=b2f14aee-af04-4db5-af55-57a3a58b9f40' target="_blank">
 </img>
Created in <span style='font-weight:600;margin-left:4px;'>Deepnote</span></a>