In [1]:
# ! pip install wandb # colab only
import re
import os
from functools import reduce
import math
from collections import defaultdict
import numpy as np
import torch
from torch import nn
from torchinfo import summary
from pprint import pprint
import wandb
from sklearn.metrics import f1_score
import config as cfg

def download_and_unzip(url, save_dir='.'):
  # downloads and unzips url, if not already downloaded
  # used for downloading dataset and glove embeddings
  import os
  from urllib.request import urlopen
  from io import BytesIO
  from zipfile import ZipFile
  fname = url.split('/')[-1][:-4] if save_dir == '.' else save_dir
  if fname not in os.listdir():
    print(f'downloading and unzipping {fname}...', end=' ')
    r = urlopen(url)
    zipf = ZipFile(BytesIO(r.read()))
    zipf.extractall(path=save_dir)
    print(f'completed')
  else:
    print(f'{fname} already downloaded')

device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

def get_wandbkey():
    with open('wandbkey.txt') as f:
        return f.read().strip()

In [2]:
PAD_TOKEN = 400000 # TODO used by collate_fn, can be done better probably
def get_glove(emb_size=100, number_token=False):
  """
    Download and load glove embeddings. 
    Parameters:
      emb_size: embedding size (50/100/200/300-dimensional vectors).    
    Returns tuple (voc, emb) where voc is dict from words to idx (in emb) and emb is (numpy) embedding matrix
  """
  n_tokens = 400000 + 1 # glove vocabulary size + PAD
  if emb_size not in (50, 100, 200, 300):
    raise ValueError(f'wrong size parameter: {emb_size}')
  
  if number_token: 
    n_tokens += 1
  download_and_unzip('http://nlp.stanford.edu/data/glove.6B.zip', save_dir='glove')
  vocabulary = dict()
  embedding_matrix = np.ones((n_tokens, emb_size))

  with open(f'glove/glove.6B.{emb_size}d.txt', encoding="utf8") as f:
    for i, line in enumerate(f):
        word, coefs = line.split(maxsplit=1)
        coefs = np.fromstring(coefs, "f", sep=" ")
        embedding_matrix[i] = coefs
        vocabulary[word] = i
  
  # add embedding for and padding and number token
  if number_token:
    embedding_matrix[n_tokens - 2] = 0
    vocabulary['<PAD>'] = n_tokens - 2
    digits = list(filter(lambda s: re.fullmatch('\d+(\.\d*)?', s) is not None, vocabulary.keys()))
    embedding_matrix[n_tokens - 1] = np.mean(embedding_matrix[[vocabulary[d] for d in digits]], axis=0)
    vocabulary['<NUM>'] = n_tokens - 1
  else: 
    embedding_matrix[n_tokens - 1] = 0
    vocabulary['<PAD>'] = n_tokens - 1
  return vocabulary, embedding_matrix

In [3]:
def load_classes():
    download_and_unzip('https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/dependency_treebank.zip')
    c = set()
    for doc in range(1, 200):
        with open(f'dependency_treebank/wsj_{doc:04d}.dp') as f:
            for line in f:
                if line.strip(): # check for empty lines
                    _, label, _ = line.split('\t')
                    #print(label)
                    c.add(label)
    return c

classes = {'$', 'NN', ',', 'RBS', 'FW', 'CC', '#', 'VBD', 'PRP', 'RBR', 'LS', ':', 'VBZ', 'MD',
           'EX', 'RB', 'WRB', 'NNS', 'VBG', 'PRP$', 'JJR', 'WP$', 'WP', '-LRB-', 'WDT', '``',
           '.', 'CD', 'JJ', "''", 'UH', 'VBN', 'IN', 'SYM', 'DT', 'JJS', '-RRB-', 'RP', 'VB',
           'POS', 'NNP', 'PDT', 'NNPS', 'VBP', 'TO', '<PAD>'}
punctuation_cls = {'$', ',', '#', ':', '-LRB-', '``', '.', "''", 'SYM', '-RRB-', '<PAD>'}
class2idx = {c: i for i, c in enumerate(classes)}
print(class2idx)

def add_oov(start_voc, oovs, embedding_matrix, sentences, verbose=True):
  """
    Computes new embedding matrix, adding embeddings for oovs
    Parameters:
      start_voc: dict, starting vocabulary that is extended with oovs
      oovs: set of string, oovs to add to the starting vocabulary and embedding matrix
      embedding_matrix: starting embedding matrix (numpy)
      sentences: list of list of strings, set used to compute oov embeddings
    Returns tuple (voc, emb) where voc is dict from words to idx (in emb) and emb is (numpy) embedding matrix with oovs
  """
  oovs = oovs - set(start_voc.keys())
  start_voc_size, emb_size = embedding_matrix.shape
  oov_embeddings = np.zeros((start_voc_size + len(oovs), emb_size))
  oov_embeddings[:start_voc_size] = embedding_matrix
  new_voc = dict(start_voc)

  for i, oov in enumerate(oovs):
    context_words = [new_voc[word] 
                    for sentence in filter(lambda s: oov in s, sentences)
                    for word in sentence if word in new_voc and word not in (oov, '<PAD>')]
    if verbose and len(context_words) == 0:
        print(f'Empty context for oov: {oov}')
        print([sentence for sentence in filter(lambda s: oov in s, sentences)])
    oov_embeddings[start_voc_size + i] = np.mean(oov_embeddings[context_words], axis=0)
    new_voc[oov] = start_voc_size + i
  return new_voc, oov_embeddings
    
def load_data(start, end, start_voc, embedding_matrix, number_token=False,
              drop_punctuation=False, split_docs=True, ret_counts=False):
  """
    Downloads dataset and preprocess data.
    Params:
      start: idx of first file to include in data
      end: idx of last file to include in data
      start_voc: starting vocabulary that is extended with oov terms
      embedding_matrix: embedding matrix that 
      number_token: if True, use a single token for all cardinal numbers
      drop_punctuation: if True, drop punt
      split_docs: if True, each sequence is one sentence; if false, each sequence is one document
      ret_counts: if True, also return counts of each word in the documents
    Returns 
  """
  # download dataset
  download_and_unzip('https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/dependency_treebank.zip')

  inputs, labels = [], []
  vocabulary = set()
  counts = defaultdict(int)
  
  # build dataset
  for doc in range(start, end+1):
    with open(f'dependency_treebank/wsj_{doc:04d}.dp') as f:
      
      input_seq, label_seq = [], []
      
      for line in f:
        if line.strip(): # check for empty lines
          word, label, _ = line.split('\t')
          word = word.lower()
          if '\/' in word:
            word = word.replace('\/', '-')
          if number_token and re.fullmatch('\d+(\.\d*)?', word) is not None:
            word = '<NUM>'
          if not drop_punctuation or label.isalpha(): # eventually drop punctuation
            vocabulary.add(word)
            input_seq.append(word)
            label_seq.append(label)
            counts[word] += 1
        elif split_docs: # sentence over, add to input if splitting documents
          inputs.append(input_seq)
          labels.append(label_seq)
          input_seq, label_seq = [], []

      inputs.append(input_seq)
      labels.append(label_seq)

  vocabulary, embedding_matrix = add_oov(start_voc, vocabulary, embedding_matrix, inputs)

  if ret_counts:
    return inputs, labels, vocabulary, embedding_matrix, counts
  else:
    return inputs, labels, vocabulary, embedding_matrix

{'VBG': 0, '.': 1, 'JJS': 2, 'VBD': 3, 'RBR': 4, 'VB': 5, 'NNS': 6, 'JJR': 7, '$': 8, 'WP': 9, 'WDT': 10, 'RBS': 11, '-LRB-': 12, 'FW': 13, 'DT': 14, '#': 15, 'PRP': 16, 'PDT': 17, 'VBZ': 18, 'JJ': 19, 'RB': 20, ',': 21, 'POS': 22, 'UH': 23, 'WRB': 24, 'VBN': 25, 'SYM': 26, 'PRP$': 27, 'VBP': 28, 'TO': 29, 'MD': 30, 'EX': 31, '-RRB-': 32, 'RP': 33, 'NN': 34, '``': 35, 'IN': 36, 'CC': 37, 'NNP': 38, 'NNPS': 39, "''": 40, ':': 41, '<PAD>': 42, 'WP$': 43, 'CD': 44, 'LS': 45}


In [4]:
class POSDataset(torch.utils.data.Dataset):
    """Simple dataset class to use dataloaders (batching) """
    def __init__(self, inputs, labels, vocabulary):
        self.inputs_str = inputs
        self.labels_str = labels
        self.voc = vocabulary
        self.inputs = [[vocabulary[word] for word in sequence] for sequence in inputs]
        self.labels = [[class2idx[label] for label in sequence] for sequence in labels]
    def __getitem__(self, idx):
        return self.inputs[idx], self.labels[idx]
    def __len__(self):
        return len(self.inputs)

def collate_fn(batch):
    """Used by DataLoader to pad batches"""
    max_seq_len = int(np.quantile([len(sample[0]) for sample in batch], 0.99))
    inputs = []
    targets = []
    for i_seq, t_seq in batch:
        if len(i_seq) > max_seq_len:
            inputs.append(i_seq[:max_seq_len])
            targets.append(t_seq[:max_seq_len])
        else:
            inputs.append(i_seq + [PAD_TOKEN] * (max_seq_len - len(i_seq))) # sì brutto con train_voc da fuori ma non mi viene meglio ora
            targets.append(t_seq + [class2idx['<PAD>']] * (max_seq_len - len(t_seq)))
    return torch.as_tensor(inputs), torch.as_tensor(targets)


class EarlyStopping:
    def __init__(self, patience, model, delta=0, path='res'):
        self.patience = patience
        self.delta = delta
        self.model = model
        self.path = path
        if not os.path.isdir(self.path):
            os.mkdir(self.path)
        self.best_score = float('inf')
        self.counter = 0
    
    def step(self, epoch, score):
        """ Updates ES tracker after one epoch.
            Params:
                epoch: current epoch
                score: validation loss
            Returns tuple (stop, checkpoint), 
                where stop is True if early stopping has occurred and False otherwise,
                and checkpoint is last best checkpoint
        """
        if score < self.best_score:
            # print('Validation loss decreasing, storing new checkpoint')
            self.best_score = score
            self.counter = 0
            checkpoint = {'model': self.model.state_dict(), 'epoch': epoch}
            torch.save(checkpoint, 'res/checkpoint.pth')
            return False, checkpoint
        elif abs(score - self.best_score) > self.delta:
            self.counter += 1
            if self.counter >= self.patience:
                print(f'Early stopping occured at epoch {epoch} with patience {self.patience}')
                checkpoint = torch.load('res/checkpoint.pth', map_location='cpu')
                return True, checkpoint
            print(f'Validation loss increasing for {self.counter} epochs')
            return False, None

In [5]:
class POSTagger(torch.nn.Module):

  def __init__(self, embedding_matrix, type, rec_size=1, units=None, hid_size=50):
    """
      A recurrent network performing multiclass classification (POS tagging).
      Params:
        type: type of rnn, either 'lstm' or 'gru'
        embedding_matrix: embedding matrix for embedding layer
        rec_size: number of stacked recurrent modules
        units: int or None, if given then add one additional linear layer with given number of units
        hid_size: size of hidden state of recurrent module
    """
    super().__init__()

    emb_size = embedding_matrix.shape[1]
    self.emb_layer = nn.Embedding.from_pretrained(torch.as_tensor(embedding_matrix))

    if type == 'lstm':
      rec_module = nn.LSTM
    elif type == 'gru':
      rec_module = nn.GRU
    else:
      raise ValueError(f'wrong type {type}, either lstm or gru')
    self.rec_modules = rec_module(input_size=emb_size, hidden_size=hid_size, bidirectional=True, batch_first=True, num_layers=rec_size)
    cls = len(classes)
    self.fc_modules = nn.Sequential(nn.Linear(2 * hid_size, units if units is not None else cls))
    if units is not None:
      self.fc_modules.add_module(nn.ReLU())
      self.fc_modules.add_module(nn.Linear(units, cls))


  def forward(self, x):
    vecs = self.emb_layer(x).float()
    rec_out, _ = self.rec_modules(vecs)
    fc_out = self.fc_modules(rec_out)
    return fc_out

class FocalLoss(nn.Module):
    def __init__(self, alpha=1, gamma=2, ignore_index=-100, reduce=True):
        super(FocalLoss, self).__init__()
        self.alpha = alpha
        self.gamma = gamma
        self.reduce = reduce
        self.ignore_index = ignore_index

    def forward(self, inputs, targets):    
        ce_loss = nn.CrossEntropyLoss(ignore_index=self.ignore_index, reduction='none')(inputs, targets)

        pt = torch.exp(-ce_loss)
        F_loss = self.alpha * (1-pt)**self.gamma * ce_loss

        if self.reduce:
            return torch.mean(F_loss)
        else:
            return F_loss

In [6]:
def train_one_epoch(model, optimizer, loss_fn, data_loader, device):
    """ 
        Trains model for one epoch on the given dataloader.
        Parameters:
            model: torch.nn.Module to train
            optimizer: torch.optim optimizer object
            loss_fn: torch.nn criterion to use to compute loss, given outputs and targets
            data_loader: torch.utils.data.DataLoader 
            device: torch.device where training is performed
        Returns log dict {'train/loss' : list(loss values for each batch)} 
    """
    model.train()
    log_dict = {'train/loss': []}

    for inputs, targets in data_loader:
        inputs = inputs.to(device)
        targets = targets.to(device)

        logprobs = model(inputs).transpose(1, 2)
        loss = loss_fn(logprobs, targets)
        loss_value = loss.item()

        if not math.isfinite(loss_value):
            print(f"Loss is {loss_value}, stopping training")
            exit(1)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        log_dict['train/loss'].append(loss_value)

    return log_dict

def evaluate(model, loss_fn, data_loader, device, split, ret_f1_classes=False):
    """ 
        Evaluate model on the given dataloader.
        Parameters:
            model: torch.nn.Module to evaluate
            loss_fn: torch.nn criterion to use to compute loss, given outputs and targets
            data_loader: torch.utils.data.DataLoader 
            device: torch.device where evaluation is performed
            split: either 'valid' or 'test'
            ret_f1_classes: if True, also returns per-class f1 scores
        Returns log dict {'valid/loss' : mean loss, 'valid/{metric}': mean metric} 
    """
    model.eval()
    assert len(data_loader) == 1 # must be a single batch
    with torch.no_grad():
        inputs, targets = next(iter(data_loader))
        inputs = inputs.to(device)
        targets = targets.to(device)

        scores = model(inputs).transpose(1, 2)
        losses = loss_fn(scores, targets).item()
        preds = torch.argmax(scores, 1)

        targets = targets.cpu().numpy()
        preds = preds.cpu().numpy()
        mask = [targets != class2idx[c] for c in punctuation_cls]
        mask = np.array(reduce(lambda a,b: a & b, mask)).reshape(targets.shape)
        acc = np.where(mask, targets==preds, False).sum() / mask.sum()
        cls = [class2idx[c] for c in (classes - punctuation_cls)]
        f1_classes = f1_score(targets.reshape(-1), preds.reshape(-1),
                      labels=cls, average=None, zero_division=1)

    log_dict = {f'{split}/loss': losses,
                f'{split}/accuracy': acc,
                f'{split}/f1': np.mean(f1_classes)}
    if ret_f1_classes:
        return log_dict, {c:s for c,s in zip(cls, f1_classes)}
    else:
        return log_dict

In [7]:
def train(tags=None, verbose=False, test=False, number_token=False, focal_loss=False):
    """ Fully trains one model, based on cfg parameters, on training set and performs evaluation on validation set.
        Returns trained model.
    """
    idx2classes = {i: c for c, i in class2idx.items()}
    cfg_dict = {
        'epochs': cfg.EPOCHS, 'batch_size': cfg.BATCH_SIZE, 'number_token': number_token,
        'model': cfg.TYPE, 'rec_size': cfg.REC_SIZE, 'units': cfg.UNITS, 'hid_size': cfg.HID_SIZE,
        'optim': cfg.OPTIM, 'lr': cfg.LR, 'alpha': cfg.ALPHA, 'betas': cfg.BETAS, 'momentum': cfg.MOMENTUM, 'focal_loss': focal_loss
    }
    if verbose:
        print('CONFIG PARAMETERS:')
        pprint(cfg_dict)
    wandb.login(key=get_wandbkey())
    run = wandb.init(project="assignment-one", entity="nlpetroni", group=f'{"testing" if test else "validation"}',
                     reinit=True, config=cfg_dict, tags=tags)
    wandb.define_metric("train_step")
    wandb.define_metric("epoch")
    wandb.define_metric('train/loss', step_metric="train_step", summary="min")
    wandb.define_metric(f"valid/loss", step_metric="epoch", summary="min")
    wandb.define_metric(f"valid/accuracy", step_metric="epoch", summary="max")
    wandb.define_metric(f"valid/f1", step_metric="epoch", summary="max")
    wandb.define_metric(f"test/accuracy", step_metric="epoch", summary="max")
    wandb.define_metric(f"test/f1", step_metric="epoch", summary="max")

    glove_voc, embedding_matrix = get_glove(number_token=number_token)
    if not test:
        split = 'valid'
        train_set, train_labels, train_voc, embedding_matrix = load_data(1, 100, glove_voc, embedding_matrix, number_token=number_token, drop_punctuation=False)
        valid_set, valid_labels, valid_voc, embedding_matrix = load_data(101, 150, train_voc, embedding_matrix, number_token=number_token, drop_punctuation=False)
        train_ds = POSDataset(train_set, train_labels, train_voc)
        valid_ds = POSDataset(valid_set, valid_labels, valid_voc)
        train_dl = torch.utils.data.DataLoader(train_ds, batch_size=cfg.BATCH_SIZE, collate_fn=collate_fn, shuffle=True)
        valid_dl = torch.utils.data.DataLoader(valid_ds, batch_size=len(valid_ds), collate_fn=collate_fn)
    else:
        split = 'test'
        train_set, train_labels, train_voc, embedding_matrix = load_data(1, 150, glove_voc, embedding_matrix, number_token=number_token, drop_punctuation=False)
        test_set, test_labels, test_voc, embedding_matrix = load_data(151, 199, train_voc, embedding_matrix, number_token=number_token, drop_punctuation=False)
        train_ds = POSDataset(train_set, train_labels, train_voc)
        test_ds = POSDataset(test_set, test_labels, test_voc)
        train_dl = torch.utils.data.DataLoader(train_ds, batch_size=cfg.BATCH_SIZE, collate_fn=collate_fn, shuffle=True)
        test_dl = torch.utils.data.DataLoader(test_ds, batch_size=len(test_ds), collate_fn=collate_fn)


    model = POSTagger(embedding_matrix, type=cfg.TYPE, rec_size=cfg.REC_SIZE, units=cfg.UNITS, hid_size=cfg.HID_SIZE).to(device)
    wandb.watch(model, log_graph=True)
    if verbose:
        print(summary(model))

    params = [p for p in model.parameters() if p.requires_grad]
    if cfg.OPTIM == 'rmsprop':
        optimizer = torch.optim.RMSprop(params, lr=cfg.LR, alpha=cfg.ALPHA, momentum=cfg.MOMENTUM, weight_decay=cfg.WEIGHT_DECAY)
    elif cfg.OPTIM == 'adam':
        optimizer = torch.optim.Adam(params, lr=cfg.LR, betas=cfg.BETAS, weight_decay=cfg.WEIGHT_DECAY)
    else:
        raise ValueError(f'wrong optim {cfg.OPTIM}, either rmsprop or adam')
    if focal_loss: 
        loss_fn = FocalLoss(ignore_index=class2idx['<PAD>'])
    else:
        loss_fn = nn.CrossEntropyLoss(ignore_index=class2idx['<PAD>']) # ignore padding
    train_step = 0
    es_tracker = EarlyStopping(10, model)
    epoch = 0
    stop = False
    print('STARTING TRAINING')
    while epoch < cfg.EPOCHS and not stop:
        train_log_dict = train_one_epoch(model, optimizer, loss_fn, train_dl, device)
        if not test:
            valid_log_dict, f1_classes = evaluate(model, loss_fn, valid_dl, device, split=split, ret_f1_classes=True)
            stop, checkpoint = es_tracker.step(epoch, valid_log_dict['valid/loss'])
            wandb.log({'epoch': epoch, 'valid/loss': valid_log_dict['valid/loss'],
                       'valid/accuracy': valid_log_dict['valid/accuracy'], 'valid/f1': valid_log_dict['valid/f1'],
                       'valid/f1_distribution': wandb.Histogram(np_histogram=np.histogram(list(f1_classes.values())))})
            if stop:
                model.load_state_dict(checkpoint['model'])
            if (epoch % 25) == 0:
                print(f'[{epoch:03d}/{cfg.EPOCHS:03d}] train loss: {np.mean(train_log_dict["train/loss"]):.3f}, valid loss: {valid_log_dict["valid/loss"]:.3f}, f1: {valid_log_dict["valid/f1"]:.2f} accuracy: {valid_log_dict["valid/accuracy"]:.2f}')
        for batch_loss in train_log_dict['train/loss']:
            wandb.log({'train_step': train_step, 'epoch': epoch, 'train/loss': batch_loss})
            train_step += 1
        epoch += 1
    # log per-class f1 scores
    data = [[idx2classes[i], score] for i, score in f1_classes.items()]
    table = wandb.Table(data=data, columns=["class", "f1_score"])
    wandb.log({'valid/f1_per_class': wandb.plot.bar(table, "class", "f1_score", title="F1 per class bar chart")})
    if test:
        log_dict, f1_classes = evaluate(model, loss_fn, test_dl, device, split=split, ret_f1_classes=True)
        data = [[idx2classes[i], score] for i,score in f1_classes.items()]
        table = wandb.Table(data=data, columns = ["class", "f1_score"])
        wandb.log({'test/loss': log_dict['test/loss'], 'test/accuracy': log_dict['test/accuracy'], 'test/f1': log_dict['test/f1'],
                   'test/f1_per_class': wandb.plot.bar(table, "class", "f1_score", title="F1 per class bar chart")})

    run.finish()

    return model, run

In [8]:
# hyperparameter tuning
for (tag, type, rec_size, units) in (('lstm_1L', 'lstm', 1, None), ('lstm_2L', 'lstm', 2, None),
                                     ('fc_2L', 'lstm', 1, 128), ('gru', 'gru', 1, None)):
    for optim in ('rmsprop', 'adam'):
        for lr in (1e-4, 2e-4, 8e-5):
            for fl in (True, False):
                for hid_size in (32, 64):
                    cfg.TYPE = type
                    cfg.REC_SIZE = rec_size
                    cfg.UNITS = units
                    cfg.OPTIM = optim
                    cfg.LR = lr
                    cfg.HID_SIZE = hid_size
                    _, run = train(tags=[tag], focal_loss=fl)

[34m[1mwandb[0m: Currently logged in as: [33mdiegochine[0m (use `wandb login --relogin` to force relogin)
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /home/diego/.netrc


glove already downloaded
dependency_treebank already downloaded
dependency_treebank already downloaded


[34m[1mwandb[0m: logging graph, to disable use `wandb.watch(log_graph=False)`


STARTING TRAINING
[000/200] train loss: 1.636, valid loss: 1.308, f1: 0.01 accuracy: 0.17
[025/200] train loss: 0.212, valid loss: 0.196, f1: 0.47 accuracy: 0.81
[050/200] train loss: 0.106, valid loss: 0.110, f1: 0.61 accuracy: 0.86
[075/200] train loss: 0.073, valid loss: 0.087, f1: 0.67 accuracy: 0.88
[100/200] train loss: 0.055, valid loss: 0.076, f1: 0.71 accuracy: 0.89
[125/200] train loss: 0.044, valid loss: 0.071, f1: 0.74 accuracy: 0.90
Validation loss increasing for 1 epochs
Validation loss increasing for 1 epochs
Validation loss increasing for 1 epochs
Validation loss increasing for 1 epochs
Validation loss increasing for 1 epochs
Validation loss increasing for 1 epochs
[150/200] train loss: 0.035, valid loss: 0.068, f1: 0.74 accuracy: 0.90
Validation loss increasing for 1 epochs
Validation loss increasing for 1 epochs
Validation loss increasing for 1 epochs
Validation loss increasing for 1 epochs
Validation loss increasing for 1 epochs
[175/200] train loss: 0.028, valid los

VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
train/loss,█▆▄▂▂▂▂▂▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train_step,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
valid/accuracy,▁▃▅▆▇▇▇▇▇███████████████████████████████
valid/f1,▁▂▃▄▅▅▆▆▆▆▇▇▇▇▇▇▇▇▇▇████████████████████
valid/loss,█▆▄▃▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,198
train_step,12337


[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /home/diego/.netrc


glove already downloaded
dependency_treebank already downloaded
dependency_treebank already downloaded


[34m[1mwandb[0m: logging graph, to disable use `wandb.watch(log_graph=False)`


STARTING TRAINING
[000/200] train loss: 1.484, valid loss: 1.124, f1: 0.01 accuracy: 0.20
[025/200] train loss: 0.126, valid loss: 0.127, f1: 0.57 accuracy: 0.85
[050/200] train loss: 0.067, valid loss: 0.084, f1: 0.66 accuracy: 0.89
[075/200] train loss: 0.042, valid loss: 0.071, f1: 0.72 accuracy: 0.90
Validation loss increasing for 1 epochs
Validation loss increasing for 1 epochs
Validation loss increasing for 1 epochs
Validation loss increasing for 1 epochs
Validation loss increasing for 2 epochs
Validation loss increasing for 1 epochs
Validation loss increasing for 1 epochs
Validation loss increasing for 2 epochs
[100/200] train loss: 0.028, valid loss: 0.068, f1: 0.73 accuracy: 0.90
Validation loss increasing for 3 epochs
Validation loss increasing for 4 epochs
Validation loss increasing for 5 epochs
Validation loss increasing for 6 epochs
Validation loss increasing for 7 epochs
Validation loss increasing for 8 epochs
Validation loss increasing for 9 epochs
Validation loss increa

VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=0.76647880421…

0,1
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
train/loss,██▅▄▂▃▂▂▂▂▁▁▁▂▂▁▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train_step,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
valid/accuracy,▁▃▅▆▇▇▇▇▇███████████████████████████████
valid/f1,▁▂▃▄▅▅▆▆▆▇▇▇▇▇▇▇▇▇▇▇▇▇██████████████████
valid/loss,█▆▄▃▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,121
train_step,7563


[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /home/diego/.netrc


glove already downloaded
dependency_treebank already downloaded
dependency_treebank already downloaded


[34m[1mwandb[0m: logging graph, to disable use `wandb.watch(log_graph=False)`


STARTING TRAINING
[000/200] train loss: 3.630, valid loss: 3.374, f1: 0.02 accuracy: 0.19
[025/200] train loss: 0.746, valid loss: 0.771, f1: 0.43 accuracy: 0.79
[050/200] train loss: 0.402, valid loss: 0.452, f1: 0.59 accuracy: 0.86
[075/200] train loss: 0.295, valid loss: 0.361, f1: 0.65 accuracy: 0.89
[100/200] train loss: 0.232, valid loss: 0.318, f1: 0.69 accuracy: 0.90
[125/200] train loss: 0.190, valid loss: 0.295, f1: 0.73 accuracy: 0.91
Validation loss increasing for 1 epochs
[150/200] train loss: 0.159, valid loss: 0.282, f1: 0.75 accuracy: 0.91
Validation loss increasing for 2 epochs
Validation loss increasing for 1 epochs
Validation loss increasing for 1 epochs
Validation loss increasing for 2 epochs
Validation loss increasing for 1 epochs
Validation loss increasing for 1 epochs
Validation loss increasing for 2 epochs
Validation loss increasing for 3 epochs
[175/200] train loss: 0.133, valid loss: 0.275, f1: 0.75 accuracy: 0.91
Validation loss increasing for 1 epochs
Valida

VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=0.76399026763…

0,1
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
train/loss,█▆▅▄▃▃▂▂▂▂▂▂▂▂▁▁▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train_step,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
valid/accuracy,▁▃▅▆▆▇▇▇▇▇██████████████████████████████
valid/f1,▁▂▃▄▄▅▆▆▆▆▆▆▇▇▇▇▇▇▇▇▇▇██████████████████
valid/loss,█▆▄▃▃▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,199
train_step,12399


[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /home/diego/.netrc


glove already downloaded
dependency_treebank already downloaded
dependency_treebank already downloaded


[34m[1mwandb[0m: logging graph, to disable use `wandb.watch(log_graph=False)`


STARTING TRAINING
[000/200] train loss: 3.354, valid loss: 2.998, f1: 0.02 accuracy: 0.24
[025/200] train loss: 0.467, valid loss: 0.511, f1: 0.55 accuracy: 0.85
[050/200] train loss: 0.279, valid loss: 0.357, f1: 0.66 accuracy: 0.89
[075/200] train loss: 0.197, valid loss: 0.309, f1: 0.71 accuracy: 0.90
Validation loss increasing for 1 epochs
Validation loss increasing for 1 epochs
Validation loss increasing for 1 epochs
Validation loss increasing for 1 epochs
[100/200] train loss: 0.141, valid loss: 0.292, f1: 0.73 accuracy: 0.91
Validation loss increasing for 1 epochs
Validation loss increasing for 1 epochs
Validation loss increasing for 1 epochs
Validation loss increasing for 1 epochs
Validation loss increasing for 2 epochs
Validation loss increasing for 1 epochs
Validation loss increasing for 2 epochs
Validation loss increasing for 3 epochs
Validation loss increasing for 4 epochs
Validation loss increasing for 5 epochs
Validation loss increasing for 6 epochs
Validation loss increa

VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=0.76414865193…

0,1
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
train/loss,█▆▄▄▃▂▂▂▂▂▁▂▂▁▁▁▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train_step,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
valid/accuracy,▁▃▅▆▆▇▇▇▇▇██████████████████████████████
valid/f1,▁▂▃▄▅▅▆▆▆▆▆▇▇▇▇▇▇▇▇▇▇▇▇█████████████████
valid/loss,█▆▄▃▃▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,122
train_step,7625


[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /home/diego/.netrc


glove already downloaded
dependency_treebank already downloaded
dependency_treebank already downloaded


[34m[1mwandb[0m: logging graph, to disable use `wandb.watch(log_graph=False)`


STARTING TRAINING
[000/200] train loss: 1.490, valid loss: 1.113, f1: 0.02 accuracy: 0.25
[025/200] train loss: 0.105, valid loss: 0.110, f1: 0.61 accuracy: 0.86
[050/200] train loss: 0.055, valid loss: 0.075, f1: 0.68 accuracy: 0.90
Validation loss increasing for 1 epochs
Validation loss increasing for 1 epochs
[075/200] train loss: 0.034, valid loss: 0.065, f1: 0.73 accuracy: 0.91
Validation loss increasing for 1 epochs
Validation loss increasing for 1 epochs
Validation loss increasing for 1 epochs
Validation loss increasing for 2 epochs
Validation loss increasing for 1 epochs
Validation loss increasing for 2 epochs
Validation loss increasing for 1 epochs
Validation loss increasing for 2 epochs
Validation loss increasing for 3 epochs
Validation loss increasing for 1 epochs
Validation loss increasing for 2 epochs
Validation loss increasing for 3 epochs
Validation loss increasing for 1 epochs
[100/200] train loss: 0.022, valid loss: 0.063, f1: 0.73 accuracy: 0.91
Validation loss increa

VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=0.76386186770…

0,1
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
train/loss,█▂▄▂▂▂▂▂▁▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train_step,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
valid/accuracy,▁▃▅▆▇▇▇▇▇███████████████████████████████
valid/f1,▁▂▃▄▅▅▆▆▆▇▇▇▇▇▇▇▇▇▇▇▇███████████████████
valid/loss,█▆▄▃▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,109
train_step,6819


[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /home/diego/.netrc


glove already downloaded
dependency_treebank already downloaded
dependency_treebank already downloaded


[34m[1mwandb[0m: logging graph, to disable use `wandb.watch(log_graph=False)`


STARTING TRAINING
[000/200] train loss: 1.319, valid loss: 0.960, f1: 0.05 accuracy: 0.37
[025/200] train loss: 0.065, valid loss: 0.082, f1: 0.68 accuracy: 0.89
Validation loss increasing for 1 epochs
Validation loss increasing for 1 epochs
Validation loss increasing for 2 epochs
Validation loss increasing for 1 epochs
[050/200] train loss: 0.027, valid loss: 0.068, f1: 0.75 accuracy: 0.90
Validation loss increasing for 2 epochs
Validation loss increasing for 1 epochs
Validation loss increasing for 2 epochs
Validation loss increasing for 3 epochs
Validation loss increasing for 4 epochs
Validation loss increasing for 1 epochs
Validation loss increasing for 2 epochs
Validation loss increasing for 3 epochs
Validation loss increasing for 4 epochs
Validation loss increasing for 5 epochs
Validation loss increasing for 6 epochs
Validation loss increasing for 7 epochs
Validation loss increasing for 8 epochs
Validation loss increasing for 9 epochs
Early stopping occured at epoch 67 with patien

VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇▇██
train/loss,▇█▃▄▂▂▂▂▂▂▂▂▂▁▁▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███
valid/accuracy,▁▂▅▆▆▇▇▇▇███████████████████████████████
valid/f1,▁▂▄▄▅▅▆▆▆▆▇▇▇▇▇▇▇▇▇▇████████████████████
valid/loss,█▆▄▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,67
train_step,4215


[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /home/diego/.netrc


glove already downloaded
dependency_treebank already downloaded
dependency_treebank already downloaded


[34m[1mwandb[0m: logging graph, to disable use `wandb.watch(log_graph=False)`


STARTING TRAINING
[000/200] train loss: 3.276, valid loss: 2.935, f1: 0.02 accuracy: 0.24
[025/200] train loss: 0.397, valid loss: 0.449, f1: 0.58 accuracy: 0.86
[050/200] train loss: 0.237, valid loss: 0.330, f1: 0.67 accuracy: 0.89
[075/200] train loss: 0.168, valid loss: 0.294, f1: 0.72 accuracy: 0.90
Validation loss increasing for 1 epochs
Validation loss increasing for 1 epochs
Validation loss increasing for 1 epochs
Validation loss increasing for 2 epochs
Validation loss increasing for 1 epochs
Validation loss increasing for 1 epochs
Validation loss increasing for 2 epochs
Validation loss increasing for 3 epochs
Validation loss increasing for 1 epochs
Validation loss increasing for 2 epochs
Validation loss increasing for 3 epochs
[100/200] train loss: 0.120, valid loss: 0.287, f1: 0.74 accuracy: 0.91
Validation loss increasing for 4 epochs
Validation loss increasing for 5 epochs
Validation loss increasing for 6 epochs
Validation loss increasing for 7 epochs
Validation loss increa

VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=0.76606179499…

0,1
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
train/loss,█▆▄▃▃▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
valid/accuracy,▁▃▅▆▇▇▇▇▇▇██████████████████████████████
valid/f1,▁▂▃▄▅▅▆▆▆▆▇▇▇▇▇▇▇▇▇▇▇███████████████████
valid/loss,█▆▄▃▃▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,107
train_step,6695


[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /home/diego/.netrc


glove already downloaded
dependency_treebank already downloaded
dependency_treebank already downloaded


[34m[1mwandb[0m: logging graph, to disable use `wandb.watch(log_graph=False)`


STARTING TRAINING
[000/200] train loss: 3.043, valid loss: 2.694, f1: 0.05 accuracy: 0.34
[025/200] train loss: 0.285, valid loss: 0.358, f1: 0.66 accuracy: 0.89
Validation loss increasing for 1 epochs
Validation loss increasing for 1 epochs
[050/200] train loss: 0.149, valid loss: 0.291, f1: 0.73 accuracy: 0.90
Validation loss increasing for 1 epochs
Validation loss increasing for 2 epochs
Validation loss increasing for 1 epochs
Validation loss increasing for 2 epochs
Validation loss increasing for 3 epochs
Validation loss increasing for 4 epochs
Validation loss increasing for 5 epochs
Validation loss increasing for 6 epochs
Validation loss increasing for 7 epochs
Validation loss increasing for 8 epochs
Validation loss increasing for 9 epochs
Early stopping occured at epoch 65 with patience 10


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇▇██
train/loss,█▆▄▄▃▂▂▂▂▂▂▂▂▁▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
valid/accuracy,▁▂▅▆▆▇▇▇▇▇▇█████████████████████████████
valid/f1,▁▂▃▄▅▅▆▆▆▆▇▇▇▇▇▇▇▇▇▇████████████████████
valid/loss,█▇▄▃▃▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,65
train_step,4091


[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /home/diego/.netrc


glove already downloaded
dependency_treebank already downloaded
dependency_treebank already downloaded


[34m[1mwandb[0m: logging graph, to disable use `wandb.watch(log_graph=False)`


STARTING TRAINING
[000/200] train loss: 1.680, valid loss: 1.385, f1: 0.01 accuracy: 0.17
[025/200] train loss: 0.265, valid loss: 0.245, f1: 0.41 accuracy: 0.77
[050/200] train loss: 0.123, valid loss: 0.125, f1: 0.59 accuracy: 0.86
[075/200] train loss: 0.086, valid loss: 0.095, f1: 0.63 accuracy: 0.88
[100/200] train loss: 0.066, valid loss: 0.082, f1: 0.67 accuracy: 0.89
[125/200] train loss: 0.052, valid loss: 0.074, f1: 0.70 accuracy: 0.90
Validation loss increasing for 1 epochs
Validation loss increasing for 1 epochs
[150/200] train loss: 0.044, valid loss: 0.069, f1: 0.72 accuracy: 0.90
Validation loss increasing for 1 epochs
Validation loss increasing for 2 epochs
Validation loss increasing for 1 epochs
Validation loss increasing for 1 epochs
Validation loss increasing for 1 epochs
[175/200] train loss: 0.036, valid loss: 0.066, f1: 0.74 accuracy: 0.90
Validation loss increasing for 1 epochs
Validation loss increasing for 2 epochs
Validation loss increasing for 1 epochs
Valida

VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=0.76593137254…

0,1
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
train/loss,█▆▆▄▃▃▂▂▂▂▂▁▁▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train_step,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
valid/accuracy,▁▃▄▅▆▇▇▇▇▇██████████████████████████████
valid/f1,▁▁▂▃▄▅▅▆▆▆▇▇▇▇▇▇▇▇▇▇▇▇▇█████████████████
valid/loss,█▆▄▃▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,199
train_step,12399


[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /home/diego/.netrc


glove already downloaded
dependency_treebank already downloaded
dependency_treebank already downloaded


[34m[1mwandb[0m: logging graph, to disable use `wandb.watch(log_graph=False)`


STARTING TRAINING
[000/200] train loss: 1.577, valid loss: 1.205, f1: 0.03 accuracy: 0.23
[025/200] train loss: 0.155, valid loss: 0.149, f1: 0.54 accuracy: 0.84
[050/200] train loss: 0.081, valid loss: 0.092, f1: 0.65 accuracy: 0.88
[075/200] train loss: 0.055, valid loss: 0.077, f1: 0.70 accuracy: 0.90
Validation loss increasing for 1 epochs
Validation loss increasing for 2 epochs
Validation loss increasing for 1 epochs
[100/200] train loss: 0.039, valid loss: 0.070, f1: 0.74 accuracy: 0.90
Validation loss increasing for 1 epochs
Validation loss increasing for 1 epochs
Validation loss increasing for 1 epochs
Validation loss increasing for 1 epochs
Validation loss increasing for 2 epochs
Validation loss increasing for 1 epochs
Validation loss increasing for 1 epochs
Validation loss increasing for 1 epochs
Validation loss increasing for 2 epochs
Validation loss increasing for 1 epochs
Validation loss increasing for 1 epochs
Validation loss increasing for 2 epochs
Validation loss increa

VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
train/loss,█▆▅▃▃▂▂▂▂▂▂▂▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train_step,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
valid/accuracy,▁▂▄▆▆▇▇▇▇▇██████████████████████████████
valid/f1,▁▁▂▄▅▅▆▆▆▆▇▇▇▇▇▇▇▇▇▇▇▇▇█████████████████
valid/loss,█▇▄▃▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,139
train_step,8679


[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /home/diego/.netrc


glove already downloaded
dependency_treebank already downloaded
dependency_treebank already downloaded


[34m[1mwandb[0m: logging graph, to disable use `wandb.watch(log_graph=False)`


STARTING TRAINING
[000/200] train loss: 3.596, valid loss: 3.401, f1: 0.01 accuracy: 0.14
[025/200] train loss: 0.907, valid loss: 0.931, f1: 0.39 accuracy: 0.74
[050/200] train loss: 0.482, valid loss: 0.524, f1: 0.53 accuracy: 0.85
[075/200] train loss: 0.352, valid loss: 0.408, f1: 0.61 accuracy: 0.87
[100/200] train loss: 0.283, valid loss: 0.354, f1: 0.65 accuracy: 0.89
[125/200] train loss: 0.238, valid loss: 0.323, f1: 0.66 accuracy: 0.89
[150/200] train loss: 0.204, valid loss: 0.302, f1: 0.67 accuracy: 0.90
Validation loss increasing for 1 epochs
Validation loss increasing for 1 epochs
[175/200] train loss: 0.178, valid loss: 0.289, f1: 0.71 accuracy: 0.91
Validation loss increasing for 1 epochs
Validation loss increasing for 1 epochs
Validation loss increasing for 1 epochs


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=0.76776900296…

0,1
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
train/loss,█▇▅▄▃▃▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train_step,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
valid/accuracy,▁▃▅▅▆▆▇▇▇▇▇▇████████████████████████████
valid/f1,▁▁▃▄▄▅▅▅▆▆▆▆▆▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇█▇██████████
valid/loss,█▇▅▄▃▃▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,199
train_step,12399


[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /home/diego/.netrc


glove already downloaded
dependency_treebank already downloaded
dependency_treebank already downloaded


[34m[1mwandb[0m: logging graph, to disable use `wandb.watch(log_graph=False)`


STARTING TRAINING
[000/200] train loss: 3.394, valid loss: 3.058, f1: 0.01 accuracy: 0.20
[025/200] train loss: 0.572, valid loss: 0.607, f1: 0.50 accuracy: 0.83
[050/200] train loss: 0.330, valid loss: 0.398, f1: 0.62 accuracy: 0.88
[075/200] train loss: 0.239, valid loss: 0.334, f1: 0.67 accuracy: 0.89
Validation loss increasing for 1 epochs
Validation loss increasing for 1 epochs
[100/200] train loss: 0.181, valid loss: 0.305, f1: 0.72 accuracy: 0.90
Validation loss increasing for 1 epochs
Validation loss increasing for 1 epochs
Validation loss increasing for 1 epochs
Validation loss increasing for 1 epochs
Validation loss increasing for 1 epochs
[125/200] train loss: 0.140, valid loss: 0.292, f1: 0.74 accuracy: 0.91
Validation loss increasing for 1 epochs
Validation loss increasing for 1 epochs
Validation loss increasing for 1 epochs
Validation loss increasing for 2 epochs
Validation loss increasing for 1 epochs
Validation loss increasing for 1 epochs
Validation loss increasing for

VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███
train/loss,█▇▅▃▃▃▂▂▂▂▂▂▂▂▂▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
valid/accuracy,▁▂▄▅▆▇▇▇▇▇██████████████████████████████
valid/f1,▁▁▂▄▄▅▆▆▆▆▆▇▇▇▇▇▇▇▇▇▇▇▇█████████████████
valid/loss,█▇▅▄▃▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,153
train_step,9547


[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /home/diego/.netrc


glove already downloaded
dependency_treebank already downloaded
dependency_treebank already downloaded


[34m[1mwandb[0m: logging graph, to disable use `wandb.watch(log_graph=False)`


STARTING TRAINING
[000/200] train loss: 1.745, valid loss: 1.517, f1: 0.02 accuracy: 0.05
[025/200] train loss: 0.509, valid loss: 0.444, f1: 0.28 accuracy: 0.65
[050/200] train loss: 0.249, valid loss: 0.231, f1: 0.43 accuracy: 0.78
[075/200] train loss: 0.166, valid loss: 0.160, f1: 0.52 accuracy: 0.84
[100/200] train loss: 0.125, valid loss: 0.126, f1: 0.58 accuracy: 0.86
[125/200] train loss: 0.101, valid loss: 0.108, f1: 0.62 accuracy: 0.87
[150/200] train loss: 0.085, valid loss: 0.096, f1: 0.65 accuracy: 0.88
[175/200] train loss: 0.073, valid loss: 0.087, f1: 0.67 accuracy: 0.89


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=0.76790123456…

0,1
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
train/loss,█▆▄▄▁▃▂▃▂▂▂▂▂▂▂▁▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train_step,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
valid/accuracy,▁▃▄▄▅▆▆▆▇▇▇▇▇▇▇▇████████████████████████
valid/f1,▁▁▂▂▃▄▄▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇▇▇▇▇██████████████
valid/loss,█▆▅▄▃▃▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,199
train_step,12399


[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /home/diego/.netrc


glove already downloaded
dependency_treebank already downloaded
dependency_treebank already downloaded


[34m[1mwandb[0m: logging graph, to disable use `wandb.watch(log_graph=False)`


STARTING TRAINING
[000/200] train loss: 1.715, valid loss: 1.433, f1: 0.02 accuracy: 0.20
[025/200] train loss: 0.293, valid loss: 0.265, f1: 0.41 accuracy: 0.76
[050/200] train loss: 0.154, valid loss: 0.147, f1: 0.55 accuracy: 0.85


In [18]:
# test best model
# first best model
cfg.REC_SIZE = 1
cfg.UNITS = 128
train(test=True)
# second best model
cfg.REC_SIZE = 2
cfg.UNITS = None
train(test=True)



0,1
epoch,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train/loss,███▇▇▇▇▆▆▅▅▅▅▅▅▅▅▅▅▅▅▄▄▄▄▄▃▃▃▃▃▃▃▂▂▂▂▁▁▁
train_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
epoch,0
train_step,101


[34m[1mwandb[0m: wandb version 0.12.7 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade


glove already downloaded
dependency_treebank already downloaded
dependency_treebank already downloaded


[34m[1mwandb[0m: logging graph, to disable use `wandb.watch(log_graph=False)`


STARTING TRAINING


0,1
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
test/accuracy,▁
test/f1,▁
test/loss,▁
train/loss,█▄▃▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train_step,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
epoch,99.0
test/loss,0.48209
train_step,10199.0


[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /home/diego/.netrc
[34m[1mwandb[0m: wandb version 0.12.7 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade


glove already downloaded
dependency_treebank already downloaded
dependency_treebank already downloaded


[34m[1mwandb[0m: logging graph, to disable use `wandb.watch(log_graph=False)`


STARTING TRAINING


0,1
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
test/accuracy,▁
test/f1,▁
test/loss,▁
train/loss,█▃▃▂▂▂▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train_step,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
epoch,99.0
test/loss,0.32036
train_step,10199.0


(POSTagger(
   (emb_layer): Embedding(400661, 100)
   (rec_modules): LSTM(100, 64, num_layers=2, batch_first=True, bidirectional=True)
   (fc_modules): Sequential(
     (fc_0): Linear(in_features=128, out_features=46, bias=True)
   )
 ),
 <wandb.sdk.wandb_run.Run at 0x7f7ff4604a00>)