In [None]:
!apt-get install -y -qq software-properties-common python-software-properties module-init-tools
!add-apt-repository -y ppa:alessandro-strada/ppa 2>&1 > /dev/null
!apt-get update -qq 2>&1 > /dev/null
!apt-get -y install -qq google-drive-ocamlfuse fuse
from google.colab import auth
auth.authenticate_user()
from oauth2client.client import GoogleCredentials
creds = GoogleCredentials.get_application_default()
import getpass
!google-drive-ocamlfuse -headless -id={creds.client_id} -secret={creds.client_secret} < /dev/null 2>&1 | grep URL
vcode = getpass.getpass()
!echo {vcode} | google-drive-ocamlfuse -headless -id={creds.client_id} -secret={creds.client_secret}

In [None]:
!mkdir -p drive
!google-drive-ocamlfuse drive

In [None]:
from os.path import exists
from wheel.pep425tags import get_abbr_impl, get_impl_ver, get_abi_tag

platform = '{}{}-{}'.format(get_abbr_impl(), get_impl_ver(), get_abi_tag())
cuda_output = !ldconfig -p|grep cudart.so|sed -e 's/.*\.\([0-9]*\)\.\([0-9]*\)$/cu\10/'    
accelerator = cuda_output[0] if exists('/dev/nvidia0') else 'cpu'
version='1.0.0'
torch_url=f"http://download.pytorch.org/whl/{accelerator}/torch-{version}-{platform}-linux_x86_64.whl"

!pip install -U {torch_url} torchvision

In [None]:
!ls drive/Colab/datagrad

# Init

In [2]:
import torch.nn.functional as F
import torch
from torch import nn, optim
import numpy as np
from sklearn.model_selection import train_test_split
from tqdm import tqdm

import random
import os
import pickle
import copy

local = True
if local:
    DATAPATH = '/Users/alfonso/workplace/datagrad/Data'
    MODELPATH = '/Users/alfonso/workplace/datagrad/Model'
    
else:
    PATH = '/content/drive/'
    DATAPATH = os.path.join(PATH,'Colab/datagrad/data/')
    MODELPATH = os.path.join(PATH,'Colab/datagrad/model/')

START_TAG = "<START>"
STOP_TAG = "<STOP>"
tag_to_ix = {"a": 0, "b": 1, "c": 2, "o": 3,
             START_TAG: 4, STOP_TAG: 5}
ix_to_tag = {tp[1]: tp[0] for tp in tag_to_ix.items()}
ix_to_tag[5] = "o"
ix_to_tag[4] = "o"

# Functions

In [28]:
def fromPickle(path):
    var = None
    with open(path, 'rb') as f:
        var = pickle.load(f)
    return var

def load():
    trn_X = fromPickle(os.path.join(
        DATAPATH, 'trn_X_token.pickle'))[:10]
    trn_y = fromPickle(os.path.join(
        DATAPATH, 'trn_y_token.pickle'))[:10]
    tst = fromPickle(os.path.join(
        DATAPATH, 'tst_X_token.pickle'))[:10]
    word_to_ix = fromPickle(os.path.join(
        DATAPATH, 'word_to_ix.pickle'))
    return trn_X, trn_y, tst, word_to_ix

def savemodel(model, name):
    torch.save(model.state_dict(), os.path.join(MODELPATH, name))

def evalinfo(y_preds, y_trues):
    size = len(y_preds)
    assert len(y_preds) == len(y_trues)
    stat = {'a': [0, 0], 'b': [0, 0], 'c': [0, 0]}
    tags = ['a', 'b', 'c']
    prec = {'a': [], 'b': [], 'c': []}
    recl = {'a': [], 'b': [], 'c': []}
    f1 = {'a': 0, 'b': 0, 'c': 0}
    for j in range(len(y_preds)):
        y_pred, y_true = y_preds[j], y_trues[j]
        prestat, recstat = copy.copy(stat), copy.copy(stat)
        for i in range(len(y_pred)):
            t = ix_to_tag[int(y_true[i])]
            p = ix_to_tag[int(y_pred[i])]
            if p in tags:
                prestat[p][1] += 1
            if p in tags and p == t:
                prestat[p][0] += 1
                recstat[t][0] += 1
            if t in tags:
                recstat[t][1] += 1
        for x in tags:
            if recstat[x][1] != 0:
                recl[x].append(recstat[x][0] /recstat[x][1])
            if prestat[x][1] != 0:
                prec[x].append(prestat[x][0] / prestat[x][1])
    for x in tags:
        prec[x] = 0 if len(prec[x]) == 0 else sum(prec[x])/len(prec[x])
        recl[x] = 0 if len(recl[x]) == 0 else sum(recl[x])/len(recl[x])
        f1[x] = (2 * prec[x] * recl[x]) / (prec[x] + recl[x] + 1e-8)

    for x in tags:
        print('TAG {} \t prec {:.4f} \t recl {:.4f} \t f1 {:.4f}'.format(
            x, prec[x], recl[x], f1[x]))

    print('AVG prec {:.4f} \t recl {:.4f} \t f1 {:.4f}'.format(
        sum(prec[x] for x in tags) / 3,
        sum(recl[x] for x in tags) / 3,
        sum(f1[x] for x in tags) / 3))
    
def argmax(vec):
    _, idx = torch.max(vec, 1)
    return idx.item()

def log_sum_exp(vec):
    max_score = vec[0, argmax(vec)]
    max_score_broadcast = max_score.view(1, -1).expand(1, vec.size()[1])
    return max_score + \
        torch.log(torch.sum(torch.exp(vec - max_score_broadcast)))

def init_embedding(embedding):
    bias = np.sqrt(3.0 / embedding.embedding_dim)
    nn.init.uniform_(embedding.weight, -bias, bias)

def init_linear(linear):
    bias = np.sqrt(6.0 / (linear.weight.size(0) +
                          linear.weight.size(1)))
    nn.init.uniform_(linear.weight, -bias, bias)
    if linear.bias is not None:
        linear.bias.data.zero_()

def init_rnn(input_rnn, rnn='lstm'):
    part_num = 4 if rnn == 'lstm' else 3
    for ind in range(0, input_rnn.num_layers):
        weight = eval('input_rnn.weight_ih_l' + str(ind))
        hid_size = weight.size(0) // part_num
        for i in range(part_num):
            nn.init.xavier_normal_(weight[hid_size * i:hid_size * (i + 1), :])
        weight = eval('input_rnn.weight_hh_l' + str(ind))
        for i in range(part_num):
            nn.init.xavier_normal_(weight[hid_size * i:hid_size * (i + 1), :])
    if input_rnn.bidirectional:
        for ind in range(0, input_rnn.num_layers):
            weight = eval('input_rnn.weight_ih_l' + str(ind) + '_reverse')
            for i in range(part_num):
                nn.init.xavier_normal_(
                    weight[hid_size * i:hid_size * (i + 1), :])
            weight = eval('input_rnn.weight_hh_l' + str(ind) + '_reverse')
            for i in range(part_num):
                nn.init.xavier_normal_(
                    weight[hid_size * i:hid_size * (i + 1), :])
    if input_rnn.bias:
        for ind in range(0, input_rnn.num_layers):
            bias = eval('input_rnn.bias_ih_l' + str(ind))
            bias.data.zero_()
            bias.data[input_rnn.hidden_size: 2 * input_rnn.hidden_size] = 1
            bias = eval('input_rnn.bias_hh_l' + str(ind))
            bias.data.zero_()
            bias.data[input_rnn.hidden_size: 2 * input_rnn.hidden_size] = 1
        if input_rnn.bidirectional:
            for ind in range(0, input_rnn.num_layers):
                bias = eval('input_rnn.bias_ih_l' + str(ind) + '_reverse')
                bias.data.zero_()
                bias.data[input_rnn.hidden_size: 2 *
                          input_rnn.hidden_size] = 1
                bias = eval('input_rnn.bias_hh_l' + str(ind) + '_reverse')
                bias.data.zero_()
                bias.data[input_rnn.hidden_size: 2 *
                          input_rnn.hidden_size] = 1

# Model

## BiLSTM

In [4]:
class BiLSTM(nn.Module):

    def __init__(self, vocab_size, embed_dim=10, hid_dim=10, loss_fn=nn.NLLLoss()):
        super(BiLSTM, self).__init__()
        self.hid_dim = hid_dim
        self.word_embeds = nn.Embedding(vocab_size, embed_dim)
        self.rnn = nn.LSTM(embed_dim, hid_dim, bidirectional=True)
        self.hid2tag = nn.Linear(hid_dim * 2, len(tag_to_ix))
        self.loss_fn = loss_fn

    def _forward(self, x):
        embeds = self.word_embeds(x)
        lstm_out, _ = self.rnn(embeds.view(len(x), 1, -1))
        tag_space = self.hid2tag(lstm_out.view(len(x), -1))
        output = F.log_softmax(tag_space, dim=1)
        return output

    def forward(self, x):
        output = self._forward(x)
        tag_seq = torch.argmax(output, dim=1).numpy()
        return tag_seq

    def loss(self, x, y):
        output = self._forward(x)
        return self.loss_fn(output, y)

## BiLSTM + CRF

In [5]:
class BiLSTMCRF(nn.Module):

    def __init__(self, vocab_size, embed_dim=10, hid_dim=20, layer_num=2, device='cpu'):
        super(BiLSTMCRF, self).__init__()
        self.embed_dim = embed_dim
        self.hid_dim = hid_dim
        self.vocab_size = vocab_size
        self.tags_size = len(tag_to_ix)
        self.layer_num = layer_num
        self.device = device
        self.word_embeds = nn.Embedding(vocab_size, embed_dim)
        self.rnn = nn.LSTM(embed_dim, hid_dim // 2,
                           num_layers=self.layer_num, bidirectional=True, dropout=0.1)
        init_rnn(self.rnn, 'lstm')
        self.hid2tag = nn.Linear(hid_dim, self.tags_size)
        init_linear(self.hid2tag)
        self.transitions = nn.Parameter(
            torch.randn(self.tags_size, self.tags_size))
        self.transitions.data[tag_to_ix[START_TAG], :] = -10000
        self.transitions.data[:, tag_to_ix[STOP_TAG]] = -10000

    def init_hidden(self):
        return (nn.init.xavier_uniform_(torch.empty(2 * self.layer_num, 1, self.hid_dim // 2), gain=nn.init.calculate_gain('relu')),
                nn.init.xavier_uniform_(torch.empty(2 * self.layer_num, 1, self.hid_dim // 2), gain=nn.init.calculate_gain('relu')))

    def _forward_alg(self, feats):
        init_alphas = torch.full((1, self.tags_size), -10000.)
        init_alphas[0][tag_to_ix[START_TAG]] = 0.
        forward_var = init_alphas
        for feat in feats:
            alphas_t = []
            for next_tag in range(self.tags_size):
                emit_score = feat[next_tag].view(
                    1, -1).expand(1, self.tags_size)
                trans_score = self.transitions[next_tag].view(1, -1)
                next_tag_var = forward_var + trans_score + emit_score
                alphas_t.append(log_sum_exp(next_tag_var).view(1))
            forward_var = torch.cat(alphas_t).view(1, -1)
        terminal_var = forward_var + self.transitions[tag_to_ix[STOP_TAG]]
        alpha = log_sum_exp(terminal_var)
        return alpha

    def _get_lstm_features(self, x):
        hidden = self.init_hidden()
        embeds = self.word_embeds(x).view(
            len(x), 1, -1)
        lstm_out, _ = self.rnn(embeds, hidden)
        lstm_out = lstm_out.view(len(x), self.hid_dim)
        lstm_feats = self.hid2tag(lstm_out)

        return lstm_feats

    def _score_sentence(self, feats, tags):
        score = torch.zeros(1)
        tags = torch.cat(
            [torch.tensor([tag_to_ix[START_TAG]], dtype=torch.long, device=self.device), tags])
        for i, feat in enumerate(feats):
            score = score + \
                self.transitions[tags[i + 1], tags[i]] + feat[tags[i + 1]]
        score = score + self.transitions[tag_to_ix[STOP_TAG], tags[-1]]
        return score

    def _viterbi_decode(self, feats):
        backpointers = []
        init_vvars = torch.full((1, self.tags_size), -10000.)
        init_vvars[0][tag_to_ix[START_TAG]] = 0
        forward_var = init_vvars
        for feat in feats:
            bptrs_t = []
            viterbivars_t = []
            for next_tag in range(self.tags_size):
                next_tag_var = forward_var + self.transitions[next_tag]
                best_tag_id = argmax(next_tag_var)
                bptrs_t.append(best_tag_id)
                viterbivars_t.append(next_tag_var[0][best_tag_id].view(1))
            forward_var = (torch.cat(viterbivars_t) + feat).view(1, -1)
            backpointers.append(bptrs_t)
        terminal_var = forward_var + \
            self.transitions[tag_to_ix[STOP_TAG]]
        best_tag_id = argmax(terminal_var)
        best_path = [best_tag_id]
        for bptrs_t in reversed(backpointers):
            best_tag_id = bptrs_t[best_tag_id]
            best_path.append(best_tag_id)
        start = best_path.pop()
        assert start == tag_to_ix[START_TAG]  # Sanity check
        best_path.reverse()  # 把从后向前的路径正过来
        return best_path

    def loss(self, x, y):
        feats = self._get_lstm_features(x)
        forward_score = self._forward_alg(feats)
        gold_score = self._score_sentence(feats, y)
        return forward_score - gold_score

    def forward(self, x):  # dont confuse this with _forward_alg above.
        lstm_feats = self._get_lstm_features(x)
        tag_seq = self._viterbi_decode(lstm_feats)
        return tag_seq

## Encoder + Att + Decoder + CRF

In [6]:
class EncoderAttDecoder(nn.Module):

    def __init__(self, vocab_size, in_hdim=128, out_hdim=128, de_hdim: int=128, loss_fn=nn.NLLLoss(),
                 use_crf=True, max_length=1000,
                 bi=2, device='cpu', dropout=0.4, num_layers=2,
                 teacher_forcing_ratio=0.3):
        super(EncoderAttDecoder, self).__init__()
        self.bi = bi
        self.device = device
        self.max_length = max_length
        self.loss_fn = loss_fn if use_crf is False else None
        self.num_layers = num_layers
        self.en_in_hdim = in_hdim
        self.en_out_hdim = out_hdim
        self.tags_size = len(tag_to_ix)
        self.de_hdim = de_hdim
        self.teacher_forcing_ratio = teacher_forcing_ratio
        self.use_crf = use_crf
        self.dropout = nn.Dropout(dropout)

        self.en_wrd_embed = nn.Embedding(vocab_size, self.en_in_hdim)
        init_embedding(self.en_wrd_embed)
        self.enrnn = nn.GRU(self.en_in_hdim, self.en_out_hdim, num_layers=num_layers,
                            bidirectional=True if bi == 2 else False,
                            dropout=dropout)
        init_rnn(self.enrnn, 'GRU')
        self.de_embed = nn.Embedding(self.tags_size, self.de_hdim)
        init_embedding(self.de_embed)
        self.attn = nn.Linear(self.de_hdim * 2, self.max_length)
        init_linear(self.attn)
        self.attn_combine = nn.Linear(self.de_hdim * 2, self.de_hdim)
        init_linear(self.attn_combine)
        self.dernn = nn.GRU(self.de_hdim, self.de_hdim)  # , dropout=0.3)
        init_rnn(self.dernn, 'GRU')
        self.hid2tag = nn.Linear(self.de_hdim, self.tags_size)
        init_linear(self.hid2tag)
        self.transitions = nn.Parameter(torch.nn.init.uniform_(
            torch.empty(self.tags_size, self.tags_size)))
        self.transitions.data[tag_to_ix[START_TAG], :] = -10000
        self.transitions.data[:, tag_to_ix[STOP_TAG]] = -10000

    def _encoder(self, x):
        eoutputs = torch.zeros(
            self.max_length, self.en_out_hdim * self.bi, device=self.device)
        ehidden = nn.init.xavier_uniform_(
            torch.zeros(self.bi * self.num_layers, 1,
                        self.en_out_hdim, device=self.device),
            gain=nn.init.calculate_gain('relu'))
        for ei in range(x.size(0)):
            eoutput, ehidden = self._encoder_net(
                x[ei], ehidden)
            eoutputs[ei] = eoutput[0, 0]

        return eoutputs, ehidden

    def _encoder_net(self, sentence, hidden):
        embed = self.en_wrd_embed(sentence).view(1, 1, -1)
        output, hidden = self.enrnn(self.dropout(embed), hidden)
        output = F.log_softmax(output, dim=2)
        return output, hidden

    def _decoder(self, eoutputs, ehidden, y=None):
        dinput = torch.tensor([[tag_to_ix[START_TAG]]], device=self.device)
        dhidden = torch.cat(
            [ehidden[0, :, :], ehidden[-1, :, :]], 1).unsqueeze(0)
        doutputs = torch.zeros(
            self.length, self.de_hdim, device=self.device)
        if y is not None:
            for di in range(self.length):
                doutput, dhidden = self._decoder_net(
                    dinput, dhidden, eoutputs)
                dinput = y[di]  # Teacher forcing
                doutputs[di] = doutput[0, 0]
        else:
            for di in range(self.length):
                doutput, decoder_hidden = self._decoder_net(
                    dinput, dhidden, eoutputs)
                topv, topi = doutput.topk(1)
                dinput = topi.squeeze().detach()
                doutputs[di] = doutput[0, 0]
                if dinput.item() == STOP_TAG:
                    break
        return doutputs

    def _decoder_net(self, input, hidden, encoder_outputs):
        embedded = self.de_embed(input).view(1, 1, -1)
        embedded = self.dropout(embedded)
        attn_weights = F.softmax(
            self.attn(torch.cat((embedded[0], hidden[0]), 1)), dim=1)
        attn_applied = torch.bmm(attn_weights.unsqueeze(0),
                                 encoder_outputs.unsqueeze(0))
        output = torch.cat((embedded[0], attn_applied[0]), 1)
        output = self.attn_combine(output).unsqueeze(0)
        output = F.relu(output)
        output, hidden = self.dernn(output, hidden)
        output = self.dropout(output)
        output = F.log_softmax(self.hid2tag(output[0]), dim=1)
        return output, hidden

    def _crf(self, feats):
        return self._viterbi_decode(feats)

    def _forward_alg(self, feats):
        init_alphas = torch.full((1, self.tags_size), -10000.)
        init_alphas[0][tag_to_ix[START_TAG]] = 0.
        forward_var = init_alphas
        # Iterate through the sentence
        for feat in feats:
            alphas_t = []
            for next_tag in range(self.tags_size):
                emit_score = feat[next_tag].view(
                    1, -1).expand(1, self.tags_size)
                trans_score = self.transitions[next_tag].view(1, -1)
                next_tag_var = forward_var + trans_score + emit_score
                alphas_t.append(log_sum_exp(next_tag_var).view(1))
            forward_var = torch.cat(alphas_t).view(1, -1)
        terminal_var = forward_var + self.transitions[tag_to_ix[STOP_TAG]]
        alpha = log_sum_exp(terminal_var)
        return alpha

    def _score_sentence(self, feats, tags):
        score = torch.zeros(1)
        tags = torch.cat(
            [torch.tensor([tag_to_ix[START_TAG]], dtype=torch.long, device=self.device), tags])
        for i, feat in enumerate(feats):
            score += self.transitions[tags[i + 1], tags[i]] + feat[tags[i + 1]]
        score += self.transitions[tag_to_ix[STOP_TAG], tags[-1]]
        return score

    def _neg_log_likelihood(self, feats, tags):
        forward_score = self._forward_alg(feats)
        gold_score = self._score_sentence(feats, tags)
        return forward_score - gold_score

    def _viterbi_decode(self, feats):
        backpointers = []
        init_vvars = torch.full((1, self.tags_size), -10000.)
        init_vvars[0][tag_to_ix[START_TAG]] = 0
        forward_var = init_vvars
        for feat in feats:
            bptrs_t = []
            viterbivars_t = []
            for next_tag in range(self.tags_size):
                next_tag_var = forward_var + self.transitions[next_tag]
                best_tag_id = argmax(next_tag_var)
                bptrs_t.append(best_tag_id)
                viterbivars_t.append(next_tag_var[0][best_tag_id].view(1))
            forward_var = (torch.cat(viterbivars_t) + feat).view(1, -1)
            backpointers.append(bptrs_t)
        terminal_var = forward_var + \
            self.transitions[tag_to_ix[STOP_TAG]]
        best_tag_id = argmax(terminal_var)
        best_path = [best_tag_id]
        for bptrs_t in reversed(backpointers):
            best_tag_id = bptrs_t[best_tag_id]
            best_path.append(best_tag_id)
        start = best_path.pop()
        assert start == tag_to_ix[START_TAG]
        best_path.reverse()
        return best_path

    def _get_feat(self, x, y=None, use_tf=False):
        self.length = x.size(0)
        eoutputs, ehidden = self._encoder(x)
        if use_tf and random.random() < self.teacher_forcing_ratio:
            doutputs = self._decoder(eoutputs, ehidden, y)
        else:
            doutputs = self._decoder(eoutputs, ehidden)
        output = self.hid2tag(doutputs)
        return output

    def loss(self, x, y):
        output = self._get_feat(x, y, use_tf=True)
        if self.use_crf:
            return self._neg_log_likelihood(output, y)
        else:
            return self.loss_fn(output, y)

    def forward(self, x):
        output = self._get_feat(x)
        if self.use_crf:
            tag_seq = self._crf(output)
        else:
            tag_seq = torch.argmax(output, dim=1).numpy()
        return tag_seq

# Train and Evaluate

## prepare

In [7]:
trn_X, trn_y, tst, word_to_ix = load()
max_length = max(len(x) for x in trn_X)
def train(trn_ixs, model, optimizer):
    avg_loss = 0
    tsize = len(trn_ixs)
    for i in tqdm(np.random.permutation(trn_ixs)):
        x = torch.tensor(trn_X[i], dtype=torch.long, device=device)
        y = torch.tensor(trn_y[i], dtype=torch.long, device=device)
        optimizer.zero_grad()
        loss = model.loss(x, y)
        avg_loss += loss.item() / tsize
        loss.backward()
        optimizer.step()
    return model, avg_loss
def evaluate(vld_ixs, model):
    vld_loss = 0
    vsize = len(vld_ixs)
    y_preds, y_trues = [], []
    for i in tqdm(np.random.permutation(vld_ixs)):
        x = torch.tensor(trn_X[i], dtype=torch.long, device=device)
        y = torch.tensor(trn_y[i], dtype=torch.long, device=device)
        y_pred = model(x)
        loss = model.loss(x, y)
        vld_loss += loss.item() / vsize
        y_preds.append(y_pred[:])
        y_trues.append(trn_y[i])
    return vld_loss, y_preds, y_trues

## setting

In [8]:
split_num = 5
epoch_num = 4
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
direction = 2
en_indim = 64
en_outdim = 64

## BiLSTM

In [29]:
for sid in range(split_num):
    trn_ixs, vld_ixs = train_test_split(
        list(range(len(trn_X))), test_size=1 / split_num,
        shuffle=True, random_state=sid)
    print('SPLIT {} --------------------'.format(sid + 1))
    model = BiLSTM(len(word_to_ix), embed_dim=64, hid_dim=64).to(device)
    optimizer = optim.Adam(
        model.parameters(), lr=0.01, weight_decay=1e-6)
    for epoch in range(epoch_num):
        model.train()
        model, avg_loss = train(trn_ixs, model, optimizer)
        model.eval()
        vld_loss, y_preds, y_trues = evaluate(vld_ixs, model)
        print('Epoch {}/{} \t avg_loss {:.4f} \t vld_loss {:.4f} \t'.format(
            epoch + 1, epoch_num, avg_loss, vld_loss))
        evalinfo(y_preds, y_trues)
        savemodel(model, 'bilstm_{}_{}.pytorch'.format(sid,epoch))

 50%|█████     | 4/8 [00:00<00:00, 33.24it/s]

SPLIT 1 --------------------


100%|██████████| 8/8 [00:00<00:00, 33.70it/s]
100%|██████████| 2/2 [00:00<00:00, 49.48it/s]
 50%|█████     | 4/8 [00:00<00:00, 31.18it/s]

Epoch 1/2 	 avg_loss 1.1474 	 vld_loss 0.5510 	
TAG a 	 prec 0.0000 	 recl 0.0000 	 f1 0.0000
TAG b 	 prec 0.0000 	 recl 0.0000 	 f1 0.0000
TAG c 	 prec 0.0000 	 recl 0.0000 	 f1 0.0000
AVG prec 0.0000 	 recl 0.0000 	 f1 0.0000


100%|██████████| 8/8 [00:00<00:00, 31.26it/s]
100%|██████████| 2/2 [00:00<00:00, 41.42it/s]
 38%|███▊      | 3/8 [00:00<00:00, 26.88it/s]

Epoch 2/2 	 avg_loss 0.4863 	 vld_loss 0.5955 	
TAG a 	 prec 0.0000 	 recl 0.0000 	 f1 0.0000
TAG b 	 prec 0.0000 	 recl 0.0000 	 f1 0.0000
TAG c 	 prec 0.0000 	 recl 0.0000 	 f1 0.0000
AVG prec 0.0000 	 recl 0.0000 	 f1 0.0000
SPLIT 2 --------------------


100%|██████████| 8/8 [00:00<00:00, 28.92it/s]
100%|██████████| 2/2 [00:00<00:00, 105.13it/s]
 25%|██▌       | 2/8 [00:00<00:00, 19.52it/s]

Epoch 1/2 	 avg_loss 1.0002 	 vld_loss 0.2338 	
TAG a 	 prec 0.0000 	 recl 0.0000 	 f1 0.0000
TAG b 	 prec 0.0000 	 recl 0.0000 	 f1 0.0000
TAG c 	 prec 0.0000 	 recl 0.0000 	 f1 0.0000
AVG prec 0.0000 	 recl 0.0000 	 f1 0.0000


100%|██████████| 8/8 [00:00<00:00, 29.60it/s]
100%|██████████| 2/2 [00:00<00:00, 102.17it/s]
 38%|███▊      | 3/8 [00:00<00:00, 29.16it/s]

Epoch 2/2 	 avg_loss 0.4069 	 vld_loss 0.3439 	
TAG a 	 prec 0.0000 	 recl 0.0000 	 f1 0.0000
TAG b 	 prec 0.0000 	 recl 0.0000 	 f1 0.0000
TAG c 	 prec 0.0000 	 recl 0.0000 	 f1 0.0000
AVG prec 0.0000 	 recl 0.0000 	 f1 0.0000
SPLIT 3 --------------------


100%|██████████| 8/8 [00:00<00:00, 30.46it/s]
100%|██████████| 2/2 [00:00<00:00, 96.70it/s]
 38%|███▊      | 3/8 [00:00<00:00, 23.14it/s]

Epoch 1/2 	 avg_loss 0.9061 	 vld_loss 1.2514 	
TAG a 	 prec 0.0000 	 recl 0.0000 	 f1 0.0000
TAG b 	 prec 0.0000 	 recl 0.0000 	 f1 0.0000
TAG c 	 prec 0.0000 	 recl 0.0000 	 f1 0.0000
AVG prec 0.0000 	 recl 0.0000 	 f1 0.0000


100%|██████████| 8/8 [00:00<00:00, 26.35it/s]
100%|██████████| 2/2 [00:00<00:00, 124.05it/s]
 50%|█████     | 4/8 [00:00<00:00, 37.13it/s]

Epoch 2/2 	 avg_loss 0.3463 	 vld_loss 0.9454 	
TAG a 	 prec 0.0000 	 recl 0.0000 	 f1 0.0000
TAG b 	 prec 0.0000 	 recl 0.0000 	 f1 0.0000
TAG c 	 prec 0.0000 	 recl 0.0000 	 f1 0.0000
AVG prec 0.0000 	 recl 0.0000 	 f1 0.0000
SPLIT 4 --------------------


100%|██████████| 8/8 [00:00<00:00, 35.68it/s]
100%|██████████| 2/2 [00:00<00:00, 94.09it/s]
 38%|███▊      | 3/8 [00:00<00:00, 24.92it/s]

Epoch 1/2 	 avg_loss 0.9722 	 vld_loss 0.9596 	
TAG a 	 prec 0.0000 	 recl 0.0000 	 f1 0.0000
TAG b 	 prec 0.0000 	 recl 0.0000 	 f1 0.0000
TAG c 	 prec 0.0000 	 recl 0.0000 	 f1 0.0000
AVG prec 0.0000 	 recl 0.0000 	 f1 0.0000


100%|██████████| 8/8 [00:00<00:00, 28.13it/s]
100%|██████████| 2/2 [00:00<00:00, 82.23it/s]
100%|██████████| 8/8 [00:00<00:00, 44.64it/s]
  0%|          | 0/2 [00:00<?, ?it/s]

Epoch 2/2 	 avg_loss 0.2737 	 vld_loss 0.8244 	
TAG a 	 prec 0.0000 	 recl 0.0000 	 f1 0.0000
TAG b 	 prec 0.0000 	 recl 0.0000 	 f1 0.0000
TAG c 	 prec 0.0000 	 recl 0.0000 	 f1 0.0000
AVG prec 0.0000 	 recl 0.0000 	 f1 0.0000
SPLIT 5 --------------------


100%|██████████| 2/2 [00:00<00:00, 33.68it/s]
 62%|██████▎   | 5/8 [00:00<00:00, 36.64it/s]

Epoch 1/2 	 avg_loss 1.0243 	 vld_loss 0.6825 	
TAG a 	 prec 0.0000 	 recl 0.0000 	 f1 0.0000
TAG b 	 prec 0.0000 	 recl 0.0000 	 f1 0.0000
TAG c 	 prec 0.0000 	 recl 0.0000 	 f1 0.0000
AVG prec 0.0000 	 recl 0.0000 	 f1 0.0000


100%|██████████| 8/8 [00:00<00:00, 38.28it/s]
100%|██████████| 2/2 [00:00<00:00, 35.39it/s]

Epoch 2/2 	 avg_loss 0.3434 	 vld_loss 0.6073 	
TAG a 	 prec 0.0000 	 recl 0.0000 	 f1 0.0000
TAG b 	 prec 0.0000 	 recl 0.0000 	 f1 0.0000
TAG c 	 prec 0.0000 	 recl 0.0000 	 f1 0.0000
AVG prec 0.0000 	 recl 0.0000 	 f1 0.0000





## BiLSTM + CRF

In [31]:
for sid in range(split_num):
    trn_ixs, vld_ixs = train_test_split(
        list(range(len(trn_X))), test_size=1 / split_num,
        shuffle=True, random_state=sid)
    print('SPLIT {} --------------------'.format(sid + 1))
    model = BiLSTMCRF(len(word_to_ix), embed_dim=64,
                      hid_dim=64, device=device).to(device)
    optimizer = optim.Adam(
        model.parameters(), lr=0.01, weight_decay=1e-6)
    for epoch in range(epoch_num):
        model.train()
        model, avg_loss = train(trn_ixs, model, optimizer)
        model.eval()
        vld_loss, y_preds, y_trues = evaluate(vld_ixs, model)
        print('Epoch {}/{} \t avg_loss {:.4f} \t vld_loss {:.4f} \t'.format(
            epoch + 1, epoch_num, avg_loss, vld_loss))
        evalinfo(y_preds, y_trues)
        savemodel(model, 'bilstmcrf_{}_{}.pytorch'.format(sid,epoch))

 12%|█▎        | 1/8 [00:00<00:00,  9.73it/s]

SPLIT 1 --------------------


100%|██████████| 8/8 [00:00<00:00, 11.16it/s]
100%|██████████| 2/2 [00:00<00:00, 11.94it/s]
 25%|██▌       | 2/8 [00:00<00:00, 10.71it/s]

Epoch 1/2 	 avg_loss 26.5289 	 vld_loss 48.6112 	
TAG a 	 prec 0.0000 	 recl 0.0000 	 f1 0.0000
TAG b 	 prec 0.0000 	 recl 0.0000 	 f1 0.0000
TAG c 	 prec 0.0000 	 recl 0.0000 	 f1 0.0000
AVG prec 0.0000 	 recl 0.0000 	 f1 0.0000


100%|██████████| 8/8 [00:00<00:00, 11.63it/s]
100%|██████████| 2/2 [00:00<00:00, 12.54it/s]
  0%|          | 0/8 [00:00<?, ?it/s]

Epoch 2/2 	 avg_loss 13.6288 	 vld_loss 51.6336 	
TAG a 	 prec 0.0000 	 recl 0.0000 	 f1 0.0000
TAG b 	 prec 0.0000 	 recl 0.0000 	 f1 0.0000
TAG c 	 prec 0.0000 	 recl 0.0000 	 f1 0.0000
AVG prec 0.0000 	 recl 0.0000 	 f1 0.0000
SPLIT 2 --------------------


100%|██████████| 8/8 [00:00<00:00,  8.76it/s]
100%|██████████| 2/2 [00:00<00:00, 34.29it/s]
 25%|██▌       | 2/8 [00:00<00:00, 17.71it/s]

Epoch 1/2 	 avg_loss 36.5849 	 vld_loss 8.3653 	
TAG a 	 prec 0.0000 	 recl 0.0000 	 f1 0.0000
TAG b 	 prec 0.0000 	 recl 0.0000 	 f1 0.0000
TAG c 	 prec 0.0000 	 recl 0.0000 	 f1 0.0000
AVG prec 0.0000 	 recl 0.0000 	 f1 0.0000


100%|██████████| 8/8 [00:00<00:00,  8.60it/s]
100%|██████████| 2/2 [00:00<00:00, 36.09it/s]
 25%|██▌       | 2/8 [00:00<00:00, 13.92it/s]

Epoch 2/2 	 avg_loss 24.5055 	 vld_loss 8.7215 	
TAG a 	 prec 0.0000 	 recl 0.0000 	 f1 0.0000
TAG b 	 prec 0.0000 	 recl 0.0000 	 f1 0.0000
TAG c 	 prec 0.0000 	 recl 0.0000 	 f1 0.0000
AVG prec 0.0000 	 recl 0.0000 	 f1 0.0000
SPLIT 3 --------------------


100%|██████████| 8/8 [00:00<00:00,  9.69it/s]
100%|██████████| 2/2 [00:00<00:00, 31.97it/s]
 38%|███▊      | 3/8 [00:00<00:00, 18.19it/s]

Epoch 1/2 	 avg_loss 32.5792 	 vld_loss 21.0915 	
TAG a 	 prec 0.0000 	 recl 0.0000 	 f1 0.0000
TAG b 	 prec 0.0000 	 recl 0.0000 	 f1 0.0000
TAG c 	 prec 0.0000 	 recl 0.0000 	 f1 0.0000
AVG prec 0.0000 	 recl 0.0000 	 f1 0.0000


100%|██████████| 8/8 [00:00<00:00,  8.26it/s]
100%|██████████| 2/2 [00:00<00:00, 31.19it/s]
 25%|██▌       | 2/8 [00:00<00:00, 12.16it/s]

Epoch 2/2 	 avg_loss 16.4998 	 vld_loss 21.6967 	
TAG a 	 prec 0.0000 	 recl 0.0000 	 f1 0.0000
TAG b 	 prec 0.0000 	 recl 0.0000 	 f1 0.0000
TAG c 	 prec 0.0000 	 recl 0.0000 	 f1 0.0000
AVG prec 0.0000 	 recl 0.0000 	 f1 0.0000
SPLIT 4 --------------------


100%|██████████| 8/8 [00:00<00:00, 10.25it/s]
100%|██████████| 2/2 [00:00<00:00, 22.39it/s]
 25%|██▌       | 2/8 [00:00<00:00, 16.63it/s]

Epoch 1/2 	 avg_loss 52.3153 	 vld_loss 13.9593 	
TAG a 	 prec 0.0000 	 recl 0.0000 	 f1 0.0000
TAG b 	 prec 0.0000 	 recl 0.0000 	 f1 0.0000
TAG c 	 prec 0.0000 	 recl 0.0000 	 f1 0.0000
AVG prec 0.0000 	 recl 0.0000 	 f1 0.0000


100%|██████████| 8/8 [00:00<00:00, 10.65it/s]
100%|██████████| 2/2 [00:00<00:00, 20.41it/s]
 12%|█▎        | 1/8 [00:00<00:00,  8.27it/s]

Epoch 2/2 	 avg_loss 22.2382 	 vld_loss 15.3426 	
TAG a 	 prec 0.0000 	 recl 0.0000 	 f1 0.0000
TAG b 	 prec 0.0000 	 recl 0.0000 	 f1 0.0000
TAG c 	 prec 0.0000 	 recl 0.0000 	 f1 0.0000
AVG prec 0.0000 	 recl 0.0000 	 f1 0.0000
SPLIT 5 --------------------


100%|██████████| 8/8 [00:00<00:00, 12.90it/s]
100%|██████████| 2/2 [00:00<00:00,  8.40it/s]
 25%|██▌       | 2/8 [00:00<00:00, 12.76it/s]

Epoch 1/2 	 avg_loss 16.9142 	 vld_loss 55.5484 	
TAG a 	 prec 0.0000 	 recl 0.0000 	 f1 0.0000
TAG b 	 prec 0.0000 	 recl 0.0000 	 f1 0.0000
TAG c 	 prec 0.0000 	 recl 0.0000 	 f1 0.0000
AVG prec 0.0000 	 recl 0.0000 	 f1 0.0000


100%|██████████| 8/8 [00:00<00:00, 13.18it/s]
100%|██████████| 2/2 [00:00<00:00,  8.43it/s]

Epoch 2/2 	 avg_loss 9.2611 	 vld_loss 70.0871 	
TAG a 	 prec 0.0000 	 recl 0.0000 	 f1 0.0000
TAG b 	 prec 0.0000 	 recl 0.0000 	 f1 0.0000
TAG c 	 prec 0.0000 	 recl 0.0000 	 f1 0.0000
AVG prec 0.0000 	 recl 0.0000 	 f1 0.0000





## Encoder + Att + Decoder

In [32]:
for sid in range(split_num):
    trn_ixs, vld_ixs = train_test_split(
        list(range(len(trn_X))), test_size=1 / split_num,
        shuffle=True, random_state=sid)
    print('SPLIT {} --------------------'.format(sid + 1))

    model = EncoderAttDecoder(len(word_to_ix), in_hdim=en_indim, out_hdim=en_outdim,
                              de_hdim=en_outdim * direction, max_length=max_length,
                              bi=direction, dropout=0.3, num_layers=2,
                              teacher_forcing_ratio=0.3, use_crf=False, device=device).to(device)
    optimizer = optim.Adam(
        model.parameters(), lr=0.01, weight_decay=1e-6)
    for epoch in range(epoch_num):
        model.train()
        model, avg_loss = train(trn_ixs, model, optimizer)
        model.eval()
        vld_loss, y_preds, y_trues = evaluate(vld_ixs, model)
        print('Epoch {}/{} \t avg_loss {:.4f} \t vld_loss {:.4f} \t'.format(
            epoch + 1, epoch_num, avg_loss, vld_loss))
        evalinfo(y_preds, y_trues)
        savemodel(model, 'ead_{}_{}.pytorch'.format(sid,epoch))

  0%|          | 0/8 [00:00<?, ?it/s]

SPLIT 1 --------------------


100%|██████████| 8/8 [00:01<00:00,  5.63it/s]
100%|██████████| 2/2 [00:00<00:00,  9.37it/s]
  0%|          | 0/8 [00:00<?, ?it/s]

Epoch 1/2 	 avg_loss -6.0156 	 vld_loss -47.1588 	
TAG a 	 prec 0.0000 	 recl 0.0000 	 f1 0.0000
TAG b 	 prec 0.0000 	 recl 0.0000 	 f1 0.0000
TAG c 	 prec 0.0000 	 recl 0.0000 	 f1 0.0000
AVG prec 0.0000 	 recl 0.0000 	 f1 0.0000


100%|██████████| 8/8 [00:01<00:00,  5.03it/s]
100%|██████████| 2/2 [00:00<00:00,  9.63it/s]
  0%|          | 0/8 [00:00<?, ?it/s]

Epoch 2/2 	 avg_loss -188.2837 	 vld_loss -432.4019 	
TAG a 	 prec 0.0000 	 recl 0.0000 	 f1 0.0000
TAG b 	 prec 0.0000 	 recl 0.0000 	 f1 0.0000
TAG c 	 prec 0.0000 	 recl 0.0000 	 f1 0.0000
AVG prec 0.0000 	 recl 0.0000 	 f1 0.0000
SPLIT 2 --------------------


100%|██████████| 8/8 [00:01<00:00,  5.40it/s]
100%|██████████| 2/2 [00:00<00:00, 26.26it/s]
 12%|█▎        | 1/8 [00:00<00:00,  7.10it/s]

Epoch 1/2 	 avg_loss -34.2251 	 vld_loss -150.3462 	
TAG a 	 prec 0.0000 	 recl 0.0000 	 f1 0.0000
TAG b 	 prec 0.0000 	 recl 0.0000 	 f1 0.0000
TAG c 	 prec 0.0000 	 recl 0.0000 	 f1 0.0000
AVG prec 0.0000 	 recl 0.0000 	 f1 0.0000


100%|██████████| 8/8 [00:01<00:00,  4.76it/s]
100%|██████████| 2/2 [00:00<00:00, 25.90it/s]
  0%|          | 0/8 [00:00<?, ?it/s]

Epoch 2/2 	 avg_loss -359.8362 	 vld_loss -722.7305 	
TAG a 	 prec 0.0000 	 recl 0.0000 	 f1 0.0000
TAG b 	 prec 0.0000 	 recl 0.0000 	 f1 0.0000
TAG c 	 prec 0.0000 	 recl 0.0000 	 f1 0.0000
AVG prec 0.0000 	 recl 0.0000 	 f1 0.0000
SPLIT 3 --------------------


100%|██████████| 8/8 [00:01<00:00,  5.04it/s]
100%|██████████| 2/2 [00:00<00:00, 20.71it/s]
 12%|█▎        | 1/8 [00:00<00:01,  5.75it/s]

Epoch 1/2 	 avg_loss -10.7636 	 vld_loss -58.5708 	
TAG a 	 prec 0.0000 	 recl 0.0000 	 f1 0.0000
TAG b 	 prec 0.0000 	 recl 0.0000 	 f1 0.0000
TAG c 	 prec 0.0000 	 recl 0.0000 	 f1 0.0000
AVG prec 0.0000 	 recl 0.0000 	 f1 0.0000


100%|██████████| 8/8 [00:01<00:00,  5.14it/s]
100%|██████████| 2/2 [00:00<00:00, 22.91it/s]
 12%|█▎        | 1/8 [00:00<00:01,  6.38it/s]

Epoch 2/2 	 avg_loss -232.9170 	 vld_loss -470.5330 	
TAG a 	 prec 0.0000 	 recl 0.0000 	 f1 0.0000
TAG b 	 prec 0.0000 	 recl 0.0000 	 f1 0.0000
TAG c 	 prec 0.0000 	 recl 0.0000 	 f1 0.0000
AVG prec 0.0000 	 recl 0.0000 	 f1 0.0000
SPLIT 4 --------------------


100%|██████████| 8/8 [00:01<00:00,  5.36it/s]
100%|██████████| 2/2 [00:00<00:00, 16.21it/s]
 12%|█▎        | 1/8 [00:00<00:01,  5.61it/s]

Epoch 1/2 	 avg_loss -86.6671 	 vld_loss -249.4149 	
TAG a 	 prec 0.0000 	 recl 0.0000 	 f1 0.0000
TAG b 	 prec 0.0000 	 recl 0.0000 	 f1 0.0000
TAG c 	 prec 0.0000 	 recl 0.0000 	 f1 0.0000
AVG prec 0.0000 	 recl 0.0000 	 f1 0.0000


100%|██████████| 8/8 [00:01<00:00,  5.15it/s]
100%|██████████| 2/2 [00:00<00:00, 16.28it/s]
  0%|          | 0/8 [00:00<?, ?it/s]

Epoch 2/2 	 avg_loss -546.4884 	 vld_loss -883.2411 	
TAG a 	 prec 0.0000 	 recl 0.0000 	 f1 0.0000
TAG b 	 prec 0.0000 	 recl 0.0000 	 f1 0.0000
TAG c 	 prec 0.0000 	 recl 0.0000 	 f1 0.0000
AVG prec 0.0000 	 recl 0.0000 	 f1 0.0000
SPLIT 5 --------------------


100%|██████████| 8/8 [00:01<00:00,  7.01it/s]
100%|██████████| 2/2 [00:00<00:00,  6.10it/s]
 12%|█▎        | 1/8 [00:00<00:01,  5.63it/s]

Epoch 1/2 	 avg_loss -73.4287 	 vld_loss -251.7889 	
TAG a 	 prec 0.0000 	 recl 0.0000 	 f1 0.0000
TAG b 	 prec 0.0000 	 recl 0.0000 	 f1 0.0000
TAG c 	 prec 0.0000 	 recl 0.0000 	 f1 0.0000
AVG prec 0.0000 	 recl 0.0000 	 f1 0.0000


100%|██████████| 8/8 [00:01<00:00,  5.89it/s]
100%|██████████| 2/2 [00:00<00:00,  7.22it/s]

Epoch 2/2 	 avg_loss -509.2907 	 vld_loss -908.3806 	
TAG a 	 prec 0.0000 	 recl 0.0000 	 f1 0.0000
TAG b 	 prec 0.0000 	 recl 0.0000 	 f1 0.0000
TAG c 	 prec 0.0000 	 recl 0.0000 	 f1 0.0000
AVG prec 0.0000 	 recl 0.0000 	 f1 0.0000





## Encoder + Att + Decoder + CRF

In [33]:
for sid in range(split_num):
    trn_ixs, vld_ixs = train_test_split(
        list(range(len(trn_X))), test_size=1 / split_num,
        shuffle=True, random_state=sid)
    print('SPLIT {} --------------------'.format(sid + 1))

    model = EncoderAttDecoder(len(word_to_ix), in_hdim=en_indim, out_hdim=en_outdim,
                              de_hdim=en_outdim * direction, max_length=max_length,
                              bi=direction, dropout=0.3, num_layers=2,
                              teacher_forcing_ratio=0.3, use_crf=True, device=device).to(device)
    optimizer = optim.Adam(
        model.parameters(), lr=0.01, weight_decay=1e-6)
    for epoch in range(epoch_num):
        model.train()
        model, avg_loss = train(trn_ixs, model, optimizer)
        model.eval()
        vld_loss, y_preds, y_trues = evaluate(vld_ixs, model)
        print('Epoch {}/{} \t avg_loss {:.4f} \t vld_loss {:.4f} \t'.format(
            epoch + 1, epoch_num, avg_loss, vld_loss))
        evalinfo(y_preds, y_trues)
        savemodel(model, 'eadcrf_{}_{}.pytorch'.format(sid,epoch))

  0%|          | 0/8 [00:00<?, ?it/s]

SPLIT 1 --------------------


100%|██████████| 8/8 [00:01<00:00,  5.41it/s]
100%|██████████| 2/2 [00:00<00:00,  7.06it/s]
 12%|█▎        | 1/8 [00:00<00:00,  9.19it/s]

Epoch 1/2 	 avg_loss 35.1029 	 vld_loss 39.7248 	
TAG a 	 prec 0.0000 	 recl 0.0000 	 f1 0.0000
TAG b 	 prec 0.0000 	 recl 0.0000 	 f1 0.0000
TAG c 	 prec 0.0000 	 recl 0.0000 	 f1 0.0000
AVG prec 0.0000 	 recl 0.0000 	 f1 0.0000


100%|██████████| 8/8 [00:01<00:00,  4.87it/s]
100%|██████████| 2/2 [00:00<00:00,  5.48it/s]
  0%|          | 0/8 [00:00<?, ?it/s]

Epoch 2/2 	 avg_loss 23.0273 	 vld_loss 39.0893 	
TAG a 	 prec 0.0000 	 recl 0.0000 	 f1 0.0000
TAG b 	 prec 0.0000 	 recl 0.0000 	 f1 0.0000
TAG c 	 prec 0.0000 	 recl 0.0000 	 f1 0.0000
AVG prec 0.0000 	 recl 0.0000 	 f1 0.0000
SPLIT 2 --------------------


100%|██████████| 8/8 [00:02<00:00,  3.96it/s]
100%|██████████| 2/2 [00:00<00:00, 15.75it/s]
  0%|          | 0/8 [00:00<?, ?it/s]

Epoch 1/2 	 avg_loss 104.6514 	 vld_loss 21.1126 	
TAG a 	 prec 0.0000 	 recl 0.0000 	 f1 0.0000
TAG b 	 prec 0.0000 	 recl 0.0000 	 f1 0.0000
TAG c 	 prec 0.0000 	 recl 0.0000 	 f1 0.0000
AVG prec 0.0000 	 recl 0.0000 	 f1 0.0000


100%|██████████| 8/8 [00:02<00:00,  3.35it/s]
100%|██████████| 2/2 [00:00<00:00, 16.02it/s]
  0%|          | 0/8 [00:00<?, ?it/s]

Epoch 2/2 	 avg_loss 25.5018 	 vld_loss 25.2135 	
TAG a 	 prec 0.0000 	 recl 0.0000 	 f1 0.0000
TAG b 	 prec 0.0000 	 recl 0.0000 	 f1 0.0000
TAG c 	 prec 0.0000 	 recl 0.0000 	 f1 0.0000
AVG prec 0.0000 	 recl 0.0000 	 f1 0.0000
SPLIT 3 --------------------


100%|██████████| 8/8 [00:02<00:00,  3.47it/s]
100%|██████████| 2/2 [00:00<00:00, 16.55it/s]
 12%|█▎        | 1/8 [00:00<00:00,  8.94it/s]

Epoch 1/2 	 avg_loss 43.2810 	 vld_loss 28.5988 	
TAG a 	 prec 0.0000 	 recl 0.0000 	 f1 0.0000
TAG b 	 prec 0.0000 	 recl 0.0000 	 f1 0.0000
TAG c 	 prec 0.0000 	 recl 0.0000 	 f1 0.0000
AVG prec 0.0000 	 recl 0.0000 	 f1 0.0000


100%|██████████| 8/8 [00:02<00:00,  3.41it/s]
100%|██████████| 2/2 [00:00<00:00, 16.99it/s]
  0%|          | 0/8 [00:00<?, ?it/s]

Epoch 2/2 	 avg_loss 48.5163 	 vld_loss 21.3842 	
TAG a 	 prec 0.0000 	 recl 0.0000 	 f1 0.0000
TAG b 	 prec 0.0000 	 recl 0.0000 	 f1 0.0000
TAG c 	 prec 0.0000 	 recl 0.0000 	 f1 0.0000
AVG prec 0.0000 	 recl 0.0000 	 f1 0.0000
SPLIT 4 --------------------


100%|██████████| 8/8 [00:01<00:00,  3.94it/s]
100%|██████████| 2/2 [00:00<00:00, 11.61it/s]
 12%|█▎        | 1/8 [00:00<00:00,  9.38it/s]

Epoch 1/2 	 avg_loss 66.1944 	 vld_loss 17.9390 	
TAG a 	 prec 0.0000 	 recl 0.0000 	 f1 0.0000
TAG b 	 prec 0.0000 	 recl 0.0000 	 f1 0.0000
TAG c 	 prec 0.0000 	 recl 0.0000 	 f1 0.0000
AVG prec 0.0000 	 recl 0.0000 	 f1 0.0000


100%|██████████| 8/8 [00:01<00:00,  3.28it/s]
100%|██████████| 2/2 [00:00<00:00,  9.82it/s]
  0%|          | 0/8 [00:00<?, ?it/s]

Epoch 2/2 	 avg_loss 34.0217 	 vld_loss 15.4507 	
TAG a 	 prec 0.0000 	 recl 0.0000 	 f1 0.0000
TAG b 	 prec 0.0000 	 recl 0.0000 	 f1 0.0000
TAG c 	 prec 0.0000 	 recl 0.0000 	 f1 0.0000
AVG prec 0.0000 	 recl 0.0000 	 f1 0.0000
SPLIT 5 --------------------


100%|██████████| 8/8 [00:01<00:00,  5.63it/s]
100%|██████████| 2/2 [00:00<00:00,  4.95it/s]
 25%|██▌       | 2/8 [00:00<00:00, 11.40it/s]

Epoch 1/2 	 avg_loss 29.8988 	 vld_loss 45.1030 	
TAG a 	 prec 0.0000 	 recl 0.0000 	 f1 0.0000
TAG b 	 prec 0.0000 	 recl 0.0000 	 f1 0.0000
TAG c 	 prec 0.0000 	 recl 0.0000 	 f1 0.0000
AVG prec 0.0000 	 recl 0.0000 	 f1 0.0000


100%|██████████| 8/8 [00:01<00:00,  5.31it/s]
100%|██████████| 2/2 [00:00<00:00,  4.39it/s]

Epoch 2/2 	 avg_loss 14.5222 	 vld_loss 64.8156 	
TAG a 	 prec 0.0000 	 recl 0.0000 	 f1 0.0000
TAG b 	 prec 0.0000 	 recl 0.0000 	 f1 0.0000
TAG c 	 prec 0.0000 	 recl 0.0000 	 f1 0.0000
AVG prec 0.0000 	 recl 0.0000 	 f1 0.0000



