# Load Data

In [1]:
'''
To Do:
1. Try different topN and Domain data.
2. 存结果（图，模型，log）
'''

'\nTo Do:\n1. Try different topN and Domain data.\n2. 存结果（图，模型，log）\n'

In [2]:
! pip install pytorch_pretrained_bert

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [3]:
from google.colab import drive
drive.mount('/content/drive')

import sys
sys.path.insert(0, '/content/drive/MyDrive/Colab Notebooks/Capstone')

import os
import pandas as pd
import numpy as np

from utils import read_conll_file, read_data


data_dir = "/content/drive/MyDrive/Colab Notebooks/Capstone/data/gweb_sancl"
wsj_dir = os.path.join(data_dir, "pos_fine", "wsj")
model_dir = "/content/drive/MyDrive/Colab Notebooks/Capstone/model"

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [4]:
wsj_train_file = os.path.join(wsj_dir, "gweb-wsj-train.conll")
wsj_dev_file = os.path.join(wsj_dir, "gweb-wsj-dev.conll")
wsj_test_file = os.path.join(wsj_dir, "gweb-wsj-test.conll")

In [5]:
wsj_train_word_lst, wsj_train_tag_lst, wsj_train_tag_set = read_data(wsj_train_file)
wsj_dev_word_lst, wsj_dev_tag_lst, wsj_dev_tag_set = read_data(wsj_dev_file)
wsj_test_word_lst, wsj_test_tag_lst, wsj_test_tag_set = read_data(wsj_test_file)

The number of samples: 30060
The number of tags 48
The number of samples: 1336
The number of tags 45
The number of samples: 1640
The number of tags 45


In [6]:
wsj_tags = wsj_train_tag_set + wsj_dev_tag_set + wsj_test_tag_set
wsj_tags = sorted(list(set(wsj_tags)))
wsj_tags = ["<pad>"] + wsj_tags
tag2idx = {tag:idx for idx, tag in enumerate(wsj_tags)}
idx2tag = {idx:tag for idx, tag in enumerate(wsj_tags)}
print(len(wsj_tags))

49


# Build Model

In [7]:
from sklearn.metrics import precision_score, recall_score, f1_score, classification_report

import os
from tqdm import tqdm_notebook as tqdm
import numpy as np
import torch
import torch.nn as nn
from torch.utils import data
import torch.optim as optim
from pytorch_pretrained_bert import BertTokenizer

In [8]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [9]:
tokenizer = BertTokenizer.from_pretrained('bert-base-cased', do_lower_case=False)

In [10]:
# tokens = tokenizer.tokenize("mistakenly")
# tokens

In [11]:
# tid = tokenizer.convert_tokens_to_ids(tokens)
# tid

In [12]:
# tokenizer.convert_ids_to_tokens([234,2000,3000,22893])

In [13]:
class PosDataset(data.Dataset):
    def __init__(self, word_lst, tag_lst):
        sents, tags_li = [], [] # list of lists
        for i in range(len(word_lst)):
            sents.append(["[CLS]"] + word_lst[i] + ["[SEP]"])
            tags_li.append(["<pad>"] + tag_lst[i] + ["<pad>"])
        self.sents, self.tags_li = sents, tags_li

    def __len__(self):
        return len(self.sents)

    def __getitem__(self, idx):
        words, tags = self.sents[idx], self.tags_li[idx] # words, tags: string list

        # We give credits only to the first piece.
        x, y = [], [] # list of ids
        is_heads = [] # list. 1: the token is the first piece of a word
        for w, t in zip(words, tags):
            tokens = tokenizer.tokenize(w) if w not in ("[CLS]", "[SEP]") else [w]
            xx = tokenizer.convert_tokens_to_ids(tokens)

            is_head = [1] + [0]*(len(tokens) - 1)

            t = [t] + ["<pad>"] * (len(tokens) - 1)  # <PAD>: no decision
            yy = [tag2idx[each] for each in t]  # (T,)

            x.extend(xx)
            is_heads.extend(is_head)
            y.extend(yy)

        assert len(x)==len(y)==len(is_heads), "len(x)={}, len(y)={}, len(is_heads)={}".format(len(x), len(y), len(is_heads))

        # seqlen
        seqlen = len(y)

        # to string
        words = " ".join(words)
        tags = " ".join(tags)
        return words, x, is_heads, tags, y, seqlen


In [14]:
def pad(batch):
    '''Pads to the longest sample'''
    f = lambda x: [sample[x] for sample in batch]
    words = f(0)
    is_heads = f(2)
    tags = f(3)
    seqlens = f(-1)
    maxlen = np.array(seqlens).max()

    f = lambda x, seqlen: [sample[x] + [0] * (seqlen - len(sample[x])) for sample in batch] # 0: <pad>
    x = f(1, maxlen)
    y = f(-2, maxlen)


    f = torch.LongTensor

    return words, f(x), is_heads, tags, f(y), seqlens

In [15]:
from pytorch_pretrained_bert import BertModel

In [16]:
class Net(nn.Module):
    def __init__(self, vocab_size=None):
        super().__init__()
        self.bert = BertModel.from_pretrained('bert-base-cased')

        self.fc = nn.Linear(768, vocab_size)
        self.device = device

    def forward(self, x, y):
        '''
        x: (N, T). int64
        y: (N, T). int64
        '''
        x = x.to(device)
        y = y.to(device)
        
        if self.training:
            self.bert.train()
            encoded_layers, _ = self.bert(x)
            enc = encoded_layers[-1]
        else:
            self.bert.eval()
            with torch.no_grad():
                encoded_layers, _ = self.bert(x)
                enc = encoded_layers[-1]
        
        logits = self.fc(enc)
        y_hat = logits.argmax(-1)
        return logits, y, y_hat

In [17]:
def train(model, iterator, optimizer, criterion):
    model.train()
    for i, batch in enumerate(iterator):
        words, x, is_heads, tags, y, seqlens = batch
        _y = y # for monitoring
        optimizer.zero_grad()
        logits, y, _ = model(x, y) # logits: (N, T, VOCAB), y: (N, T)

        logits = logits.view(-1, logits.shape[-1]) # (N*T, VOCAB)
        y = y.view(-1)  # (N*T,)

        loss = criterion(logits, y)
        loss.backward()

        optimizer.step()

        if i%10==0: # monitoring
            print("step: {}, loss: {}".format(i, loss.item()))

In [18]:
def eval(model, iterator, average="macro"):
    model.eval()

    Words, Is_heads, Tags, Y, Y_hat = [], [], [], [], []
    with torch.no_grad():
        for i, batch in enumerate(iterator):
            words, x, is_heads, tags, y, seqlens = batch

            _, _, y_hat = model(x, y)  # y_hat: (N, T)

            Words.extend(words)
            Is_heads.extend(is_heads)
            Tags.extend(tags)
            Y.extend(y.numpy().tolist())
            Y_hat.extend(y_hat.cpu().numpy().tolist())

    ## gets results and save
    with open("result", 'w') as fout:
        for words, is_heads, tags, y_hat in zip(Words, Is_heads, Tags, Y_hat):
            y_hat = [hat for head, hat in zip(is_heads, y_hat) if head == 1]
            preds = [idx2tag[hat] for hat in y_hat]
            assert len(preds)==len(words.split())==len(tags.split())
            for w, t, p in zip(words.split()[1:-1], tags.split()[1:-1], preds[1:-1]):
                fout.write("{} {} {}\n".format(w, t, p))
            fout.write("\n")
            
    ## calc metric
    y_true =  np.array([tag2idx[line.split()[1]] for line in open('result', 'r').read().splitlines() if len(line) > 0])
    y_pred =  np.array([tag2idx[line.split()[2]] for line in open('result', 'r').read().splitlines() if len(line) > 0])

    acc = (y_true==y_pred).astype(np.int32).sum() / len(y_true)

    print("acc=%.2f"%acc)
    print("classification_report", classification_report(y_true, y_pred))
    precision_value = precision_score(y_true, y_pred, average=average)
    recall_value = recall_score(y_true, y_pred, average=average)
    f1_value = f1_score(y_true, y_pred, average=average)

    return precision_value, recall_value, f1_value

In [19]:
model = Net(vocab_size=len(tag2idx))
model.to(device)
model = nn.DataParallel(model)

In [20]:
train_dataset = PosDataset(wsj_train_word_lst, wsj_train_tag_lst)
eval_dataset = PosDataset(wsj_test_word_lst, wsj_test_tag_lst)

train_iter = data.DataLoader(dataset=train_dataset,
                             batch_size=8,
                             shuffle=True,
                             num_workers=1,
                             collate_fn=pad)
test_iter = data.DataLoader(dataset=eval_dataset,
                             batch_size=8,
                             shuffle=False,
                             num_workers=1,
                             collate_fn=pad)

optimizer = optim.Adam(model.parameters(), lr = 0.0001)

criterion = nn.CrossEntropyLoss(ignore_index=0)

In [21]:
# train(model, train_iter, optimizer, criterion)
# eval(model, test_iter)

# Save Model

In [22]:
model_file = os.path.join(model_dir, "base_model.pt")
# torch.save(model.state_dict(), model_file)

## Load Model

In [23]:
model = Net(vocab_size=len(tag2idx))
model.to(device)
model = nn.DataParallel(model)
model.load_state_dict(torch.load(model_file))
# wsj_precision_value, wsj_recall_value, wsj_f1_value = eval(model, test_iter)

<All keys matched successfully>

In [24]:
# wsj_precision_value, wsj_recall_value, wsj_f1_value

# Self Training

In [25]:
def filter_tag(process_words, process_tags, label_tags_set=wsj_tags):
  new_words = []
  new_tags = []
  for words, tags in zip(process_words, process_tags):
    w_lst = []
    t_lst = []
    for i, t in enumerate(tags):
      if t in label_tags_set:
        w_lst.append(words[i])
        t_lst.append(tags[i])

    if w_lst:
      new_words.append(w_lst)
      new_tags.append(t_lst)
  print("after filter tag", len(new_words))
  return new_words, new_tags

In [26]:
file_name_lst = ["answers", "emails", "newsgroups", "reviews", "weblogs"]

In [27]:
domain = "answers"
domain_dir = os.path.join(data_dir, "pos_fine", f"{domain}")
domain_dev_file = os.path.join(domain_dir, f"gweb-{domain}-dev.conll")
domain_test_file = os.path.join(domain_dir, f"gweb-{domain}-test.conll")

In [28]:
domain_dev_word_lst, domain_dev_tag_lst, domain_dev_tag_set = read_data(domain_dev_file)
domain_test_word_lst, domain_test_tag_lst, domain_test_tag_set = read_data(domain_test_file)
domain_dev_word_lst, domain_dev_tag_lst = filter_tag(domain_dev_word_lst, domain_dev_tag_lst)  
domain_test_word_lst, domain_test_tag_lst = filter_tag(domain_test_word_lst, domain_test_tag_lst)

The number of samples: 1745
The number of tags 49
The number of samples: 1744
The number of tags 50
after filter tag 1713
after filter tag 1723


In [29]:
domain_precision_value_lst = []
domain_recall_value_lst = []
domain_f1_value_lst = []

In [30]:
domain_test_dataset = PosDataset(domain_test_word_lst, domain_test_tag_lst)

domain_test_iter = data.DataLoader(dataset=domain_test_dataset,
                             batch_size=8,
                             shuffle=False,
                             num_workers=1,
                             collate_fn=pad)

domain_precision_value, domain_recall_value, domain_f1_value = eval(model, domain_test_iter)

domain_precision_value_lst.append(domain_precision_value)
domain_recall_value_lst.append(domain_recall_value)
domain_f1_value_lst.append(domain_f1_value)

acc=0.93
classification_report               precision    recall  f1-score   support

           1       0.64      0.88      0.74         8
           2       0.82      0.41      0.55        44
           3       1.00      0.87      0.93       987
           4       0.99      0.98      0.99       108
           5       0.97      0.98      0.98       115
           6       1.00      0.97      0.99      1600
           7       0.20      0.73      0.31        48
           8       0.00      0.00      0.00         4
           9       0.99      0.98      0.99      1086
          10       0.94      0.98      0.96       386
          11       0.97      0.99      0.98      2229
          12       0.98      0.95      0.97        61
          13       0.27      0.50      0.35        18
          14       0.36      0.75      0.49        28
          15       0.91      0.97      0.94      2566
          16       0.92      0.81      0.86      1511
          17       0.81      0.89      0.84       

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [31]:
class PosDataset_new(data.Dataset):
    def __init__(self, word_lst, tag_lst):
        self.word_lst, self.tag_lst = word_lst, tag_lst

    def __len__(self):
      return len(self.word_lst)

    def __getitem__(self, idx):
      words, tags = self.word_lst[idx], self.tag_lst[idx] # words, tags: string list
      assert len(words)==len(tags)
        # seqlen
      seqlen = len(words)

      return words, tags, seqlen

def pad_new(batch):
    '''Pads to the longest sample'''
    f = lambda x: [sample[x] for sample in batch]
    words = f(0)
    tags = f(1)
    seqlens = f(-1)
    maxlen = np.array(seqlens).max()

    f = lambda x, seqlen: [sample[x] + [0] * (seqlen - len(sample[x])) for sample in batch] # 0: <pad>
    x = f(0, maxlen)
    y = f(1, maxlen)

    f = torch.LongTensor

    return f(x), f(y), seqlens

def train_new(model, iterator, optimizer, criterion):
    model.train()
    for i, batch in enumerate(iterator):
        x, y, seqlens = batch
        
        optimizer.zero_grad()
        logits, y, _ = model(x, y) # logits: (N, T, VOCAB), y: (N, T)

        logits = logits.view(-1, logits.shape[-1]) # (N*T, VOCAB)
        y = y.view(-1)  # (N*T,)

        loss = criterion(logits, y)
        loss.backward()

        optimizer.step()

        if i%10==0: # monitoring
            print("step: {}, loss: {}".format(i, loss.item()))

In [32]:
def gen_pseudo_data(model, domain_dev_iter, topn=300, initial=True):
  model.eval()

  LLD = []
  MEAN_PROB = []
  new_x_lst = []
  new_y_lst = []

  if initial:
    with torch.no_grad():
        for i, batch in enumerate(domain_dev_iter):

          _, x, _, _, y, _ = batch
          sen_len = y.bool().sum(axis=1)

          logits, _, y_hat = model(x, y)  # y_hat: (N, T)

          # Save prediction as new training dataset
          softmax_value = torch.softmax(logits, dim=2)
          max_prob = torch.amax(softmax_value, dim=2)
          
          # Rank by LLD
          # lld = torch.prod(max_prob, 1)
          # LLD.extend(lld)

          # Rank by mean probability
          res_prob = y.bool().to(device) * max_prob.to(device)
          sum_prob = res_prob.sum(axis=1)
          mean_prob = sum_prob / sen_len.to(device)
          MEAN_PROB.extend(mean_prob)
          
          new_x_lst.extend(x.tolist())
          new_y_lst.extend(y_hat.tolist())
  else:
    with torch.no_grad():
        for i, batch in enumerate(domain_dev_iter):

          x, y, seqlens = batch
          sen_len = y.bool().sum(axis=1)

          logits, _, y_hat = model(x, y)  # y_hat: (N, T)

          # Save prediction as new training dataset
          softmax_value = torch.softmax(logits, dim=2)
          max_prob = torch.amax(softmax_value, dim=2)

          # Rank by mean probability
          res_prob = y.bool().to(device) * max_prob.to(device)
          sum_prob = res_prob.sum(axis=1)
          mean_prob = sum_prob / sen_len.to(device)
          MEAN_PROB.extend(mean_prob)
          
          new_x_lst.extend(x.tolist())
          new_y_lst.extend(y_hat.tolist())

  ind = list(range(len(MEAN_PROB)))
  ind = [x for _, x in sorted(zip(MEAN_PROB, ind), reverse=True)]

  select_ind = ind[: topn]
  not_select_ind = ind[topn: ]

  new_train_x = [new_x_lst[i] for i in select_ind]
  new_train_y = [new_y_lst[i] for i in select_ind]

  remain_train_x = [new_x_lst[i] for i in not_select_ind]
  remain_train_y = [new_y_lst[i] for i in not_select_ind]

  return new_train_x, new_train_y, remain_train_x, remain_train_y

In [33]:
factor_list = [1, 2, 5, 10, 20]
factor = factor_list[4] #  to be modified
topn = round(factor * len(domain_dev_word_lst) / 100)
i = 0
while len(domain_dev_word_lst) >= topn:
  i += 1
  print("\nLoop", i)
  print("domain_dev_word_lst", len(domain_dev_word_lst))

  if i == 1:
    domain_dev_dataset = PosDataset(domain_dev_word_lst, domain_dev_tag_lst)

    domain_dev_iter = data.DataLoader(dataset=domain_dev_dataset,
                                batch_size=8,
                                shuffle=False,
                                num_workers=1,
                                collate_fn=pad)
  else:
    domain_dev_dataset = PosDataset_new(domain_dev_word_lst, domain_dev_tag_lst)

    domain_dev_iter = data.DataLoader(dataset=domain_dev_dataset,
                                batch_size=8,
                                shuffle=True,
                                num_workers=1,
                                collate_fn=pad_new)
  
  initial = True if i==1 else False
  top_words_ids, top_tags_ids, domain_dev_word_lst, domain_dev_tag_lst = gen_pseudo_data(model, domain_dev_iter, topn, initial)

  # Revert ids to words
  top_words = []
  top_tags = []
  for t in range(len(top_words_ids)):
    word_ids = tokenizer.convert_ids_to_tokens(top_words_ids[t])
    tag_ids = list(map(idx2tag.get, top_tags_ids[t]))
    words = []
    tags = []
    for k, w in enumerate(word_ids):
      if w == '[CLS]':
        pass
      elif w == '[SEP]':
        break
      else:
        words.append(w)
        tags.append(tag_ids[k])
    top_words.append(words)
    top_tags.append(tags)

  new_train_dataset = PosDataset(wsj_train_word_lst+top_words, wsj_train_tag_lst+top_tags)
  new_train_iter = data.DataLoader(dataset=new_train_dataset,
                              batch_size=8,
                              shuffle=True,
                              num_workers=1,
                              collate_fn=pad)

  print("Train from scratch...")
  model = Net(vocab_size=len(tag2idx))
  model.to(device)
  model = nn.DataParallel(model)

  optimizer = optim.Adam(model.parameters(), lr = 0.0001)
  criterion = nn.CrossEntropyLoss(ignore_index=0)

  train(model, new_train_iter, optimizer, criterion)

  domain_precision_value, domain_recall_value, domain_f1_value = eval(model, domain_test_iter)
  domain_precision_value_lst.append(domain_precision_value)
  domain_recall_value_lst.append(domain_recall_value)
  domain_f1_value_lst.append(domain_f1_value)


Loop 1
domain_dev_word_lst 1713
Train from scratch...
step: 0, loss: 3.873807430267334
step: 10, loss: 1.7628498077392578
step: 20, loss: 0.8586282730102539
step: 30, loss: 0.45573461055755615
step: 40, loss: 0.3006611168384552
step: 50, loss: 0.23682256042957306
step: 60, loss: 0.2827904522418976
step: 70, loss: 0.14820179343223572
step: 80, loss: 0.2087739259004593
step: 90, loss: 0.07022108137607574
step: 100, loss: 0.2536206543445587
step: 110, loss: 0.16104190051555634
step: 120, loss: 0.10658647865056992
step: 130, loss: 0.12622720003128052
step: 140, loss: 0.19432146847248077
step: 150, loss: 0.07479424774646759
step: 160, loss: 0.05321161076426506
step: 170, loss: 0.07537906616926193
step: 180, loss: 0.08082715421915054
step: 190, loss: 0.1110253855586052
step: 200, loss: 0.18054001033306122
step: 210, loss: 0.1799662709236145
step: 220, loss: 0.12505507469177246
step: 230, loss: 0.13595665991306305
step: 240, loss: 0.1348154991865158
step: 250, loss: 0.14813962578773499
step:

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Train from scratch...
step: 0, loss: 4.006485462188721
step: 10, loss: 1.6920629739761353
step: 20, loss: 0.5858360528945923
step: 30, loss: 0.3194621503353119
step: 40, loss: 0.3381424844264984
step: 50, loss: 0.36956024169921875
step: 60, loss: 0.18898659944534302
step: 70, loss: 0.2660520076751709
step: 80, loss: 0.21164844930171967
step: 90, loss: 0.07743684947490692
step: 100, loss: 0.1992431879043579
step: 110, loss: 0.17276734113693237
step: 120, loss: 0.12371756136417389
step: 130, loss: 0.1008838638663292
step: 140, loss: 0.18373408913612366
step: 150, loss: 0.07199489325284958
step: 160, loss: 0.14443683624267578
step: 170, loss: 0.15286271274089813
step: 180, loss: 0.12981636822223663
step: 190, loss: 0.15759624540805817
step: 200, loss: 0.12324119359254837
step: 210, loss: 0.1274838149547577
step: 220, loss: 0.059503719210624695
step: 230, loss: 0.06552645564079285
step: 240, loss: 0.1769491732120514
step: 250, loss: 0.059940073639154434
step: 260, loss: 0.1874794363975525


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Train from scratch...
step: 0, loss: 3.854783535003662
step: 10, loss: 1.9920305013656616
step: 20, loss: 0.8530915379524231
step: 30, loss: 0.23796015977859497
step: 40, loss: 0.2345523089170456
step: 50, loss: 0.19618573784828186
step: 60, loss: 0.3359408378601074
step: 70, loss: 0.08745300024747849
step: 80, loss: 0.2768062949180603
step: 90, loss: 0.17534029483795166
step: 100, loss: 0.1951582133769989
step: 110, loss: 0.22620664536952972
step: 120, loss: 0.13534049689769745
step: 130, loss: 0.16895554959774017
step: 140, loss: 0.10525413602590561
step: 150, loss: 0.2207639068365097
step: 160, loss: 0.12419281899929047
step: 170, loss: 0.11160830408334732
step: 180, loss: 0.07408983260393143
step: 190, loss: 0.1491713523864746
step: 200, loss: 0.13655757904052734
step: 210, loss: 0.04508701339364052
step: 220, loss: 0.3360525667667389
step: 230, loss: 0.25552675127983093
step: 240, loss: 0.0710805281996727
step: 250, loss: 0.17186522483825684
step: 260, loss: 0.0986037626862526
ste

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Train from scratch...
step: 0, loss: 3.8841216564178467
step: 10, loss: 2.028573989868164
step: 20, loss: 0.7612056732177734
step: 30, loss: 0.5227319598197937
step: 40, loss: 0.3425378203392029
step: 50, loss: 0.2072959989309311
step: 60, loss: 0.20723769068717957
step: 70, loss: 0.25055497884750366
step: 80, loss: 0.21760807931423187
step: 90, loss: 0.1872178167104721
step: 100, loss: 0.14835122227668762
step: 110, loss: 0.2042236477136612
step: 120, loss: 0.1292334944009781
step: 130, loss: 0.1586228609085083
step: 140, loss: 0.08138562738895416
step: 150, loss: 0.18242476880550385
step: 160, loss: 0.04360223934054375
step: 170, loss: 0.12963241338729858
step: 180, loss: 0.1031276285648346
step: 190, loss: 0.2945893704891205
step: 200, loss: 0.16003382205963135
step: 210, loss: 0.1792760193347931
step: 220, loss: 0.12115529179573059
step: 230, loss: 0.08919410407543182
step: 240, loss: 0.09236670285463333
step: 250, loss: 0.01792966015636921
step: 260, loss: 0.1289737969636917
step:

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [34]:
# ids = []
# for w in domain_dev_word_lst[10]:
#   tokens = tokenizer.tokenize(w) if w not in ("[CLS]", "[SEP]") else [w]
#   xx = tokenizer.convert_tokens_to_ids(tokens)
#   ids.append(xx[0])
# print(ids)

In [35]:
# token = tokenizer.convert_ids_to_tokens(ids)
# token

In [36]:
# print(new_train_x[11])
# print(tokenizer.convert_ids_to_tokens(new_train_x[11]))
# print(new_train_y[11])
# print(list(map(idx2tag.get, new_train_y[11])))

In [37]:
import pandas as pd

In [38]:
test_metric = pd.DataFrame({
    "Loop": list(range(len(domain_precision_value_lst))) * 3,
    "metric": ["precision"]*len(domain_precision_value_lst) + ["recall"]*len(domain_precision_value_lst) + ["f1"]*len(domain_precision_value_lst),
    "value": domain_precision_value_lst + domain_recall_value_lst + domain_f1_value_lst
})

In [39]:
import seaborn as sns
import matplotlib.pyplot as plt

In [40]:
import plotly
import plotly.express as px
import plotly.graph_objects as go

In [41]:
fig_title = f"topn: {topn}, domain: {domain}"
fig = px.line(test_metric, x="Loop", y="value", color='metric', markers=True, title=fig_title)
fig.show()

In [42]:
file_name = f"topn_{factor}\%_domain_{domain}"
scratch_model_dir = "/content/drive/MyDrive/Colab Notebooks/Capstone/scratch_fixed/model"
log_model_dir = "/content/drive/MyDrive/Colab Notebooks/Capstone/scratch_fixed/result"
test_metric.to_csv(os.path.join(log_model_dir, file_name) + '.csv')

In [43]:
torch.save(model.state_dict(), os.path.join(scratch_model_dir, file_name))