In [1]:

from transformers import BertTokenizerFast
import numpy as np
import torch
learning_rate = 0.000005
batch_size = 192
n_epochs = 10

# if you want to use online BERT model,try replace it to 'ckip/bert-base-chinese'
tokenizer = BertTokenizerFast.from_pretrained('../bert-base-chinese')
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
from collections import namedtuple
def text2token(txt):
  d = tokenizer(txt,padding="max_length", truncation=True,return_tensors='pt',max_length=64)
  d.update((k,v[0]) for k,v in d.items())
  return d
LabeledData = namedtuple("LabeledData",['token','label'])

In [3]:
def load_raw_to_data(path : str,label:int):
  label_tensor = torch.tensor(label)
  ret = []
  with open(path) as f:
    for line in f.readlines():
      ret.append(LabeledData(text2token(line.strip()),label_tensor))
  return ret

t_c = load_raw_to_data('oral.txt',1)
t_m = load_raw_to_data('Literature.txt',0)
  

In [4]:
from torch.utils.data import DataLoader
import torch
train_dataloader = DataLoader([*t_c[:-200],*t_m[:-200]], shuffle=True, batch_size=batch_size)
eval_dataloader = DataLoader([*t_c[-200:],*t_m[-200:]], batch_size=batch_size)

In [5]:
from transformers import AutoModelForSequenceClassification

model = AutoModelForSequenceClassification.from_pretrained("../bert-base-cantonese", num_labels=2)
model.to(device)
pass

Some weights of the model checkpoint at ../bert-base-cantonese were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.decoder.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at ../bert-base-cantonese

In [6]:
from torch import nn
from torch.optim import AdamW
from transformers import get_scheduler
import torch

optimizer = AdamW(model.parameters(), lr=learning_rate)

num_training_steps = n_epochs * len(train_dataloader)
lr_scheduler = get_scheduler(
    name="linear", optimizer=optimizer, num_warmup_steps=0, num_training_steps=num_training_steps
)

def bce_loss(logits, labels):
    logits = nn.functional.softmax(logits)
    term_0 = logits[:, 0] * ~labels
    term_1 = logits[:, 1] * labels
    loss = -(term_0 + term_1).mean()
    return loss

def forward(params, key, inputs, labels, mask):
    outputs = model(input_ids=inputs, attention_mask=mask, params=params, train=True, dropout_rng=key)
    logits = outputs.logits
    loss = bce_loss(logits, labels)
    return loss


In [7]:
from tqdm import tqdm

model.train()
for epoch in range(n_epochs):
    loop = tqdm(train_dataloader, leave=True)
    for batch in loop:
        input_ids = batch.token['input_ids'].to(device)
        attention_mask = batch.token['attention_mask'].to(device)
        label = batch.label.to(device)
        outputs = model(labels=label,input_ids=input_ids,attention_mask=attention_mask)
        
        loss = outputs.loss
        loss.backward()
        
        optimizer.step()
        lr_scheduler.step()
        optimizer.zero_grad()

        loop.set_description(f'Epoch {epoch}')
        loop.set_postfix(loss=loss.item())


Epoch 0: 100%|██████████| 57/57 [19:08<00:00, 20.15s/it, loss=0.252]
Epoch 1: 100%|██████████| 57/57 [19:17<00:00, 20.31s/it, loss=0.114] 
Epoch 2: 100%|██████████| 57/57 [19:29<00:00, 20.52s/it, loss=0.066] 
Epoch 3: 100%|██████████| 57/57 [19:13<00:00, 20.24s/it, loss=0.0175]
Epoch 4: 100%|██████████| 57/57 [19:37<00:00, 20.65s/it, loss=0.0144]
Epoch 5: 100%|██████████| 57/57 [19:31<00:00, 20.56s/it, loss=0.0156]
Epoch 6: 100%|██████████| 57/57 [19:28<00:00, 20.51s/it, loss=0.0264]
Epoch 7: 100%|██████████| 57/57 [19:31<00:00, 20.55s/it, loss=0.00772]
Epoch 8: 100%|██████████| 57/57 [19:32<00:00, 20.57s/it, loss=0.127]  
Epoch 9: 100%|██████████| 57/57 [19:41<00:00, 20.73s/it, loss=0.0131] 


In [8]:
model.save_pretrained("yue-classifier-can")