In [None]:
import torch
import time
from torch.utils.data.sampler import SequentialSampler, RandomSampler
from torch.utils.data.dataloader import DataLoader
from transformers import BertTokenizer,AdamW, get_linear_schedule_with_warmup
import torch.nn as nn
from torch.utils.tensorboard import SummaryWriter

from dataset import SeqDataset
from model import BertClassifier
from metric import binary_cls_metrics, binary_cls_log
from train_utils import ModelSave, load_checkpoint
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
if torch.cuda.is_available():
    device = torch.device("cuda")
    print(f'There are {torch.cuda.device_count()} GPU(s) available.')
    print('Device name:', torch.cuda.get_device_name(0))

else:
    print('No GPU available, using the CPU instead.')
    device = torch.device("cpu")

In [None]:
def load(file_name):
    import json
    file = './trainsample/chnsenticorp/{}.txt'
    data = []
    with open(file.format(file_name), 'r') as f:
        for line in f.readlines():
            data.append(json.loads(line.strip()))
    return data

In [None]:
batch_size =16
max_seq_len=128
label_size=2
pretrain_model='bert-base-chinese'
train_params = {
    'lr': 5e-5,
    'eps':1e-10,
    'epoch_size': 5,
    'batch_size': batch_size,
    'max_seq_len': max_seq_len,
    'pretrain_model': pretrain_model
}

model_params = {
    'cls_dropout':0.7,
    'label_size':label_size,
    'freeze_bert':False
}

tokenizer = BertTokenizer.from_pretrained(pretrain_model, do_lower_case=True)

train_dataset = SeqDataset('train', max_seq_len, tokenizer, load)
valid_dataset = SeqDataset('valid', max_seq_len, tokenizer, load)

train_sampler = RandomSampler(train_dataset)
valid_sampler = SequentialSampler(valid_dataset)

train_loader = DataLoader(train_dataset, sampler=train_sampler, batch_size=batch_size)
valid_loader = DataLoader(valid_dataset, sampler=valid_sampler, batch_size=batch_size)

# Instantiate Bert Classifier
model = BertClassifier(**model_params)
model.to(device)

optimizer = AdamW(model.parameters(),
                  lr=train_params['lr'],    # Default learning rate
                  eps=train_params['eps']   # Default epsilon value
                  )

train_params.update({
    'num_train_steps':len(train_loader),
    'total_train_steps':train_params['num_train_steps'] * train_params['epoch_size']
})
# Set up the learning rate scheduler
scheduler = get_linear_schedule_with_warmup(optimizer,
                                            num_warmup_steps=0, # Default value
                                            num_training_steps=train_params['total_train_steps'])
loss_fn = nn.CrossEntropyLoss()

In [None]:
CKPT = './checkpoint/chnsenticrop'
saver = ModelSave(CKPT, continue_train=False)
tb = SummaryWriter(CKPT)

global_step = 0
saver.init()

for epoch_i in range(train_params['epoch_size']):
    # =======================================
    #               Training
    # =======================================
    # Print the header of the result table
    print(f"{'Epoch':^7} | {'Batch':^7} | {'Train Loss':^12} | {'Val Loss':^10}  | {'Elapsed':^9}")
    print("-"*60)

    # Measure the elapsed time of each epoch
    t0_epoch, t0_batch = time.time(), time.time()
    total_loss, batch_loss, batch_counts = 0, 0, 0

    model.train()
    for step, batch in enumerate(train_loader):
        global_step +=1
        batch_counts +=1

        #Forward propogate
        model.zero_grad()
        input_ids, token_type_ids, attention_mask, label_ids = tuple(t.to(device) for t in batch.values())
        logits = model(input_ids, token_type_ids, attention_mask)

        # visualize
        if global_step==1:
            tb.add_graph(model, (input_ids, token_type_ids, attention_mask))
        tb.add_histogram('weights/classifier', model.classifier[0].weight, global_step=global_step)

        #calcualte loss
        loss = loss_fn(logits, label_ids)
        batch_loss += loss.item()
        total_loss += loss.item()

        #Back propogate
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
        optimizer.step()
        scheduler.step()

        # Print the loss values and time elapsed for every 20 batches
        if (step % 20 == 0 and step != 0) or (step == len(train_loader) - 1):
            # Calculate time elapsed for 20 batches
            time_elapsed = time.time() - t0_batch

            # Print training results
            print(f"{epoch_i + 1:^7} | {step:^7} | {batch_loss / batch_counts:^12.6f} | {'-':^9} | {time_elapsed:^9.2f}")
            tb.add_scalar('loss/agg_batch_train', batch_loss / batch_counts, global_step=global_step)
            batch_loss, batch_counts = 0, 0
            t0_batch = time.time()

    # Epoch Loss: training loss over entire training data
    avg_train_loss = total_loss / len(train_loader)
    print("-"*70)

    # =======================================
    #               Evaluation
    # =======================================
    val_metrics = binary_cls_metrics(model, valid_loader, loss_fn ,device)

    # Print performance over the entire training data
    time_elapsed = time.time() - t0_epoch
    tb.add_scalars('loss/epoch',{'train_loss': avg_train_loss,
                                 'valid_loss': val_metrics['val_loss']})
    print(f"{epoch_i + 1:^7} | {'-':^7} | {avg_train_loss:^12.6f} | {val_metrics['val_loss']:^10.6f} | {time_elapsed:^9.2f}")

    binary_cls_log(epoch_i, val_metrics)
    # save model per epoch
    saver(avg_train_loss, val_metrics['val_loss'], epoch_i, model, optimizer, scheduler)
    print("\n")

tb.add_hparams({
    'lr': train_params['lr'],
    'batch_size': train_params['batch_size'],
    'epoch_size': train_params['epoch_size']
}, val_metrics)
tb.close()