## Set up

### Set up for Colab

In [None]:
# For runing notebook in colab
from google.colab import drive
drive.mount('/content/drive')
import os
root_of_repository = '/content/drive/MyDrive/ADL/Project/'
os.chdir(root_of_repository)

Mounted at /content/drive


In [None]:
!pip install transformers

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting transformers
  Downloading transformers-4.25.1-py3-none-any.whl (5.8 MB)
[K     |████████████████████████████████| 5.8 MB 8.4 MB/s 
Collecting tokenizers!=0.11.3,<0.14,>=0.11.1
  Downloading tokenizers-0.13.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.6 MB)
[K     |████████████████████████████████| 7.6 MB 48.7 MB/s 
Collecting huggingface-hub<1.0,>=0.10.0
  Downloading huggingface_hub-0.11.1-py3-none-any.whl (182 kB)
[K     |████████████████████████████████| 182 kB 76.4 MB/s 
Installing collected packages: tokenizers, huggingface-hub, transformers
Successfully installed huggingface-hub-0.11.1 tokenizers-0.13.2 transformers-4.25.1


### Packages

In [1]:
import torch
from transformers import BertModel, BertTokenizer, AdamW, logging
logging.set_verbosity_error()
import torch.nn as nn
from tqdm.notebook import tqdm
import torch.nn.functional as F

import numpy as np
import pandas as pd
import copy
import time
import gc
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score, accuracy_score

In [2]:
from classifier_utils import SentimentClassifier, train, test
from data_load_utils import convert_text_to_token, genDataLoader

## Data Preprocessing

In [3]:
LABEL_DICT = {'fear':0, 'neutral':1, 'sad':2, 'surprise':3, 'angry':4, 'happy':5} # Mapping label code and meaning

In [4]:
TESTING = False
train_datagen = genDataLoader('train')
val_datagen = genDataLoader('val')
test_datagen = genDataLoader('test')

Processed 2000 sentences for train
Processed 4000 sentences for train
Processed 6000 sentences for train
Processed 8000 sentences for train
Processed 10000 sentences for train
Processed 12000 sentences for train
Processed 14000 sentences for train
Processed 16000 sentences for train
Processed 18000 sentences for train
Processed 20000 sentences for train
Processed 22000 sentences for train
Processed 2000 sentences for val
Processed 4000 sentences for val
Processed 2000 sentences for test
Processed 4000 sentences for test


## Modeling

In [None]:
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
DEVICE

device(type='cuda')


### Load Pretrain Model

- `wwm` means whole word masking pretrained upon EXT dataset

In [6]:
bert-base-chinese# All pretrain models in chinese
MODELS_PATHS_UNITS = {
    'BERT': ('bert-base-chinese', 768),
    'BERT-wwm': ('hfl/chinese-bert-wwm-ext', 768),
    'RoBERTa': ('uer/chinese_roberta_L-12_H-768', 768),
    'RoBERTa-wwm': ('hfl/chinese-roberta-wwm-ext', 768),
    'RoBERTa-wwm-large': ('hfl/chinese-roberta-wwm-ext-large', 1024),
    '3L RoBERTa-wwm': ('hfl/rbt3', 768),
    '3L RoBERTa-wwm-large': ('hfl/rbtl3', 1024),
}

In [7]:
models = {}
for model_name in tqdm(MODELS_PATHS_UNITS.keys()):
    sentiment_classifier = SentimentClassifier(num_classes=6, 
                                               model_name=model_name, 
                                               pretrain_path=MODELS_PATHS_UNITS[model_name][0], 
                                               hidden_size=MODELS_PATHS_UNITS[model_name][1]).to(DEVICE)
    models[model_name] = sentiment_classifier
    gc.collect()

  0%|          | 0/7 [00:00<?, ?it/s]

### Fine-tuning

In [None]:
# Setting up
REPLACE_EXIST = False
NUM_EPOCHS = 15
BEST_MODEL_FOLDER = 'result/model/'  # Path to save best model
TRAINING_LOGS_FOLDER = 'result/training/'  # Path to save training logs

# Fine-tune each pretrain model
for model_name in tqdm(list(models.keys())):
    print('-'*10, model_name, '-'*10)
    model_path = f'{BEST_MODEL_FOLDER}best_{model_name}.pth' if not TESTING else f'{BEST_MODEL_FOLDER}best_testing_{model_name}.pth'
    log_path = f'{TRAINING_LOGS_FOLDER}{model_name}.pickle'
    if not os.path.exists(model_path) or not os.path.exists(log_path) or REPLACE_EXIST:
        optimizer = AdamW(models[model_name].parameters(), lr=1e-5, eps=1e-8, correct_bias=False, no_deprecation_warning=True)
        training_log = train(models[model_name], model_name, train_datagen, val_datagen, optimizer, DEVICE, NUM_EPOCHS, model_path)
        training_log.to_pickle(log_path)
    else:
        print('already done')
    del models[model_name]
    gc.collect()

  0%|          | 0/7 [00:00<?, ?it/s]

---------- 3L RoBERTa-wwm ----------
already done
---------- 3L RoBERTa-wwm-large ----------
already done
---------- BERT-wwm ----------
already done
---------- RoBERTa-wwm ----------
already done
---------- RoBERTa-wwm-large ----------
already done
---------- BERT ----------


  0%|          | 0/15 [00:00<?, ?it/s]

  0%|          | 0/2846 [00:00<?, ?it/s]

Epoch 1 finished, took 568.8s
Test set: Average loss: 0.6438, Accuracy: 3847/5000 (76.9400%), Macro F1: 74.0394%, took 38.2s


  0%|          | 0/2846 [00:00<?, ?it/s]

Epoch 2 finished, took 557.0s
Test set: Average loss: 0.6482, Accuracy: 3841/5000 (76.8200%), Macro F1: 74.1689%, took 38.3s


  0%|          | 0/2846 [00:00<?, ?it/s]

Epoch 3 finished, took 557.4s
Test set: Average loss: 0.7859, Accuracy: 3778/5000 (75.5600%), Macro F1: 72.9767%, took 38.4s


  0%|          | 0/2846 [00:00<?, ?it/s]

Epoch 4 finished, took 557.1s
Test set: Average loss: 0.9499, Accuracy: 3798/5000 (75.9600%), Macro F1: 73.0479%, took 38.7s


  0%|          | 0/2846 [00:00<?, ?it/s]

Epoch 5 finished, took 557.9s
Test set: Average loss: 1.0079, Accuracy: 3798/5000 (75.9600%), Macro F1: 73.0902%, took 38.6s


  0%|          | 0/2846 [00:00<?, ?it/s]

Epoch 6 finished, took 557.6s
Test set: Average loss: 1.1057, Accuracy: 3789/5000 (75.7800%), Macro F1: 72.7906%, took 38.6s


  0%|          | 0/2846 [00:00<?, ?it/s]

Epoch 7 finished, took 556.7s
Test set: Average loss: 1.2076, Accuracy: 3788/5000 (75.7600%), Macro F1: 72.9567%, took 38.5s


  0%|          | 0/2846 [00:00<?, ?it/s]

Epoch 8 finished, took 556.8s
Test set: Average loss: 1.2467, Accuracy: 3804/5000 (76.0800%), Macro F1: 73.6544%, took 38.5s


  0%|          | 0/2846 [00:00<?, ?it/s]

Epoch 9 finished, took 556.9s
Test set: Average loss: 1.1830, Accuracy: 3795/5000 (75.9000%), Macro F1: 72.2718%, took 38.6s


  0%|          | 0/2846 [00:00<?, ?it/s]

Epoch 10 finished, took 556.6s
Test set: Average loss: 1.2481, Accuracy: 3766/5000 (75.3200%), Macro F1: 72.5884%, took 38.5s


  0%|          | 0/2846 [00:00<?, ?it/s]

Epoch 11 finished, took 557.0s
Test set: Average loss: 1.3220, Accuracy: 3730/5000 (74.6000%), Macro F1: 71.7836%, took 38.5s


  0%|          | 0/2846 [00:00<?, ?it/s]

Epoch 12 finished, took 557.0s
Test set: Average loss: 1.4084, Accuracy: 3715/5000 (74.3000%), Macro F1: 70.9934%, took 38.6s


  0%|          | 0/2846 [00:00<?, ?it/s]

Epoch 13 finished, took 557.3s
Test set: Average loss: 1.3721, Accuracy: 3797/5000 (75.9400%), Macro F1: 73.3271%, took 38.5s


  0%|          | 0/2846 [00:00<?, ?it/s]

Epoch 14 finished, took 557.1s
Test set: Average loss: 1.4565, Accuracy: 3773/5000 (75.4600%), Macro F1: 72.5723%, took 38.6s


  0%|          | 0/2846 [00:00<?, ?it/s]

Epoch 15 finished, took 556.7s
Test set: Average loss: 1.4120, Accuracy: 3774/5000 (75.4800%), Macro F1: 71.6697%, took 38.5s
---------- RoBERTa ----------


  0%|          | 0/15 [00:00<?, ?it/s]

  0%|          | 0/2846 [00:00<?, ?it/s]

Epoch 1 finished, took 564.5s
Test set: Average loss: 0.5989, Accuracy: 3924/5000 (78.4800%), Macro F1: 75.7908%, took 38.5s


  0%|          | 0/2846 [00:00<?, ?it/s]

Epoch 2 finished, took 557.2s
Test set: Average loss: 0.6536, Accuracy: 3835/5000 (76.7000%), Macro F1: 72.9188%, took 38.6s


  0%|          | 0/2846 [00:00<?, ?it/s]

Epoch 3 finished, took 557.2s
Test set: Average loss: 0.7756, Accuracy: 3834/5000 (76.6800%), Macro F1: 74.0665%, took 38.5s


  0%|          | 0/2846 [00:00<?, ?it/s]

Epoch 4 finished, took 557.7s
Test set: Average loss: 1.0034, Accuracy: 3816/5000 (76.3200%), Macro F1: 73.5751%, took 38.6s


  0%|          | 0/2846 [00:00<?, ?it/s]

Epoch 5 finished, took 558.2s
Test set: Average loss: 1.0204, Accuracy: 3837/5000 (76.7400%), Macro F1: 74.0091%, took 38.7s


  0%|          | 0/2846 [00:00<?, ?it/s]

Epoch 6 finished, took 558.1s
Test set: Average loss: 1.1265, Accuracy: 3761/5000 (75.2200%), Macro F1: 72.3707%, took 38.8s


  0%|          | 0/2846 [00:00<?, ?it/s]

Epoch 7 finished, took 557.3s
Test set: Average loss: 1.1314, Accuracy: 3839/5000 (76.7800%), Macro F1: 73.7076%, took 38.7s


  0%|          | 0/2846 [00:00<?, ?it/s]

Epoch 8 finished, took 557.2s
Test set: Average loss: 1.2747, Accuracy: 3787/5000 (75.7400%), Macro F1: 72.8764%, took 38.6s


  0%|          | 0/2846 [00:00<?, ?it/s]

Epoch 9 finished, took 556.7s
Test set: Average loss: 1.2884, Accuracy: 3834/5000 (76.6800%), Macro F1: 73.5238%, took 38.5s


  0%|          | 0/2846 [00:00<?, ?it/s]

Epoch 10 finished, took 557.0s
Test set: Average loss: 1.3396, Accuracy: 3781/5000 (75.6200%), Macro F1: 72.5711%, took 38.6s


  0%|          | 0/2846 [00:00<?, ?it/s]

Epoch 11 finished, took 557.3s
Test set: Average loss: 1.3045, Accuracy: 3796/5000 (75.9200%), Macro F1: 72.8343%, took 38.6s


  0%|          | 0/2846 [00:00<?, ?it/s]

Epoch 12 finished, took 557.1s
Test set: Average loss: 1.2956, Accuracy: 3818/5000 (76.3600%), Macro F1: 73.5415%, took 38.7s


  0%|          | 0/2846 [00:00<?, ?it/s]

Epoch 13 finished, took 557.1s
Test set: Average loss: 1.3370, Accuracy: 3782/5000 (75.6400%), Macro F1: 72.7186%, took 38.6s


  0%|          | 0/2846 [00:00<?, ?it/s]

Epoch 14 finished, took 556.9s
Test set: Average loss: 1.3748, Accuracy: 3815/5000 (76.3000%), Macro F1: 72.9947%, took 38.6s


  0%|          | 0/2846 [00:00<?, ?it/s]

Epoch 15 finished, took 557.3s
Test set: Average loss: 1.3379, Accuracy: 3788/5000 (75.7600%), Macro F1: 73.0581%, took 38.6s
