In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
%%capture
!pip install transformers

In [2]:
!nvidia-smi

Wed Jun 28 04:05:13 2023       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 525.85.12    Driver Version: 525.85.12    CUDA Version: 12.0     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla T4            Off  | 00000000:00:04.0 Off |                    0 |
| N/A   53C    P8    10W /  70W |      0MiB / 15360MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [4]:
# as Data Scientist
import pandas as pd
import numpy as np

In [19]:
# Torch
import torch
from torch import nn
import torch.nn.functional as F

# Transformers
from transformers import AutoTokenizer, AutoModel

# Utils
from tqdm.auto import tqdm

# Hyperparameters

### Paths

In [6]:
TRAIN_PATH = '/content/drive/MyDrive/Public/DS102 - Machine Learning/data/train.csv'
VAL_PATH = '/content/drive/MyDrive/Public/DS102 - Machine Learning/data/val.csv'
TEST_PATH = '/content/drive/MyDrive/Public/DS102 - Machine Learning/data/test.csv'

# all labels
ALL_LABELS_PATH = '/content/drive/MyDrive/Public/DS102 - Machine Learning/data/labels.csv'

# model path for saving model
MODEL_PATH = '/content/drive/MyDrive/Public/DS102 - Machine Learning/models/'

In [54]:
MAX_SEQUENCE_LENGTH = 200
MODEL_NAME = ''

### Device & torch

In [8]:
torch.cuda.is_available()

True

In [9]:
# device
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
device

device(type='cuda', index=0)

In [10]:
def seed_everything(seed_value):
    np.random.seed(seed_value)
    torch.manual_seed(seed_value)

    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed_value)
        torch.cuda.manual_seed_all(seed_value)
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = True

In [11]:
seed_everything(86)

# Load data

In [12]:
# Load data
def load_data(path):
    df = pd.read_csv(path)
    df['information'] = df.description + ' ' + df.requirements
    df.drop(columns=['description', 'requirements', 'industry'], inplace=True)
    df.columns = ['industries', 'information']

    return df

# Create onehot label
def create_onehot(y):
    seperated = y.split(' / ')
    re = np.zeros(NUM_LABELS)
    for i in range(NUM_LABELS):
        if ALL_LABELS[i] in seperated:
            re[i] = 1

    return re

# Reverse to label
def reverse_label(y_onehot):
    label = []
    for i in range(NUM_LABELS):
        if y_onehot == 1:
            label.append(ALL_LABELS[i])

    return ' / '.join(label)

In [13]:
# full = load_data(FULL_PATH)
train_df = load_data(TRAIN_PATH)
val_df = load_data(VAL_PATH)
test_df = load_data(TEST_PATH)

# job labels
ALL_LABELS = pd.read_csv(ALL_LABELS_PATH)['0'].tolist()
NUM_LABELS = len(ALL_LABELS)

# Models

In [57]:
# Simple Neural Net
class MultilabelClassifier_NeuralNet(nn.Module):

    def __init__(self, n_classes=NUM_LABELS, model_name=MODEL_NAME):

        super(MultilabelClassifier_NeuralNet, self).__init__()
        self.n_classes = n_classes

        # Architecture
        self.bert = AutoModel.from_pretrained(model_name, return_dict=True) # Backbone
        self.hidden = nn.Linear(self.bert.config.hidden_size, self.bert.config.hidden_size)
        self.classifier = nn.Linear(self.bert.config.hidden_size, n_classes)
        self.dropout = nn.Dropout(0.3)

        # Initialization
        nn.init.xavier_uniform_(self.hidden.weight)
        nn.init.xavier_uniform_(self.classifier.weight)

    def forward(self, input_ids, attention_masks):#, token_type_ids):

        # Bert (fine-tuning)
        out = self.bert(input_ids=input_ids, attention_mask=attention_masks)
        pooled_out = torch.mean(out.last_hidden_state, 1)

        # Neural network
        x = self.dropout(pooled_out)
        x = self.hidden(x)
        x = F.relu(x)
        x = self.dropout(x)
        logits = self.classifier(x)

        return logits




# TextCNN
class MultilabelClassifier_TextCNN(nn.Module):

    def __init__(self, n_classes=NUM_LABELS, model_name=MODEL_NAME):

        super(MultilabelClassifier_TextCNN, self).__init__()
        self.n_classes = n_classes

        # Architecture
        self.bert = AutoModel.from_pretrained(model_name, return_dict=True) # Backbone
        self.cnn = nn.Conv1d(self.bert.config.hidden_size, 256, kernel_size=3, padding=1)
        self.classifier = nn.Linear(256, self.n_classes)
        self.dropout = nn.Dropout(0.3)

        # Initialization
        nn.init.xavier_uniform_(self.cnn.weight)
        nn.init.constant_(self.cnn.bias, 0)
        nn.init.xavier_uniform_(self.classifier.weight)
        nn.init.constant_(self.classifier.bias, 0)

    def forward(self, input_ids, attention_masks):#, token_type_ids):

        # Bert (fine-tuning)
        out = self.bert(input_ids=input_ids, attention_mask=attention_masks)
        word_embeddings = out.last_hidden_state.permute(0, 2, 1) # Reshape for CNN input

        # CNN
        cnn_out = self.cnn(word_embeddings)
        cnn_out = F.relu(cnn_out)
        pooled_out = F.max_pool1d(cnn_out, kernel_size=cnn_out.size(2)).squeeze(2)

        # Dropout
        x = self.dropout(pooled_out)

        # Classifier
        logits = self.classifier(x)

        return logits




# Bi-LSTM
class MultilabelClassifier_BiLSTM(nn.Module):

    def __init__(self, n_classes=NUM_LABELS, model_name=MODEL_NAME):

        super(MultilabelClassifier_BiLSTM, self).__init__()
        self.n_classes = n_classes

        # Architecture
        self.bert = AutoModel.from_pretrained(model_name, return_dict=True) # Backbone
        self.b_lstm = nn.LSTM(self.bert.config.hidden_size, self.bert.config.hidden_size, batch_first=True, bidirectional=True)
        self.classifier = nn.Linear(2 * self.b_lstm.hidden_size, self.n_classes)
        self.dropout = nn.Dropout(0.3)

        # Initialization
        nn.init.xavier_uniform_(self.b_lstm.weight_ih_l0)
        nn.init.xavier_uniform_(self.b_lstm.weight_hh_l0)
        nn.init.constant_(self.b_lstm.bias_ih_l0, 0)
        nn.init.constant_(self.b_lstm.bias_hh_l0, 0)

    def forward(self, input_ids, attention_masks):#, token_type_ids):

        # Bert (fine-tuning)
        out = self.bert(input_ids=input_ids, attention_mask=attention_masks)
        pooled_out = torch.mean(out.last_hidden_state, 1)

        # Bi-LSTM
        lstm_out, _ = self.b_lstm(pooled_out.unsqueeze(0))
        lstm_out = lstm_out.squeeze(0)

        # Dropout
        x = self.dropout(lstm_out)

        # Classifier
        logits = self.classifier(x)

        return logits




# Bi-GRU
class MultilabelClassifier_BiGRU(nn.Module):

    def __init__(self, n_classes=NUM_LABELS, model_name=MODEL_NAME):

        super(MultilabelClassifier_BiGRU, self).__init__()
        self.n_classes = n_classes

        # Architecture
        self.bert = AutoModel.from_pretrained(model_name, return_dict=True) # Backbone
        self.b_gru = nn.GRU(self.bert.config.hidden_size, self.bert.config.hidden_size, batch_first=True, bidirectional=True)
        self.classifier = nn.Linear(2 * self.b_gru.hidden_size, self.n_classes)
        self.dropout = nn.Dropout(0.3)

        # Initialization
        nn.init.xavier_uniform_(self.b_gru.weight_ih_l0)
        nn.init.xavier_uniform_(self.b_gru.weight_hh_l0)
        nn.init.constant_(self.b_gru.bias_ih_l0, 0)
        nn.init.constant_(self.b_gru.bias_hh_l0, 0)

    def forward(self, input_ids, attention_masks):#, token_type_ids):

        # Bert (fine-tuning)
        out = self.bert(input_ids=input_ids, attention_mask=attention_masks)
        pooled_out = torch.mean(out.last_hidden_state, 1)

        # Bi-GRU
        gru_out, _ = self.b_gru(pooled_out.unsqueeze(0))
        gru_out = gru_out.squeeze(0)

        # Dropout
        x = self.dropout(gru_out)

        # Classifier
        logits = self.classifier(x)

        return logits

# Utils Functions

In [68]:
# Load model
def load_model(path, c, pretrained):
    if c == 'NeuralNet':
        model = MultilabelClassifier_NeuralNet(model_name=pretrained).to(device)
    elif c == 'TextCNN':
        model = MultilabelClassifier_TextCNN(model_name=pretrained).to(device)
    elif c == 'BiLSTM':
        model = MultilabelClassifier_BiLSTM(model_name=pretrained).to(device)
    elif c == 'BiGRU':
        model = MultilabelClassifier_BiGRU(model_name=pretrained).to(device)
    else:
        print('Lỗi model_type c')
        return

    model.load_state_dict(torch.load(path))

    return model

# Get the name of bert (for get tokenizer)
def get_extractor_name(model_name):
    sep = model_name.split('_')

    if sep[0] == 'phoBERT':
        pretrained_model = 'vinai/phobert-base'
    elif sep[0] == 'XLMBERT':
        pretrained_model = 'bert-base-multilingual-cased'
    else:
        pretrained_model = 'distilbert-base-cased'


    return pretrained_model, sep[1]

def get_tokens(df, idx, tokenizer, max_len=MAX_SEQUENCE_LENGTH):
    # Extract
    text = df.iloc[idx].information

    # Encoding
    encoding = tokenizer.encode_plus(
        text,
        truncation=True,
        padding='max_length',
        max_length=max_len,
        add_special_tokens=True,
        return_attention_mask=True,
        return_token_type_ids=True,
        return_tensors='pt',
    )

    return {
      'text': text,
      'input_ids': encoding['input_ids'].flatten(),
      'attention_masks': encoding['attention_mask'].flatten()
    }

def tensor_to_numpy(tensor):
    return tensor.cpu().numpy()

def get_prediction(model, ids, attn):

    ids = ids.unsqueeze(0).to(device)
    attn = attn.unsqueeze(0).to(device)

    with torch.no_grad():
        logits = model(ids, attn)

    probs = torch.sigmoid(logits)
    preds = torch.round(probs)

    return logits, probs, preds

# Evaluation & Error analysis

In [60]:
from sklearn.metrics import precision_score, recall_score, f1_score

# Hamming score is Accuracy for multi-label
def hamming_score(y_true, y_pred):
    temp = 0
    for i in range(0, len(y_true)):
        temp += sum(np.logical_and(y_true[i], y_pred[i])) / sum(np.logical_or(y_true[i], y_pred[i]))

    return temp / len(y_true)

# Exact Matching Score
def em_score(y_true, y_pred):
    MR = np.all(y_pred == y_true, axis=1).mean()

    return MR

def get_score(y_true, y_pred):
    # Hand
    hamming = hamming_score(y_true, y_pred)
    em = em_score(y_true, y_pred)

    # Machine
    precision = precision_score(y_true, y_pred, average='samples')
    recall = recall_score(y_true, y_pred, average='samples')
    f1 = f1_score(y_true, y_pred, average='samples')

    return hamming, em, precision, recall, f1

In [103]:
# Checking: Liệu rằng càng nhiều label thì model dự đoán đúng càng ít?
def error_analysis(y_true, y_pred):

    true = [np.where(y == 1)[0].tolist() for y in y_true]
    pred = [np.where(y == 1)[0].tolist() for y in y_pred]

    re = pd.DataFrame({'true': true, 'pred': pred})
    re['num_label'] = re['true'].map(lambda x: len(x))
    re['perfectly_correct'] = re['true'] == re['pred']

    return re[['num_label', 'perfectly_correct']].groupby('num_label').sum()

In [104]:
def evaluate(path, df):

    # Get tokenizer and load model
    pretrained_model_name, c = get_extractor_name(path)
    tokenizer = AutoTokenizer.from_pretrained(pretrained_model_name, use_fast=False)
    model = load_model(MODEL_PATH + path, c, pretrained_model_name)

    y_true, y_pred = [], []
    for i in tqdm(range(len(df))):

        # True label
        y_true.append(create_onehot(df.industries[i]))

        # Predicted label
        encoding = get_tokens(df, i, tokenizer)
        _, _, pred = get_prediction(model, encoding['input_ids'], encoding['attention_masks'])
        y_pred.append(pred.cpu().numpy())

    y_true = np.array(y_true).squeeze()
    y_pred = np.array(y_pred).squeeze()

    # Get scores
    hamming, em, precision, recall, f1 = get_score(y_true, y_pred)

    # Print scores
    print('Evaluation of ' + path)
    print('Hamming score:', hamming)
    print('EM score:', em)
    print('Precision score:', precision)
    print('Recall score:', recall)
    print('F1 score:', f1)

    print('Error analysis: ')
    print(error_analysis(y_true, y_pred))

<hr>
<hr>
<hr>

### XLMBERT + NeuralNet

In [105]:
evaluate('XLMBERT_NeuralNet_9.pth', val_df)

Some weights of the model checkpoint at bert-base-multilingual-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


  0%|          | 0/6851 [00:00<?, ?it/s]

  _warn_prf(average, modifier, msg_start, len(result))


Evaluation of XLMBERT_NeuralNet_9.pth
Hamming score: 0.5669367002384109
EM score: 0.3850532768938841
Precision score: 0.700690896706077
Recall score: 0.621033425777259
F1 score: 0.6311154900338961
Error analysis: 
           perfectly_correct
num_label                   
1                       1856
2                        659
3                        123
4                          0
5                          0
6                          0


In [106]:
evaluate('XLMBERT_NeuralNet_9.pth', test_df)

Some weights of the model checkpoint at bert-base-multilingual-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


  0%|          | 0/3426 [00:00<?, ?it/s]

Evaluation of XLMBERT_NeuralNet_9.pth
Hamming score: 0.5508881661245922
EM score: 0.36456509048453006
Precision score: 0.6828663164039697
Recall score: 0.6109408445222806
F1 score: 0.6165568620910127
Error analysis: 
           perfectly_correct
num_label                   
1                        906
2                        287
3                         54
4                          2
5                          0
6                          0


  _warn_prf(average, modifier, msg_start, len(result))


### XLMBERT + TextCNN

In [107]:
evaluate('XLMBERT_TextCNN_9.pth', val_df)

Some weights of the model checkpoint at bert-base-multilingual-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


  0%|          | 0/6851 [00:00<?, ?it/s]

  _warn_prf(average, modifier, msg_start, len(result))


Evaluation of XLMBERT_TextCNN_9.pth
Hamming score: 0.5666746599384195
EM score: 0.36476426799007444
Precision score: 0.6823797012601567
Recall score: 0.6524497640247165
F1 score: 0.637035492443462
Error analysis: 
           perfectly_correct
num_label                   
1                       1657
2                        681
3                        159
4                          0
5                          2
6                          0


In [108]:
evaluate('XLMBERT_TextCNN_9.pth', test_df)

Some weights of the model checkpoint at bert-base-multilingual-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


  0%|          | 0/3426 [00:00<?, ?it/s]

Evaluation of XLMBERT_TextCNN_9.pth
Hamming score: 0.5560350818669547
EM score: 0.35288966725043786
Precision score: 0.6749172990854251
Recall score: 0.6438460790036972
F1 score: 0.6284873377255165
Error analysis: 
           perfectly_correct
num_label                   
1                        840
2                        288
3                         78
4                          3
5                          0
6                          0


  _warn_prf(average, modifier, msg_start, len(result))


### XLMBERT + Bi-LSTM

In [109]:
evaluate('XLMBERT_BiLSTM_9.pth', val_df)

Some weights of the model checkpoint at bert-base-multilingual-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


  0%|          | 0/6851 [00:00<?, ?it/s]

Evaluation of XLMBERT_BiLSTM_9.pth
Hamming score: 0.5401731412167871
EM score: 0.36841337031090354
Precision score: 0.6865688707244685
Recall score: 0.5779642874519535
F1 score: 0.6012691925405398
Error analysis: 
           perfectly_correct
num_label                   
1                       1861
2                        581
3                         82
4                          0
5                          0
6                          0


  _warn_prf(average, modifier, msg_start, len(result))


In [110]:
evaluate('XLMBERT_BiLSTM_9.pth', test_df)

Some weights of the model checkpoint at bert-base-multilingual-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


  0%|          | 0/3426 [00:00<?, ?it/s]

Evaluation of XLMBERT_BiLSTM_9.pth
Hamming score: 0.524498929752869
EM score: 0.3561004086398132
Precision score: 0.673594084452228
Recall score: 0.5616267756372836
F1 score: 0.5851597031106663
Error analysis: 
           perfectly_correct
num_label                   
1                        935
2                        242
3                         42
4                          1
5                          0
6                          0


  _warn_prf(average, modifier, msg_start, len(result))


### XLMBERT + Bi-GRU

In [111]:
evaluate('XLMBERT_BiGRU_9.pth', val_df)

Some weights of the model checkpoint at bert-base-multilingual-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


  0%|          | 0/6851 [00:00<?, ?it/s]

  _warn_prf(average, modifier, msg_start, len(result))


Evaluation of XLMBERT_BiGRU_9.pth
Hamming score: 0.5719658583036216
EM score: 0.38651291782221575
Precision score: 0.6978682291775271
Recall score: 0.6377000924439254
F1 score: 0.638162775449303
Error analysis: 
           perfectly_correct
num_label                   
1                       1800
2                        695
3                        146
4                          5
5                          2
6                          0


In [112]:
evaluate('XLMBERT_BiGRU_9.pth', test_df)

Some weights of the model checkpoint at bert-base-multilingual-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


  0%|          | 0/3426 [00:00<?, ?it/s]

Evaluation of XLMBERT_BiGRU_9.pth
Hamming score: 0.5702033469546594
EM score: 0.38324576765907764
Precision score: 0.6948871375754037
Recall score: 0.635736524615684
F1 score: 0.6363480944041365
Error analysis: 
           perfectly_correct
num_label                   
1                        931
2                        308
3                         70
4                          4
5                          0
6                          0


  _warn_prf(average, modifier, msg_start, len(result))


### distilBERT + NeuralNet

In [113]:
evaluate('distilBERT_NeuralNet_9.pth', val_df)

Some weights of the model checkpoint at distilbert-base-cased were not used when initializing DistilBertModel: ['vocab_transform.weight', 'vocab_layer_norm.bias', 'vocab_projector.bias', 'vocab_layer_norm.weight', 'vocab_transform.bias', 'vocab_projector.weight']
- This IS expected if you are initializing DistilBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DistilBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


  0%|          | 0/6851 [00:00<?, ?it/s]

  _warn_prf(average, modifier, msg_start, len(result))


Evaluation of distilBERT_NeuralNet_9.pth
Hamming score: 0.4738821583223886
EM score: 0.31104948182747044
Precision score: 0.6162117452440034
Recall score: 0.5123753223373717
F1 score: 0.5329006540581493
Error analysis: 
           perfectly_correct
num_label                   
1                       1650
2                        440
3                         41
4                          0
5                          0
6                          0


In [114]:
evaluate('distilBERT_NeuralNet_9.pth', test_df)

Some weights of the model checkpoint at distilbert-base-cased were not used when initializing DistilBertModel: ['vocab_transform.weight', 'vocab_layer_norm.bias', 'vocab_projector.bias', 'vocab_layer_norm.weight', 'vocab_transform.bias', 'vocab_projector.weight']
- This IS expected if you are initializing DistilBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DistilBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


  0%|          | 0/3426 [00:00<?, ?it/s]

Evaluation of distilBERT_NeuralNet_9.pth
Hamming score: 0.4642294220665492
EM score: 0.30998248686514884
Precision score: 0.6047090873710838
Recall score: 0.5003356684179802
F1 score: 0.5211213966030078
Error analysis: 
           perfectly_correct
num_label                   
1                        862
2                        179
3                         20
4                          1
5                          0
6                          0


  _warn_prf(average, modifier, msg_start, len(result))


### distilBERT + TextCNN

In [115]:
evaluate('distilBERT_TextCNN_9.pth', val_df)

Some weights of the model checkpoint at distilbert-base-cased were not used when initializing DistilBertModel: ['vocab_transform.weight', 'vocab_layer_norm.bias', 'vocab_projector.bias', 'vocab_layer_norm.weight', 'vocab_transform.bias', 'vocab_projector.weight']
- This IS expected if you are initializing DistilBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DistilBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


  0%|          | 0/6851 [00:00<?, ?it/s]

  _warn_prf(average, modifier, msg_start, len(result))


Evaluation of distilBERT_TextCNN_9.pth
Hamming score: 0.47489070069715433
EM score: 0.3057947744854766
Precision score: 0.6127329343648129
Recall score: 0.5221841093757603
F1 score: 0.5362145023435346
Error analysis: 
           perfectly_correct
num_label                   
1                       1599
2                        449
3                         47
4                          0
5                          0
6                          0


In [116]:
evaluate('distilBERT_TextCNN_9.pth', test_df)

Some weights of the model checkpoint at distilbert-base-cased were not used when initializing DistilBertModel: ['vocab_transform.weight', 'vocab_layer_norm.bias', 'vocab_projector.bias', 'vocab_layer_norm.weight', 'vocab_transform.bias', 'vocab_projector.weight']
- This IS expected if you are initializing DistilBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DistilBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


  0%|          | 0/3426 [00:00<?, ?it/s]

Evaluation of distilBERT_TextCNN_9.pth
Hamming score: 0.46751799961081864
EM score: 0.30531231757151195
Precision score: 0.6011918661218136
Recall score: 0.5152996691963416
F1 score: 0.527475467711895
Error analysis: 
           perfectly_correct
num_label                   
1                        840
2                        184
3                         22
4                          0
5                          0
6                          0


  _warn_prf(average, modifier, msg_start, len(result))


### distilBERT + Bi-LSTM

In [117]:
evaluate('distilBERT_BiLSTM_9.pth', val_df)

Some weights of the model checkpoint at distilbert-base-cased were not used when initializing DistilBertModel: ['vocab_transform.weight', 'vocab_layer_norm.bias', 'vocab_projector.bias', 'vocab_layer_norm.weight', 'vocab_transform.bias', 'vocab_projector.weight']
- This IS expected if you are initializing DistilBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DistilBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


  0%|          | 0/6851 [00:00<?, ?it/s]

  _warn_prf(average, modifier, msg_start, len(result))


Evaluation of distilBERT_BiLSTM_9.pth
Hamming score: 0.4303313384907325
EM score: 0.2970369289154868
Precision score: 0.5726171361844986
Recall score: 0.4446552814674257
F1 score: 0.47909933204050853
Error analysis: 
           perfectly_correct
num_label                   
1                       1686
2                        311
3                         38
4                          0
5                          0
6                          0


In [118]:
evaluate('distilBERT_BiLSTM_9.pth', test_df)

Some weights of the model checkpoint at distilbert-base-cased were not used when initializing DistilBertModel: ['vocab_transform.weight', 'vocab_layer_norm.bias', 'vocab_projector.bias', 'vocab_layer_norm.weight', 'vocab_transform.bias', 'vocab_projector.weight']
- This IS expected if you are initializing DistilBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DistilBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


  0%|          | 0/3426 [00:00<?, ?it/s]

Evaluation of distilBERT_BiLSTM_9.pth
Hamming score: 0.41521210352208543
EM score: 0.29363689433741974
Precision score: 0.5471395213076474
Recall score: 0.43021502237789455
F1 score: 0.4603202401801351
Error analysis: 
           perfectly_correct
num_label                   
1                        859
2                        137
3                         10
4                          0
5                          0
6                          0


  _warn_prf(average, modifier, msg_start, len(result))


### distilBERT + Bi-GRU

In [119]:
evaluate('distilBERT_BiGRU_9.pth', val_df)

Some weights of the model checkpoint at distilbert-base-cased were not used when initializing DistilBertModel: ['vocab_transform.weight', 'vocab_layer_norm.bias', 'vocab_projector.bias', 'vocab_layer_norm.weight', 'vocab_transform.bias', 'vocab_projector.weight']
- This IS expected if you are initializing DistilBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DistilBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


  0%|          | 0/6851 [00:00<?, ?it/s]

Evaluation of distilBERT_BiGRU_9.pth
Hamming score: 0.48691918454726973
EM score: 0.3208290760472924
Precision score: 0.6304067532720283
Recall score: 0.5251447477253929
F1 score: 0.5467522989344622
Error analysis: 
           perfectly_correct
num_label                   
1                       1706
2                        433
3                         58
4                          1
5                          0
6                          0


  _warn_prf(average, modifier, msg_start, len(result))


In [120]:
evaluate('distilBERT_BiGRU_9.pth', test_df)

Some weights of the model checkpoint at distilbert-base-cased were not used when initializing DistilBertModel: ['vocab_transform.weight', 'vocab_layer_norm.bias', 'vocab_projector.bias', 'vocab_layer_norm.weight', 'vocab_transform.bias', 'vocab_projector.weight']
- This IS expected if you are initializing DistilBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DistilBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


  0%|          | 0/3426 [00:00<?, ?it/s]

Evaluation of distilBERT_BiGRU_9.pth
Hamming score: 0.4794123370305502
EM score: 0.3164039696438996
Precision score: 0.6211471103327496
Recall score: 0.5200087565674255
F1 score: 0.538547892400782
Error analysis: 
           perfectly_correct
num_label                   
1                        879
2                        179
3                         26
4                          0
5                          0
6                          0


  _warn_prf(average, modifier, msg_start, len(result))


### phoBERT + NeuralNet

In [121]:
evaluate('phoBERT_NeuralNet_9.pth', val_df)

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Downloading pytorch_model.bin:   0%|          | 0.00/543M [00:00<?, ?B/s]

Some weights of the model checkpoint at vinai/phobert-base were not used when initializing RobertaModel: ['lm_head.layer_norm.weight', 'lm_head.decoder.weight', 'lm_head.dense.bias', 'lm_head.bias', 'lm_head.dense.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.bias']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


  0%|          | 0/6851 [00:00<?, ?it/s]

  _warn_prf(average, modifier, msg_start, len(result))


Evaluation of phoBERT_NeuralNet_9.pth
Hamming score: 0.5522673089086768
EM score: 0.3733761494672311
Precision score: 0.6972729041989004
Recall score: 0.598138957816377
F1 score: 0.6162586622738425
Error analysis: 
           perfectly_correct
num_label                   
1                       1844
2                        615
3                         99
4                          0
5                          0
6                          0


In [122]:
evaluate('phoBERT_NeuralNet_9.pth', test_df)

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Some weights of the model checkpoint at vinai/phobert-base were not used when initializing RobertaModel: ['lm_head.layer_norm.weight', 'lm_head.decoder.weight', 'lm_head.dense.bias', 'lm_head.bias', 'lm_head.dense.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.bias']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


  0%|          | 0/3426 [00:00<?, ?it/s]

Evaluation of phoBERT_NeuralNet_9.pth
Hamming score: 0.547183999110443
EM score: 0.3666082895504962
Precision score: 0.6921677369137965
Recall score: 0.5963319712006226
F1 score: 0.6119443749478776
Error analysis: 
           perfectly_correct
num_label                   
1                        941
2                        262
3                         53
4                          0
5                          0
6                          0


  _warn_prf(average, modifier, msg_start, len(result))


### phoBERT + TextCNN

In [123]:
evaluate('phoBERT_TextCNN_9.pth', val_df)

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Some weights of the model checkpoint at vinai/phobert-base were not used when initializing RobertaModel: ['lm_head.layer_norm.weight', 'lm_head.decoder.weight', 'lm_head.dense.bias', 'lm_head.bias', 'lm_head.dense.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.bias']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


  0%|          | 0/6851 [00:00<?, ?it/s]

  _warn_prf(average, modifier, msg_start, len(result))


Evaluation of phoBERT_TextCNN_9.pth
Hamming score: 0.5402495985987483
EM score: 0.34827032549992704
Precision score: 0.6780932224006228
Recall score: 0.6028973872427384
F1 score: 0.6086688769800724
Error analysis: 
           perfectly_correct
num_label                   
1                       1713
2                        572
3                        101
4                          0
5                          0
6                          0


In [124]:
evaluate('phoBERT_TextCNN_9.pth', test_df)

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Some weights of the model checkpoint at vinai/phobert-base were not used when initializing RobertaModel: ['lm_head.layer_norm.weight', 'lm_head.decoder.weight', 'lm_head.dense.bias', 'lm_head.bias', 'lm_head.dense.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.bias']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


  0%|          | 0/3426 [00:00<?, ?it/s]

Evaluation of phoBERT_TextCNN_9.pth
Hamming score: 0.5419001751313479
EM score: 0.34413309982486867
Precision score: 0.6802101576182137
Recall score: 0.6112765129402606
F1 score: 0.612478108581436
Error analysis: 
           perfectly_correct
num_label                   
1                        878
2                        252
3                         48
4                          1
5                          0
6                          0


  _warn_prf(average, modifier, msg_start, len(result))


### phoBERT + Bi-LSTM

In [125]:
evaluate('phoBERT_BiLSTM_9.pth', val_df)

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Some weights of the model checkpoint at vinai/phobert-base were not used when initializing RobertaModel: ['lm_head.layer_norm.weight', 'lm_head.decoder.weight', 'lm_head.dense.bias', 'lm_head.bias', 'lm_head.dense.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.bias']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


  0%|          | 0/6851 [00:00<?, ?it/s]

Evaluation of phoBERT_BiLSTM_9.pth
Hamming score: 0.47765533012212563
EM score: 0.3218508246971245
Precision score: 0.6350654405682868
Recall score: 0.4974942830730307
F1 score: 0.5340603735290642
Error analysis: 
           perfectly_correct
num_label                   
1                       1802
2                        391
3                         12
4                          0
5                          0
6                          0


  _warn_prf(average, modifier, msg_start, len(result))


In [126]:
evaluate('phoBERT_BiLSTM_9.pth', test_df)

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Some weights of the model checkpoint at vinai/phobert-base were not used when initializing RobertaModel: ['lm_head.layer_norm.weight', 'lm_head.decoder.weight', 'lm_head.dense.bias', 'lm_head.bias', 'lm_head.dense.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.bias']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


  0%|          | 0/3426 [00:00<?, ?it/s]

Evaluation of phoBERT_BiLSTM_9.pth
Hamming score: 0.47122008172796187
EM score: 0.3228254524226503
Precision score: 0.622723292469352
Recall score: 0.4908639813193228
F1 score: 0.5252467128123871
Error analysis: 
           perfectly_correct
num_label                   
1                        946
2                        156
3                          4
4                          0
5                          0
6                          0


  _warn_prf(average, modifier, msg_start, len(result))


### phoBERT + Bi-GRU

In [127]:
evaluate('phoBERT_BiGRU_9.pth', val_df)

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Some weights of the model checkpoint at vinai/phobert-base were not used when initializing RobertaModel: ['lm_head.layer_norm.weight', 'lm_head.decoder.weight', 'lm_head.dense.bias', 'lm_head.bias', 'lm_head.dense.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.bias']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


  0%|          | 0/6851 [00:00<?, ?it/s]

Evaluation of phoBERT_BiGRU_9.pth
Hamming score: 0.5653397835561046
EM score: 0.37936067727339073
Precision score: 0.7011385199240987
Recall score: 0.6196321704860603
F1 score: 0.6308430237272742
Error analysis: 
           perfectly_correct
num_label                   
1                       1851
2                        617
3                        129
4                          2
5                          0
6                          0


  _warn_prf(average, modifier, msg_start, len(result))


In [128]:
evaluate('phoBERT_BiGRU_9.pth', test_df)

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Some weights of the model checkpoint at vinai/phobert-base were not used when initializing RobertaModel: ['lm_head.layer_norm.weight', 'lm_head.decoder.weight', 'lm_head.dense.bias', 'lm_head.bias', 'lm_head.dense.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.bias']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


  0%|          | 0/3426 [00:00<?, ?it/s]

Evaluation of phoBERT_BiGRU_9.pth
Hamming score: 0.5531697384149215
EM score: 0.3613543490951547
Precision score: 0.6911964065641825
Recall score: 0.6124197314652655
F1 score: 0.6207526014881531
Error analysis: 
           perfectly_correct
num_label                   
1                        933
2                        253
3                         50
4                          2
5                          0
6                          0


  _warn_prf(average, modifier, msg_start, len(result))
