# Phobertv2 for shopee reviews

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
!pip install -q transformers
!pip install transformers[torch]
! pip install --quiet vncorenlp
! pip install underthesea

Collecting accelerate>=0.21.0 (from transformers[torch])
  Downloading accelerate-0.31.0-py3-none-any.whl (309 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m309.4/309.4 kB[0m [31m5.8 MB/s[0m eta [36m0:00:00[0m
Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch->transformers[torch])
  Using cached nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (23.7 MB)
Collecting nvidia-cuda-runtime-cu12==12.1.105 (from torch->transformers[torch])
  Using cached nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (823 kB)
Collecting nvidia-cuda-cupti-cu12==12.1.105 (from torch->transformers[torch])
  Using cached nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (14.1 MB)
Collecting nvidia-cudnn-cu12==8.9.2.26 (from torch->transformers[torch])
  Using cached nvidia_cudnn_cu12-8.9.2.26-py3-none-manylinux1_x86_64.whl (731.7 MB)
Collecting nvidia-cublas-cu12==12.1.3.1 (from torch->transformers[torch])
  Using cached nvidia_cublas_cu

In [None]:
from distutils.dir_util import copy_tree
copy_tree("/content/drive/MyDrive/Thesis: Topic Modelling/Code/utils", "./utils/")

['./utils/vietnamese-stopwords.txt',
 './utils/bertopic_model/topic_embeddings.safetensors',
 './utils/bertopic_model/topics.json',
 './utils/bertopic_model/config.json',
 './utils/bertopic_model/ctfidf.safetensors',
 './utils/bertopic_model/ctfidf_config.json',
 './utils/vncorenlp/models/dep/vi-dep.xz',
 './utils/vncorenlp/models/ner/vi-ner.xz',
 './utils/vncorenlp/models/ner/vi-500brownclusters.xz',
 './utils/vncorenlp/models/ner/vi-pretrainedembeddings.xz',
 './utils/vncorenlp/models/postagger/vi-tagger',
 './utils/vncorenlp/models/wordsegmenter/wordsegmenter.rdr',
 './utils/vncorenlp/models/wordsegmenter/vi-vocab',
 './utils/vncorenlp/VnCoreNLP-1.2.jar',
 './utils/data_preprocessing.py',
 './utils/data_preprocessing_v2.py',
 './utils/__pycache__/data_preprocessing_v2.cpython-310.pyc']

In [None]:


from transformers import AutoTokenizer
from transformers import AutoModelForSequenceClassification
from transformers import TrainingArguments, Trainer
from distutils.dir_util import copy_tree
copy_tree("/content/drive/MyDrive/Thesis: Topic Modelling/Code/utils", "./utils/")

from utils.data_preprocessing_v2 import *
from vncorenlp import VnCoreNLP
from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler
import torch
import numpy as np
from sklearn.metrics import f1_score, roc_auc_score, accuracy_score
from transformers import EvalPrediction
import torch

# source: https://jesusleal.io/2021/04/21/Longformer-multilabel-classification/
def multi_label_metrics(predictions, labels, threshold=0.5):
    # first, apply sigmoid on predictions which are of shape (batch_size, num_labels)
    sigmoid = torch.nn.Sigmoid()
    probs = sigmoid(torch.Tensor(predictions))
    # next, use threshold to turn them into integer predictions
    y_pred = np.zeros(probs.shape)
    y_pred[np.where(probs >= threshold)] = 1
    # finally, compute metrics
    y_true = labels
    f1_micro_average = f1_score(y_true=y_true, y_pred=y_pred, average='micro')
    roc_auc = roc_auc_score(y_true, y_pred, average = 'micro')
    accuracy = accuracy_score(y_true, y_pred)
    # return as dictionary
    metrics = {'f1': f1_micro_average,
               'roc_auc': roc_auc,
               'accuracy': accuracy}
    return metrics

def get_prediction(predictions, threshold=0.5):
    # first, apply sigmoid on predictions which are of shape (batch_size, num_labels)
    sigmoid = torch.nn.Sigmoid()
    probs = sigmoid(torch.Tensor(predictions))
    # next, use threshold to turn them into integer predictions
    y_pred = np.zeros(probs.shape)
    y_pred[np.where(probs >= threshold)] = 1
    return y_pred

def compute_metrics(p: EvalPrediction):
    preds = p.predictions[0] if isinstance(p.predictions,
            tuple) else p.predictions
    result = multi_label_metrics(
        predictions=preds,
        labels=p.label_ids)
    return result

class Phobertv2:
  def __init__(self, data = None):
    self.tokenizer, self.model  = self.get_model()
    self.data = None
    self.rdrsegmenter = VnCoreNLP("/content/drive/MyDrive/transformers/vncorenlp/VnCoreNLP-1.1.1.jar", annotators="wseg", max_heap_size='-Xmx500m')
  def trainer(self, train_data, valid_data, save_weight_path = None):
    batch_size = 8
    metric_name = "f1"
    args = TrainingArguments(
        f"bert-finetuned-sem_eval-english",
        evaluation_strategy = "epoch",
        save_strategy = "epoch",
        learning_rate=2e-5,
        per_device_train_batch_size=batch_size,
        per_device_eval_batch_size=batch_size,
        num_train_epochs=5,
        weight_decay=0.01,
        load_best_model_at_end=True,
        metric_for_best_model=metric_name,
        #push_to_hub=True,
    )
    trainer = Trainer(
    self.model,
    args,
    train_dataset=train_data,
    eval_dataset=valid_data,
    tokenizer=self.tokenizer,
    compute_metrics=compute_metrics
    )


  def get_model(self):
    labels = ["Quality",	"Serve",	"Pack",	"Shipping", "Price", "Other"]
    id2label = {idx:label for idx, label in enumerate(labels)}
    label2id = {label:idx for idx, label in enumerate(labels)}
    self.tokenizer = AutoTokenizer.from_pretrained("vinai/phobert-base-v2")
    self.model = AutoModelForSequenceClassification.from_pretrained("vinai/phobert-base-v2",
                                                              problem_type="multi_label_classification",
                                                              num_labels=len(labels),
                                                              id2label=id2label,
                                                              label2id=label2id)
    return (self.tokenizer, self.model)


  def multi_label_metrics(self, predictions, labels, threshold=0.5):
      # first, apply sigmoid on predictions which are of shape (batch_size, num_labels)
      sigmoid = torch.nn.Sigmoid()
      probs = sigmoid(torch.Tensor(predictions))
      # next, use threshold to turn them into integer predictions
      y_pred = np.zeros(probs.shape)
      y_pred[np.where(probs >= threshold)] = 1
      # finally, compute metrics
      y_true = labels
      f1_micro_average = f1_score(y_true=y_true, y_pred=y_pred, average='micro')
      roc_auc = roc_auc_score(y_true, y_pred, average = 'micro')
      accuracy = accuracy_score(y_true, y_pred)
      # return as dictionary
      metrics = {'f1': f1_micro_average,
                'roc_auc': roc_auc,
                'accuracy': accuracy}
      return metrics

  def preprocess(self, data):

    if not self.tokenizer and not self.model:
      self.tokenizer, self.model = self.get_model()
    labels = ["Quality",	"Serve",	"Pack",	"Shipping", "Price", "Other"]
    comments = data["comment"]
    text_list = []
    for text in comments:
      text = self.rdrsegmenter.tokenize(text)
      text = ' '.join([' '.join(x) for x in text])
      text_list.append(text)

    encoding = self.tokenizer(text_list, padding = "max_length", truncation = True, max_length = 125)
    labels_batch = {k: data[k] for k in data.keys() if k in labels}
    labels_matrix = np.zeros((len(text_list), len(labels)))
    for idx, label in enumerate(labels):
      labels_matrix[:, idx] = labels_batch[label]
    encoding["labels"] = labels_matrix.tolist()
    return encoding

  def generate_dataset(self, processed_data, batch_size = 32):
    inputs = torch.tensor(processed_data["input_ids"])
    labels = torch.tensor(processed_data["labels"])
    masks = torch.tensor(processed_data["attention_mask"])
    dataset = TensorDataset(inputs, masks, labels)
    dataset_sampler = SequentialSampler(dataset)
    data_loader = DataLoader(dataset, sampler=dataset_sampler, batch_size=batch_size)
    return data_loader

No module named 'fasttext'


In [None]:

import re
from sklearn.model_selection import train_test_split
import pandas as pd

def cleaning_for_phobert(sentence):
    sentence = text_normalize(sentence)
    sentence =  remove_all_tag(sentence)
    sentence = remove_ending_letters_in_sentence(sentence)
    return sentence

cols = ["Quality",	"Serve",	"Pack",	"Shipping",	"Price",	"Other",	"rating",	"comment"	]

train = pd.read_excel("/content/drive/MyDrive/Thesis: Topic Modelling/Data/Splitted data/train.xlsx")[cols]
train["comment"] = train["comment"].astype(str).apply(lambda x: cleaning_for_phobert(x))

test = pd.read_excel("/content/drive/MyDrive/Thesis: Topic Modelling/Data/Splitted data/test.xlsx")[cols]
test["comment"] = test["comment"].astype(str).apply(lambda x: cleaning_for_phobert(x))

valid = pd.read_excel("/content/drive/MyDrive/Thesis: Topic Modelling/Data/Splitted data/valid.xlsx")[cols]
valid["comment"] = valid["comment"].astype(str).apply(lambda x: cleaning_for_phobert(x))

In [None]:
train.shape

(12240, 8)

In [None]:
from sklearn.metrics import f1_score, roc_auc_score, accuracy_score
from transformers import EvalPrediction
import torch

# source: https://jesusleal.io/2021/04/21/Longformer-multilabel-classification/
def multi_label_metrics(predictions, labels, threshold=0.5):
    # first, apply sigmoid on predictions which are of shape (batch_size, num_labels)
    sigmoid = torch.nn.Sigmoid()
    probs = sigmoid(torch.Tensor(predictions))
    # next, use threshold to turn them into integer predictions
    y_pred = np.zeros(probs.shape)
    y_pred[np.where(probs >= threshold)] = 1
    # finally, compute metrics
    y_true = labels
    f1_micro_average = f1_score(y_true=y_true, y_pred=y_pred, average='micro')
    roc_auc = roc_auc_score(y_true, y_pred, average = 'micro')
    accuracy = accuracy_score(y_true, y_pred)
    # return as dictionary
    metrics = {'f1': f1_micro_average,
               'roc_auc': roc_auc,
               'accuracy': accuracy}
    return metrics

def get_prediction(predictions, threshold=0.5):
    # first, apply sigmoid on predictions which are of shape (batch_size, num_labels)
    sigmoid = torch.nn.Sigmoid()
    probs = sigmoid(torch.Tensor(predictions))
    # next, use threshold to turn them into integer predictions
    y_pred = np.zeros(probs.shape)
    y_pred[np.where(probs >= threshold)] = 1
    return y_pred

def compute_metrics(p: EvalPrediction):
    preds = p.predictions[0] if isinstance(p.predictions,
            tuple) else p.predictions
    result = multi_label_metrics(
        predictions=preds,
        labels=p.label_ids)
    return result

In [None]:
phobert = Phobertv2()
train_data_encoding = phobert.preprocess(train)
train_data = phobert.generate_dataset(train_data_encoding)
test_data_encoding = phobert.preprocess(test)
test_data = phobert.generate_dataset(test_data_encoding, batch_size = 1)
valid_data_encoding = phobert.preprocess(valid)
valid_data = phobert.generate_dataset(valid_data_encoding)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/678 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/895k [00:00<?, ?B/s]

bpe.codes:   0%|          | 0.00/1.14M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/3.13M [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/540M [00:00<?, ?B/s]

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at vinai/phobert-base-v2 and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [None]:
phobert.model

RobertaForSequenceClassification(
  (roberta): RobertaModel(
    (embeddings): RobertaEmbeddings(
      (word_embeddings): Embedding(64001, 768, padding_idx=1)
      (position_embeddings): Embedding(258, 768, padding_idx=1)
      (token_type_embeddings): Embedding(1, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): RobertaEncoder(
      (layer): ModuleList(
        (0-11): 12 x RobertaLayer(
          (attention): RobertaAttention(
            (self): RobertaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): RobertaSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
             

In [None]:
PHO_BERT = phobert.model.cuda()

In [None]:
import random
from transformers import RobertaForSequenceClassification, RobertaConfig, AdamW
from tqdm import tqdm_notebook
device = 'cuda'
epochs = 10

param_optimizer = list(PHO_BERT.named_parameters())
no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight']
optimizer_grouped_parameters = [
    {'params': [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)], 'weight_decay': 0.01},
    {'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], 'weight_decay': 0.0}
]

save_best_path =  "/content/drive/MyDrive/THESIS DSEB62: Product review analysis/Baseline-model/Phobertv2" + "/processed_best.pth"
# Save model after 10 epochs
save_last_path = "/content/drive/MyDrive/THESIS DSEB62: Product review analysis/Baseline-model/Phobertv2" + "/processed_last.pth"

train_loss_list, eval_loss_list = [], []
train_accuracy_list, eval_accuracy_list = [], []
train_f1_list, eval_f1_list = [], []
saved_status = []
max_acc = 0
optimizer = AdamW(optimizer_grouped_parameters, lr=1e-5, correct_bias=False)
best_epoch = 0
for epoch_i in range(0, epochs):
    print('======== Epoch {:} / {:} ========'.format(epoch_i + 1, epochs))
    print('Training...')

    total_loss = 0
    PHO_BERT.train()
    train_accuracy = 0
    nb_train_steps = 0
    train_f1 = 0

    for step, batch in tqdm_notebook(enumerate(train_data)):
        b_input_ids = batch[0].to(device)
        b_input_mask = batch[1].to(device)
        b_labels = batch[2].to(device)

        PHO_BERT.zero_grad()
        outputs = PHO_BERT(b_input_ids,
            token_type_ids=None,
            attention_mask=b_input_mask,
            labels=b_labels)
        loss = outputs[0]
        total_loss += loss.item()

        logits = outputs[1].detach().cpu().numpy()
        label_ids = b_labels.to('cpu').numpy()
        train_eval = multi_label_metrics(logits, label_ids)
        tmp_train_accuracy = train_eval["accuracy"]
        tmp_train_f1 = train_eval["f1"]
        train_accuracy += tmp_train_accuracy
        train_f1 += tmp_train_f1
        nb_train_steps += 1

        loss.backward()
        torch.nn.utils.clip_grad_norm_(PHO_BERT.parameters(), 1.0)
        optimizer.step()

    avg_train_loss = total_loss / len(train_data)
    print(" Accuracy: {0:.4f}".format(train_accuracy/nb_train_steps))
    print(" F1 score: {0:.4f}".format(train_f1/nb_train_steps))
    print(" Average training loss: {0:.4f}".format(avg_train_loss))
    train_loss_list.append(avg_train_loss)
    train_accuracy_list.append(train_accuracy/nb_train_steps)
    train_f1_list.append(train_f1/nb_train_steps)
    print("Running Validation...")
    PHO_BERT.eval()
    eval_loss, eval_accuracy = 0, 0
    nb_eval_steps, nb_eval_examples = 0, 0
    eval_f1 = 0
    for batch in tqdm_notebook(valid_data):

        batch = tuple(t.to(device) for t in batch)

        b_input_ids, b_input_mask, b_labels = batch

        with torch.no_grad():
            outputs = PHO_BERT(b_input_ids,
            token_type_ids=None,
            attention_mask=b_input_mask,
            labels=b_labels)
            loss = outputs[0]
            eval_loss += loss.item()
            logits = outputs[1].detach().cpu().numpy()
            label_ids = b_labels.to('cpu').numpy()

            valid_eval = multi_label_metrics(logits, label_ids)
            tmp_eval_accuracy = valid_eval["accuracy"]
            tmp_eval_f1 = valid_eval["f1"]
            eval_accuracy += tmp_eval_accuracy
            eval_f1 += tmp_eval_f1
            nb_eval_steps += 1
    avg_eval_loss = eval_loss / len(valid_data)
    print(" Accuracy: {0:.4f}".format(eval_accuracy/nb_eval_steps))
    print(" F1 score: {0:.4f}".format(eval_f1/nb_eval_steps))
    print(" Average validation loss: {0:.4f}".format(avg_eval_loss))
    eval_loss_list.append(avg_eval_loss)
    eval_accuracy_list.append(eval_accuracy/nb_eval_steps)
    eval_f1_list.append(eval_f1/nb_eval_steps)

    if (eval_accuracy/nb_eval_steps) > max_acc:
        print("new model saved")
        max_acc = eval_accuracy/nb_eval_steps
        best_epoch = epoch_i
        saved_status.append(1)
        # torch.save(PHO_BERT, save_best_path)
    else:
        saved_status.append(0)
    if epoch_i - best_epoch > 5:
        break
print("Training complete!")
# torch.save(PHO_BERT, save_last_path)

Training...


Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  for step, batch in tqdm_notebook(enumerate(train_data)):


0it [00:00, ?it/s]

 Accuracy: 0.8757
 F1 score: 0.9595
 Average training loss: 0.0784
Running Validation...


Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  for batch in tqdm_notebook(valid_data):


  0%|          | 0/43 [00:00<?, ?it/s]

 Accuracy: 0.8228
 F1 score: 0.9386
 Average training loss: 0.1181
new model saved
Training...


Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  for step, batch in tqdm_notebook(enumerate(train_data)):


0it [00:00, ?it/s]

 Accuracy: 0.9065
 F1 score: 0.9705
 Average training loss: 0.0601
Running Validation...


Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  for batch in tqdm_notebook(valid_data):


  0%|          | 0/43 [00:00<?, ?it/s]

 Accuracy: 0.8316
 F1 score: 0.9414
 Average training loss: 0.1165
new model saved
Training...


Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  for step, batch in tqdm_notebook(enumerate(train_data)):


0it [00:00, ?it/s]

 Accuracy: 0.9224
 F1 score: 0.9763
 Average training loss: 0.0490
Running Validation...


Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  for batch in tqdm_notebook(valid_data):


  0%|          | 0/43 [00:00<?, ?it/s]

 Accuracy: 0.8308
 F1 score: 0.9397
 Average training loss: 0.1225
Training...


Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  for step, batch in tqdm_notebook(enumerate(train_data)):


0it [00:00, ?it/s]

 Accuracy: 0.9386
 F1 score: 0.9812
 Average training loss: 0.0408
Running Validation...


Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  for batch in tqdm_notebook(valid_data):


  0%|          | 0/43 [00:00<?, ?it/s]

 Accuracy: 0.8258
 F1 score: 0.9389
 Average training loss: 0.1272
Training...


Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  for step, batch in tqdm_notebook(enumerate(train_data)):


0it [00:00, ?it/s]

 Accuracy: 0.9514
 F1 score: 0.9853
 Average training loss: 0.0343
Running Validation...


Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  for batch in tqdm_notebook(valid_data):


  0%|          | 0/43 [00:00<?, ?it/s]

 Accuracy: 0.8251
 F1 score: 0.9372
 Average training loss: 0.1390
Training...


Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  for step, batch in tqdm_notebook(enumerate(train_data)):


0it [00:00, ?it/s]

 Accuracy: 0.9585
 F1 score: 0.9872
 Average training loss: 0.0294
Running Validation...


Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  for batch in tqdm_notebook(valid_data):


  0%|          | 0/43 [00:00<?, ?it/s]

 Accuracy: 0.8251
 F1 score: 0.9376
 Average training loss: 0.1381
Training...


Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  for step, batch in tqdm_notebook(enumerate(train_data)):


0it [00:00, ?it/s]

 Accuracy: 0.9635
 F1 score: 0.9889
 Average training loss: 0.0257
Running Validation...


Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  for batch in tqdm_notebook(valid_data):


  0%|          | 0/43 [00:00<?, ?it/s]

 Accuracy: 0.8251
 F1 score: 0.9368
 Average training loss: 0.1489
Training...


Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  for step, batch in tqdm_notebook(enumerate(train_data)):


0it [00:00, ?it/s]

 Accuracy: 0.9700
 F1 score: 0.9909
 Average training loss: 0.0212
Running Validation...


Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  for batch in tqdm_notebook(valid_data):


  0%|          | 0/43 [00:00<?, ?it/s]

 Accuracy: 0.8287
 F1 score: 0.9377
 Average training loss: 0.1531
Training complete!


In [None]:
# prompt: save these list into a dataframe: train_loss_list, eval_loss_list
# train_accuracy_list, eval_accuracy_list
# train_f1_list, eval_f1_list, saved_status

df = pd.DataFrame({
    "train_loss": train_loss_list,
    "eval_loss": eval_loss_list,
    "train_accuracy": train_accuracy_list,
    "eval_accuracy": eval_accuracy_list,
    "train_f1": train_f1_list,
    "eval_f1": eval_f1_list,
    "saved_status": saved_status,
})

# Save the DataFrame to a CSV file



In [None]:
df = pd.DataFrame({
    "train_loss": train_loss_list,
    "eval_loss": eval_loss_list,
    "train_accuracy": train_accuracy_list,
    "eval_accuracy": eval_accuracy_list,
    "train_f1": train_f1_list,
    "eval_f1": eval_f1_list,
    "saved_status": saved_status,
})
df.to_excel("/content/result.xlsx", index=False)

In [None]:
df

Unnamed: 0,train_loss,eval_loss,train_accuracy,eval_accuracy,train_f1,eval_f1,saved_status
0,0.078386,0.118057,0.875734,0.822845,0.959548,0.938575,1
1,0.060071,0.116476,0.906495,0.831566,0.970533,0.94144,1
2,0.049003,0.122496,0.922405,0.83084,0.976257,0.93966,0
3,0.040808,0.12718,0.938642,0.825838,0.981188,0.938939,0
4,0.034305,0.13898,0.951371,0.825111,0.985305,0.937246,0
5,0.029402,0.138124,0.958469,0.825111,0.987161,0.937649,0
6,0.025717,0.148905,0.963528,0.825111,0.988946,0.936841,0
7,0.021191,0.153056,0.969974,0.828745,0.990872,0.9377,0


In [None]:
train_loss_list

[0.07838583882051878,
 0.06007133251391643,
 0.04900300208304087,
 0.04080767864872052,
 0.03430480235462945,
 0.02940188690669854,
 0.025716921517640042,
 0.021191227273509763]

In [None]:
vinoutputs = PHO_BERT(b_input_ids,
    token_type_ids=None,
    attention_mask=b_input_mask,
    labels=b_labels)


In [None]:
outputs[0]

tensor(0.1438, device='cuda:0')

In [None]:
outputs[1]

tensor([[ 4.5631, -5.0007, -3.1528,  4.0199, -5.3767, -7.5246],
        [ 3.8247,  3.3134, -4.7262,  1.4635, -4.6912, -6.5316],
        [ 5.8529, -5.3810, -5.7075, -5.2932, -4.6429, -6.7878],
        [ 3.0503, -4.9791,  4.3905,  2.9030, -5.0893, -6.2521],
        [ 2.6792, -3.6879, -4.6783,  3.6516, -6.9137, -7.1229],
        [ 2.5363, -3.4675, -5.8423,  2.0198, -7.4178, -6.9966],
        [ 3.0929, -4.0604,  4.8582, -2.6794, -5.0238, -5.9252],
        [ 2.7604, -3.7066,  5.0439, -3.4388, -4.6622, -5.4960],
        [ 3.6667, -1.7092,  5.3531,  0.0733, -5.3875, -6.4734],
        [ 4.5322, -4.0886, -3.1708,  4.6582, -4.8868, -7.5256],
        [-4.0301, -4.6858, -4.9066, -4.7685, -4.9934,  4.3253],
        [ 4.6377, -5.7174, -6.4281, -5.5823, -5.7065, -5.3784],
        [ 3.4848, -4.4025, -3.4315,  3.4587,  5.7573, -5.0177],
        [ 5.9159, -5.3483, -5.6816, -4.6066, -5.5445, -7.0185],
        [-1.1367, -4.5155,  4.4796, -0.4886, -6.1526, -4.1185],
        [ 3.7839, -4.4018,  3.4413,  4.0

In [None]:
PHO_BERT(b_input_ids,
            token_type_ids=None,
            attention_mask=b_input_mask)

SequenceClassifierOutput(loss=None, logits=tensor([[ 3.8405, -4.6877, -3.6963,  2.7867, -4.4526, -6.3020],
        [ 2.6941,  3.4944, -3.8592,  0.8230, -2.9157, -4.9252],
        [ 4.5290, -4.8983, -5.1044, -4.3903, -4.6613, -5.6623],
        [ 3.2516, -4.9612,  2.2091,  2.2430, -5.0999, -6.2596],
        [ 3.4122, -2.7162, -4.1432,  3.6374, -4.8521, -7.1955],
        [ 2.9219, -3.9846, -5.2236,  2.2568, -6.1531, -6.5446],
        [ 2.6455, -4.2368,  3.8145, -1.6810, -4.7356, -5.2130],
        [ 2.0643, -2.9423,  4.5424, -2.6721, -3.8271, -4.1804],
        [ 2.9436, -2.4102,  4.9530, -0.5769, -3.7970, -5.2558],
        [ 4.0664, -3.4858, -3.8813,  3.4260, -3.9262, -6.4963],
        [-3.1274, -3.4620, -4.0554, -3.8721, -3.7722,  2.9473],
        [ 3.9470, -5.0633, -5.2591, -4.0778, -5.4607, -5.1854],
        [ 3.3470, -3.6892, -3.3983,  3.0066,  3.8583, -4.4087],
        [ 4.8782, -4.9050, -5.4784, -3.0552, -5.7438, -6.2963],
        [-0.9378, -3.4340,  3.6344, -0.2057, -4.8583, -3.0534

In [None]:
torch.save(PHO_BERT, "/content/drive/MyDrive/THESIS DSEB62: Product review analysis/Baseline-model/Phobertv2/last.pth")

In [None]:
import random
from transformers import RobertaForSequenceClassification, RobertaConfig, AdamW
from tqdm import tqdm_notebook
device = "cuda"
model = torch.load("/content/drive/MyDrive/THESIS DSEB62: Product review analysis/Baseline-model/Phobertv2/last.pth")
predictions = []
labels_l = []
for batch in tqdm_notebook(test_data):
    batch = tuple(t.to(device) for t in batch)
    b_input_ids, b_input_mask, b_labels = batch
    with torch.no_grad():
        outputs = model(b_input_ids,
        token_type_ids=None,
        attention_mask=b_input_mask)
        logits = outputs[0]
        logits = logits.detach().cpu().numpy()
        label_ids = b_labels.to('cpu').numpy()
        predictions.append(get_prediction(logits))
        labels_l.append(label_ids)

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  for batch in tqdm_notebook(test_data):


  0%|          | 0/3401 [00:00<?, ?it/s]

In [None]:
prediction = pd.DataFrame(np.array(predictions).reshape(-1, 6), columns = ["Quality",	"Serve",	"Pack",	"Shipping", "Price", "Other"])
labels = pd.DataFrame(np.array(labels_l).reshape(-1, 6), columns = ["Quality",	"Serve",	"Pack",	"Shipping", "Price", "Other"])

In [None]:
from sklearn.metrics import hamming_loss
from sklearn.metrics import classification_report
import pandas as pd

phobert_pred = prediction

label_cols = ["Quality",	"Serve",	"Pack",	"Shipping", "Price", "Other"]
print("Classification report from phobertv2")
print('Hamming Loss: ', round(hamming_loss(labels[label_cols], phobert_pred[label_cols]),3))
print(classification_report(labels[label_cols],  phobert_pred[label_cols]))
for i in range(len(label_cols)):
  print(f"classification report of {label_cols[i]}")
  print(classification_report(labels[label_cols[i]], phobert_pred[label_cols[i]]))

Classification report from phobertv2
Hamming Loss:  0.038
              precision    recall  f1-score   support

           0       0.95      0.96      0.95      2721
           1       0.85      0.94      0.89       518
           2       0.93      0.98      0.95       581
           3       0.91      0.92      0.92      1366
           4       0.95      0.95      0.95       509
           5       0.94      0.86      0.90       309

   micro avg       0.93      0.94      0.94      6004
   macro avg       0.92      0.93      0.93      6004
weighted avg       0.93      0.94      0.94      6004
 samples avg       0.93      0.94      0.93      6004

classification report of Quality
              precision    recall  f1-score   support

         0.0       0.81      0.79      0.80       680
         1.0       0.95      0.96      0.95      2721

    accuracy                           0.92      3401
   macro avg       0.88      0.87      0.88      3401
weighted avg       0.92      0.92      0

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [None]:
import random
from transformers import RobertaForSequenceClassification, RobertaConfig, AdamW
from tqdm import tqdm_notebook
device = "cuda"
model = torch.load("/content/drive/MyDrive/THESIS DSEB62: Product review analysis/Baseline-model/Phobertv2/best.pth")
predictions = []
labels_l = []
for batch in tqdm_notebook(test_data):
    batch = tuple(t.to(device) for t in batch)
    b_input_ids, b_input_mask, b_labels = batch
    with torch.no_grad():
        outputs = model(b_input_ids,
        token_type_ids=None,
        attention_mask=b_input_mask)
        logits = outputs[0]
        logits = logits.detach().cpu().numpy()
        label_ids = b_labels.to('cpu').numpy()
        predictions.append(get_prediction(logits))
        labels_l.append(label_ids)

prediction = pd.DataFrame(np.array(predictions).reshape(-1, 6), columns = ["Quality",	"Serve",	"Pack",	"Shipping", "Price", "Other"])
labels = pd.DataFrame(np.array(labels_l).reshape(-1, 6), columns = ["Quality",	"Serve",	"Pack",	"Shipping", "Price", "Other"])
phobert_pred = prediction

label_cols = ["Quality",	"Serve",	"Pack",	"Shipping", "Price", "Other"]
print("Classification report from phobertv2")
print('Hamming Loss: ', round(hamming_loss(labels[label_cols], phobert_pred[label_cols]),3))
print(classification_report(labels[label_cols],  phobert_pred[label_cols]))
for i in range(len(label_cols)):
  print(f"classification report of {label_cols[i]}")
  print(classification_report(labels[label_cols[i]], phobert_pred[label_cols[i]]))

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  for batch in tqdm_notebook(test_data):


  0%|          | 0/3401 [00:00<?, ?it/s]

Classification report from phobertv2
Hamming Loss:  0.032
              precision    recall  f1-score   support

           0       0.95      0.96      0.96      2721
           1       0.88      0.93      0.90       518
           2       0.97      0.97      0.97       581
           3       0.91      0.96      0.94      1366
           4       0.95      0.96      0.96       509
           5       0.94      0.90      0.92       309

   micro avg       0.94      0.96      0.95      6004
   macro avg       0.93      0.95      0.94      6004
weighted avg       0.94      0.96      0.95      6004
 samples avg       0.94      0.95      0.94      6004

classification report of Quality
              precision    recall  f1-score   support

         0.0       0.84      0.79      0.81       680
         1.0       0.95      0.96      0.96      2721

    accuracy                           0.93      3401
   macro avg       0.89      0.88      0.88      3401
weighted avg       0.93      0.93      0

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [None]:
save_best_path =  "/content/drive/MyDrive/THESIS DSEB62: Product review analysis/Baseline-model/Phobertv2" + "/best.pth"
model = torch.load(save_best_path)
embedder = model.roberta

In [None]:
import random
from tqdm import tqdm_notebook
device = 'cuda'
phobert = embedder.to(device)
with torch.no_grad():
    phobert.eval()
    train_embedded_mean = []
    train_embedded_pooling = []
    for batch in tqdm_notebook(train_data):
        batch = tuple(t.to(device) for t in batch)
        b_input_ids, b_input_mask, b_labels = batch
        last_hidden_layer = phobert(b_input_ids,
        token_type_ids=None,
        attention_mask=b_input_mask)
        embedded_value_mean = torch.mean(last_hidden_layer[0], dim=1)
        embedded_value_pool = last_hidden_layer[0]
        train_embedded_mean.append(embedded_value_mean)
        train_embedded_pooling.append(embedded_value_pool)

with torch.no_grad():
    phobert.eval()
    test_embedded_mean = []
    test_embedded_pooling = []
    for batch in tqdm_notebook(test_data):
        batch = tuple(t.to(device) for t in batch)
        b_input_ids, b_input_mask, b_labels = batch
        last_hidden_layer = phobert(b_input_ids,
        token_type_ids=None,
        attention_mask=b_input_mask)
        embedded_value_mean = torch.mean(last_hidden_layer[0], dim=1)
        embedded_value_pool = last_hidden_layer[0]
        test_embedded_mean.append(embedded_value_mean)
        test_embedded_pooling.append(embedded_value_pool)

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  for batch in tqdm_notebook(train_data):


  0%|          | 0/383 [00:00<?, ?it/s]

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  for batch in tqdm_notebook(test_data):


  0%|          | 0/3401 [00:00<?, ?it/s]

In [None]:
test_feature = torch.cat(test_embedded_mean).detach().cpu().numpy()
train_feature = torch.cat(train_embedded_mean).detach().cpu().numpy()

In [None]:
phobert_test_feature = pd.DataFrame(test_feature)
phobert_test_feature.to_csv("/content/drive/MyDrive/Thesis: Topic Modelling/Code/Phobert result/phobert_test_feature.csv")

phobert_train_feature = pd.DataFrame(train_feature)
phobert_train_feature.to_csv("/content/drive/MyDrive/Thesis: Topic Modelling/Code/Phobert result/phobert_train_feature.csv")

In [None]:
phobert_train_feature.shape

(12240, 768)

In [None]:
phobert_test_feature.shape

(3401, 768)

In [None]:
from sklearn.metrics import hamming_loss
from sklearn.metrics import classification_report
import pandas as pd

phobert_pred = prediction

label_cols = ["Quality",	"Serve",	"Pack",	"Shipping", "Price", "Other"]
print("Classification report from phobertv2")
print('Hamming Loss: ', round(hamming_loss(labels[label_cols], phobert_pred[label_cols]),3))
print(classification_report(labels[label_cols],  phobert_pred[label_cols]))
for i in range(len(label_cols)):
  print(f"classification report of {label_cols[i]}")
  print(classification_report(labels[label_cols[i]], phobert_pred[label_cols[i]]))

Classification report from phobertv2
Hamming Loss:  0.036
              precision    recall  f1-score   support

           0       0.95      0.95      0.95      2721
           1       0.85      0.93      0.89       518
           2       0.97      0.96      0.97       581
           3       0.91      0.95      0.93      1366
           4       0.96      0.95      0.95       509
           5       0.92      0.90      0.91       309

   micro avg       0.93      0.95      0.94      6004
   macro avg       0.93      0.94      0.93      6004
weighted avg       0.93      0.95      0.94      6004
 samples avg       0.93      0.95      0.93      6004

classification report of Quality
              precision    recall  f1-score   support

         0.0       0.81      0.79      0.80       680
         1.0       0.95      0.95      0.95      2721

    accuracy                           0.92      3401
   macro avg       0.88      0.87      0.87      3401
weighted avg       0.92      0.92      0

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [None]:
embedder =

In [None]:
from tqdm import tqdm_notebook
device = "cuda"
phobert = embedder.to(device)
with torch.no_grad():
    phobert.eval()
    test_embedded_mean = []
    test_embedded_pooling = []
    for step, batch in tqdm_notebook(enumerate(test_dataloader)):
        b_input_ids = batch[0].to(device)
        b_input_mask = batch[1].to(device)
        b_labels = batch[2].to(device)
        last_hidden_layer = phobert(b_input_ids,
        token_type_ids=None,
        attention_mask=b_input_mask)
        embedded_value_mean = torch.mean(last_hidden_layer[0], dim=1)
        embedded_value_pool = last_hidden_layer[0]
        test_embedded_mean.append(embedded_value_mean)
        test_embedded_pooling.append(embedded_value_pool)