# Dataset

### Import Dataset

In [1]:
import pandas as pd
import numpy as np

DATASET_PATH = 'data/Dataset-Mental-Health-with-Tags.xlsx'
datasets = pd.read_excel(DATASET_PATH)
answers_set = datasets[['Jawaban', 'Tag']]
datasets = datasets[['Pertanyaan', 'Tag']]
datasets.info()
tags = np.unique(datasets['Tag'])
print(datasets.head())
print(tags)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 500 entries, 0 to 499
Data columns (total 2 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   Pertanyaan  500 non-null    object
 1   Tag         500 non-null    object
dtypes: object(2)
memory usage: 7.9+ KB
                                          Pertanyaan   Tag
0  Mengapa saya merasa semakin stres setelah berm...  tag1
1  Apa yang menyebabkan saya merasa depresi setel...  tag1
2  Kenapa perasaan saya memburuk setelah melihat ...  tag1
3  Mengapa saya merasa cemas setelah menghabiskan...  tag1
4  Apa alasan di balik perasaan tertekan saya set...  tag1
['tag1' 'tag10' 'tag11' 'tag12' 'tag13' 'tag14' 'tag15' 'tag16' 'tag17'
 'tag18' 'tag19' 'tag2' 'tag20' 'tag21' 'tag22' 'tag23' 'tag24' 'tag25'
 'tag3' 'tag4' 'tag5' 'tag6' 'tag7' 'tag8' 'tag9']


In [2]:
from Sastrawi.Stemmer.StemmerFactory import StemmerFactory
from nltk.corpus import stopwords
import re

factory = StemmerFactory()
stemmer = factory.create_stemmer()
stop = stopwords.words('indonesian')

def prepare_question(text: str) -> str:
  text = text.lower()
  text = re.sub(r'[,](?!\s)', ', ', text)  # Add spasi pada koma tanpa spasi
  text = re.sub(r'[.](?!\s)', '. ', text)  # Add spasi pada titik tanpa spasi
  text = text.replace('\\t', ' ').replace('\\n', ' ').replace('\\u', ' ')  # Hapus tab, new line, , dll
  text = text.encode('ascii', 'replace').decode('ascii')  # Hapus karakter non ASCII (emoticon, chinese word, dll)
  text = re.sub(r"(?i)(?:https?:\/\/)?(?:www\.)?(?:[a-zA-Z0-9-.]+)(?:\.[a-zA-Z]{2,6})(?:\/[^\s\r\n]*)?", "", text)  # Hapus URL
  text = re.sub(r'[\!\"\#\$\%\&\'\(\)\*\+\,\.\-\/\:\;\<\=\>\?\@\[\\\]\^\_\`\{\|\}\~]', ' ', text)  # Ubah tanda baca ke spasi
  text = text.strip()  # Hapus whitespace di depan/belakang teks
  text = re.sub(r'\\s+', ' ', text)  # Hapus double++ spasi
  text = re.sub(r'\\s+(?=\.)', '', text)  # Hapus spasi sebelum titik
  text = re.sub(r'\.{2,}', r'\.', text)  # Hapus titik++
  text = ' '.join([word for word in text.split() if word not in (stop)])  # Hapus stopwords
  return stemmer.stem(text)  # Return hasil stemming
  # return text

In [3]:
new_datasets = datasets.sample(frac=1, random_state=42)
new_datasets['stem'] = new_datasets['Pertanyaan'].apply(prepare_question)
new_datasets.head()

new_datasets.to_excel('data/Dataset-Mental-Health-with-Tags-Preprocessed.xlsx', index=False)

### Encode dataset

In [4]:
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split

# Encode labels
le = LabelEncoder()
new_datasets['label'] = le.fit_transform(new_datasets['Tag'])
# Check class distribution
print(new_datasets['label'].value_counts())

# Split dataset into training and validation sets
train_texts, val_texts, train_labels, val_labels = train_test_split(
    list(new_datasets['Pertanyaan']), list(new_datasets['label']), random_state=42, test_size=0.1
)

label
10    20
7     20
14    20
3     20
5     20
18    20
24    20
8     20
2     20
16    20
9     20
6     20
11    20
19    20
17    20
20    20
13    20
1     20
0     20
15    20
22    20
12    20
21    20
23    20
4     20
Name: count, dtype: int64


# Prepare Training

### Define models to train

In [5]:
from transformers import BertForSequenceClassification
from transformers import AlbertForSequenceClassification
from transformers import DistilBertForSequenceClassification
from transformers import MobileBertForSequenceClassification
from transformers import AutoTokenizer, BertTokenizer
from transformers import Trainer, TrainingArguments
import torch
import time

In [6]:
models = [
    ("bert-base-uncased", BertForSequenceClassification, "bert-base-uncased"),
    ("bert-base-multilingual-uncased", BertForSequenceClassification, "bert-base-multilingual-uncased"),
    ("albert/albert-base-v2", AlbertForSequenceClassification, "albert-base-v2"),
    ("distilbert-base-uncased", DistilBertForSequenceClassification, "distilbert-base-uncased"),
    ('google/mobilebert-uncased', MobileBertForSequenceClassification, "mobilebert-uncased"),
    ('cahya/bert-base-indonesian-522M', BertForSequenceClassification, "bert-base-indonesian-522M"),
]

training_durations = {}

### Check device used

In [7]:
# Check GPU availability
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cuda


### Define PyTorch dataset

In [8]:
# Define PyTorch datasets
class PyTorchDataset(torch.utils.data.Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx])
        return item

    def __len__(self):
        return len(self.labels)

### Create training function

In [9]:
def train_model(model_name, model_class, model_nickname):
    # Load tokenizer and model
    model = model_class.from_pretrained(model_name, num_labels=len(tags))
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    
    # Tokenize training and validation texts
    train_encodings = tokenizer(list(train_texts), truncation=True, padding=True)
    val_encodings = tokenizer(list(val_texts), truncation=True, padding=True)

    # Create PyTorch datasets
    train_datasets = PyTorchDataset(train_encodings, train_labels)
    eval_datasets = PyTorchDataset(val_encodings, val_labels)

    # Prepare training arguments
    training_args = TrainingArguments(
        output_dir=f"./results/{model_name}",
        eval_strategy="epoch",
        per_device_train_batch_size=32,
        per_device_eval_batch_size=32,
        num_train_epochs=20,
        logging_dir=f"./logs/{model_name}",
    )

    # Define trainer
    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=train_datasets,
        eval_dataset=eval_datasets,
    )

    # Fine-tune the model
    start_time = time.time()
    trainer.train()
    end_time = time.time()
    training_durations[model_nickname] = end_time - start_time
    print(f"Training time for {model_nickname}: {training_durations[model_nickname]} seconds")

    model.save_pretrained(f"./results/{model_name}_fine-tuned_mental-health")
    tokenizer.save_pretrained(f"./results/{model_name}")
    torch.cuda.empty_cache()


### Start training

In [10]:
for model_name, model_class, model_nickname in models:
    print(f"Training {model_name}")
    train_model(model_name, model_class, model_nickname)
    print("*" * 100)
    print()

for training_time in training_durations:
    print(f"Training time for {training_time}: {training_durations[training_time]} seconds")

Training bert-base-uncased


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


  0%|          | 0/300 [00:00<?, ?it/s]

  attn_output = torch.nn.functional.scaled_dot_product_attention(


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 3.120062828063965, 'eval_runtime': 0.0666, 'eval_samples_per_second': 751.169, 'eval_steps_per_second': 30.047, 'epoch': 1.0}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 2.9546923637390137, 'eval_runtime': 0.0581, 'eval_samples_per_second': 861.257, 'eval_steps_per_second': 34.45, 'epoch': 2.0}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 2.5275731086730957, 'eval_runtime': 0.0596, 'eval_samples_per_second': 839.425, 'eval_steps_per_second': 33.577, 'epoch': 3.0}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 2.0879650115966797, 'eval_runtime': 0.0586, 'eval_samples_per_second': 853.896, 'eval_steps_per_second': 34.156, 'epoch': 4.0}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 1.6877589225769043, 'eval_runtime': 0.0581, 'eval_samples_per_second': 861.296, 'eval_steps_per_second': 34.452, 'epoch': 5.0}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 1.3840175867080688, 'eval_runtime': 0.0581, 'eval_samples_per_second': 861.282, 'eval_steps_per_second': 34.451, 'epoch': 6.0}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 1.180670976638794, 'eval_runtime': 0.0606, 'eval_samples_per_second': 825.634, 'eval_steps_per_second': 33.025, 'epoch': 7.0}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 1.0024787187576294, 'eval_runtime': 0.0596, 'eval_samples_per_second': 839.502, 'eval_steps_per_second': 33.58, 'epoch': 8.0}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 0.8054612874984741, 'eval_runtime': 0.0601, 'eval_samples_per_second': 832.567, 'eval_steps_per_second': 33.303, 'epoch': 9.0}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 0.6942300200462341, 'eval_runtime': 0.0586, 'eval_samples_per_second': 853.861, 'eval_steps_per_second': 34.154, 'epoch': 10.0}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 0.5503681898117065, 'eval_runtime': 0.0596, 'eval_samples_per_second': 839.526, 'eval_steps_per_second': 33.581, 'epoch': 11.0}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 0.4822544753551483, 'eval_runtime': 0.0586, 'eval_samples_per_second': 853.917, 'eval_steps_per_second': 34.157, 'epoch': 12.0}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 0.4270804226398468, 'eval_runtime': 0.0591, 'eval_samples_per_second': 846.688, 'eval_steps_per_second': 33.868, 'epoch': 13.0}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 0.3935300409793854, 'eval_runtime': 0.0586, 'eval_samples_per_second': 853.854, 'eval_steps_per_second': 34.154, 'epoch': 14.0}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 0.36267974972724915, 'eval_runtime': 0.0596, 'eval_samples_per_second': 839.489, 'eval_steps_per_second': 33.58, 'epoch': 15.0}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 0.32368260622024536, 'eval_runtime': 0.0581, 'eval_samples_per_second': 861.278, 'eval_steps_per_second': 34.451, 'epoch': 16.0}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 0.29190608859062195, 'eval_runtime': 0.0581, 'eval_samples_per_second': 861.292, 'eval_steps_per_second': 34.452, 'epoch': 17.0}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 0.28558415174484253, 'eval_runtime': 0.0576, 'eval_samples_per_second': 868.771, 'eval_steps_per_second': 34.751, 'epoch': 18.0}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 0.2753553092479706, 'eval_runtime': 0.0596, 'eval_samples_per_second': 839.519, 'eval_steps_per_second': 33.581, 'epoch': 19.0}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 0.27438193559646606, 'eval_runtime': 0.0415, 'eval_samples_per_second': 1203.607, 'eval_steps_per_second': 48.144, 'epoch': 20.0}
{'train_runtime': 36.0508, 'train_samples_per_second': 249.648, 'train_steps_per_second': 8.322, 'train_loss': 1.0910693359375, 'epoch': 20.0}
Training time for bert-base-uncased: 36.680134534835815 seconds
****************************************************************************************************

Training bert-base-multilingual-uncased


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-multilingual-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


  0%|          | 0/300 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 3.060262441635132, 'eval_runtime': 0.0596, 'eval_samples_per_second': 839.506, 'eval_steps_per_second': 33.58, 'epoch': 1.0}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 2.6407392024993896, 'eval_runtime': 0.0561, 'eval_samples_per_second': 892.029, 'eval_steps_per_second': 35.681, 'epoch': 2.0}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 2.1703665256500244, 'eval_runtime': 0.0586, 'eval_samples_per_second': 853.854, 'eval_steps_per_second': 34.154, 'epoch': 3.0}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 1.5639255046844482, 'eval_runtime': 0.0581, 'eval_samples_per_second': 861.285, 'eval_steps_per_second': 34.451, 'epoch': 4.0}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 1.0895352363586426, 'eval_runtime': 0.0591, 'eval_samples_per_second': 845.687, 'eval_steps_per_second': 33.827, 'epoch': 5.0}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 0.8129457831382751, 'eval_runtime': 0.0586, 'eval_samples_per_second': 853.882, 'eval_steps_per_second': 34.155, 'epoch': 6.0}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 0.684217631816864, 'eval_runtime': 0.0591, 'eval_samples_per_second': 846.688, 'eval_steps_per_second': 33.868, 'epoch': 7.0}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 0.5988849401473999, 'eval_runtime': 0.0591, 'eval_samples_per_second': 845.619, 'eval_steps_per_second': 33.825, 'epoch': 8.0}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 0.533599317073822, 'eval_runtime': 0.0604, 'eval_samples_per_second': 827.726, 'eval_steps_per_second': 33.109, 'epoch': 9.0}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 0.4733303189277649, 'eval_runtime': 0.0591, 'eval_samples_per_second': 846.67, 'eval_steps_per_second': 33.867, 'epoch': 10.0}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 0.44566184282302856, 'eval_runtime': 0.0591, 'eval_samples_per_second': 846.182, 'eval_steps_per_second': 33.847, 'epoch': 11.0}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 0.36999985575675964, 'eval_runtime': 0.0611, 'eval_samples_per_second': 818.925, 'eval_steps_per_second': 32.757, 'epoch': 12.0}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 0.3373372554779053, 'eval_runtime': 0.0583, 'eval_samples_per_second': 856.953, 'eval_steps_per_second': 34.278, 'epoch': 13.0}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 0.3296751081943512, 'eval_runtime': 0.0576, 'eval_samples_per_second': 868.738, 'eval_steps_per_second': 34.75, 'epoch': 14.0}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 0.2890188694000244, 'eval_runtime': 0.0601, 'eval_samples_per_second': 832.573, 'eval_steps_per_second': 33.303, 'epoch': 15.0}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 0.24013803899288177, 'eval_runtime': 0.0586, 'eval_samples_per_second': 853.851, 'eval_steps_per_second': 34.154, 'epoch': 16.0}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 0.19095295667648315, 'eval_runtime': 0.0586, 'eval_samples_per_second': 853.896, 'eval_steps_per_second': 34.156, 'epoch': 17.0}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 0.1761508733034134, 'eval_runtime': 0.0581, 'eval_samples_per_second': 861.271, 'eval_steps_per_second': 34.451, 'epoch': 18.0}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 0.16545066237449646, 'eval_runtime': 0.0586, 'eval_samples_per_second': 853.882, 'eval_steps_per_second': 34.155, 'epoch': 19.0}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 0.16461433470249176, 'eval_runtime': 0.0641, 'eval_samples_per_second': 779.605, 'eval_steps_per_second': 31.184, 'epoch': 20.0}
{'train_runtime': 31.2258, 'train_samples_per_second': 288.224, 'train_steps_per_second': 9.607, 'train_loss': 0.8260383097330729, 'epoch': 20.0}
Training time for bert-base-multilingual-uncased: 31.358631372451782 seconds
****************************************************************************************************

Training albert/albert-base-v2


Some weights of AlbertForSequenceClassification were not initialized from the model checkpoint at albert/albert-base-v2 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


  0%|          | 0/300 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 3.2579281330108643, 'eval_runtime': 0.0556, 'eval_samples_per_second': 900.015, 'eval_steps_per_second': 36.001, 'epoch': 1.0}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 3.290515661239624, 'eval_runtime': 0.0543, 'eval_samples_per_second': 920.078, 'eval_steps_per_second': 36.803, 'epoch': 2.0}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 3.0545578002929688, 'eval_runtime': 0.0541, 'eval_samples_per_second': 925.058, 'eval_steps_per_second': 37.002, 'epoch': 3.0}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 2.8690011501312256, 'eval_runtime': 0.054, 'eval_samples_per_second': 925.094, 'eval_steps_per_second': 37.004, 'epoch': 4.0}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 2.781682014465332, 'eval_runtime': 0.0546, 'eval_samples_per_second': 916.567, 'eval_steps_per_second': 36.663, 'epoch': 5.0}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 2.5047993659973145, 'eval_runtime': 0.0533, 'eval_samples_per_second': 937.291, 'eval_steps_per_second': 37.492, 'epoch': 6.0}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 2.3677496910095215, 'eval_runtime': 0.054, 'eval_samples_per_second': 925.078, 'eval_steps_per_second': 37.003, 'epoch': 7.0}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 2.0051004886627197, 'eval_runtime': 0.054, 'eval_samples_per_second': 925.086, 'eval_steps_per_second': 37.003, 'epoch': 8.0}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 1.8341248035430908, 'eval_runtime': 0.0546, 'eval_samples_per_second': 916.571, 'eval_steps_per_second': 36.663, 'epoch': 9.0}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 1.6513797044754028, 'eval_runtime': 0.0546, 'eval_samples_per_second': 916.511, 'eval_steps_per_second': 36.66, 'epoch': 10.0}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 1.5113754272460938, 'eval_runtime': 0.054, 'eval_samples_per_second': 925.074, 'eval_steps_per_second': 37.003, 'epoch': 11.0}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 1.3758184909820557, 'eval_runtime': 0.054, 'eval_samples_per_second': 925.082, 'eval_steps_per_second': 37.003, 'epoch': 12.0}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 1.293816328048706, 'eval_runtime': 0.0536, 'eval_samples_per_second': 933.665, 'eval_steps_per_second': 37.347, 'epoch': 13.0}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 1.2077627182006836, 'eval_runtime': 0.0536, 'eval_samples_per_second': 933.64, 'eval_steps_per_second': 37.346, 'epoch': 14.0}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 1.0614415407180786, 'eval_runtime': 0.0541, 'eval_samples_per_second': 924.458, 'eval_steps_per_second': 36.978, 'epoch': 15.0}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 1.0455138683319092, 'eval_runtime': 0.055, 'eval_samples_per_second': 908.267, 'eval_steps_per_second': 36.331, 'epoch': 16.0}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 0.9694881439208984, 'eval_runtime': 0.0546, 'eval_samples_per_second': 916.535, 'eval_steps_per_second': 36.661, 'epoch': 17.0}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 0.9114673137664795, 'eval_runtime': 0.0536, 'eval_samples_per_second': 933.669, 'eval_steps_per_second': 37.347, 'epoch': 18.0}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 0.8939911127090454, 'eval_runtime': 0.0541, 'eval_samples_per_second': 925.066, 'eval_steps_per_second': 37.003, 'epoch': 19.0}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 0.8598687052726746, 'eval_runtime': 0.0545, 'eval_samples_per_second': 916.627, 'eval_steps_per_second': 36.665, 'epoch': 20.0}
{'train_runtime': 35.9919, 'train_samples_per_second': 250.056, 'train_steps_per_second': 8.335, 'train_loss': 1.6325562540690104, 'epoch': 20.0}
Training time for albert-base-v2: 36.128053426742554 seconds
****************************************************************************************************

Training distilbert-base-uncased


Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


  0%|          | 0/300 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 3.1034369468688965, 'eval_runtime': 0.0325, 'eval_samples_per_second': 1536.87, 'eval_steps_per_second': 61.475, 'epoch': 1.0}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 2.6831326484680176, 'eval_runtime': 0.034, 'eval_samples_per_second': 1469.273, 'eval_steps_per_second': 58.771, 'epoch': 2.0}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 2.2649474143981934, 'eval_runtime': 0.0335, 'eval_samples_per_second': 1492.784, 'eval_steps_per_second': 59.711, 'epoch': 3.0}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 1.8944324254989624, 'eval_runtime': 0.033, 'eval_samples_per_second': 1513.807, 'eval_steps_per_second': 60.552, 'epoch': 4.0}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 1.6166726350784302, 'eval_runtime': 0.0335, 'eval_samples_per_second': 1491.011, 'eval_steps_per_second': 59.64, 'epoch': 5.0}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 1.2999709844589233, 'eval_runtime': 0.033, 'eval_samples_per_second': 1513.741, 'eval_steps_per_second': 60.55, 'epoch': 6.0}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 1.0954279899597168, 'eval_runtime': 0.034, 'eval_samples_per_second': 1469.232, 'eval_steps_per_second': 58.769, 'epoch': 7.0}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 0.8880311846733093, 'eval_runtime': 0.034, 'eval_samples_per_second': 1469.18, 'eval_steps_per_second': 58.767, 'epoch': 8.0}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 0.7102583050727844, 'eval_runtime': 0.033, 'eval_samples_per_second': 1513.763, 'eval_steps_per_second': 60.551, 'epoch': 9.0}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 0.5737379193305969, 'eval_runtime': 0.033, 'eval_samples_per_second': 1513.654, 'eval_steps_per_second': 60.546, 'epoch': 10.0}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 0.47253844141960144, 'eval_runtime': 0.033, 'eval_samples_per_second': 1513.785, 'eval_steps_per_second': 60.551, 'epoch': 11.0}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 0.40378913283348083, 'eval_runtime': 0.033, 'eval_samples_per_second': 1513.73, 'eval_steps_per_second': 60.549, 'epoch': 12.0}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 0.3341749608516693, 'eval_runtime': 0.034, 'eval_samples_per_second': 1469.252, 'eval_steps_per_second': 58.77, 'epoch': 13.0}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 0.320142924785614, 'eval_runtime': 0.0334, 'eval_samples_per_second': 1497.078, 'eval_steps_per_second': 59.883, 'epoch': 14.0}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 0.26075711846351624, 'eval_runtime': 0.033, 'eval_samples_per_second': 1513.763, 'eval_steps_per_second': 60.551, 'epoch': 15.0}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 0.2690044641494751, 'eval_runtime': 0.0335, 'eval_samples_per_second': 1491.032, 'eval_steps_per_second': 59.641, 'epoch': 16.0}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 0.2483348846435547, 'eval_runtime': 0.033, 'eval_samples_per_second': 1513.719, 'eval_steps_per_second': 60.549, 'epoch': 17.0}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 0.23040463030338287, 'eval_runtime': 0.0335, 'eval_samples_per_second': 1491.032, 'eval_steps_per_second': 59.641, 'epoch': 18.0}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 0.2214871048927307, 'eval_runtime': 0.033, 'eval_samples_per_second': 1513.741, 'eval_steps_per_second': 60.55, 'epoch': 19.0}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 0.22025078535079956, 'eval_runtime': 0.024, 'eval_samples_per_second': 2081.416, 'eval_steps_per_second': 83.257, 'epoch': 20.0}
{'train_runtime': 20.0769, 'train_samples_per_second': 448.277, 'train_steps_per_second': 14.943, 'train_loss': 0.9723445638020833, 'epoch': 20.0}
Training time for distilbert-base-uncased: 20.19717836380005 seconds
****************************************************************************************************

Training google/mobilebert-uncased


Some weights of MobileBertForSequenceClassification were not initialized from the model checkpoint at google/mobilebert-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


  0%|          | 0/300 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 4.929247856140137, 'eval_runtime': 0.0886, 'eval_samples_per_second': 564.438, 'eval_steps_per_second': 22.578, 'epoch': 1.0}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 3.2494802474975586, 'eval_runtime': 0.0841, 'eval_samples_per_second': 594.22, 'eval_steps_per_second': 23.769, 'epoch': 2.0}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 2.643003463745117, 'eval_runtime': 0.0846, 'eval_samples_per_second': 591.132, 'eval_steps_per_second': 23.645, 'epoch': 3.0}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 2.046945095062256, 'eval_runtime': 0.0936, 'eval_samples_per_second': 534.231, 'eval_steps_per_second': 21.369, 'epoch': 4.0}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 1.179095983505249, 'eval_runtime': 0.0916, 'eval_samples_per_second': 545.933, 'eval_steps_per_second': 21.837, 'epoch': 5.0}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 0.7552564740180969, 'eval_runtime': 0.0866, 'eval_samples_per_second': 577.478, 'eval_steps_per_second': 23.099, 'epoch': 6.0}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 0.5723366737365723, 'eval_runtime': 0.0861, 'eval_samples_per_second': 580.53, 'eval_steps_per_second': 23.221, 'epoch': 7.0}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 0.5987165570259094, 'eval_runtime': 0.0864, 'eval_samples_per_second': 578.777, 'eval_steps_per_second': 23.151, 'epoch': 8.0}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 0.2195868343114853, 'eval_runtime': 0.0876, 'eval_samples_per_second': 570.901, 'eval_steps_per_second': 22.836, 'epoch': 9.0}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 0.40241119265556335, 'eval_runtime': 0.0911, 'eval_samples_per_second': 548.725, 'eval_steps_per_second': 21.949, 'epoch': 10.0}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 0.1938096284866333, 'eval_runtime': 0.0861, 'eval_samples_per_second': 580.858, 'eval_steps_per_second': 23.234, 'epoch': 11.0}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 0.3346230387687683, 'eval_runtime': 0.0866, 'eval_samples_per_second': 577.491, 'eval_steps_per_second': 23.1, 'epoch': 12.0}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 0.4057622253894806, 'eval_runtime': 0.0846, 'eval_samples_per_second': 591.147, 'eval_steps_per_second': 23.646, 'epoch': 13.0}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 0.3527800440788269, 'eval_runtime': 0.0881, 'eval_samples_per_second': 567.543, 'eval_steps_per_second': 22.702, 'epoch': 14.0}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 0.5080309510231018, 'eval_runtime': 0.0856, 'eval_samples_per_second': 584.232, 'eval_steps_per_second': 23.369, 'epoch': 15.0}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 0.2958228290081024, 'eval_runtime': 0.0885, 'eval_samples_per_second': 564.959, 'eval_steps_per_second': 22.598, 'epoch': 16.0}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 0.411457359790802, 'eval_runtime': 0.0856, 'eval_samples_per_second': 584.238, 'eval_steps_per_second': 23.37, 'epoch': 17.0}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 0.18016472458839417, 'eval_runtime': 0.0881, 'eval_samples_per_second': 567.657, 'eval_steps_per_second': 22.706, 'epoch': 18.0}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 0.14836856722831726, 'eval_runtime': 0.0936, 'eval_samples_per_second': 534.265, 'eval_steps_per_second': 21.371, 'epoch': 19.0}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 0.15663351118564606, 'eval_runtime': 0.0846, 'eval_samples_per_second': 591.138, 'eval_steps_per_second': 23.646, 'epoch': 20.0}
{'train_runtime': 47.1797, 'train_samples_per_second': 190.76, 'train_steps_per_second': 6.359, 'train_loss': 227350.32, 'epoch': 20.0}
Training time for mobilebert-uncased: 47.37594723701477 seconds
****************************************************************************************************

Training cahya/bert-base-indonesian-522M


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at cahya/bert-base-indonesian-522M and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


  0%|          | 0/300 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 2.1544735431671143, 'eval_runtime': 0.05, 'eval_samples_per_second': 999.101, 'eval_steps_per_second': 39.964, 'epoch': 1.0}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 1.1554046869277954, 'eval_runtime': 0.042, 'eval_samples_per_second': 1189.381, 'eval_steps_per_second': 47.575, 'epoch': 2.0}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 0.5732111930847168, 'eval_runtime': 0.042, 'eval_samples_per_second': 1189.374, 'eval_steps_per_second': 47.575, 'epoch': 3.0}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 0.25969040393829346, 'eval_runtime': 0.042, 'eval_samples_per_second': 1189.415, 'eval_steps_per_second': 47.577, 'epoch': 4.0}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 0.1688808798789978, 'eval_runtime': 0.042, 'eval_samples_per_second': 1189.388, 'eval_steps_per_second': 47.576, 'epoch': 5.0}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 0.12350311130285263, 'eval_runtime': 0.041, 'eval_samples_per_second': 1218.41, 'eval_steps_per_second': 48.736, 'epoch': 6.0}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 0.1105523556470871, 'eval_runtime': 0.046, 'eval_samples_per_second': 1085.966, 'eval_steps_per_second': 43.439, 'epoch': 7.0}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 0.09116998314857483, 'eval_runtime': 0.04, 'eval_samples_per_second': 1248.84, 'eval_steps_per_second': 49.954, 'epoch': 8.0}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 0.09144019335508347, 'eval_runtime': 0.04, 'eval_samples_per_second': 1248.87, 'eval_steps_per_second': 49.955, 'epoch': 9.0}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 0.09216375648975372, 'eval_runtime': 0.042, 'eval_samples_per_second': 1189.374, 'eval_steps_per_second': 47.575, 'epoch': 10.0}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 0.08808144927024841, 'eval_runtime': 0.041, 'eval_samples_per_second': 1218.417, 'eval_steps_per_second': 48.737, 'epoch': 11.0}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 0.09240054339170456, 'eval_runtime': 0.042, 'eval_samples_per_second': 1189.388, 'eval_steps_per_second': 47.576, 'epoch': 12.0}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 0.09451120346784592, 'eval_runtime': 0.041, 'eval_samples_per_second': 1218.403, 'eval_steps_per_second': 48.736, 'epoch': 13.0}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 0.1056601032614708, 'eval_runtime': 0.042, 'eval_samples_per_second': 1189.394, 'eval_steps_per_second': 47.576, 'epoch': 14.0}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 0.10223520547151566, 'eval_runtime': 0.045, 'eval_samples_per_second': 1110.109, 'eval_steps_per_second': 44.404, 'epoch': 15.0}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 0.09521691501140594, 'eval_runtime': 0.041, 'eval_samples_per_second': 1218.41, 'eval_steps_per_second': 48.736, 'epoch': 16.0}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 0.097079336643219, 'eval_runtime': 0.046, 'eval_samples_per_second': 1085.966, 'eval_steps_per_second': 43.439, 'epoch': 17.0}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 0.10558206588029861, 'eval_runtime': 0.042, 'eval_samples_per_second': 1189.415, 'eval_steps_per_second': 47.577, 'epoch': 18.0}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 0.10759995132684708, 'eval_runtime': 0.042, 'eval_samples_per_second': 1189.408, 'eval_steps_per_second': 47.576, 'epoch': 19.0}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 0.10719986259937286, 'eval_runtime': 0.0245, 'eval_samples_per_second': 2038.782, 'eval_steps_per_second': 81.551, 'epoch': 20.0}
{'train_runtime': 23.2434, 'train_samples_per_second': 387.207, 'train_steps_per_second': 12.907, 'train_loss': 0.31978235880533856, 'epoch': 20.0}
Training time for bert-base-indonesian-522M: 23.38695478439331 seconds
****************************************************************************************************

Training time for bert-base-uncased: 36.680134534835815 seconds
Training time for bert-base-multilingual-uncased: 31.358631372451782 seconds
Training time for albert-base-v2: 36.128053426742554 seconds
Training time for distilbert-base-uncased: 20.19717836380005 seconds
Training time for mobilebert-uncased: 47.37594723701477 seconds
Training time for bert-base-indonesian-522M: 23.38695478439331 seconds


# Deploy Trained Models

In [11]:
import re
from torchinfo import summary

In [12]:
TEST_SET_PATH = 'data/Test-Set-Mental-Health-with-Tags.xlsx'
test_sets = pd.read_excel(TEST_SET_PATH)
test_sets = test_sets[['Pertanyaan', 'Tag']]
test_sets.info()

test_sets = test_sets.sample(frac=1, random_state=42)
# new_test_sets['stem'] = new_test_sets['Pertanyaan'].apply(prepare_question)
# print(new_test_sets.head())
# print(np.unique(new_test_sets['Tag']))

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 250 entries, 0 to 249
Data columns (total 2 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   Pertanyaan  250 non-null    object
 1   Tag         250 non-null    object
dtypes: object(2)
memory usage: 4.0+ KB


In [13]:
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix

In [14]:
def get_prediction(text, tokenizer, model):
    cleaned_text = re.sub(r"[^a-zA-Z ]+", "", text)
    cleaned_text = prepare_question(cleaned_text)

    # Tokenize input text
    inputs = tokenizer(
        cleaned_text, padding=True, truncation=True, return_tensors="pt"
    ).to(device)

    # Perform inference
    with torch.no_grad():
        outputs = model(**inputs)

    # Get predicted logits
    logits = outputs.logits

    # Convert logits to probabilities
    probs = torch.softmax(logits, dim=-1)

    # Get predicted label (index of the maximum probability)
    predicted_label_index = torch.argmax(probs, dim=-1).item()

    # Get corresponding label name
    predicted_label = tags[predicted_label_index]

    return predicted_label

In [19]:
def deploy_model(model_name, model_class):
    model = model_class.from_pretrained(f"./results/{model_name}_fine-tuned_mental-health", num_labels=len(tags))
    tokenizer = AutoTokenizer.from_pretrained(f"./results/{model_name}")

    model.to(device)
    print(summary(model))

    prediction_results = []
    prediction_time = []

    for index, row in test_sets.iterrows():
        start_time = time.time()
        text = row['Pertanyaan']
        predicted_label = get_prediction(text, tokenizer, model)
        end_time = time.time()

        print(f"Input: {text}")
        print(f"Tag: {row['Tag']}")
        print(f"Predicted: {predicted_label}")
        for i in range(len(answers_set)):
            if answers_set.iloc[i]['Tag'] == predicted_label:
                print(f"Answer: {answers_set.iloc[i]['Jawaban']}")
                break
        print("=" * 100)
        print()

        prediction_results.append((predicted_label))
        prediction_time.append(end_time - start_time)
    
    return prediction_results, prediction_time


In [20]:
model_results = {}
from copy import deepcopy
for model_name, model_class, model_nickname in models:
    print(f"Deploying ./results/{model_name}_fine-tuned_mental-health")
    prediction_results, prediction_time = deploy_model(model_name, model_class)
    print("*" * 100)
    print()

    test_sets['Predicted Tag'] = prediction_results
    test_sets['Prediction Time'] = prediction_time

    model_results[model_nickname] = deepcopy(test_sets)

Deploying ./results/bert-base-uncased_fine-tuned_mental-health
Layer (type:depth-idx)                                       Param #
BertForSequenceClassification                                --
├─BertModel: 1-1                                             --
│    └─BertEmbeddings: 2-1                                   --
│    │    └─Embedding: 3-1                                   23,440,896
│    │    └─Embedding: 3-2                                   393,216
│    │    └─Embedding: 3-3                                   1,536
│    │    └─LayerNorm: 3-4                                   1,536
│    │    └─Dropout: 3-5                                     --
│    └─BertEncoder: 2-2                                      --
│    │    └─ModuleList: 3-6                                  85,054,464
│    └─BertPooler: 2-3                                       --
│    │    └─Linear: 3-7                                      590,592
│    │    └─Tanh: 3-8                                        --
├─Dr

Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


Layer (type:depth-idx)                                            Param #
MobileBertForSequenceClassification                               --
├─MobileBertModel: 1-1                                            --
│    └─MobileBertEmbeddings: 2-1                                  --
│    │    └─Embedding: 3-1                                        3,906,816
│    │    └─Embedding: 3-2                                        262,144
│    │    └─Embedding: 3-3                                        1,024
│    │    └─Linear: 3-4                                           197,120
│    │    └─NoNorm: 3-5                                           1,024
│    │    └─Dropout: 3-6                                          --
│    └─MobileBertEncoder: 2-2                                     --
│    │    └─ModuleList: 3-7                                       20,213,760
│    └─MobileBertPooler: 2-3                                      --
├─Dropout: 1-2                                                    -

Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


Layer (type:depth-idx)                                       Param #
BertForSequenceClassification                                --
├─BertModel: 1-1                                             --
│    └─BertEmbeddings: 2-1                                   --
│    │    └─Embedding: 3-1                                   24,576,000
│    │    └─Embedding: 3-2                                   393,216
│    │    └─Embedding: 3-3                                   1,536
│    │    └─LayerNorm: 3-4                                   1,536
│    │    └─Dropout: 3-5                                     --
│    └─BertEncoder: 2-2                                      --
│    │    └─ModuleList: 3-6                                  85,054,464
│    └─BertPooler: 2-3                                       --
│    │    └─Linear: 3-7                                      590,592
│    │    └─Tanh: 3-8                                        --
├─Dropout: 1-2                                               --
├─L

# Evaluasi

In [21]:
def save_to_excel(data, file_name, sheet_name):
    # Try to open the existing Excel file and replace the sheet, or create a new file if it doesn't exist
    try:
        # Load the existing Excel file
        with pd.ExcelWriter(file_name, engine='openpyxl', mode='a', if_sheet_exists='replace') as writer:
            # Write to the specified sheet, replacing if it exists
            data.to_excel(writer, sheet_name=sheet_name, index=False)
    except FileNotFoundError:
        # Create a new Excel file if it doesn't exist
        with pd.ExcelWriter(file_name, engine='openpyxl') as writer:
            data.to_excel(writer, sheet_name=sheet_name, index=False)

In [22]:
RESULT_FILE = 'results/mental_health_prediction_results.xlsx'
model_nicknames = []
model_training_time = []
model_accuracy = []
model_average_prediction_time = []
for model_nickname in model_results:
    print(f"Results for {model_nickname}")
    # print(model_results[model_nickname].columns)
    print(model_results[model_nickname].head())
    print()

    training_duration = training_durations[model_nickname]
    correct_predictions = (model_results[model_nickname]['Tag'] == model_results[model_nickname]['Predicted Tag']).sum()
    total_predictions = len(model_results[model_nickname])
    accuracy = correct_predictions / total_predictions
    average_prediction_time = model_results[model_nickname]['Prediction Time'].mean()

    print(f"Training duration: {training_duration} seconds")
    print(f"Accuracy: {accuracy}")
    print(f"Average prediction time: {average_prediction_time} seconds")
    print()

    model_nicknames.append(model_nickname)
    model_training_time.append(training_duration)
    model_accuracy.append(accuracy)
    model_average_prediction_time.append(average_prediction_time)

    # Sort by tag name
    model_results[model_nickname] = model_results[model_nickname].sort_values(by=['Tag'])
    # Generate the classification report as a dictionary
    report_dict = classification_report(model_results[model_nickname]['Tag'], 
                                        model_results[model_nickname]['Predicted Tag'], 
                                        zero_division=0, output_dict=True)
    # Convert the report dictionary into a DataFrame
    df_report = pd.DataFrame(report_dict).transpose()
    # Sort the DataFrame by precision (descending order)
    df_report_sorted = df_report.sort_values(by='precision', ascending=False)
    # Print the sorted DataFrame
    print(df_report_sorted)
    print(classification_report(model_results[model_nickname]['Tag'], model_results[model_nickname]['Predicted Tag'], zero_division=0))
    print()

    print(np.unique(model_results[model_nickname]['Tag']))
    print(confusion_matrix(model_results[model_nickname]['Tag'], model_results[model_nickname]['Predicted Tag']))
    print("*" * 100)
    print()

    save_to_excel(model_results[model_nickname], RESULT_FILE, model_nickname)

comparison_results = pd.DataFrame({
    'Model': model_nicknames,
    'Training Time': model_training_time,
    'Accuracy': model_accuracy,
    'Average Prediction Time': model_average_prediction_time
})

comparison_results = comparison_results.sort_values(by=['Accuracy'], ascending=False)
print(comparison_results)

save_to_excel(comparison_results, RESULT_FILE, 'Comparison Results')

Results for bert-base-uncased
                                            Pertanyaan    Tag Predicted Tag  \
142  Bagaimana fokus pada kekuatan dan pencapaian d...  tag15         tag19   
6    Bagaimana cara mengatasi perasaan tertekan kar...   tag1          tag4   
97   Apa yang terjadi di otak ketika saya tidak tid...  tag10          tag6   
60   Apa saja gejala yang menunjukkan bahwa kurang ...   tag7          tag6   
112  Bagaimana cara mendengarkan teman yang sedang ...  tag12         tag15   

     Prediction Time  
142         0.038538  
6           0.036033  
97          0.034031  
60          0.034537  
112         0.032029  

Training duration: 36.680134534835815 seconds
Accuracy: 0.42
Average prediction time: 0.008240313529968261 seconds

              precision  recall  f1-score  support
tag14          1.000000    0.40  0.571429    10.00
tag12          0.875000    0.70  0.777778    10.00
tag13          0.875000    0.70  0.777778    10.00
tag22          0.800000    0.40  0.5