In [1]:
import json
import torch
import pickle
import numpy as np
from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer, TrainingArguments, EarlyStoppingCallback
from torch.utils.data import Dataset
from sklearn.model_selection import KFold
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
from sklearn.utils.class_weight import compute_class_weight
from pyvi import ViTokenizer

2025-04-08 14:33:14.222595: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-04-08 14:33:14.231289: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1744122794.240277  944078 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1744122794.243151  944078 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1744122794.251125  944078 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking 

In [2]:
# 1. ƒê·ªçc file intents.json
with open("/home/asd/testModel/MentalModel/Dataset/Chatbot.json", "r", encoding="utf-8") as f:
# with open("/home/asd/testModel/MentalModel/Dataset/Chatbot_remake.json", "r", encoding="utf-8") as f:
    data = json.load(f)

In [3]:
# 2. T·∫°o danh s√°ch vƒÉn b·∫£n v√† nh√£n (v·ªõi ph√¢n ƒëo·∫°n t·ª´)
texts = []
labels = []
for intent in data["intents"]:
    for pattern in intent["patterns"]:
        # Ph√¢n ƒëo·∫°n t·ª´ ti·∫øng Vi·ªát b·∫±ng pyvi
        segmented_text = ViTokenizer.tokenize(pattern)
        texts.append(segmented_text)
        labels.append(intent["tag"])

In [4]:
# 3. M√£ ho√° nh√£n
label_encoder = LabelEncoder()
labels_encoded = label_encoder.fit_transform(labels)
num_classes = len(set(labels_encoded))

In [5]:
# 4. Load tokenizer & model (DistilBERT)
model_name = "distilbert-base-multilingual-cased"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(
    model_name, 
    num_labels=num_classes,
    problem_type="single_label_classification"
)

Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [6]:
# 5. ƒê·ªãnh nghƒ©a Dataset
class IntentDataset(Dataset):
    def __init__(self, texts, labels, tokenizer, max_length=64):
        self.texts = texts
        self.labels = labels
        self.tokenizer = tokenizer
        self.max_length = max_length

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        text = self.texts[idx]
        label = self.labels[idx]
        
        encoding = self.tokenizer(
            text, 
            padding="max_length", 
            truncation=True, 
            max_length=self.max_length, 
            return_tensors="pt"
        )
        
        return {
            "input_ids": encoding["input_ids"].squeeze(0),
            "attention_mask": encoding["attention_mask"].squeeze(0),
            "labels": torch.tensor(label, dtype=torch.long)
        }

In [7]:
# 6. ƒê·ªãnh nghƒ©a h√†m compute_metrics
def compute_metrics(pred):
    labels = pred.label_ids
    preds = np.argmax(pred.predictions, axis=1)
    accuracy = accuracy_score(labels, preds)
    precision, recall, f1, _ = precision_recall_fscore_support(labels, preds, average='weighted')
    precision_macro, recall_macro, f1_macro, _ = precision_recall_fscore_support(labels, preds, average='macro')
    return {
        "accuracy": accuracy,
        "precision_weighted": precision,
        "recall_weighted": recall,
        "f1_weighted": f1,
        "precision_macro": precision_macro,
        "recall_macro": recall_macro,
        "f1_macro": f1_macro
    }

In [8]:
# 7. T√≠nh tr·ªçng s·ªë l·ªõp
class_weights = compute_class_weight(
    class_weight='balanced',
    classes=np.unique(labels_encoded),
    y=labels_encoded
)
class_weights = torch.tensor(class_weights, dtype=torch.float).to('cuda' if torch.cuda.is_available() else 'cpu')

In [9]:
# 8. Custom Trainer
class CustomTrainer(Trainer):
    def compute_loss(self, model, inputs, return_outputs=False, **kwargs): 
        labels = inputs.get("labels")
        outputs = model(**inputs)
        logits = outputs.get("logits")
        loss_fct = torch.nn.CrossEntropyLoss(weight=class_weights)
        loss = loss_fct(logits.view(-1, self.model.config.num_labels), labels.view(-1))
        return (loss, outputs) if return_outputs else loss

In [10]:
# 9. C·∫•u h√¨nh TrainingArguments
training_args = TrainingArguments(
    output_dir="./distilbert_intent_model",
    evaluation_strategy="epoch",
    save_strategy="epoch",
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    num_train_epochs=200,
    weight_decay=0.01,
    learning_rate=3e-5,
    warmup_steps=20,
    logging_dir="./logs",
    logging_steps=10,
    save_total_limit=2,
    load_best_model_at_end=True,
    metric_for_best_model="f1_macro",
    greater_is_better=True,
)



In [11]:
# 10. Th·ª±c hi·ªán Cross-Validation 5 Folds
kf = KFold(n_splits=5, shuffle=True, random_state=42)
fold_results = []
best_f1_macro = -1  # Theo d√µi f1_macro t·ªët nh·∫•t
best_model = None   # L∆∞u tr·ªØ m√¥ h√¨nh t·ªët nh·∫•t
best_fold = -1      # Theo d√µi fold t·ªët nh·∫•t

for fold, (train_idx, eval_idx) in enumerate(kf.split(texts)):
    print(f"\nTraining Fold {fold + 1}/5")
    
    # Chia d·ªØ li·ªáu cho fold hi·ªán t·∫°i
    texts_train = [texts[i] for i in train_idx]
    labels_train = [labels_encoded[i] for i in train_idx]
    texts_eval = [texts[i] for i in eval_idx]
    labels_eval = [labels_encoded[i] for i in eval_idx]
    
    # T·∫°o dataset
    train_dataset = IntentDataset(texts_train, labels_train, tokenizer)
    eval_dataset = IntentDataset(texts_eval, labels_eval, tokenizer)
    
    # Kh·ªüi t·∫°o model m·ªõi cho m·ªói fold
    model = AutoModelForSequenceClassification.from_pretrained(
        model_name, 
        num_labels=num_classes,
        problem_type="single_label_classification"
    )
    
    # Kh·ªüi t·∫°o trainer v·ªõi EarlyStoppingCallback
    trainer = CustomTrainer(
        model=model,
        args=training_args,
        train_dataset=train_dataset,
        eval_dataset=eval_dataset,
        compute_metrics=compute_metrics,
        callbacks=[EarlyStoppingCallback(early_stopping_patience=10)] 
    )
    
    # Hu·∫•n luy·ªán
    trainer.train()
    
    # ƒê√°nh gi√°
    eval_results = trainer.evaluate()
    fold_results.append(eval_results)
    
    # In chi ti·∫øt t·ª´ng measure cho fold hi·ªán t·∫°i
    print(f"\nFold {fold + 1} Results:")
    print(f"Accuracy: {eval_results['eval_accuracy']:.4f}")
    print(f"Weighted Precision: {eval_results['eval_precision_weighted']:.4f}")
    print(f"Weighted Recall: {eval_results['eval_recall_weighted']:.4f}")
    print(f"Weighted F1: {eval_results['eval_f1_weighted']:.4f}")
    print(f"Macro Precision: {eval_results['eval_precision_macro']:.4f}")
    print(f"Macro Recall: {eval_results['eval_recall_macro']:.4f}")
    print(f"Macro F1: {eval_results['eval_f1_macro']:.4f}")
    
    # Ki·ªÉm tra v√† l∆∞u m√¥ h√¨nh t·ªët nh·∫•t
    current_f1_macro = eval_results["eval_f1_macro"]
    if current_f1_macro > best_f1_macro:
        best_f1_macro = current_f1_macro
        best_model = model  # L∆∞u m√¥ h√¨nh t·ªët nh·∫•t
        best_fold = fold + 1

print(f"\nBest Fold: {best_fold} with F1 Macro: {best_f1_macro:.4f}")


Training Fold 1/5


Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,Precision Weighted,Recall Weighted,F1 Weighted,Precision Macro,Recall Macro,F1 Macro
1,4.2491,4.236205,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,4.2366,4.214064,0.053191,0.004271,0.053191,0.007781,0.003586,0.06,0.006628
3,4.0852,4.076545,0.042553,0.006966,0.042553,0.011496,0.008311,0.037736,0.013086
4,3.8983,3.897638,0.053191,0.017376,0.053191,0.022081,0.024294,0.062147,0.029284
5,3.6407,3.782869,0.085106,0.10765,0.085106,0.054344,0.048473,0.088218,0.035776
6,3.2025,3.633778,0.085106,0.082004,0.085106,0.061734,0.06709,0.091808,0.062406
7,3.0875,3.475327,0.148936,0.165284,0.148936,0.119326,0.09669,0.137778,0.0875
8,2.7315,3.396481,0.159574,0.183688,0.159574,0.136879,0.107778,0.124444,0.094259
9,2.3237,3.307589,0.170213,0.181796,0.170213,0.15295,0.105695,0.125269,0.098956
10,2.2137,3.17164,0.212766,0.217908,0.212766,0.184202,0.144722,0.185,0.137057


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



Fold 1 Results:
Accuracy: 0.3191
Weighted Precision: 0.3611
Weighted Recall: 0.3191
Weighted F1: 0.3064
Macro Precision: 0.2659
Macro Recall: 0.2606
Macro F1: 0.2392

Training Fold 2/5


Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,Precision Weighted,Recall Weighted,F1 Weighted,Precision Macro,Recall Macro,F1 Macro
1,4.2659,4.254081,0.010638,0.000409,0.010638,0.000788,0.000726,0.018868,0.001398
2,4.2018,4.185296,0.031915,0.001473,0.031915,0.002816,0.001642,0.035714,0.00314
3,4.0633,4.042021,0.06383,0.060844,0.06383,0.052128,0.048099,0.058333,0.041667
4,3.775,3.885556,0.06383,0.034402,0.06383,0.033924,0.032468,0.075,0.03463
5,3.4115,3.73924,0.074468,0.024392,0.074468,0.035198,0.020648,0.07377,0.03043
6,3.0961,3.582512,0.138298,0.146217,0.138298,0.117,0.111464,0.142479,0.105836
7,2.7727,3.484076,0.138298,0.150355,0.138298,0.11492,0.091005,0.13681,0.091498
8,2.4988,3.350635,0.212766,0.16253,0.212766,0.174569,0.109549,0.16969,0.123734
9,2.3678,3.271763,0.244681,0.171927,0.244681,0.192097,0.128861,0.206727,0.149206
10,1.9998,3.15867,0.234043,0.211082,0.234043,0.203829,0.136156,0.191244,0.143818


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



Fold 2 Results:
Accuracy: 0.3617
Weighted Precision: 0.3456
Weighted Recall: 0.3617
Weighted F1: 0.3390
Macro Precision: 0.2612
Macro Recall: 0.2816
Macro F1: 0.2585

Training Fold 3/5


Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,Precision Weighted,Recall Weighted,F1 Weighted,Precision Macro,Recall Macro,F1 Macro
1,4.2481,4.232627,0.010753,0.000512,0.010753,0.000978,0.001013,0.021277,0.001934
2,4.1458,4.130229,0.021505,0.001147,0.021505,0.002171,0.002177,0.040816,0.004121
3,4.013,3.990658,0.010753,0.000538,0.010753,0.001024,0.001,0.02,0.001905
4,3.6761,3.841429,0.064516,0.081413,0.064516,0.043452,0.045373,0.077044,0.030019
5,3.4709,3.720458,0.086022,0.134886,0.086022,0.082535,0.096768,0.083333,0.068705
6,3.1826,3.59844,0.139785,0.161879,0.139785,0.122084,0.094996,0.111582,0.078314
7,2.986,3.490833,0.129032,0.145541,0.129032,0.12144,0.100226,0.106322,0.089028
8,2.6992,3.422219,0.096774,0.112391,0.096774,0.100324,0.060469,0.055556,0.055455
9,2.4236,3.334166,0.11828,0.167503,0.11828,0.119816,0.092769,0.082011,0.071504
10,1.9991,3.261893,0.139785,0.14482,0.139785,0.130606,0.08483,0.084699,0.077078


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



Fold 3 Results:
Accuracy: 0.3333
Weighted Precision: 0.3543
Weighted Recall: 0.3333
Weighted F1: 0.3247
Macro Precision: 0.2480
Macro Recall: 0.2442
Macro F1: 0.2364

Training Fold 4/5


Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,Precision Weighted,Recall Weighted,F1 Weighted,Precision Macro,Recall Macro,F1 Macro
1,4.2657,4.241809,0.010753,0.000439,0.010753,0.000843,0.0004,0.009804,0.000769
2,4.1243,4.108566,0.010753,0.000458,0.010753,0.000878,0.000387,0.009091,0.000742
3,3.936,3.96038,0.043011,0.085407,0.043011,0.033794,0.025815,0.028822,0.016604
4,3.6564,3.814299,0.064516,0.093958,0.064516,0.058781,0.035916,0.047215,0.033145
5,3.3395,3.627017,0.096774,0.114098,0.096774,0.091039,0.052731,0.075061,0.053672
6,3.1334,3.519278,0.086022,0.100358,0.086022,0.086282,0.039617,0.050351,0.041431
7,2.8515,3.438069,0.11828,0.120456,0.11828,0.101912,0.065183,0.09719,0.070036
8,2.646,3.319945,0.107527,0.113799,0.107527,0.100978,0.058743,0.083138,0.062742
9,2.3005,3.245988,0.096774,0.111111,0.096774,0.097035,0.05791,0.069007,0.059784
10,2.1641,3.230267,0.129032,0.142857,0.129032,0.123319,0.078845,0.091335,0.076494


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



Fold 4 Results:
Accuracy: 0.2903
Weighted Precision: 0.3554
Weighted Recall: 0.2903
Weighted F1: 0.3028
Macro Precision: 0.2417
Macro Recall: 0.2220
Macro F1: 0.2165

Training Fold 5/5


Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,Precision Weighted,Recall Weighted,F1 Weighted,Precision Macro,Recall Macro,F1 Macro
1,4.2491,4.255461,0.010753,0.000448,0.010753,0.00086,0.000801,0.019231,0.001538
2,4.191,4.185708,0.021505,0.022103,0.021505,0.015469,0.010101,0.022727,0.007974
3,3.9121,4.061662,0.043011,0.025762,0.043011,0.028147,0.040607,0.067797,0.044367
4,3.6358,3.943701,0.086022,0.055479,0.086022,0.059123,0.057348,0.109195,0.065491
5,3.4038,3.830407,0.096774,0.100358,0.096774,0.082284,0.083333,0.10929,0.079547
6,3.2851,3.69712,0.086022,0.065233,0.086022,0.06595,0.06828,0.104839,0.073656
7,2.8228,3.614873,0.129032,0.194444,0.129032,0.131388,0.125,0.118687,0.102814
8,2.6528,3.496958,0.129032,0.161649,0.129032,0.129032,0.120313,0.122396,0.10625
9,2.3166,3.432514,0.150538,0.170251,0.150538,0.150538,0.129487,0.129487,0.121795
10,2.149,3.349542,0.150538,0.15233,0.150538,0.143369,0.121212,0.141414,0.123737


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize


Fold 5 Results:
Accuracy: 0.2903
Weighted Precision: 0.3297
Weighted Recall: 0.2903
Weighted F1: 0.2929
Macro Precision: 0.2292
Macro Recall: 0.2227
Macro F1: 0.2129

Best Fold: 2 with F1 Macro: 0.2585


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [12]:
# 11. L∆∞u m√¥ h√¨nh t·ªët nh·∫•t
if best_model is not None:
    best_model.save_pretrained("distilbert_intent_model_best")
    tokenizer.save_pretrained("distilbert_intent_tokenizer")
    with open("label_encoder.pkl", "wb") as handle:
        pickle.dump(label_encoder, handle, protocol=pickle.HIGHEST_PROTOCOL)
    print("üéâ ƒê√£ l∆∞u m√¥ h√¨nh t·ªët nh·∫•t t·ª´ 5 folds!")
else:
    print("‚ö†Ô∏è Kh√¥ng t√¨m th·∫•y m√¥ h√¨nh t·ªët nh·∫•t ƒë·ªÉ l∆∞u!")

üéâ ƒê√£ l∆∞u m√¥ h√¨nh t·ªët nh·∫•t t·ª´ 5 folds!


In [13]:
# 12. T√≠nh trung b√¨nh k·∫øt qu·∫£ t·ª´ 5 Folds
avg_results = {
    "accuracy": np.mean([r["eval_accuracy"] for r in fold_results]),
    "precision_weighted": np.mean([r["eval_precision_weighted"] for r in fold_results]),
    "recall_weighted": np.mean([r["eval_recall_weighted"] for r in fold_results]),
    "f1_weighted": np.mean([r["eval_f1_weighted"] for r in fold_results]),
    "precision_macro": np.mean([r["eval_precision_macro"] for r in fold_results]),
    "recall_macro": np.mean([r["eval_recall_macro"] for r in fold_results]),
    "f1_macro": np.mean([r["eval_f1_macro"] for r in fold_results])
}

print("\nAverage Results Across 5 Folds:")
print(f"Accuracy: {avg_results['accuracy']:.4f}")
print(f"Weighted Precision: {avg_results['precision_weighted']:.4f}")
print(f"Weighted Recall: {avg_results['recall_weighted']:.4f}")
print(f"Weighted F1: {avg_results['f1_weighted']:.4f}")
print(f"Macro Precision: {avg_results['precision_macro']:.4f}")
print(f"Macro Recall: {avg_results['recall_macro']:.4f}")
print(f"Macro F1: {avg_results['f1_macro']:.4f}")


Average Results Across 5 Folds:
Accuracy: 0.3190
Weighted Precision: 0.3492
Weighted Recall: 0.3190
Weighted F1: 0.3132
Macro Precision: 0.2492
Macro Recall: 0.2462
Macro F1: 0.2327
