# Beevibe - Multilabel tutorial


## Manage Packages

### Install packages

In [None]:
# Install Beevibe
! pip install beevibe --quiet --progress-bar off

In [7]:
# Install Watermark
! pip install watermark --quiet

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/1.6 MB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.6/1.6 MB[0m [31m68.8 MB/s[0m eta [36m0:00:00[0m
[?25h

### Import packages

In [25]:
import sys
import pandas as pd
from datasets import load_dataset
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import f1_score
import numpy as np
from watermark import watermark
import torch.nn as nn
from torch.optim import AdamW
from beevibe import BeeTrainer, BeeMLMClassifier
from beevibe import transform_to_multilabel_by_top_labels
from beevibe import transform_to_multilabel_by_label_names


## GPU Card

In [9]:
!nvidia-smi

Mon Feb 10 12:43:32 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 550.54.15              Driver Version: 550.54.15      CUDA Version: 12.4     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  NVIDIA A100-SXM4-40GB          Off |   00000000:00:04.0 Off |                    0 |
| N/A   30C    P0             42W /  400W |       0MiB /  40960MiB |      0%      Default |
|                                         |                        |             Disabled |
+-----------------------------------------+------------------------+----------------------+
                                                

## Drive Directory


In [10]:
# Path sur le projet
sys.path.insert(0, "..")

## Packages versions

In [11]:
print(watermark())

Last updated: 2025-02-10T12:43:37.550348+00:00

Python implementation: CPython
Python version       : 3.11.11
IPython version      : 7.34.0

Compiler    : GCC 11.4.0
OS          : Linux
Release     : 6.1.85+
Machine     : x86_64
Processor   : x86_64
CPU cores   : 12
Architecture: 64bit



In [12]:
print(watermark(packages="pandas,numpy,scipy,sklearn,torch,transformers,tokenizers,sentencepiece,datasets,beevibe"))

pandas       : 2.2.2
numpy        : 1.26.4
scipy        : 1.13.1
sklearn      : 1.6.1
torch        : 2.5.1+cu124
transformers : 4.48.2
tokenizers   : 0.21.0
sentencepiece: 0.2.0
datasets     : 3.2.0
beevibe      : 0.1.0.dev13



## Load dataset

### Get Train & Test

In [13]:
data_files = {
    "train": "elegana_train_v0_1.csv",
    "test": "elegana_test_v0_1.csv",
}

dataset = load_dataset(
    "Franbul/elegana_relation_client_FR",
    data_files=data_files,
    sep="|")

pd_train = dataset["train"].to_pandas()
pd_test = dataset["test"].to_pandas()

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Generating train split: 0 examples [00:00, ? examples/s]

Generating test split: 0 examples [00:00, ? examples/s]

### Get Themes

In [14]:
data_files = {
    "themes": "elegana_themes_v0_1.csv"
}

dataset = load_dataset(
    "Franbul/elegana_relation_client_FR",
    data_files=data_files,
    sep="|")

pd_themes = dataset["themes"].to_pandas()


Generating themes split: 0 examples [00:00, ? examples/s]

### Merge datas

In [15]:
# Merge train, test and thems
pd_data = pd.merge(pd_train, pd_themes, on="THEME", how='left')
pd_data_test = pd.merge(pd_test, pd_themes, on="THEME", how='left')

# Get a sample here
#pd_data = pd_data.sample(200, random_state=1811)

In [16]:
pd_data.shape

(2364, 10)

In [17]:
pd_data.head()

Unnamed: 0,CLIENT,CONSEILLER,THEME,DESCRIPTION,2_CLASSES,5_CLASSES,LABEL_1,LABEL_2,LABEL_3,LABEL_4
0,Quelle est la taille de ce chapeau ?,La taille de ce chapeau est de 58 cm de circon...,Demande d'informations produit,Questions spécifiques sur les caractéristiques...,Informations et services spécialisés,Conseils et informations produits,Produit,Caractéristique,Spécifique,Demande
1,Quels sont vos délais de livraison pour les co...,Nos délais de livraison pour les commandes en ...,Demande d'informations sur les achats en gros,Conditions et possibilités pour les achats en ...,Informations et services spécialisés,"Services exclusifs, programmes et personnalisa...",Gros,Condition,Professionnel,Possibilité
2,Nous avons besoin de tenues pour un événement ...,"Oui, en fonction du thème de l'événement, nous...",Demande de conseils pour les achats de groupe,Conseils pour effectuer des achats groupés (po...,Informations et services spécialisés,Conseils et informations produits,Achat,Groupe,Événement,Mariage
3,Le gilet que j'ai commandé est trop petit. Pui...,Je suis désolé d'apprendre que la taille du gi...,Échange de produit,"Demande d'échange pour un autre taille, couleu...",Support client opérationnel,"Commandes, livraison et suivi",Produit,Taille,Couleur,Option
4,Est-ce que vous proposez des remises pour les ...,"Oui, nous offrons des remises aux professionne...",Demande d'informations sur les achats en gros,Conditions et possibilités pour les achats en ...,Informations et services spécialisés,"Services exclusifs, programmes et personnalisa...",Gros,Condition,Professionnel,Possibilité


### Get texts & labels for training

In [18]:
# Process Top 50 labels for train Dataset
df_train, top_labels_counts = transform_to_multilabel_by_top_labels(
    pd_data,
    text_column="CLIENT",
    label_columns=["LABEL_1", "LABEL_2", "LABEL_3", "LABEL_4"],
    nb_top_labels=50
)
labels_names = list(top_labels_counts.keys())
labels = df_train.drop(columns="CLIENT").values.tolist()
texts = pd_data["CLIENT"].values.tolist()
print(f"Train : Nb texts:{len(texts)}, Nb labels:{len(labels)}, Nb classes:{len(labels_names)}")

# Process Top 50 labels for test Dataset
df_test, top_labels_counts = transform_to_multilabel_by_label_names(
    pd_data_test,
    text_column="CLIENT",
    label_columns=["LABEL_1", "LABEL_2", "LABEL_3", "LABEL_4"],
    labels_to_use=labels_names
)
test_labels = df_test.drop(columns="CLIENT").values.tolist()
test_texts = pd_test["CLIENT"].values.tolist()
print(f"Test  : Nb texts:{len(test_texts)}, Nb labels:{len(test_labels)}")


Train : Nb texts:2364, Nb labels:2364, Nb classes:50
Test  : Nb texts:591, Nb labels:591


In [19]:
df_train.head()

Unnamed: 0,CLIENT,Produit,Information,Problème,Demande,Commande,Option,Article,Client,Mode,...,Pratique,Organisation,Qualité,Taille,Question,Conformité,Collection,Possibilité,Fonctionnement,Accès
0,Quelle est la taille de ce chapeau ?,1,0,0,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,Quels sont vos délais de livraison pour les co...,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
2,Nous avons besoin de tenues pour un événement ...,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,Le gilet que j'ai commandé est trop petit. Pui...,1,0,0,0,0,1,0,0,0,...,0,0,0,1,0,0,0,0,0,0
4,Est-ce que vous proposez des remises pour les ...,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0


In [20]:
df_test.head()

Unnamed: 0,CLIENT,Produit,Information,Problème,Demande,Commande,Option,Article,Client,Mode,...,Pratique,Organisation,Qualité,Taille,Question,Conformité,Collection,Possibilité,Fonctionnement,Accès
0,J'essaie d'ajouter plusieurs articles à mon pa...,0,0,0,0,0,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
1,J'ai finalisé ma commande mais je souhaite cha...,0,0,0,1,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,Comment puis-je ajouter un manteau à mon panie...,0,0,0,0,0,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
3,"Je n'arrive pas à finaliser mon achat, la page...",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,Allez-vous sortir des éditions limitées dans l...,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,1,0,0,0


In [21]:
top_labels_counts

{'Produit': 129,
 'Information': 70,
 'Problème': 56,
 'Demande': 56,
 'Commande': 41,
 'Option': 47,
 'Article': 51,
 'Client': 58,
 'Mode': 46,
 'Achat': 50,
 'Assistance': 43,
 'Conseil': 43,
 'Difficulté': 24,
 'Statut': 28,
 'Processus': 30,
 'Livraison': 19,
 'Correction': 20,
 'Disponibilité': 20,
 'Spécial': 22,
 'Retour': 26,
 'Cadeau': 26,
 'Site': 26,
 'International': 27,
 'Spécifique': 30,
 'Détail': 9,
 'Compte': 15,
 'Inscription': 17,
 'Technique': 17,
 'Condition': 18,
 'Engagement': 19,
 'Stock': 19,
 'Collaboration': 21,
 'Soutien': 8,
 'Service': 30,
 'Mise à jour': 8,
 'Signalement': 9,
 'Marque': 9,
 'Utilisateur': 9,
 'Partenariat': 10,
 'Mobile': 10,
 'Pratique': 10,
 'Organisation': 10,
 'Qualité': 10,
 'Taille': 10,
 'Question': 11,
 'Conformité': 11,
 'Collection': 11,
 'Possibilité': 11,
 'Fonctionnement': 12,
 'Accès': 12}

## Holdout

### Starter code

In [48]:
num_epochs = 30
batch_size = 8
patience = 3
min_delta = 0.001
val_size = 0.3


# Get number of classes to predict
num_labels = len(labels_names)

# Create Classification Model from "Camembert-base"
model = BeeMLMClassifier(
    model_name = "camembert-base",
    num_labels = num_labels,
)

# Create Trainer with Lora parameters
trainer = BeeTrainer(model=model,
                     lr=5e-5,
                     multilabel=True, # Set multilabel to true
                    )

# Train over a Holdout with Earlystopping
ret = trainer.holdout(texts=texts,
                    labels=labels,
                    val_size=val_size,
                    num_epochs=num_epochs,
                    batch_size=batch_size,
                    patience=patience,
                    min_delta=min_delta
                    )

# Free CPU/GPU memory
trainer.release_model()

Device : cuda
Holdout creates a 0.3 % validation texts and labels from train
Use optimizer : Adam
 - {'lr': 5e-05}
No scheduler used
Epoch 0/29, Train Loss: 0.4348, Val Loss: 0.2889, Val F1-Micro: 0.0000, lr: 5.000e-05
Epoch 1/29, Train Loss: 0.2352, Val Loss: 0.1939, Val F1-Micro: 0.0000, lr: 5.000e-05
Epoch 2/29, Train Loss: 0.1820, Val Loss: 0.1658, Val F1-Micro: 0.0114, lr: 5.000e-05
Epoch 3/29, Train Loss: 0.1603, Val Loss: 0.1476, Val F1-Micro: 0.1249, lr: 5.000e-05
Epoch 4/29, Train Loss: 0.1418, Val Loss: 0.1318, Val F1-Micro: 0.1539, lr: 5.000e-05
Epoch 5/29, Train Loss: 0.1231, Val Loss: 0.1166, Val F1-Micro: 0.1731, lr: 5.000e-05
Epoch 6/29, Train Loss: 0.1069, Val Loss: 0.1036, Val F1-Micro: 0.2541, lr: 5.000e-05
Epoch 7/29, Train Loss: 0.0921, Val Loss: 0.0928, Val F1-Micro: 0.4272, lr: 5.000e-05
Epoch 8/29, Train Loss: 0.0798, Val Loss: 0.0844, Val F1-Micro: 0.5005, lr: 5.000e-05
Epoch 9/29, Train Loss: 0.0712, Val Loss: 0.0751, Val F1-Micro: 0.6801, lr: 5.000e-05
Epoch 1

### Modify classification  head

In [50]:
num_epochs = 30
batch_size = 8
patience = 3
min_delta = 0.001
val_size = 0.3


# Get number of classes to predict
num_labels = len(labels_names)

# Define a custom classification head
head_layer_configs = [
        {"input_size": 768, "output_size": 512, "activation": nn.ReLU, "batch_norm": True},
        {"input_size": 512, "output_size": 256, "activation": nn.ReLU, "layer_norm": True},
        {"input_size": 256, "output_size": num_labels},
    ]

# Create Classification Model from "Camembert-base"
model = BeeMLMClassifier(
    model_name = "camembert-base",
    num_labels = num_labels,
    layer_configs=head_layer_configs
)

# Create Trainer with Lora parameters
trainer = BeeTrainer(model=model,
                     lr=5e-5,
                     multilabel=True, # Set multilabel to true
                     labels_names=labels_names
                     )

# Train over a Holdout with Earlystopping
ret = trainer.holdout(texts=texts,
                    labels=labels,
                    val_size=val_size,
                    num_epochs=num_epochs,
                    batch_size=batch_size,
                    patience=patience,
                    min_delta=min_delta
                    )

# Free CPU/GPU memory
trainer.release_model()


Device : cuda
Holdout creates a 0.3 % validation texts and labels from train
Use optimizer : Adam
 - {'lr': 5e-05}
No scheduler used
Epoch 0/29, Train Loss: 0.3721, Val Loss: 0.2305, Val F1-Micro: 0.0000, lr: 5.000e-05
Epoch 1/29, Train Loss: 0.2024, Val Loss: 0.1752, Val F1-Micro: 0.0780, lr: 5.000e-05
Epoch 2/29, Train Loss: 0.1661, Val Loss: 0.1478, Val F1-Micro: 0.1516, lr: 5.000e-05
Epoch 3/29, Train Loss: 0.1415, Val Loss: 0.1271, Val F1-Micro: 0.1878, lr: 5.000e-05
Epoch 4/29, Train Loss: 0.1215, Val Loss: 0.1099, Val F1-Micro: 0.3186, lr: 5.000e-05
Epoch 5/29, Train Loss: 0.1041, Val Loss: 0.0967, Val F1-Micro: 0.5312, lr: 5.000e-05
Epoch 6/29, Train Loss: 0.0888, Val Loss: 0.0842, Val F1-Micro: 0.6727, lr: 5.000e-05
Epoch 7/29, Train Loss: 0.0749, Val Loss: 0.0752, Val F1-Micro: 0.7561, lr: 5.000e-05
Epoch 8/29, Train Loss: 0.0632, Val Loss: 0.0672, Val F1-Micro: 0.8149, lr: 5.000e-05
Epoch 9/29, Train Loss: 0.0531, Val Loss: 0.0620, Val F1-Micro: 0.8386, lr: 5.000e-05
Epoch 1

### Use AdamW

In [51]:
num_epochs = 30
batch_size = 8
patience = 3
min_delta = 0.001
val_size = 0.3

# Get number of classes to predict
num_labels = len(labels_names)

# Define a custom classification head
head_layer_configs = [
        {"input_size": 768, "output_size": 512, "activation": nn.ReLU, "batch_norm": True},
        {"input_size": 512, "output_size": 256, "activation": nn.ReLU, "layer_norm": True},
        {"input_size": 256, "output_size": num_labels},
    ]

# Create Classification Model from "Camembert-base"
model = BeeMLMClassifier(
    model_name = "camembert-base",
    num_labels = num_labels,
    layer_configs=head_layer_configs
)

# Create Trainer with Lora parameters
trainer = BeeTrainer(model=model,
                     lr=5e-5,
                     multilabel=True, # Set multilabel to true
                     labels_names=labels_names,
                     optimizer_class=AdamW
                     )

# Train over a Holdout with Earlystopping
ret = trainer.holdout(texts=texts,
                    labels=labels,
                    val_size=val_size,
                    num_epochs=num_epochs,
                    batch_size=batch_size,
                    patience=patience,
                    min_delta=min_delta
                    )

# Free CPU/GPU memory
trainer.release_model()

Device : cuda
Holdout creates a 0.3 % validation texts and labels from train
Use optimizer : AdamW
 - {'lr': 5e-05}
No scheduler used
Epoch 0/29, Train Loss: 0.3721, Val Loss: 0.2294, Val F1-Micro: 0.0000, lr: 5.000e-05
Epoch 1/29, Train Loss: 0.2031, Val Loss: 0.1753, Val F1-Micro: 0.0422, lr: 5.000e-05
Epoch 2/29, Train Loss: 0.1661, Val Loss: 0.1474, Val F1-Micro: 0.1288, lr: 5.000e-05
Epoch 3/29, Train Loss: 0.1419, Val Loss: 0.1267, Val F1-Micro: 0.1878, lr: 5.000e-05
Epoch 4/29, Train Loss: 0.1219, Val Loss: 0.1102, Val F1-Micro: 0.3300, lr: 5.000e-05
Epoch 5/29, Train Loss: 0.1047, Val Loss: 0.0977, Val F1-Micro: 0.4942, lr: 5.000e-05
Epoch 6/29, Train Loss: 0.0893, Val Loss: 0.0865, Val F1-Micro: 0.6593, lr: 5.000e-05
Epoch 7/29, Train Loss: 0.0760, Val Loss: 0.0757, Val F1-Micro: 0.7570, lr: 5.000e-05
Epoch 8/29, Train Loss: 0.0635, Val Loss: 0.0656, Val F1-Micro: 0.8259, lr: 5.000e-05
Epoch 9/29, Train Loss: 0.0539, Val Loss: 0.0591, Val F1-Micro: 0.8673, lr: 5.000e-05
Epoch 

### Add a Lora configuration

In [52]:
num_epochs = 50
batch_size = 8
patience = 3
min_delta = 0.001
val_size = 0.3

# Get number of classes to predict
num_labels = len(labels_names)

# Define a custom classification head
head_layer_configs = [
        {"input_size": 768, "output_size": 512, "activation": nn.ReLU, "batch_norm": True},
        {"input_size": 512, "output_size": 256, "activation": nn.ReLU, "layer_norm": True},
        {"input_size": 256, "output_size": num_labels},
    ]

# Create Classification Model from "Camembert-base"
model = BeeMLMClassifier(
    model_name = "camembert-base",
    num_labels = num_labels,
    layer_configs=head_layer_configs
)

# Create Trainer with Lora parameters
trainer = BeeTrainer(model=model,
                     lr=5e-5,
                     multilabel=True, # Set multilabel to true
                     labels_names=labels_names,
                     optimizer_class=AdamW,
                     use_lora=True,
                     lora_r = 64,
                     lora_alpha= 128,
                     lora_dropout = 0.01,
                     )

# Train over a Holdout with Earlystopping
ret = trainer.holdout(texts=texts,
                    labels=labels,
                    val_size=val_size,
                    num_epochs=num_epochs,
                    batch_size=batch_size,
                    patience=patience,
                    min_delta=min_delta
                    )

# Free CPU/GPU memory
trainer.release_model()

Device : cuda
Holdout creates a 0.3 % validation texts and labels from train
Using Lora
Target modules : ['base_model.encoder.layer.0.attention.self.query', 'base_model.encoder.layer.0.attention.self.key', 'base_model.encoder.layer.0.attention.output.dense', 'base_model.encoder.layer.0.intermediate.dense', 'base_model.encoder.layer.0.output.dense', 'base_model.encoder.layer.1.attention.self.query', 'base_model.encoder.layer.1.attention.self.key', 'base_model.encoder.layer.1.attention.output.dense', 'base_model.encoder.layer.1.intermediate.dense', 'base_model.encoder.layer.1.output.dense', 'base_model.encoder.layer.2.attention.self.query', 'base_model.encoder.layer.2.attention.self.key', 'base_model.encoder.layer.2.attention.output.dense', 'base_model.encoder.layer.2.intermediate.dense', 'base_model.encoder.layer.2.output.dense', 'base_model.encoder.layer.3.attention.self.query', 'base_model.encoder.layer.3.attention.self.key', 'base_model.encoder.layer.3.attention.output.dense', 'base_

## TODO Cross-validation

In [19]:
num_epochs = 100
batch_size = 8
patience = 3
min_delta = 0.001
n_splits = 5

# Get number of classes to predict
num_labels = len(labels_names)

# Define a custom classification head
head_layer_configs = [
        {"input_size": 768, "output_size": 512, "activation": nn.ReLU, "batch_norm": True},
        {"input_size": 512, "output_size": 256, "activation": nn.ReLU, "layer_norm": True},
        {"input_size": 256, "output_size": num_labels},
    ]

# Create Classification Model from "Camembert-base"
model = BeeMLMClassifier(
    model_name = "camembert-base",
    num_labels = num_labels,
    head_layers=head_layer_configs
)

# Create Trainer with Lora parameters
trainer = BeeTrainer(model=model,
                      lr=5e-5,
                      multilabel=True, # Set multilabel to true
                      labels_names=labels_names,
                      optimizer_class=AdamW,
                      use_lora=True,
                      lora_r = 64,
                      lora_alpha= 128,
                      lora_dropout = 0.01,
                      )

# Cross validate with Earlystopping
rets = trainer.cross_validation(texts=texts,
              labels=labels,
              n_splits=n_splits,
              num_epochs=num_epochs,
              batch_size=batch_size,
              patience=patience,
              min_delta=min_delta
              )

# Free CPU/GPU memory
trainer.release_model()

Device : cuda
Use MultilabelStratifiedKFold


> Fold 1
Using Lora
Target modules : ['base_model.encoder.layer.0.attention.self.query', 'base_model.encoder.layer.0.attention.self.key', 'base_model.encoder.layer.0.attention.output.dense', 'base_model.encoder.layer.0.intermediate.dense', 'base_model.encoder.layer.0.output.dense', 'base_model.encoder.layer.1.attention.self.query', 'base_model.encoder.layer.1.attention.self.key', 'base_model.encoder.layer.1.attention.output.dense', 'base_model.encoder.layer.1.intermediate.dense', 'base_model.encoder.layer.1.output.dense', 'base_model.encoder.layer.2.attention.self.query', 'base_model.encoder.layer.2.attention.self.key', 'base_model.encoder.layer.2.attention.output.dense', 'base_model.encoder.layer.2.intermediate.dense', 'base_model.encoder.layer.2.output.dense', 'base_model.encoder.layer.3.attention.self.query', 'base_model.encoder.layer.3.attention.self.key', 'base_model.encoder.layer.3.attention.output.dense', 'base_model.encoder.layer.3.

In [5]:
epochs_list = [k.get("best_epoch") for k in rets.get("cv_folds")]
median_epoch = int(np.median(epochs_list))
print("CV Folds max epoch:", median_epoch)

CV Folds max epoch: 24


## Full Training

In [22]:
num_epochs = 24 # median_epoch
batch_size = 8
train_size = 1.0

num_labels = len(labels_names)

# Define a custom classification head
head_layer_configs = [
        {"input_size": 768, "output_size": 512, "activation": nn.ReLU, "batch_norm": True},
        {"input_size": 512, "output_size": 256, "activation": nn.ReLU, "layer_norm": True},
        {"input_size": 256, "output_size": num_labels},
    ]

# Create Classification Model from "Camembert-base"
model = BeeMLMClassifier(
    model_name = "camembert-base",
    num_labels = num_labels,
    head_layers=head_layer_configs
)

# Create Trainer with Lora parameters
trainer = BeeTrainer(model=model,
                      lr=5e-5,
                      multilabel=True, # Set multilabel to true
                      labels_names=labels_names,
                      optimizer_class=AdamW,
                      use_lora=True,
                      lora_r = 64,
                      lora_alpha= 128,
                      lora_dropout = 0.01,
                      )

# Train the model
ret = trainer.train(texts=texts,
                    labels=labels,
                    train_size=train_size,
                    num_epochs=num_epochs,
                    batch_size=batch_size
                    )

# Save model and adaptater
trainer.save_model("./model-multilabel_v1")
trainer.save_adaptater("./adaptater-multilabel_v1")

# Free CPU/GPU memory
trainer.release_model()

Device : cuda
Using Lora
Target modules : ['base_model.encoder.layer.0.attention.self.query', 'base_model.encoder.layer.0.attention.self.key', 'base_model.encoder.layer.0.attention.output.dense', 'base_model.encoder.layer.0.intermediate.dense', 'base_model.encoder.layer.0.output.dense', 'base_model.encoder.layer.1.attention.self.query', 'base_model.encoder.layer.1.attention.self.key', 'base_model.encoder.layer.1.attention.output.dense', 'base_model.encoder.layer.1.intermediate.dense', 'base_model.encoder.layer.1.output.dense', 'base_model.encoder.layer.2.attention.self.query', 'base_model.encoder.layer.2.attention.self.key', 'base_model.encoder.layer.2.attention.output.dense', 'base_model.encoder.layer.2.intermediate.dense', 'base_model.encoder.layer.2.output.dense', 'base_model.encoder.layer.3.attention.self.query', 'base_model.encoder.layer.3.attention.self.key', 'base_model.encoder.layer.3.attention.output.dense', 'base_model.encoder.layer.3.intermediate.dense', 'base_model.encoder.

## Predictions

### Reload model

In [23]:
# Load the trained model
bee_mlm_model = BeeMLMClassifier.load_model_safetensors("./model-multilabel_v1")

### Test predictions

In [27]:
y_preds = bee_mlm_model.predict(test_texts, batch_size=50, device="cpu")
f1_micro = f1_score(test_labels, y_preds, average="micro", zero_division=0)
print(f1_micro)

0.9246908954664669


# End of game