# Beevibe - Multiclass tutorial


## Manage Packages

### Install packages

In [1]:
# Install Beevibe
! pip install beevibe --quiet --progress-bar off

In [None]:
# Install Watermark
! pip install watermark --quiet --progress-bar off

### Import packages

In [None]:
import sys
import pandas as pd
from datasets import load_dataset
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import matthews_corrcoef
import numpy as np
from watermark import watermark
import torch.nn as nn
from torch.optim import AdamW
from beevibe import BeeTrainer, BeeMLMClassifier


## GPU Card

In [None]:
!nvidia-smi

Thu Feb  6 16:19:41 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 550.54.15              Driver Version: 550.54.15      CUDA Version: 12.4     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  Tesla T4                       Off |   00000000:00:04.0 Off |                    0 |
| N/A   46C    P8              9W /   70W |       0MiB /  15360MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
                                                

## Drive Directory

<!> Please adjust to your notebook path

In [None]:
# Path sur le projet
sys.path.insert(0, "..")

## Packages versions

In [None]:
print(watermark())

Last updated: 2025-02-06T16:19:41.750498+00:00

Python implementation: CPython
Python version       : 3.11.11
IPython version      : 7.34.0

Compiler    : GCC 11.4.0
OS          : Linux
Release     : 6.1.85+
Machine     : x86_64
Processor   : x86_64
CPU cores   : 8
Architecture: 64bit



In [None]:
print(watermark(packages="pandas,numpy,scipy,sklearn,torch,transformers,tokenizers,sentencepiece,datasets,beevibe"))

pandas       : 2.2.2
numpy        : 1.26.4
scipy        : 1.13.1
sklearn      : 1.6.1
torch        : 2.5.1+cu124
transformers : 4.47.1
tokenizers   : 0.21.0
sentencepiece: 0.2.0
datasets     : 3.2.0
beevibe      : 0.1.0.dev13



## Load dataset

### Get Train & Test

In [None]:
data_files = {
    "train": "elegana_train_v0_1.csv",
    "test": "elegana_test_v0_1.csv",
}

dataset = load_dataset(
    "Franbul/elegana_relation_client_FR",
    data_files=data_files,
    sep="|")

pd_train = dataset["train"].to_pandas()
pd_test = dataset["test"].to_pandas()

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


### Get Themes

In [None]:
data_files = {
    "themes": "elegana_themes_v0_1.csv"
}

dataset = load_dataset(
    "Franbul/elegana_relation_client_FR",
    data_files=data_files,
    sep="|")

pd_themes = dataset["themes"].to_pandas()


### Merge datas

In [None]:
# Merge train, test and thems
pd_data = pd.merge(pd_train, pd_themes, on="THEME", how='left')
pd_data_test = pd.merge(pd_test, pd_themes, on="THEME", how='left')

# Get a sample here
#pd_data = pd_data.sample(200, random_state=1811)

In [None]:
pd_data.shape

(2364, 10)

In [None]:
pd_data.head()

Unnamed: 0,CLIENT,CONSEILLER,THEME,DESCRIPTION,2_CLASSES,5_CLASSES,LABEL_1,LABEL_2,LABEL_3,LABEL_4
0,Quelle est la taille de ce chapeau ?,La taille de ce chapeau est de 58 cm de circon...,Demande d'informations produit,Questions spécifiques sur les caractéristiques...,Informations et services spécialisés,Conseils et informations produits,Produit,Caractéristique,Spécifique,Demande
1,Quels sont vos délais de livraison pour les co...,Nos délais de livraison pour les commandes en ...,Demande d'informations sur les achats en gros,Conditions et possibilités pour les achats en ...,Informations et services spécialisés,"Services exclusifs, programmes et personnalisa...",Gros,Condition,Professionnel,Possibilité
2,Nous avons besoin de tenues pour un événement ...,"Oui, en fonction du thème de l'événement, nous...",Demande de conseils pour les achats de groupe,Conseils pour effectuer des achats groupés (po...,Informations et services spécialisés,Conseils et informations produits,Achat,Groupe,Événement,Mariage
3,Le gilet que j'ai commandé est trop petit. Pui...,Je suis désolé d'apprendre que la taille du gi...,Échange de produit,"Demande d'échange pour un autre taille, couleu...",Support client opérationnel,"Commandes, livraison et suivi",Produit,Taille,Couleur,Option
4,Est-ce que vous proposez des remises pour les ...,"Oui, nous offrons des remises aux professionne...",Demande d'informations sur les achats en gros,Conditions et possibilités pour les achats en ...,Informations et services spécialisés,"Services exclusifs, programmes et personnalisa...",Gros,Condition,Professionnel,Possibilité


### Get texts & labels for training

In [None]:
pd_data['5_CLASSES'].value_counts()

Unnamed: 0_level_0,count
5_CLASSES,Unnamed: 1_level_1
"Services exclusifs, programmes et personnalisations",1126
"Engagements, événements et initiatives",413
"Commandes, livraison et suivi",370
Conseils et informations produits,300
Assistance technique et support immédiat,155


In [None]:
# Use THEME in 5-Classes for classification
classes = np.unique(pd_data['5_CLASSES'])
le_esg = LabelEncoder()
le_esg.fit(classes)

# Get sentences and labels to train
labels_names = classes[le_esg.transform(classes)]
labels = le_esg.transform(pd_data['5_CLASSES']).tolist()
classes = np.unique(labels)
texts = pd_data["CLIENT"].values.tolist()
print(f"Train : Nb texts:{len(texts)}, Nb labels:{len(labels)}, Nb classes:{len(classes)}")

# Get sentences and labels to predict
test_texts = pd_data_test["CLIENT"].values.tolist()
test_labels = le_esg.transform(pd_data_test['5_CLASSES']).tolist()
print(f"Test  : Nb texts:{len(test_texts)}, Nb labels:{len(test_labels)}, Nb classes:{len(np.unique(test_labels))}")


Train : Nb texts:2364, Nb labels:2364, Nb classes:5
Test  : Nb texts:591, Nb labels:591, Nb classes:5


## Holdout

In [None]:
num_epochs = 20
patience = 3
min_delta = 0.001
val_size = 0.3
batch_size = 8


num_labels = len(labels_names)

# Create custom model : Normalized Layers
custom_layer_configs = [
        {"input_size": 768, "output_size": num_labels, "activation": None},
    ]

model = BeeMLMClassifier(
    model_name = "cmarkea/distilcamembert-base",
    num_labels = num_labels,
    head_layers=custom_layer_configs
)

trainer = BeeTrainer(model=model,
                            labels_names=labels_names,
                            optimizer_class=AdamW,
                            use_lora=True,
                            lora_r = 64,
                            lora_alpha= 128,
                            lora_dropout = 0.01,
                            )

ret = trainer.holdout(texts=texts,
              labels=labels,
              val_size=val_size,
              num_epochs=num_epochs,
              batch_size=batch_size,
              patience=patience,
              min_delta=min_delta,
              balanced=True
              )

# Free CPU/GPU memory
trainer.release_model()

Device : cuda
Using Lora
Target modules : ['base_model.encoder.layer.0.attention.self.query', 'base_model.encoder.layer.0.attention.self.key', 'base_model.encoder.layer.0.attention.output.dense', 'base_model.encoder.layer.0.intermediate.dense', 'base_model.encoder.layer.0.output.dense', 'base_model.encoder.layer.1.attention.self.query', 'base_model.encoder.layer.1.attention.self.key', 'base_model.encoder.layer.1.attention.output.dense', 'base_model.encoder.layer.1.intermediate.dense', 'base_model.encoder.layer.1.output.dense', 'base_model.encoder.layer.2.attention.self.query', 'base_model.encoder.layer.2.attention.self.key', 'base_model.encoder.layer.2.attention.output.dense', 'base_model.encoder.layer.2.intermediate.dense', 'base_model.encoder.layer.2.output.dense', 'base_model.encoder.layer.3.attention.self.query', 'base_model.encoder.layer.3.attention.self.key', 'base_model.encoder.layer.3.attention.output.dense', 'base_model.encoder.layer.3.intermediate.dense', 'base_model.encoder.

### Starter code

In [None]:
num_epochs = 30
batch_size = 8
patience = 3
min_delta = 0.001
val_size = 0.3


# Get number of classes to predict
num_labels = len(labels_names)

# Create Classification Model from "Camembert-base"
model = BeeMLMClassifier(
    model_name = "camembert-base",
    num_labels = num_labels,
)

# Create Trainer with Lora parameters
trainer = BeeTrainer(model=model,
                    )

# Train over a Holdout with Earlystopping
ret = trainer.holdout(texts=texts,
                    labels=labels,
                    val_size=val_size,
                    num_epochs=num_epochs,
                    batch_size=batch_size,
                    patience=patience,
                    min_delta=min_delta,
                    balanced=True
                    )

# Free CPU/GPU memory
trainer.release_model()

Device : cuda
Use optimizer : Adam
 - {'lr': 1e-05}
No scheduler used
Epoch 0/29, Train Loss: 1.3343, Val Loss: 1.0701, Val MCC: 0.5585, lr: 1.000e-05
Epoch 1/29, Train Loss: 0.7855, Val Loss: 0.7265, Val MCC: 0.7286, lr: 1.000e-05
Epoch 2/29, Train Loss: 0.5525, Val Loss: 0.5388, Val MCC: 0.8015, lr: 1.000e-05
Epoch 3/29, Train Loss: 0.4241, Val Loss: 0.4429, Val MCC: 0.8559, lr: 1.000e-05
Epoch 4/29, Train Loss: 0.3519, Val Loss: 0.4439, Val MCC: 0.8669, lr: 1.000e-05
Epoch 5/29, Train Loss: 0.3068, Val Loss: 0.3898, Val MCC: 0.8633, lr: 1.000e-05
Epoch 6/29, Train Loss: 0.2496, Val Loss: 0.3820, Val MCC: 0.8634, lr: 1.000e-05
Epoch 7/29, Train Loss: 0.2250, Val Loss: 0.3387, Val MCC: 0.8801, lr: 1.000e-05
Epoch 8/29, Train Loss: 0.1892, Val Loss: 0.3322, Val MCC: 0.8763, lr: 1.000e-05
Epoch 9/29, Train Loss: 0.1657, Val Loss: 0.3570, Val MCC: 0.8757, lr: 1.000e-05
Epoch 10/29, Train Loss: 0.1544, Val Loss: 0.3085, Val MCC: 0.8926, lr: 1.000e-05
Epoch 11/29, Train Loss: 0.1374, Val L

### Modify classification  head

In [None]:
num_epochs = 30
batch_size = 8
patience = 3
min_delta = 0.001
val_size = 0.3


# Get number of classes to predict
num_labels = len(labels_names)

# Define a custom classification head
head_layer_configs = [
        {"input_size": 768, "output_size": 512, "activation": nn.ReLU, "batch_norm": True},
        {"input_size": 512, "output_size": 256, "activation": nn.ReLU, "layer_norm": True},
        {"input_size": 256, "output_size": num_labels},
    ]

# Create Classification Model from "Camembert-base"
model = BeeMLMClassifier(
    model_name = "camembert-base",
    num_labels = num_labels,
    head_layers=head_layer_configs
)

# Create Trainer with Lora parameters
trainer = BeeTrainer(model=model,
                     labels_names=labels_names,
                     )

# Train over a Holdout with Earlystopping
ret = trainer.holdout(texts=texts,
                    labels=labels,
                    val_size=val_size,
                    num_epochs=num_epochs,
                    batch_size=batch_size,
                    patience=patience,
                    min_delta=min_delta,
                    balanced=True
                    )

# Free CPU/GPU memory
trainer.release_model()


Device : cuda
Use optimizer : Adam
 - {'lr': 1e-05}
No scheduler used
Epoch 0/29, Train Loss: 1.3351, Val Loss: 1.1422, Val MCC: 0.4502, lr: 1.000e-05
Epoch 1/29, Train Loss: 0.8332, Val Loss: 0.8430, Val MCC: 0.6287, lr: 1.000e-05
Epoch 2/29, Train Loss: 0.5602, Val Loss: 0.5850, Val MCC: 0.7402, lr: 1.000e-05
Epoch 3/29, Train Loss: 0.3758, Val Loss: 0.3997, Val MCC: 0.8080, lr: 1.000e-05
Epoch 4/29, Train Loss: 0.2665, Val Loss: 0.3226, Val MCC: 0.8547, lr: 1.000e-05
Epoch 5/29, Train Loss: 0.1953, Val Loss: 0.2997, Val MCC: 0.8510, lr: 1.000e-05
Epoch 6/29, Train Loss: 0.1546, Val Loss: 0.2894, Val MCC: 0.8617, lr: 1.000e-05
Epoch 7/29, Train Loss: 0.1105, Val Loss: 0.2820, Val MCC: 0.8770, lr: 1.000e-05
Epoch 8/29, Train Loss: 0.0943, Val Loss: 0.2702, Val MCC: 0.8855, lr: 1.000e-05
Epoch 9/29, Train Loss: 0.0759, Val Loss: 0.3019, Val MCC: 0.8802, lr: 1.000e-05
Epoch 10/29, Train Loss: 0.0854, Val Loss: 0.2804, Val MCC: 0.8958, lr: 1.000e-05
Epoch 11/29, Train Loss: 0.0681, Val L

### Use AdamW

In [None]:
num_epochs = 30
batch_size = 8
patience = 3
min_delta = 0.001
val_size = 0.3

# Get number of classes to predict
num_labels = len(labels_names)

# Define a custom classification head
head_layer_configs = [
        {"input_size": 768, "output_size": 512, "activation": nn.ReLU, "batch_norm": True},
        {"input_size": 512, "output_size": 256, "activation": nn.ReLU, "layer_norm": True},
        {"input_size": 256, "output_size": num_labels},
    ]

# Create Classification Model from "Camembert-base"
model = BeeMLMClassifier(
    model_name = "camembert-base",
    num_labels = num_labels,
    head_layers=head_layer_configs
)

# Create Trainer with Lora parameters
trainer = BeeTrainer(model=model,
                    labels_names=labels_names,
                    optimizer_class=AdamW
                    )

# Train over a Holdout with Earlystopping
ret = trainer.holdout(texts=texts,
                    labels=labels,
                    val_size=val_size,
                    num_epochs=num_epochs,
                    batch_size=batch_size,
                    patience=patience,
                    min_delta=min_delta,
                    balanced=True
                    )

# Free CPU/GPU memory
trainer.release_model()

Device : cuda
Use optimizer : AdamW
 - {'lr': 1e-05}
No scheduler used
Epoch 0/29, Train Loss: 1.3326, Val Loss: 1.1319, Val MCC: 0.4692, lr: 1.000e-05
Epoch 1/29, Train Loss: 0.8206, Val Loss: 0.8275, Val MCC: 0.6353, lr: 1.000e-05
Epoch 2/29, Train Loss: 0.5458, Val Loss: 0.5490, Val MCC: 0.7632, lr: 1.000e-05
Epoch 3/29, Train Loss: 0.3644, Val Loss: 0.4060, Val MCC: 0.8193, lr: 1.000e-05
Epoch 4/29, Train Loss: 0.2526, Val Loss: 0.3247, Val MCC: 0.8441, lr: 1.000e-05
Epoch 5/29, Train Loss: 0.1851, Val Loss: 0.2847, Val MCC: 0.8629, lr: 1.000e-05
Epoch 6/29, Train Loss: 0.1469, Val Loss: 0.2835, Val MCC: 0.8625, lr: 1.000e-05
Epoch 7/29, Train Loss: 0.1113, Val Loss: 0.2829, Val MCC: 0.8811, lr: 1.000e-05
Epoch 8/29, Train Loss: 0.0884, Val Loss: 0.2654, Val MCC: 0.8944, lr: 1.000e-05
Epoch 9/29, Train Loss: 0.0799, Val Loss: 0.2702, Val MCC: 0.8987, lr: 1.000e-05
Epoch 10/29, Train Loss: 0.0552, Val Loss: 0.3107, Val MCC: 0.8910, lr: 1.000e-05
Epoch 11/29, Train Loss: 0.0505, Val 

### Add a Lora configuration

In [None]:
num_epochs = 30
batch_size = 8
patience = 3
min_delta = 0.001
val_size = 0.3

# Get number of classes to predict
num_labels = len(labels_names)

# Define a custom classification head
head_layer_configs = [
        {"input_size": 768, "output_size": 512, "activation": nn.ReLU, "batch_norm": True},
        {"input_size": 512, "output_size": 256, "activation": nn.ReLU, "layer_norm": True},
        {"input_size": 256, "output_size": num_labels},
    ]

# Create Classification Model from "Camembert-base"
model = BeeMLMClassifier(
    model_name = "camembert-base",
    num_labels = num_labels,
    head_layers=head_layer_configs
)

# Create Trainer with Lora parameters
trainer = BeeTrainer(model=model,
                            labels_names=labels_names,
                            optimizer_class=AdamW,
                            use_lora=True,
                            lora_r = 64,
                            lora_alpha= 128,
                            lora_dropout = 0.01,
                            )

# Train over a Holdout with Earlystopping
ret = trainer.holdout(texts=texts,
                    labels=labels,
                    val_size=val_size,
                    num_epochs=num_epochs,
                    batch_size=batch_size,
                    patience=patience,
                    min_delta=min_delta,
                    balanced=True
                    )

# Free CPU/GPU memory
trainer.release_model()

Device : cuda
Using Lora
Target modules : ['base_model.encoder.layer.0.attention.self.query', 'base_model.encoder.layer.0.attention.self.key', 'base_model.encoder.layer.0.attention.output.dense', 'base_model.encoder.layer.0.intermediate.dense', 'base_model.encoder.layer.0.output.dense', 'base_model.encoder.layer.1.attention.self.query', 'base_model.encoder.layer.1.attention.self.key', 'base_model.encoder.layer.1.attention.output.dense', 'base_model.encoder.layer.1.intermediate.dense', 'base_model.encoder.layer.1.output.dense', 'base_model.encoder.layer.2.attention.self.query', 'base_model.encoder.layer.2.attention.self.key', 'base_model.encoder.layer.2.attention.output.dense', 'base_model.encoder.layer.2.intermediate.dense', 'base_model.encoder.layer.2.output.dense', 'base_model.encoder.layer.3.attention.self.query', 'base_model.encoder.layer.3.attention.self.key', 'base_model.encoder.layer.3.attention.output.dense', 'base_model.encoder.layer.3.intermediate.dense', 'base_model.encoder.

## Cross-validation

In [None]:
num_epochs = 30
batch_size = 8
patience = 3
min_delta = 0.001
n_splits = 5

# Get number of classes to predict
num_labels = len(labels_names)

# Define a custom classification head
head_layer_configs = [
        {"input_size": 768, "output_size": 512, "activation": nn.ReLU, "batch_norm": True},
        {"input_size": 512, "output_size": 256, "activation": nn.ReLU, "layer_norm": True},
        {"input_size": 256, "output_size": num_labels},
    ]

# Create Classification Model from "Camembert-base"
model = BeeMLMClassifier(
    model_name = "camembert-base",
    num_labels = num_labels,
    head_layers=head_layer_configs
)

# Create Trainer with Lora parameters
trainer = BeeTrainer(model=model,
                            labels_names=labels_names,
                            optimizer_class=AdamW,
                            use_lora=True,
                            lora_r = 64,
                            lora_alpha= 128,
                            lora_dropout = 0.01,
                            )

# Cross validate with Earlystopping
rets = trainer.cross_validation(texts=texts,
              labels=labels,
              n_splits=n_splits,
              num_epochs=num_epochs,
              batch_size=batch_size,
              patience=patience,
              min_delta=min_delta,
              balanced=True)

# Free CPU/GPU memory
trainer.release_model()

Device : cuda


> Fold 1
Using Lora
Target modules : ['base_model.encoder.layer.0.attention.self.query', 'base_model.encoder.layer.0.attention.self.key', 'base_model.encoder.layer.0.attention.output.dense', 'base_model.encoder.layer.0.intermediate.dense', 'base_model.encoder.layer.0.output.dense', 'base_model.encoder.layer.1.attention.self.query', 'base_model.encoder.layer.1.attention.self.key', 'base_model.encoder.layer.1.attention.output.dense', 'base_model.encoder.layer.1.intermediate.dense', 'base_model.encoder.layer.1.output.dense', 'base_model.encoder.layer.2.attention.self.query', 'base_model.encoder.layer.2.attention.self.key', 'base_model.encoder.layer.2.attention.output.dense', 'base_model.encoder.layer.2.intermediate.dense', 'base_model.encoder.layer.2.output.dense', 'base_model.encoder.layer.3.attention.self.query', 'base_model.encoder.layer.3.attention.self.key', 'base_model.encoder.layer.3.attention.output.dense', 'base_model.encoder.layer.3.intermediate.dense', 'base_mod

## Full Training

In [None]:
num_epochs = 9
batch_size = 8 # 2
train_size = 1.0 #0.15

num_labels = len(labels_names)

# Define a custom classification head
head_layer_configs = [
        {"input_size": 768, "output_size": 512, "activation": nn.ReLU, "batch_norm": True},
        {"input_size": 512, "output_size": 256, "activation": nn.ReLU, "layer_norm": True},
        {"input_size": 256, "output_size": num_labels},
    ]

# Create Classification Model from "Camembert-base"
model = BeeMLMClassifier(
    model_name = "camembert-base",
    num_labels = num_labels,
    head_layers=head_layer_configs
)

# Create Trainer with Lora parameters
trainer = BeeTrainer(model=model,
                            labels_names=labels_names,
                            optimizer_class=AdamW,
                            use_lora=True,
                            lora_r = 64,
                            lora_alpha= 128,
                            lora_dropout = 0.01,
                            )

# Train the model
ret = trainer.train(texts=texts,
                    labels=labels,
                    train_size=train_size,
                    num_epochs=num_epochs,
                    batch_size=batch_size,
                    balanced=True
                    )

# Save model and adaptater
trainer.save_model("./model-multiclass_v1")
trainer.save_adaptater("./adaptater-multiclass_v1")

# Free CPU/GPU memory
trainer.release_model()

Device : cuda
Using Lora
Target modules : ['base_model.encoder.layer.0.attention.self.query', 'base_model.encoder.layer.0.attention.self.key', 'base_model.encoder.layer.0.attention.output.dense', 'base_model.encoder.layer.0.intermediate.dense', 'base_model.encoder.layer.0.output.dense', 'base_model.encoder.layer.1.attention.self.query', 'base_model.encoder.layer.1.attention.self.key', 'base_model.encoder.layer.1.attention.output.dense', 'base_model.encoder.layer.1.intermediate.dense', 'base_model.encoder.layer.1.output.dense', 'base_model.encoder.layer.2.attention.self.query', 'base_model.encoder.layer.2.attention.self.key', 'base_model.encoder.layer.2.attention.output.dense', 'base_model.encoder.layer.2.intermediate.dense', 'base_model.encoder.layer.2.output.dense', 'base_model.encoder.layer.3.attention.self.query', 'base_model.encoder.layer.3.attention.self.key', 'base_model.encoder.layer.3.attention.output.dense', 'base_model.encoder.layer.3.intermediate.dense', 'base_model.encoder.

## Predictions

### Reload model

In [None]:
# Load the trained model
bee_mlm_model = BeeMLMClassifier.load_model_safetensors("./model-multiclass_v1")

### Predictions

In [None]:
y_preds = bee_mlm_model.predict(test_texts, batch_size=50, device="cpu")
mcc = matthews_corrcoef(test_labels, y_preds)
print(mcc)

-0.0421334602516051


# End of game