In [43]:
!pip install -q -U accelerate datasets peft transformers trl wandb bitsandbytes

In [None]:
import os
# Redémarre le carnet Jupyter immédiatement après l'intallation des deps, pour êtres à jours
os._exit(0)

In [44]:
from transformers import AutoModelForCausalLM, AutoTokenizer
from trl import SFTConfig, SFTTrainer, DataCollatorForCompletionOnlyLM

# Specify the checkpoint for SmolLM2 and set the device.
checkpoint = "HuggingFaceTB/SmolLM2-135M-Instruct"

# Load the tokenizer and model.
tokenizer = AutoTokenizer.from_pretrained(checkpoint)
model = AutoModelForCausalLM.from_pretrained(
        checkpoint,
        device_map="auto",
        )

In [6]:
tokenizer.pad_token_id, tokenizer.eos_token_id, tokenizer.pad_token, tokenizer.eos_token

(2, 2, '<|im_end|>', '<|im_end|>')

In [11]:
tokenizer.decode(tokenizer.encode("19603-445954-697843-603692-781009-502356-747355-640146-569094-1070777-536437-121342-325358-667538-960033-496450-447365-1037644-298618-713704-1121116"))

'19603-445954-697843-603692-781009-502356-747355-640146-569094-1070777-536437-121342-325358-667538-960033-496450-447365-1037644-298618-713704-1121116'

# Dataset

Json structure output: https://huggingface.co/datasets/ChristianAzinn/json-training

In [45]:
from datasets import load_dataset

dataset_name = "wykonos/movies"
ds = load_dataset(dataset_name)["train"]

In [46]:
# Créer un dictionnaire ID -> Titre
id_to_title = {row["id"]: row["title"] for row in ds}

In [47]:
# Filtrer les exemples qui ont une recommandation
ds_filtre = ds.filter(lambda example: bool(example["recommendations"]))

# Fonction pour remplacer les IDs par les titres
def replace_ids_with_titles(example):
    titles = []
    for movie_id in example["recommendations"].split('-'):
        if id_to_title.get(int(movie_id)):
            titles.append(id_to_title.get(int(movie_id)))

        if len(titles) == 4:
            break

    example["recommendations"] = None if len(titles) == 0 else  ", ".join(titles)
    return example

# Appliquer la transformation sur le dataset filtré
ds_filtre = ds_filtre.map(replace_ids_with_titles).filter(lambda example: bool(example["recommendations"]))

Map:   0%|          | 0/34720 [00:00<?, ? examples/s]

Filter:   0%|          | 0/34720 [00:00<?, ? examples/s]

In [48]:

# Perform Train-Test Split
#split_ds = ds["train"].train_test_split(test_size=0.2, seed=42)

# Access train and test splits
train_dataset = ds_filtre.select(range(50))
test_dataset = train_dataset

In [49]:
for row in train_dataset:
    print(row["title"], "::",row["recommendations"])

Fast X :: Ciao, Acts of Vengeance, Extraction 2, John Wick: Chapter 4
The Pope's Exorcist :: Evil Dead Rise, The Devil Conspiracy, The Super Mario Bros. Movie, Suzume
Ant-Man and the Wasp: Quantumania :: Diabolik - Ginko all'attacco!, Infelices para Siempre, Ghosted, Miracle in Kasama
Creed III :: Lord of the Streets, There Are No Saints, Prizefighter: The Life of Jem Belcher, Bandit
The Super Mario Bros. Movie :: Evil Dead Rise, Fast X, Ant-Man and the Wasp: Quantumania, Erotic Ghost Story
Knock at the Cabin :: Plane, Huesera: The Bone Woman, Black Panther: Wakanda Forever, Private Peaceful
John Wick: Chapter 4 :: Demon Slayer: Kimetsu no Yaiba Mt. Natagumo Arc, The Super Mario Bros. Movie, Fast X, B.O.O.O.M.
Sisu :: The Mother, Evil Dead Rise, Guy Ritchie's The Covenant, The Devil Conspiracy
Plane :: Black Panther: Wakanda Forever, Unwelcome, Pathaan, Knock at the Cabin
Spider-Man: Across the Spider-Verse :: Miraculous: Ladybug & Cat Noir, The Movie, Transformers: Rise of the Beasts,

In [51]:
# Set the response template to match the chat format.
# (Ensure this string exactly matches the beginning of the assistant's response as output by apply_chat_template.)
response_template = "<|im_start|>assistant\n"
instruction_template = "<|im_start|>user\n"
PROMPT_TEMPLATE = """Suggest movies similar to {title}
movie recommendations:"""


def formatting_prompts_func(example):
    """
    Converts each example into a conversation string using the tokenizer's chat template.
    Assumes each example contains lists under "instruction" and "output".
    """
    output_texts = []
    for i in range(len(example["title"])):
        if example["recommendations"][i] is None:
            continue
        # Build a conversation with a user message and an assistant reply.
        messages = [
            {
                "role":    "system",
                "content": "You are are an expert in movie recommendation"
                },
            {"role": "user", "content": PROMPT_TEMPLATE.format(title=example["title"][i])},
            # Note: It is important that the assistant message content here does not
            # include the assistant marker, because the chat template will insert it.
            # + "<|im_end|>"
            {"role": "assistant", "content": example["recommendations"][i]}
            ]
        # Use the chat template to generate the formatted text.
        text = tokenizer.apply_chat_template(messages, tokenize=False)
        output_texts.append(text)
    print(output_texts)
    return output_texts


# Create the data collator.
# It will search for the response_template (here "Assistant:") in the formatted text
# and ensure that only tokens after that marker contribute to the loss.
collator = DataCollatorForCompletionOnlyLM(response_template=response_template,
                                           instruction_template=instruction_template,
                                           tokenizer=tokenizer,
                                           mlm=False)

In [52]:
tokenizer.apply_chat_template([
    {"role": "system", "content": "You are a helpful assistant."},
    {"role": "user", "content": "Hello, how are you?"},
    {"role": "assistant", "content": "I am good, thank you."}
    ], tokenize=False)

'<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n<|im_start|>user\nHello, how are you?<|im_end|>\n<|im_start|>assistant\nI am good, thank you.<|im_end|>\n'

# Lora Config

In [53]:
from peft import LoraConfig

# Note that r, in the figure above, is a hyperparameter here that we can use to specify the rank of the low-rank matrices used for adaptation.
# A smaller r leads to a simpler low-rank matrix, which results in fewer parameters to learn during adaptation.
# This can lead to faster training and potentially reduced computational requirements.
# However, with a smaller r, the capacity of the low-rank matrix to capture task-specific information decreases.
# This may result in lower adaptation quality, and the model might not perform as well on the new task compared to a higher r.
lora_config = LoraConfig(
        r=64,
        lora_alpha=128,
        lora_dropout=0.05,
        target_modules=["q_proj", "k_proj", "v_proj", "o_proj",],
        bias="none",
        task_type="CAUSAL_LM",
        )

# Wandb

Creat token and account: https://wandb.ai/home

In [54]:
import wandb
import getpass

token = getpass.getpass()
wandb.login(key=token)



True

# Hub login

Obtenir votre token API en Write: https://huggingface.co/settings/tokens

In [55]:
from huggingface_hub import notebook_login

notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

Crée un model dans votre organisation puis modifier la var suivent: https://huggingface.co/new

In [56]:
hub_model_id = "RealDragonMA/Pelliculum-Chatbot"

# SFT Trainer config

In [57]:
OUTPUT_DIR = checkpoint.split("/")[-1] + "-movie-reco"

# setup the trainer
trainer = SFTTrainer(
        model=model,
        train_dataset=train_dataset,
        eval_dataset=test_dataset,
        args=SFTConfig(
                per_device_train_batch_size=2,
                gradient_accumulation_steps=4,
                warmup_steps=10,
                num_train_epochs=3,
                max_steps=1000,
                learning_rate=0.0002,
                lr_scheduler_type="cosine",
                eval_strategy="steps",
                eval_steps=150,
                weight_decay=0.01,
                bf16=True,
                logging_strategy="steps",
                logging_steps=10,
                output_dir="./" + OUTPUT_DIR,
                optim="paged_adamw_8bit",
                seed=42,
                run_name=f"train-{OUTPUT_DIR}",
                report_to="wandb",
                save_steps=31,
                push_to_hub=True,
                hub_model_id=hub_model_id,
                save_total_limit=4,
                ),
        peft_config=lora_config,
        formatting_func=formatting_prompts_func,
        data_collator=collator,
        )

['<|im_start|>system\nYou are are an expert in movie recommendation<|im_end|>\n<|im_start|>user\nSuggest movies similar to F\nmovie recommendations:<|im_end|>\n<|im_start|>assistant\nC<|im_end|>\n', '<|im_start|>system\nYou are are an expert in movie recommendation<|im_end|>\n<|im_start|>user\nSuggest movies similar to a\nmovie recommendations:<|im_end|>\n<|im_start|>assistant\ni<|im_end|>\n', '<|im_start|>system\nYou are are an expert in movie recommendation<|im_end|>\n<|im_start|>user\nSuggest movies similar to s\nmovie recommendations:<|im_end|>\n<|im_start|>assistant\na<|im_end|>\n', '<|im_start|>system\nYou are are an expert in movie recommendation<|im_end|>\n<|im_start|>user\nSuggest movies similar to t\nmovie recommendations:<|im_end|>\n<|im_start|>assistant\no<|im_end|>\n', '<|im_start|>system\nYou are are an expert in movie recommendation<|im_end|>\n<|im_start|>user\nSuggest movies similar to  \nmovie recommendations:<|im_end|>\n<|im_start|>assistant\n,<|im_end|>\n', '<|im_sta

Applying formatting function to train dataset:   0%|          | 0/50 [00:00<?, ? examples/s]

['<|im_start|>system\nYou are are an expert in movie recommendation<|im_end|>\n<|im_start|>user\nSuggest movies similar to Fast X\nmovie recommendations:<|im_end|>\n<|im_start|>assistant\nCiao, Acts of Vengeance, Extraction 2, John Wick: Chapter 4<|im_end|>\n', "<|im_start|>system\nYou are are an expert in movie recommendation<|im_end|>\n<|im_start|>user\nSuggest movies similar to The Pope's Exorcist\nmovie recommendations:<|im_end|>\n<|im_start|>assistant\nEvil Dead Rise, The Devil Conspiracy, The Super Mario Bros. Movie, Suzume<|im_end|>\n", "<|im_start|>system\nYou are are an expert in movie recommendation<|im_end|>\n<|im_start|>user\nSuggest movies similar to Ant-Man and the Wasp: Quantumania\nmovie recommendations:<|im_end|>\n<|im_start|>assistant\nDiabolik - Ginko all'attacco!, Infelices para Siempre, Ghosted, Miracle in Kasama<|im_end|>\n", '<|im_start|>system\nYou are are an expert in movie recommendation<|im_end|>\n<|im_start|>user\nSuggest movies similar to Creed III\nmovie r

Converting train dataset to ChatML:   0%|          | 0/50 [00:00<?, ? examples/s]

Applying chat template to train dataset:   0%|          | 0/50 [00:00<?, ? examples/s]

Tokenizing train dataset:   0%|          | 0/50 [00:00<?, ? examples/s]

Tokenizing train dataset:   0%|          | 0/50 [00:00<?, ? examples/s]

['<|im_start|>system\nYou are are an expert in movie recommendation<|im_end|>\n<|im_start|>user\nSuggest movies similar to F\nmovie recommendations:<|im_end|>\n<|im_start|>assistant\nC<|im_end|>\n', '<|im_start|>system\nYou are are an expert in movie recommendation<|im_end|>\n<|im_start|>user\nSuggest movies similar to a\nmovie recommendations:<|im_end|>\n<|im_start|>assistant\ni<|im_end|>\n', '<|im_start|>system\nYou are are an expert in movie recommendation<|im_end|>\n<|im_start|>user\nSuggest movies similar to s\nmovie recommendations:<|im_end|>\n<|im_start|>assistant\na<|im_end|>\n', '<|im_start|>system\nYou are are an expert in movie recommendation<|im_end|>\n<|im_start|>user\nSuggest movies similar to t\nmovie recommendations:<|im_end|>\n<|im_start|>assistant\no<|im_end|>\n', '<|im_start|>system\nYou are are an expert in movie recommendation<|im_end|>\n<|im_start|>user\nSuggest movies similar to  \nmovie recommendations:<|im_end|>\n<|im_start|>assistant\n,<|im_end|>\n', '<|im_sta

No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


In [58]:
import os
from transformers import is_torch_xpu_available, is_torch_npu_available
import torch

# Lancement du processus d'entraînement du modèle.
# Ici, 'trainer.train()' déclenche la phase de fine-tuning,
# dans laquelle les paramètres du modèle sont ajustés sur une tâche spécifique
# en utilisant des données d'entraînement pertinentes.
trainer.train()

# Une fois l'entraînement terminé, on sauvegarde l'adaptateur LoRA (fine-tuning léger).
# LoRA (Low-Rank Adaptation) est une technique destinée à fine-tuner les grands
# modèles en modifiant uniquement un sous-ensemble restreint de paramètres.
final_checkpoint_dir = os.path.join(OUTPUT_DIR, "final_checkpoint")
trainer.save_model(final_checkpoint_dir)

Step,Training Loss,Validation Loss
150,0.2988,0.203932
300,0.0075,0.005232
450,0.0021,0.001892
600,0.0015,0.001331
750,0.0013,0.001148
900,0.0013,0.00109


Push du model dans le hub

In [59]:
trainer.push_to_hub(dataset_name=dataset_name)

CommitInfo(commit_url='https://huggingface.co/RealDragonMA/Pelliculum-Chatbot/commit/28f514640dee8e9b19f51c7fa6719363c1afe6a4', commit_message='End of training', commit_description='', oid='28f514640dee8e9b19f51c7fa6719363c1afe6a4', pr_url=None, repo_url=RepoUrl('https://huggingface.co/RealDragonMA/Pelliculum-Chatbot', endpoint='https://huggingface.co', repo_type='model', repo_id='RealDragonMA/Pelliculum-Chatbot'), pr_revision=None, pr_num=None)

In [60]:
# Nettoyage des ressources mémoire pour libérer l'espace GPU ou autres accélérateurs,
# ce qui est utile avant de fusionner l'adaptateur LoRA avec le modèle de base.
del model  # Suppression explicite du modèle de la mémoire.

# Vider les caches des accélérateurs (XPU, NPU ou GPU en fonction de la disponibilité).
# Cela optimise l'utilisation future des ressources.
if is_torch_xpu_available():
    torch.xpu.empty_cache()  # Vide les caches spécifiques pour XPU.
elif is_torch_npu_available():
    torch.npu.empty_cache()  # Vide les caches spécifiques pour NPU.
else:
    torch.cuda.empty_cache()  # Vide les caches GPU standard.

# Chargement du modèle adapté (en incluant l'adaptateur LoRA) pour effectuer une fusion
# avec le modèle de base. Cela permet de sauvegarder un modèle autonome optimisé.
from peft import AutoPeftModelForCausalLM

# Chargement du modèle préalablement sauvegardé depuis le répertoire OUTPUT_DIR.
# Les paramètres 'device_map' et 'torch_dtype' permettent d'optimiser le chargement :
# - 'device_map="auto"' ajuste automatiquement le placement sur le GPU, CPU ou autre.
# - 'torch_dtype=torch.bfloat16' utilise un format numérique bfloat16, qui réduit
#    la mémoire nécessaire tout en maintenant des performances stables.
model = AutoPeftModelForCausalLM.from_pretrained(
        OUTPUT_DIR,
        device_map="auto",
        torch_dtype=torch.bfloat16
        )

# Fusion de l'adaptateur LoRA directement dans le modèle de base,
# afin de produire un modèle final unique tout en réduisant ses redondances.
model = model.merge_and_unload()

# Sauvegarde du modèle fusionné dans un répertoire spécifique.
# 'safe_serialization=True' garantit que le modèle est stocké au format sûr,
# pour une compatibilité future et une intégrité des données.
output_merged_dir = os.path.join(OUTPUT_DIR, "final_merged_checkpoint")
model.save_pretrained(output_merged_dir, safe_serialization=True)

push du model dans le hub mais avec l'adaptateur lora merge directement

In [61]:
model.push_to_hub(hub_model_id)

README.md:   0%|          | 0.00/1.84k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/269M [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/RealDragonMA/Pelliculum-Chatbot/commit/f14f6fba9a6a2117171ed8153d16b80a78f122e2', commit_message='Upload LlamaForCausalLM', commit_description='', oid='f14f6fba9a6a2117171ed8153d16b80a78f122e2', pr_url=None, repo_url=RepoUrl('https://huggingface.co/RealDragonMA/Pelliculum-Chatbot', endpoint='https://huggingface.co', repo_type='model', repo_id='RealDragonMA/Pelliculum-Chatbot'), pr_revision=None, pr_num=None)

# inference

In [62]:
from transformers import AutoModelForCausalLM, AutoTokenizer

device = "cuda"  # for GPU usage or "cpu" for CPU usage
tokenizer = AutoTokenizer.from_pretrained(OUTPUT_DIR)
# for multiple GPUs install accelerate and do `model = AutoModelForCausalLM.from_pretrained(checkpoint, device_map="auto")`
model = AutoModelForCausalLM.from_pretrained(OUTPUT_DIR).to(device)

In [63]:
model = model.eval()

In [64]:
idx = 1
messages = [
          {
                "role":    "system",
                "content": "You are are an expert in movie recommendation"
                },
    {
        "role":    "user",
        "content": PROMPT_TEMPLATE.format(title=train_dataset[idx]["title"])
        },
    ]
print(train_dataset[idx]["title"], "-->",train_dataset[idx]["recommendations"])
device = "cuda"
input_text = tokenizer.apply_chat_template(messages, tokenize=False)
print("----------------- Generated text -----------------")
inputs = tokenizer.encode(input_text, return_tensors="pt").to(device)
outputs = model.generate(inputs,
                         max_new_tokens=50,
                         temperature=0.2,
                         top_p=0.45,
                         eos_token_id=tokenizer.eos_token_id,   # <-- crucial
                         pad_token_id=tokenizer.eos_token_id,    # often set pad = eos
                         do_sample=True)
print(tokenizer.decode(outputs[0]))

The Pope's Exorcist --> Evil Dead Rise, The Devil Conspiracy, The Super Mario Bros. Movie, Suzume
----------------- Generated text -----------------
<|im_start|>system
You are are an expert in movie recommendation<|im_end|>
<|im_start|>user
Suggest movies similar to The Pope's Exorcist
movie recommendations:<|im_end|>
<|im_start|>assistant
Evil Dead Rise, The Devil Conspiracy, The Super Mario Bros. Movie, Suzume, 3-3, The Devotion, The Super Mario Bros. Movie, The Super Marioatu, The Devil
