In [62]:
!pip install -q -U accelerate datasets peft transformers trl wandb bitsandbytes

In [63]:

from transformers import AutoModelForCausalLM, AutoTokenizer
from trl import SFTConfig, SFTTrainer, DataCollatorForCompletionOnlyLM

# Specify the checkpoint for SmolLM2 and set the device.
checkpoint = "HuggingFaceTB/SmolLM2-135M-Instruct"

# Load the tokenizer and model.
tokenizer = AutoTokenizer.from_pretrained(checkpoint)
# For multi-GPU setups, consider using device_map="auto":
model = AutoModelForCausalLM.from_pretrained(
        checkpoint,
        device_map="auto",  # {"": PartialState().process_index}
        )

In [64]:
tokenizer

GPT2TokenizerFast(name_or_path='HuggingFaceTB/SmolLM2-135M-Instruct', vocab_size=49152, model_max_length=8192, is_fast=True, padding_side='right', truncation_side='right', special_tokens={'bos_token': '<|im_start|>', 'eos_token': '<|im_end|>', 'unk_token': '<|endoftext|>', 'pad_token': '<|im_end|>', 'additional_special_tokens': ['<|im_start|>', '<|im_end|>']}, clean_up_tokenization_spaces=False, added_tokens_decoder={
	0: AddedToken("<|endoftext|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
	1: AddedToken("<|im_start|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
	2: AddedToken("<|im_end|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
	3: AddedToken("<repo_name>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
	4: AddedToken("<reponame>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
	5: AddedToken("<file_sep>"

In [65]:
model

LlamaForCausalLM(
  (model): LlamaModel(
    (embed_tokens): Embedding(49152, 576, padding_idx=2)
    (layers): ModuleList(
      (0-29): 30 x LlamaDecoderLayer(
        (self_attn): LlamaAttention(
          (q_proj): Linear(in_features=576, out_features=576, bias=False)
          (k_proj): Linear(in_features=576, out_features=192, bias=False)
          (v_proj): Linear(in_features=576, out_features=192, bias=False)
          (o_proj): Linear(in_features=576, out_features=576, bias=False)
        )
        (mlp): LlamaMLP(
          (gate_proj): Linear(in_features=576, out_features=1536, bias=False)
          (up_proj): Linear(in_features=576, out_features=1536, bias=False)
          (down_proj): Linear(in_features=1536, out_features=576, bias=False)
          (act_fn): SiLU()
        )
        (input_layernorm): LlamaRMSNorm((576,), eps=1e-05)
        (post_attention_layernorm): LlamaRMSNorm((576,), eps=1e-05)
      )
    )
    (norm): LlamaRMSNorm((576,), eps=1e-05)
    (rotary_emb)

# Dataset

Json structure output: https://huggingface.co/datasets/ChristianAzinn/json-training

In [66]:
from datasets import load_dataset

ds = load_dataset("TommyDIL/Bro-Cases")
# Perform Train-Test Split
split_ds = ds["train"].train_test_split(test_size=0.2, seed=42)

# Access train and test splits
train_dataset = split_ds["train"]
test_dataset = split_ds["test"]

In [67]:
train_dataset

Dataset({
    features: ['case', 'verdict'],
    num_rows: 3320
})

In [68]:
# Set the response template to match the chat format.
# (Ensure this string exactly matches the beginning of the assistant's response as output by apply_chat_template.)
response_template = "<|im_start|>assistant\n"
instruction_template = "<|im_start|>user\n"
PROMPT_TEMPLATE = """You are an AI Judge specialized in interpreting and enforcing the sacred laws of the Bro Code. Given a case describing a social situation among Bros after <<<>>>, your role is to analyze the events and determine whether a violation of the Bro Code has occurred.

Using the official Bro Code as your reference, carefully assess the actions of the Bros involved. If a violation is found, clearly state which Bromandment or Article was broken and explain why. If the situation is ambiguous or does not constitute a clear violation, provide a reasoned verdict based on the principles of Bro justice.

Your response should be structured as follows:

    Verdict – State whether a violation has occurred.
    Bro Code Reference – Cite the relevant Bromandment(s) or Article(s).
    Explanation – Provide a clear and concise reasoning for the verdict.
    Possible Consequences or Resolutions – If applicable, suggest an appropriate response (e.g., an apology, a duel, or invoking a Broll).

Maintain a fair and neutral tone, upholding the spirit of brotherhood and justice while ensuring that Bros are held accountable to the sacred Code."""

USER_PROMPT_TEMPLATE = """<<<
Case:
{text}
>>>
"""

def formatting_prompts_func(example):
    """
    Converts each example into a conversation string using the tokenizer's chat template.
    Assumes each example contains lists under "instruction" and "output".
    """

    output_texts = []

    if not isinstance(example["case"], list):
        return []

    for i in range(len(example["case"])):
        messages = [
            {
                "role":    "system",
                "content": PROMPT_TEMPLATE
                },
            {"role": "user", "content": USER_PROMPT_TEMPLATE.format(text=example["case"][i])},
            # Note: It is important that the assistant message content here does not
            # include the assistant marker, because the chat template will insert it.
            {"role": "assistant", "content": example["verdict"][i]}
        ]
        # Use the chat template to generate the formatted text.
        text = tokenizer.apply_chat_template(messages, tokenize=False)
    
        output_texts.append(text)

    return output_texts


# Create the data collator.
# It will search for the response_template (here "Assistant:") in the formatted text
# and ensure that only tokens after that marker contribute to the loss.
collator = DataCollatorForCompletionOnlyLM(response_template=response_template,
                                           instruction_template=instruction_template,
                                           tokenizer=tokenizer,
                                           mlm=False)



In [69]:
tokenizer.apply_chat_template([
    {"role": "system", "content": """You are an AI Judge specialized in interpreting and enforcing the sacred laws of the Bro Code. Given a case describing a social situation among Bros after <<<>>>, your role is to analyze the events and determine whether a violation of the Bro Code has occurred.

Using the official Bro Code as your reference, carefully assess the actions of the Bros involved. If a violation is found, clearly state which Bromandment or Article was broken and explain why. If the situation is ambiguous or does not constitute a clear violation, provide a reasoned verdict based on the principles of Bro justice.

Your response should be structured as follows:

    Verdict – State whether a violation has occurred.
    Bro Code Reference – Cite the relevant Bromandment(s) or Article(s).
    Explanation – Provide a clear and concise reasoning for the verdict.
    Possible Consequences or Resolutions – If applicable, suggest an appropriate response (e.g., an apology, a duel, or invoking a Broll).

Maintain a fair and neutral tone, upholding the spirit of brotherhood and justice while ensuring that Bros are held accountable to the sacred Code."""},
    {"role": "user", "content": """<<<
Case:
Bro Ethan agreed to be Bro Jake’s wingman at a party. However, instead of helping Jake talk to his crush, Ethan ended up flirting with her himself and later left the party with her.
>>>"""},
    {"role": "assistant", "content": """Verdict: Violation of the Bro Code.
Bro Code Reference: Bromandment I – Bros before hoes; Article 3 – The Conquest Clause.
Explanation: As a wingman, Bro Ethan had a duty to support Bro Jake, not compete with him. By making a move on Jake’s crush, he directly violated both the principle of brotherly loyalty and the Conquest Clause.
Possible Consequences or Resolutions: Bro Ethan should apologize and recognize his failure as a wingman. If the situation is severe, Bro Jake may challenge him to a Duel of Conquest (Article 5)."""}
    ], tokenize=False)

'<|im_start|>system\nYou are an AI Judge specialized in interpreting and enforcing the sacred laws of the Bro Code. Given a case describing a social situation among Bros after <<<>>>, your role is to analyze the events and determine whether a violation of the Bro Code has occurred.\n\nUsing the official Bro Code as your reference, carefully assess the actions of the Bros involved. If a violation is found, clearly state which Bromandment or Article was broken and explain why. If the situation is ambiguous or does not constitute a clear violation, provide a reasoned verdict based on the principles of Bro justice.\n\nYour response should be structured as follows:\n\n    Verdict – State whether a violation has occurred.\n    Bro Code Reference – Cite the relevant Bromandment(s) or Article(s).\n    Explanation – Provide a clear and concise reasoning for the verdict.\n    Possible Consequences or Resolutions – If applicable, suggest an appropriate response (e.g., an apology, a duel, or invok

# Lora Config

In [70]:
from peft import LoraConfig

# Note that r, in the figure above, is a hyperparameter here that we can use to specify the rank of the low-rank matrices used for adaptation.
# A smaller r leads to a simpler low-rank matrix, which results in fewer parameters to learn during adaptation.
# This can lead to faster training and potentially reduced computational requirements.
# However, with a smaller r, the capacity of the low-rank matrix to capture task-specific information decreases.
# This may result in lower adaptation quality, and the model might not perform as well on the new task compared to a higher r.
lora_config = LoraConfig(
        r=16,
        lora_alpha=32,
        lora_dropout=0.05,
        target_modules=['o_proj', 'k_proj', 'q_proj', "v_proj"],
        bias="none",
        task_type="CAUSAL_LM",
        )

# Wandb

Create token and account: https://wandb.ai/home

In [None]:
import wandb
import getpass

token = getpass.getpass()
wandb.login(key=token)

In [None]:
from huggingface_hub import notebook_login

notebook_login()

In [None]:
hub_model_id = "TommyDIL/BroBot"

In [None]:
OUTPUT_DIR = checkpoint.split("/")[-1] + "-structure-output"

# setup the trainer
trainer = SFTTrainer(
        model=model,
        train_dataset=train_dataset,
        eval_dataset=test_dataset,
        args=SFTConfig(
                per_device_train_batch_size=2,
                gradient_accumulation_steps=4,
                warmup_steps=100,
                max_steps=1000,
                learning_rate=0.0002,
                lr_scheduler_type="cosine",
                eval_strategy="steps",
                eval_steps=150,
                weight_decay=0.01,
                bf16=True,
                logging_strategy="steps",
                logging_steps=10,
                output_dir="./" + OUTPUT_DIR,
                optim="paged_adamw_8bit",
                seed=42,
                run_name=f"train-{OUTPUT_DIR}",
                report_to="wandb",
                save_steps=31,
                hub_model_id=hub_model_id,
                save_total_limit=4,
                ),
        peft_config=lora_config,
        formatting_func=formatting_prompts_func,
        data_collator=collator,
        )

# SFT Trainer config

In [None]:
import os
from transformers import is_torch_xpu_available, is_torch_npu_available
import torch

# Lancement du processus d'entraînement du modèle.
# Ici, 'trainer.train()' déclenche la phase de fine-tuning,
# dans laquelle les paramètres du modèle sont ajustés sur une tâche spécifique
# en utilisant des données d'entraînement pertinentes.
trainer.train()

# Une fois l'entraînement terminé, on sauvegarde l'adaptateur LoRA (fine-tuning léger).
# LoRA (Low-Rank Adaptation) est une technique destinée à fine-tuner les grands
# modèles en modifiant uniquement un sous-ensemble restreint de paramètres.
final_checkpoint_dir = os.path.join(OUTPUT_DIR, "final_checkpoint")
trainer.model.save_pretrained(final_checkpoint_dir)

In [None]:
trainer.push_to_hub(dataset_name="TommyDIL/Bro-Cases")

In [None]:
# Nettoyage des ressources mémoire pour libérer l'espace GPU ou autres accélérateurs,
# ce qui est utile avant de fusionner l'adaptateur LoRA avec le modèle de base.
del model  # Suppression explicite du modèle de la mémoire.

# Vider les caches des accélérateurs (XPU, NPU ou GPU en fonction de la disponibilité).
# Cela optimise l'utilisation future des ressources.
if is_torch_xpu_available():
    torch.xpu.empty_cache()  # Vide les caches spécifiques pour XPU.
elif is_torch_npu_available():
    torch.npu.empty_cache()  # Vide les caches spécifiques pour NPU.
else:
    torch.cuda.empty_cache()  # Vide les caches GPU standard.

# Chargement du modèle adapté (en incluant l'adaptateur LoRA) pour effectuer une fusion
# avec le modèle de base. Cela permet de sauvegarder un modèle autonome optimisé.
from peft import AutoPeftModelForCausalLM

# Chargement du modèle préalablement sauvegardé depuis le répertoire OUTPUT_DIR.
# Les paramètres 'device_map' et 'torch_dtype' permettent d'optimiser le chargement :
# - 'device_map="auto"' ajuste automatiquement le placement sur le GPU, CPU ou autre.
# - 'torch_dtype=torch.bfloat16' utilise un format numérique bfloat16, qui réduit
#    la mémoire nécessaire tout en maintenant des performances stables.
model = AutoPeftModelForCausalLM.from_pretrained(
        OUTPUT_DIR,
        device_map="auto",
        torch_dtype=torch.bfloat16
        )

# Fusion de l'adaptateur LoRA directement dans le modèle de base,
# afin de produire un modèle final unique tout en réduisant ses redondances.
model = model.merge_and_unload()

# Sauvegarde du modèle fusionné dans un répertoire spécifique.
# 'safe_serialization=True' garantit que le modèle est stocké au format sûr,
# pour une compatibilité future et une intégrité des données.
output_merged_dir = os.path.join(OUTPUT_DIR, "final_merged_checkpoint")
model.save_pretrained(output_merged_dir, safe_serialization=True)

In [None]:
model.push_to_hub(hub_model_id)

# inference

In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer

device = "cuda"  # for GPU usage or "cpu" for CPU usage
tokenizer = AutoTokenizer.from_pretrained(OUTPUT_DIR)
# for multiple GPUs install accelerate and do `model = AutoModelForCausalLM.from_pretrained(checkpoint, device_map="auto")`
model = AutoModelForCausalLM.from_pretrained(OUTPUT_DIR).to(device)

In [None]:
test_json_schema = """{
  "type": "object",
  "properties": {
    "weather_data": {
      "type": "array",
      "items": {
        "type": "object",
        "properties": {
          "year": { "type": "integer" },
          "station": { "type": "string" },
          "temperature": {
            "type": "object",
            "properties": {
              "min": { "type": "number" },
              "max": { "type": "number" }
            },
            "required": ["min", "max"]
          },
          "events": {
            "type": "array",
            "items": { "type": "string" }
          }
        },
        "required": ["year", "station", "temperature", "events"]
      }
    },
    "required": ["weather_data"]
  }
}"""

test_query = "Provide a detailed breakdown of meteorological data recorded in the city of Berlin from 2015 to 2020. The data should include the year, meteorological station, temperature ranges (minimum and maximum), and any significant events."

test_response = """{
  "weather_data": [
    {
      "year": 2015,
      "station": "Berlin Central Station",
      "temperature": { "min": -5.2, "max": 35.1 },
      "events": ["Heavy snowfall in January", "Heatwave in July"]
    },
    {
      "year": 2017,
      "station": "Berlin East Station",
      "temperature": { "min": -4.0, "max": 32.8 },
      "events": ["Thunderstorms in April", "Flooding in June"]
    },
    {
      "year": 2020,
      "station": "Berlin West Station",
      "temperature": { "min": -3.9, "max": 36.5 },
      "events": ["Drought in September", "Blizzards in February"]
    }
  ]
}"""

messages = [
    {
        "role":    "system",
        "content": "You are are an expert in generate json structure based on user query and schema."
        },
    {
        "role":    "user",
        "content": PROMPT_TEMPLATE.format(query=test_query, schema=test_json_schema)
        },
    ]

In [None]:
input_text = tokenizer.apply_chat_template(messages, tokenize=False)
print(input_text)
print("----------------- Generated text -----------------")
inputs = tokenizer.encode(input_text, return_tensors="pt").to(device)
outputs = model.generate(inputs, max_new_tokens=1024, temperature=0.2, top_p=0.9, do_sample=True)
print(tokenizer.decode(outputs[0]))