Install dependencies

In [1]:
%%capture
# Installe Unsloth et les dépendances pour T4
!pip install "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"
!pip install --no-deps "xformers<0.0.27" "trl<0.9.0" peft accelerate bitsandbytes
!pip install scipy

Fine-tune a model

In [2]:
import json
import random
import numpy as np
import torch
import re
import statistics
from scipy.stats import spearmanr
from unsloth import FastLanguageModel
from trl import SFTTrainer
from transformers import TrainingArguments
from datasets import load_dataset
from tqdm import tqdm
from google.colab import files

# ==========================================
# 1. CHARGEMENT ET PRÉPARATION (AVEC UPLOAD)
# ==========================================
print("--- ÉTAPE 1 : Importation et Préparation ---")

# --- WIDGET D'UPLOAD ---
print("Veuillez sélectionner le fichier train.json depuis votre ordinateur :")
uploaded = files.upload()

# Récupération automatique du nom du fichier
filename = next(iter(uploaded))
print(f"Fichier reçu : {filename}")

# Chargement
with open(filename, 'r') as f:
    data = json.load(f)

dataset = []
meaning_map = {1: "Impossible", 2: "Unlikely", 3: "Neutral", 4: "Plausible", 5: "Perfect"}

for key, item in data.items():
    # Cible
    if 'choices' in item:
        mean_val = np.mean(item['choices'])
    else:
        mean_val = item.get('average', 0)
    label = int(round(mean_val))

    # --- CORRECTION CRITIQUE DES IDS ---
    sample_id = key

    # Prompt Chain of Thought
    input_text = (
        f"Context: {item['precontext']}\n"
        f"Sentence: {item['sentence']}\n"
        f"Word: {item['homonym']}\n"
        f"Meaning: {item['judged_meaning']}\n"
        f"Task: Analyze if the word implies this meaning in this context. Explain briefly then rate 1-5."
    )
    output_text = f"Rationale: The meaning fits {meaning_map.get(label, 'well')}.\nScore: {label}"

    entry = {
        "id": sample_id,
        "input_text": input_text,
        "output_text": output_text
    }

    # --- OVERSAMPLING ---
    # x3 pour les extrêmes (1 et 5)
    if label == 1 or label == 5:
        dataset.extend([entry] * 3)
    elif label == 2 or label == 4:
        dataset.extend([entry] * 1)
    else:
        dataset.extend([entry] * 1)

# Mélange
random.seed(42)
random.shuffle(dataset)

# --- CHANGEMENT ICI : PAS DE SPLIT ---
# On prend tout le dataset pour l'entraînement
train_data = dataset
print(f"Données prêtes. Total Entraînement: {len(train_data)} (incluant duplications)")

def save_jsonl(data_list, filename):
    with open(filename, 'w') as f:
        for e in data_list:
            json_obj = {
                "instruction": e['input_text'],
                "input": "",
                "output": e['output_text'],
                "id": e['id']
            }
            f.write(json.dumps(json_obj) + '\n')

# On sauvegarde tout dans un seul fichier d'entraînement
save_jsonl(train_data, 'train_full.jsonl')

# ==========================================
# 2. CONFIGURATION MODÈLE
# ==========================================
print("\n--- ÉTAPE 2 : Chargement Modèle ---")
max_seq_length = 2048
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "openai/gpt-oss-20b",
    max_seq_length = max_seq_length,
    dtype = None,
    load_in_4bit = True,
)

model = FastLanguageModel.get_peft_model(
    model,
    r = 32,
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
    lora_alpha = 64,
    lora_dropout = 0,
    bias = "none",
    use_gradient_checkpointing = "unsloth",
    random_state = 3407,
)

alpaca_prompt = """Below is an instruction that describes a task. Write a response that appropriately completes the request.

### Instruction:
{}

### Response:
{}"""

EOS_TOKEN = tokenizer.eos_token
def formatting_prompts_func(examples):
    instructions = examples["instruction"]
    outputs      = examples["output"]
    texts = []
    for instruction, output in zip(instructions, outputs):
        text = alpaca_prompt.format(instruction, output) + EOS_TOKEN
        texts.append(text)
    return { "text" : texts, }

# Chargement du dataset complet
train_dataset = load_dataset("json", data_files={"train": "train_full.jsonl"}, split="train")
train_dataset = train_dataset.map(formatting_prompts_func, batched = True)

# ==========================================
# 3. ENTRAÎNEMENT
# ==========================================
print("\n--- ÉTAPE 3 : Entraînement ---")
trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    train_dataset = train_dataset,
    dataset_text_field = "text",
    max_seq_length = max_seq_length,
    dataset_num_proc = 2,
    packing = False,
    args = TrainingArguments(
        per_device_train_batch_size = 2,
        gradient_accumulation_steps = 4,
        warmup_steps = 5,
        num_train_epochs = 4,
        learning_rate = 2e-4,
        fp16 = not torch.cuda.is_bf16_supported(),
        bf16 = torch.cuda.is_bf16_supported(),
        logging_steps = 10,
        optim = "adamw_8bit",
        weight_decay = 0.01,
        lr_scheduler_type = "linear",
        seed = 3407,
        output_dir = "outputs",
    ),
)
trainer.train()

🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
🦥 Unsloth Zoo will now patch everything to make training faster!
--- ÉTAPE 1 : Importation et Préparation ---
Veuillez sélectionner le fichier train.json depuis votre ordinateur :


Saving train.json to train.json
Fichier reçu : train.json
Données prêtes. Total Entraînement: 3508 (incluant duplications)

--- ÉTAPE 2 : Chargement Modèle ---
==((====))==  Unsloth 2025.12.8: Fast Gpt_Oss patching. Transformers: 4.57.3.
   \\   /|    NVIDIA A100-SXM4-40GB. Num GPUs = 1. Max memory: 39.557 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.9.0+cu126. CUDA: 8.0. CUDA Toolkit: 12.6. Triton: 3.5.0
\        /    Bfloat16 = TRUE. FA [Xformers = None. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


model.safetensors.index.json: 0.00B [00:00, ?B/s]

model-00001-of-00004.safetensors:   0%|          | 0.00/4.00G [00:00<?, ?B/s]

model-00002-of-00004.safetensors:   0%|          | 0.00/4.00G [00:00<?, ?B/s]

model-00003-of-00004.safetensors:   0%|          | 0.00/3.37G [00:00<?, ?B/s]

model-00004-of-00004.safetensors:   0%|          | 0.00/1.16G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/165 [00:00<?, ?B/s]

tokenizer_config.json: 0.00B [00:00, ?B/s]

tokenizer.json:   0%|          | 0.00/27.9M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/446 [00:00<?, ?B/s]

chat_template.jinja: 0.00B [00:00, ?B/s]

Unsloth: Making `model.base_model.model.model` require gradients


Generating train split: 0 examples [00:00, ? examples/s]

Map:   0%|          | 0/3508 [00:00<?, ? examples/s]


--- ÉTAPE 3 : Entraînement ---


Unsloth: Tokenizing ["text"] (num_proc=16):   0%|          | 0/3508 [00:00<?, ? examples/s]

The model is already on multiple devices. Skipping the move to device specified in `args`.
==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 3,508 | Num Epochs = 4 | Total steps = 1,756
O^O/ \_/ \    Batch size per device = 2 | Gradient accumulation steps = 4
\        /    Data Parallel GPUs = 1 | Total batch size (2 x 4 x 1) = 8
 "-____-"     Trainable parameters = 15,925,248 of 20,930,682,432 (0.08% trained)
  | |_| | '_ \/ _` / _` |  _/ -_)
wandb: (1) Create a W&B account
wandb: (2) Use an existing W&B account
wandb: (3) Don't visualize my results
wandb: Enter your choice:

 3


wandb: You chose "Don't visualize my results"


wandb: Detected [huggingface_hub.inference, openai] in use.
wandb: Use W&B Weave for improved LLM call tracing. Install Weave with `pip install weave` then add `import weave` to the top of your script.
wandb: For more information, check out the docs at: https://weave-docs.wandb.ai/


Step,Training Loss
10,3.2023
20,2.1774
30,1.7686
40,1.6328
50,1.3606
60,1.4294
70,1.5769
80,1.2934
90,1.2976
100,1.3489




0,1
train/epoch,▁▁▁▂▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇█████
train/global_step,▁▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▆▆▆▆▇▇▇▇▇▇▇▇██
train/grad_norm,▁ █ ▁ ▂▃▃▁ ▆
train/learning_rate,█████▇▇▇▇▆▆▆▆▆▅▅▅▅▅▅▄▄▄▄▄▄▄▃▃▃▃▂▂▂▂▁▁▁▁▁
train/loss,█▅▆▄▄▄▄▄▄▄▃▃▄▃▃▃▃▃▂▃▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁

0,1
total_flos,2.292775389146212e+17
train/epoch,4.0
train/global_step,1756.0
train/grad_norm,1.75529
train/learning_rate,0.0
train/loss,0.2076
train_loss,0.69597
train_runtime,22763.3316
train_samples_per_second,0.616
train_steps_per_second,0.077


TrainOutput(global_step=1756, training_loss=0.6959691054457141, metrics={'train_runtime': 22763.3316, 'train_samples_per_second': 0.616, 'train_steps_per_second': 0.077, 'total_flos': 2.292775389146212e+17, 'train_loss': 0.6959691054457141, 'epoch': 4.0})

Chat with the model to draw results from the dev.json dataset

In [4]:
import json
import torch
import re
from pathlib import Path
from unsloth import FastLanguageModel
from tqdm import tqdm
from pydantic import BaseModel
from typing import Literal

# ==========================================
# 1. SETUP & MODEL LOADING
# ==========================================
print("--- Loading Model from previous training ---")

# Define the Prompt Format used in training
alpaca_prompt = """Below is an instruction that describes a task. Write a response that appropriately completes the request.

### Instruction:
{}

### Response:
"""

# Load the model and tokenizer from the 'outputs' directory
# (assumes the previous script ran and saved checkpoints in ./outputs)
# If you are running this in the SAME Colab session as training,
# you can skip loading if 'model' and 'tokenizer' are already in memory.
max_seq_length = 2048
dtype = None
load_in_4bit = True

# We verify if 'model' is already defined (e.g. same notebook session)
# otherwise we load it from the outputs
try:
    model
    tokenizer
    print("Model found in memory. Using existing model.")
except NameError:
    print("Loading model from local 'outputs' directory...")
    # NOTE: Unsloth usually saves to "outputs/checkpoint-X".
    # We load the latest adapter. If this fails, point specific path.
    from unsloth import FastLanguageModel
    model, tokenizer = FastLanguageModel.from_pretrained(
        model_name = "outputs", # Load local adapter
        max_seq_length = max_seq_length,
        dtype = dtype,
        load_in_4bit = load_in_4bit,
    )

FastLanguageModel.for_inference(model)

# ==========================================
# 2. DEFINITIONS
# ==========================================

class PredictionResponse(BaseModel):
    id: str
    prediction: Literal[1, 2, 3, 4, 5]

def generate_prediction(entry, obj_nr, model, tokenizer):
    # 1. Map the new JSON format to the TRAINING format
    # The model expects: Context, Sentence, Word, Meaning, Task
    input_text = (
        f"Context: {entry.get('precontext', '')}\n"
        f"Sentence: {entry.get('sentence', '')}\n"
        f"Word: {entry.get('homonym', '')}\n"
        f"Meaning: {entry.get('judged_meaning', '')}\n"
        f"Task: Analyze if the word implies this meaning in this context. Explain briefly then rate 1-5."
    )

    # 2. Format with Alpaca
    prompt = alpaca_prompt.format(input_text)

    # 3. Tokenize
    inputs = tokenizer([prompt], return_tensors="pt").to("cuda")

    # 4. Generate
    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=128, # Short generation (Rationale + Score)
            use_cache=True,
            temperature=0.1,    # Low temp for deterministic scores
        )

    # 5. Decode
    output_text = tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]

    # Extract only the generated response (after "### Response:")
    response_part = output_text.split("### Response:")[-1].strip()

    # 6. Parse Score using Regex
    # Expected output: "Rationale: ... \nScore: 5"
    match = re.search(r"Score:\s*(\d)", response_part)

    if match:
        score = int(match.group(1))
        # Clamp between 1 and 5 just in case
        score = max(1, min(5, score))
    else:
        # Fallback if model hallucinates format
        score = 3
        print(f"Warning: Could not parse score for ID {obj_nr}. Output: {response_part[:50]}...")

    return PredictionResponse(id=str(obj_nr), prediction=score)

def read_json(path: str | Path):
    path = Path(path)
    with path.open("r", encoding="utf-8") as f:
        return json.load(f)

# ==========================================
# 3. MAIN LOOP
# ==========================================

# Upload the dev file if not present (Colab specific)
if not Path("dev.cleaned.extra.json").exists():
    from google.colab import files
    print("Please upload dev.cleaned.extra.json:")
    files.upload()

data = read_json("dev.cleaned.extra.json")
out_dir = Path("output")
out_dir.mkdir(parents=True, exist_ok=True)
out_file = out_dir / "output_gpt_1.jsonl"

print(f"Starting inference on {len(data)} items...")

# Clear file if exists
with out_file.open("w", encoding="utf-8") as f:
    pass

for key, entry in tqdm(data.items()):
    try:
        prediction = generate_prediction(
            entry=entry,
            obj_nr=key,
            model=model,
            tokenizer=tokenizer
        )

        with out_file.open("a", encoding="utf-8") as f:
            f.write(prediction.model_dump_json() + "\n")

    except Exception as e:
        print(f"Error processing ID {key}: {e}")

print(f"Done! Results saved to {out_file}")

--- Loading Model from previous training ---
Model found in memory. Using existing model.
Starting inference on 588 items...


100%|██████████| 588/588 [18:53<00:00,  1.93s/it]

Done! Results saved to output/output_gpt_1.jsonl





Push the model to huggingface



In [6]:
from huggingface_hub import login
from unsloth import FastLanguageModel

# ==========================================
# 1. CONFIGURATION
# ==========================================
# Replace with your actual username and desired model name
HF_USERNAME = "afafasdfasfa"
MODEL_NAME = "gpt-oss-20b-wsd-finetune"
REPO_ID = f"{HF_USERNAME}/{MODEL_NAME}"

# Choose your saving method:
# "lora" = Saves only the LoRA adapters (much smaller, ~100MB). Recommended.
# "merged_16bit" = Merges LoRA into base model (huge, ~40GB). Use only if necessary.
SAVE_METHOD = "lora"

# ==========================================
# 2. AUTHENTICATION
# ==========================================
print("--- Authenticating with Hugging Face ---")
print("Paste your Write Token below (Profile -> Settings -> Access Tokens -> New Token (Write))")
login()

# ==========================================
# 3. LOAD & PUSH
# ==========================================
# Ensure the model is loaded. If you just ran training in this session,
# 'model' and 'tokenizer' are already in memory.
# If not, uncomment the lines below to load from local outputs:

# model, tokenizer = FastLanguageModel.from_pretrained(
#     model_name = "outputs",
#     max_seq_length = 2048,
#     dtype = None,
#     load_in_4bit = True,
# )

print(f"\n--- Pushing to {REPO_ID} using method: {SAVE_METHOD} ---")

if SAVE_METHOD == "lora":
    model.push_to_hub_merged(
        REPO_ID,
        tokenizer,
        save_method = "lora",
        token = True,
    )
elif SAVE_METHOD == "merged_16bit":
    model.push_to_hub_merged(
        REPO_ID,
        tokenizer,
        save_method = "merged_16bit",
        token = True,
    )
elif SAVE_METHOD == "merged_4bit":
    model.push_to_hub_merged(
        REPO_ID,
        tokenizer,
        save_method = "merged_4bit",
        token = True,
    )

print("✅ Upload Complete!")
print(f"View your model at: https://huggingface.co/{REPO_ID}")

--- Authenticating with Hugging Face ---
Paste your Write Token below (Profile -> Settings -> Access Tokens -> New Token (Write))


VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…


--- Pushing to afafasdfasfa/gpt-oss-20b-wsd-finetune using method: lora ---


config.json: 0.00B [00:00, ?B/s]

Processing Files (0 / 0)      : |          |  0.00B /  0.00B            

New Data Upload               : |          |  0.00B /  0.00B            

  ...d-finetune/tokenizer.json: 100%|##########| 27.9MB / 27.9MB            

Found HuggingFace hub cache directory: /root/.cache/huggingface/hub
Checking cache directory for required files...
Cache check failed: model-00000-of-00002.safetensors not found in local cache.
Not all required files found in cache. Will proceed with downloading.
Checking cache directory for required files...
Cache check failed: tokenizer.model not found in local cache.
Not all required files found in cache. Will proceed with downloading.


Unsloth: Preparing safetensor model files:   0%|          | 0/3 [00:00<?, ?it/s]

model-00000-of-00002.safetensors:   0%|          | 0.00/4.79G [00:00<?, ?B/s]

Unsloth: Preparing safetensor model files:  33%|███▎      | 1/3 [00:13<00:27, 13.71s/it]

model-00001-of-00002.safetensors:   0%|          | 0.00/4.80G [00:00<?, ?B/s]

Unsloth: Preparing safetensor model files:  67%|██████▋   | 2/3 [01:24<00:47, 47.56s/it]

model-00002-of-00002.safetensors:   0%|          | 0.00/4.17G [00:00<?, ?B/s]

Unsloth: Preparing safetensor model files: 100%|██████████| 3/3 [01:37<00:00, 32.39s/it]


Note: tokenizer.model not found (this is OK for non-SentencePiece models)


Unsloth: Merging weights into 16bit:   0%|          | 0/3 [00:00<?, ?it/s]

Processing Files (0 / 0)      : |          |  0.00B /  0.00B            

New Data Upload               : |          |  0.00B /  0.00B            

  ...0000-of-00002.safetensors:   0%|          | 41.9MB / 16.5GB            

Unsloth: Merging weights into 16bit:  33%|███▎      | 1/3 [04:08<08:16, 248.02s/it]

Processing Files (0 / 0)      : |          |  0.00B /  0.00B            

New Data Upload               : |          |  0.00B /  0.00B            

  ...0001-of-00002.safetensors:   0%|          | 41.9MB / 16.5GB            

Unsloth: Merging weights into 16bit:  67%|██████▋   | 2/3 [08:22<04:11, 251.92s/it]

Processing Files (0 / 0)      : |          |  0.00B /  0.00B            

New Data Upload               : |          |  0.00B /  0.00B            

  ...0002-of-00002.safetensors:   1%|          | 58.7MB / 8.85GB            

Unsloth: Merging weights into 16bit: 100%|██████████| 3/3 [10:43<00:00, 214.55s/it]


Unsloth: Regenerating safetensors index for dequantized MXFP4 model...
Unsloth: Merge process complete. Saved to `/content/afafasdfasfa/gpt-oss-20b-wsd-finetune`
✅ Upload Complete!
View your model at: https://huggingface.co/afafasdfasfa/gpt-oss-20b-wsd-finetune


Save the model and the outputs locally


In [7]:
import shutil
import os
from google.colab import files

# ==========================================
# 1. DEFINE DIRECTORIES
# ==========================================
# The folder where checkpoints were saved during training (from TrainingArguments)
checkpoint_dir = "output"

# The folder where we will save the final, clean adapters
final_model_dir = "final_model_adapters"

# ==========================================
# 2. SAVE FINAL ADAPTERS
# ==========================================
print(f"--- Saving final adapters to '{final_model_dir}' ---")
# This saves the final LoRA weights and tokenizer configuration
model.save_pretrained(final_model_dir)
tokenizer.save_pretrained(final_model_dir)

# ==========================================
# 3. COMPRESS (ZIP) DIRECTORIES
# ==========================================
print("--- Zipping files... this may take a moment ---")

# Zip the final adapters
shutil.make_archive(final_model_dir, 'zip', final_model_dir)
print(f"Created {final_model_dir}.zip")

# Zip the training checkpoints (outputs folder)
# Check if it exists first to avoid errors
if os.path.exists(checkpoint_dir):
    shutil.make_archive(checkpoint_dir, 'zip', checkpoint_dir)
    print(f"Created {checkpoint_dir}.zip")
else:
    print(f"Warning: '{checkpoint_dir}' folder not found. Skipping.")

# ==========================================
# 4. DOWNLOAD TO LOCAL MACHINE
# ==========================================
print("--- Starting Downloads ---")

# Download Final Adapters
files.download(f"{final_model_dir}.zip")

# Download Checkpoints (if they exist)
if os.path.exists(f"{checkpoint_dir}.zip"):
    files.download(f"{checkpoint_dir}.zip")

--- Saving final adapters to 'final_model_adapters' ---
--- Zipping files... this may take a moment ---
Created final_model_adapters.zip
Created output.zip
--- Starting Downloads ---


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>