In [None]:
# ------------------------------------------------------------------------
# STEP 1: INSTALLATION & SETUP
# ------------------------------------------------------------------------
# We install Unsloth and specific versions of PyTorch/Xformers for Colab T4
!pip install "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"
!pip install --no-deps xformers --index-url https://download.pytorch.org/whl/cu121
!pip install --no-deps "trl<0.9.0" peft accelerate bitsandbytes

import os
from google.colab import drive

from unsloth import FastLanguageModel
import torch

# --- SAFE MODE CONFIGURATION ---
# Mount Google Drive to ensure we don't lose progress if Colab disconnects
drive.mount('/content/drive')

# Define where to save checkpoints and the final model in your Drive
OUTPUT_DIR = "/content/drive/My Drive/LLM project/DATA/Llama3.1-Joke-Finetune(2)"

# Create the directory if it doesn't exist
if not os.path.exists(OUTPUT_DIR):
    os.makedirs(OUTPUT_DIR)
    print(f"Created directory: {OUTPUT_DIR}")
else:
    print(f"Directory exists: {OUTPUT_DIR}. Will look for checkpoints here.")

Collecting unsloth@ git+https://github.com/unslothai/unsloth.git (from unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git)
  Cloning https://github.com/unslothai/unsloth.git to /tmp/pip-install-q1r389zz/unsloth_6fb1dde7397b4745817302a3af942f48
  Running command git clone --filter=blob:none --quiet https://github.com/unslothai/unsloth.git /tmp/pip-install-q1r389zz/unsloth_6fb1dde7397b4745817302a3af942f48
  Resolved https://github.com/unslothai/unsloth.git to commit 4cb7229ac1c346e143524b6f9a6ad544259364d6
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Collecting trl!=0.19.0,<=0.24.0,>=0.18.2 (from unsloth_zoo>=2026.1.4->unsloth@ git+https://github.com/unslothai/unsloth.git->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git)
  Using cached trl-0.24.0-py3-none-any.whl.metadata (11 kB)
Using cached trl-0.24.0-py3-none-any.whl (423 kB)
Inst

In [None]:
# ------------------------------------------------------------------------
# STEP 2: LOAD MODEL (Saved to Drive)
# ------------------------------------------------------------------------

# --- MODEL CACHE CONFIGURATION ---
# We define a folder in your Drive to store the base model
# This effectively "saves" the download so you don't do it twice.
model_cache_dir = "/content/drive/My Drive/LLM project/DATA/Llama3_Cache"

# Create the folder if it doesn't exist
if not os.path.exists(model_cache_dir):
    os.makedirs(model_cache_dir)
    print(f"Created cache directory: {model_cache_dir}")

max_seq_length = 2048
dtype = None
load_in_4bit = True

print("Loading model... (This may take a moment to read from Drive if already downloaded)")

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "unsloth/Meta-Llama-3.1-8B-Instruct",
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
    # THIS LINE IS THE KEY:
    cache_dir = model_cache_dir,
)

# Add LoRA adapters (Standard Setup)
model = FastLanguageModel.get_peft_model(
    model,
    r = 16,
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
                      "gate_proj", "up_proj", "down_proj",],
    lora_alpha = 16,
    lora_dropout = 0,
    bias = "none",
    use_gradient_checkpointing = "unsloth",
    random_state = 3407,
    use_rslora = False,
    loftq_config = None,
)

Loading model... (This may take a moment to read from Drive if already downloaded)
==((====))==  Unsloth 2026.1.4: Fast Llama patching. Transformers: 4.57.6.
   \\   /|    Tesla T4. Num GPUs = 1. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.9.0+cu126. CUDA: 7.5. CUDA Toolkit: 12.6. Triton: 3.5.0
\        /    Bfloat16 = FALSE. FA [Xformers = None. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


Unsloth 2026.1.4 patched 32 layers with 32 QKV layers, 32 O layers and 32 MLP layers.


In [None]:
# ------------------------------------------------------------------------
# STEP 3: DATA PREPARATION (Updated for JSONL)
# ------------------------------------------------------------------------
from datasets import load_dataset

# IMPORTANT: Upload 'synthetic_data.jsonl' to Colab files sidebar first!
dataset_file = "/content/drive/My Drive/LLM project/DATA/best_jokes.jsonl"

if not os.path.exists(dataset_file):
    raise FileNotFoundError(f"Please upload '{dataset_file}' to the Colab files sidebar.")

# Loading JSONL is the same as JSON in datasets library
dataset = load_dataset("json", data_files=dataset_file, split="train")



# 1. The Prompt Template (Standard Llama 3.1 format)
alpaca_prompt = """<|begin_of_text|><|start_header_id|>system<|end_header_id|>

You are a witty AI assistant and satirical comedian.<|eot_id|><|start_header_id|>user<|end_header_id|>

{}
Input: {}<|eot_id|><|start_header_id|>assistant<|end_header_id|>

{}<|eot_id|>"""

EOS_TOKEN = tokenizer.eos_token

# 2. The Formatting Function (Handles the logic)
def formatting_prompts_func(examples):
    # These lists will hold the processed text for the batch
    instructions = []
    inputs      = examples["input_original"]
    outputs     = examples["generated_joke"]
    types       = examples["type"] # We use this to decide the instruction

    for i in range(len(inputs)):
        # LOGIC: Check the 'type' column to insert the correct instruction
        if types[i] == "headline":
            instruction = "Write a satirical joke based on the following news headline."
        else:
            # Assuming 'words' type
            instruction = "Write a joke that incorporates the following two words."

        instructions.append(instruction)

    # 3. Fill the template
    texts = []
    for instruction, input_text, output_text in zip(instructions, inputs, outputs):
        # Format: {Instruction} -> {Input} -> {Output}
        text = alpaca_prompt.format(instruction, input_text, output_text) + "<|end_of_text|>"
        texts.append(text)

    return { "text" : texts }

# Usage:
dataset = dataset.map(formatting_prompts_func, batched=True)

In [None]:
# ------------------------------------------------------------------------
# STEP 4: TRAINING (With Resume Capability)
# ------------------------------------------------------------------------
from trl import SFTTrainer
from transformers import TrainingArguments
from unsloth import is_bfloat16_supported

# Check for existing checkpoints to resume
last_checkpoint = None
if os.path.isdir(OUTPUT_DIR):
    checkpoints = [d for d in os.listdir(OUTPUT_DIR) if d.startswith("checkpoint-")]
    if checkpoints:
        checkpoints.sort(key=lambda x: int(x.split('-')[1]))
        last_checkpoint = os.path.join(OUTPUT_DIR, checkpoints[-1])
        print(f"Found checkpoint! Resuming training from: {last_checkpoint}")


#Define the Trainer (The Machine)
trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    train_dataset = dataset,
    dataset_text_field = "text",
    max_seq_length = max_seq_length,
    dataset_num_proc = 2,
    packing = False,

    # 2. Pass the Arguments (The Settings) inside the Trainer
    args = TrainingArguments(
        per_device_train_batch_size = 4,
        gradient_accumulation_steps = 4, # Effective Batch Size = 32
        warmup_ratio = 0.1,
        max_steps = 225,                  # Approx 3 Epoch
        learning_rate = 2e-5,             # Gentle learning rate
        fp16 = not is_bfloat16_supported(),
        bf16 = is_bfloat16_supported(),
        logging_steps = 5,
        optim = "paged_adamw_8bit",
        weight_decay = 0.01,
        lr_scheduler_type = "linear",
        seed = 3407,
        output_dir = OUTPUT_DIR,
        save_strategy = "steps",
        save_steps = 25,
        save_total_limit = 10,
    ),
)

# Start Training
trainer_stats = trainer.train(resume_from_checkpoint=last_checkpoint)


Found checkpoint! Resuming training from: /content/drive/My Drive/LLM project/DATA/Llama3.1-Joke-Finetune(2)/checkpoint-150


Map (num_proc=2):   0%|          | 0/1200 [00:00<?, ? examples/s]

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 1,200 | Num Epochs = 3 | Total steps = 225
O^O/ \_/ \    Batch size per device = 4 | Gradient accumulation steps = 4
\        /    Data Parallel GPUs = 1 | Total batch size (4 x 4 x 1) = 16
 "-____-"     Trainable parameters = 41,943,040 of 8,072,204,288 (0.52% trained)


Step,Training Loss
155,2.376
160,2.2741
165,2.2481
170,2.1654
175,2.1082
180,2.1486
185,2.0141
190,2.0122
195,1.9298
200,1.9702


In [None]:
# ------------------------------------------------------------------------
# STEP 5: SAVING THE FINAL MODEL
# ------------------------------------------------------------------------
model.save_pretrained(os.path.join(OUTPUT_DIR, "lora_model"))
tokenizer.save_pretrained(os.path.join(OUTPUT_DIR, "lora_model"))

print("Training complete! Model saved to Google Drive.")

Training complete! Model saved to Google Drive.


In [None]:
def create_headline_prompt(headline_text):
    prompt_text = f"""<|begin_of_text|><|start_header_id|>system<|end_header_id|>
You are a witty, cynical stand-up comedian. Your task is to write EXACTLY ONE punchy joke (1-2 sentences) based on the provided headline.

Key Rules:
1. The joke must be STANDALONE - include the headline context in the setup
2. Be clever, cynical, or ironic
3. NO explanations or filler - output ONLY the joke
4. Format: One or two sentences maximum

Examples:

Headline: "Study finds 90% of office meetings could be emails."
Joke: A new study found that 90% of office meetings could be emails, which implies the other 10% could have just been silence.

Headline: "Billionaire builds giant clock inside a mountain."
Joke: Jeff Bezos is building a giant clock inside a mountain, finally providing a way to tell time for the five people who actually survive the apocalypse.

Headline: "Scientists discover new species of deep-sea jelly."
Joke: Scientists have discovered a new species of jelly at the bottom of the ocean, mostly because they were tired of looking for the ones in their donuts.<|eot_id|><|start_header_id|>user<|end_header_id|>
Target Headline: {headline_text}

Write a standalone joke based on this headline.<|eot_id|><|start_header_id|>assistant<|end_header_id|>
"""
    return prompt_text

# LOADING THE MODEL
from unsloth import FastLanguageModel
import torch

max_seq_length = 2048
dtype = None
load_in_4bit = True

# Step 1: Load the BASE model first
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "unsloth/Meta-Llama-3.1-8B-Instruct",  # ðŸ‘ˆ Original base model
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
)

# Step 2: Load your trained LoRA adapter on top
from peft import PeftModel
model = PeftModel.from_pretrained(
    model,
    "/content/drive/My Drive/LLM project/DATA/Llama3.1-Joke-Finetune/lora_model"
)

# Step 3: Enable inference mode
FastLanguageModel.for_inference(model)

print("âœ… Model with your finetuned LoRA adapter loaded successfully!")

# Create prompt
headline = "Ryanair to cut 1 million more passenger seats in Spain"
prompt = create_headline_prompt(headline)

# Generate
inputs = tokenizer([prompt], return_tensors="pt").to("cuda")
outputs = model.generate(
    **inputs,
    max_new_tokens=512,
    use_cache=True,
    temperature=0.9,
    top_p=0.9,
    do_sample=True
)

# Print result
#print(tokenizer.batch_decode(outputs, skip_special_tokens=True)[0])
input_length = inputs["input_ids"].shape[1]
generated_tokens = outputs[0][input_length:]
print(tokenizer.decode(generated_tokens, skip_special_tokens=True))


==((====))==  Unsloth 2026.1.4: Fast Llama patching. Transformers: 4.57.6.
   \\   /|    Tesla T4. Num GPUs = 1. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.9.0+cu126. CUDA: 7.5. CUDA Toolkit: 12.6. Triton: 3.5.0
\        /    Bfloat16 = FALSE. FA [Xformers = None. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


model.safetensors:   0%|          | 0.00/5.96G [00:00<?, ?B/s]

KeyboardInterrupt: 