In [None]:
# ------------------------------------------------------------------------
# STEP 1: INSTALLATION & SETUP
# ------------------------------------------------------------------------
# We install Unsloth and specific versions of PyTorch/Xformers for Colab T4
!pip install "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"
!pip install --no-deps xformers --index-url https://download.pytorch.org/whl/cu121
!pip install --no-deps "trl<0.9.0" peft accelerate bitsandbytes

import os
from google.colab import drive

# --- SAFE MODE CONFIGURATION ---
# Mount Google Drive to ensure we don't lose progress if Colab disconnects
drive.mount('/content/drive')

# Define where to save checkpoints and the final model in your Drive
OUTPUT_DIR = "/content/drive/My Drive/Humor_Project/Llama_Model/Llama3.1-Joke-Finetune"

# Create the directory if it doesn't exist
if not os.path.exists(OUTPUT_DIR):
    os.makedirs(OUTPUT_DIR)
    print(f"Created directory: {OUTPUT_DIR}")
else:
    print(f"Directory exists: {OUTPUT_DIR}. Will look for checkpoints here.")

Collecting unsloth@ git+https://github.com/unslothai/unsloth.git (from unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git)
  Cloning https://github.com/unslothai/unsloth.git to /tmp/pip-install-izbwgnvq/unsloth_3c2d04f52f594608a1e4b2554f0b3e5d
  Running command git clone --filter=blob:none --quiet https://github.com/unslothai/unsloth.git /tmp/pip-install-izbwgnvq/unsloth_3c2d04f52f594608a1e4b2554f0b3e5d
  Resolved https://github.com/unslothai/unsloth.git to commit 4cb7229ac1c346e143524b6f9a6ad544259364d6
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Collecting unsloth_zoo>=2026.1.4 (from unsloth@ git+https://github.com/unslothai/unsloth.git->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git)
  Downloading unsloth_zoo-2026.1.4-py3-none-any.whl.metadata (32 kB)
Collecting tyro (from unsloth@ git+https://github.com/unslothai/unsloth.git-

In [None]:
# ------------------------------------------------------------------------
# STEP 2: LOAD MODEL (Saved to Drive)
# ------------------------------------------------------------------------
from unsloth import FastLanguageModel
import torch
import os

# --- MODEL CACHE CONFIGURATION ---
# We define a folder in your Drive to store the base model
# This effectively "saves" the download so you don't do it twice.
model_cache_dir = "/content/drive/My Drive/Humor_Project/Llama_Model/Llama3_Cache"

# Create the folder if it doesn't exist
if not os.path.exists(model_cache_dir):
    os.makedirs(model_cache_dir)
    print(f"Created cache directory: {model_cache_dir}")

max_seq_length = 2048
dtype = None
load_in_4bit = True

print("Loading model... (This may take a moment to read from Drive if already downloaded)")

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "unsloth/Meta-Llama-3.1-8B-Instruct",
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
    # THIS LINE IS THE KEY:
    cache_dir = model_cache_dir,
)

# Add LoRA adapters (Standard Setup)
model = FastLanguageModel.get_peft_model(
    model,
    r = 16,
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
                      "gate_proj", "up_proj", "down_proj",],
    lora_alpha = 8,
    lora_dropout = 0,
    bias = "none",
    use_gradient_checkpointing = "unsloth",
    random_state = 3407,
    use_rslora = False,
    loftq_config = None,
)

ü¶• Unsloth: Will patch your computer to enable 2x faster free finetuning.
ü¶• Unsloth Zoo will now patch everything to make training faster!
Loading model... (This may take a moment to read from Drive if already downloaded)
==((====))==  Unsloth 2026.1.4: Fast Llama patching. Transformers: 4.57.6.
   \\   /|    Tesla T4. Num GPUs = 1. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.9.0+cu126. CUDA: 7.5. CUDA Toolkit: 12.6. Triton: 3.5.0
\        /    Bfloat16 = FALSE. FA [Xformers = None. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


tokenizer_config.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/454 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/17.2M [00:00<?, ?B/s]

Unsloth 2026.1.4 patched 32 layers with 32 QKV layers, 32 O layers and 32 MLP layers.


In [None]:
# ------------------------------------------------------------------------
# STEP 3: DATA PREPARATION (Corrected for your JSONL format)
# ------------------------------------------------------------------------
import os
from datasets import load_dataset

# 1. Load the Dataset
# Note: "json" is the correct builder for .jsonl files too.
dataset_file = "/content/drive/My Drive/Humor_Project/Llama_Model/outputs_qwen_rag.jsonl"

if not os.path.exists(dataset_file):
    raise FileNotFoundError(f"Please upload '{dataset_file}' to the Colab files sidebar or Drive.")

# We load it as 'json' (Hugging Face handles jsonl automatically this way)
dataset = load_dataset("json", data_files=dataset_file, split="train")

# 2. Define the Template (Standard Llama 3 Format)
alpaca_prompt = """<|begin_of_text|><|start_header_id|>system<|end_header_id|>

You are a witty AI assistant and satirical comedian.<|eot_id|><|start_header_id|>user<|end_header_id|>

{instruction}
Input: {input}<|eot_id|><|start_header_id|>assistant<|end_header_id|>

{output}<|eot_id|>"""

# 3. The Formatting Function
EOS_TOKEN = tokenizer.eos_token # Must be defined from your loaded model

def formatting_prompts_func(examples):
    # Extract the columns from your specific JSON structure
    input_originals = examples["input_original"]
    generated_jokes = examples["generated_joke"]
    types           = examples["type"]  # We use this for the logic

    texts = []

    # We use zip() to loop through all 3 lists at the same time
    for input_text, output_text, doc_type in zip(input_originals, generated_jokes, types):

        # --- LOGIC: Dynamic Instruction based on 'type' ---
        if doc_type == "headline":
            instruction = "Write a satirical joke based on the following news headline."
        else:
            # Fallback for 'words' or any other type
            instruction = "Write a joke that incorporates the following two words."

        # --- Fill the Template ---
        text = alpaca_prompt.format(
            instruction = instruction,
            input       = input_text,
            output      = output_text
        ) + EOS_TOKEN # Crucial for training

        texts.append(text)

    return { "text" : texts }

# 4. Apply the formatting
dataset = dataset.map(formatting_prompts_func, batched = True)

# Optional: Print one to verify it looks correct
print("Sample Data Check:")
print(dataset[0]["text"])

Generating train split: 0 examples [00:00, ? examples/s]

Map:   0%|          | 0/1200 [00:00<?, ? examples/s]

Sample Data Check:
<|begin_of_text|><|start_header_id|>system<|end_header_id|>

You are a witty AI assistant and satirical comedian.<|eot_id|><|start_header_id|>user<|end_header_id|>

Write a satirical joke based on the following news headline.
Input: Ryanair to cut 1 million more passenger seats in Spain<|eot_id|><|start_header_id|>assistant<|end_header_id|>

Ryanair slashes a million seats in Spain, proving they can fly even when they're empty.<|eot_id|><|eot_id|>


In [None]:
# ------------------------------------------------------------------------
# STEP 4: TRAINING (With Resume Capability - NEW PARAMETERS TEST)
# ------------------------------------------------------------------------
from trl import SFTTrainer
from transformers import TrainingArguments
from unsloth import is_bfloat16_supported

# Check for existing checkpoints to resume
last_checkpoint = None
if os.path.isdir(OUTPUT_DIR):
    checkpoints = [d for d in os.listdir(OUTPUT_DIR) if d.startswith("checkpoint-")]
    if checkpoints:
        checkpoints.sort(key=lambda x: int(x.split('-')[1]))
        last_checkpoint = os.path.join(OUTPUT_DIR, checkpoints[-1])
        print(f"Found checkpoint! Resuming training from: {last_checkpoint}")

trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    train_dataset = dataset,
    dataset_text_field = "text",
    max_seq_length = max_seq_length,
    dataset_num_proc = 2,
    packing = False,

    # 2. Pass the Arguments (The Settings) inside the Trainer
    args = TrainingArguments(
        per_device_train_batch_size = 4,
        gradient_accumulation_steps = 2, # Effective Batch Size = 32
        warmup_steps = 20,
        num_train_epochs = 2,
        learning_rate = 2e-4,             # Gentle learning rate
        fp16 = not is_bfloat16_supported(),
        bf16 = is_bfloat16_supported(),
        logging_steps = 10,
        optim = "adamw_8bit",
        weight_decay = 0.01,
        lr_scheduler_type = "linear",
        seed = 3407,
        output_dir = OUTPUT_DIR,
        save_strategy = "steps",
        save_steps = 10,
        save_total_limit = 10,
    ),
)


# Start Training
trainer_stats = trainer.train(resume_from_checkpoint=last_checkpoint)

Found checkpoint! Resuming training from: /content/drive/My Drive/Humor_Project/Llama_Model/Llama3.1-Joke-Finetune/checkpoint-150


Map (num_proc=2):   0%|          | 0/1200 [00:00<?, ? examples/s]

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 1,200 | Num Epochs = 2 | Total steps = 300
O^O/ \_/ \    Batch size per device = 4 | Gradient accumulation steps = 2
\        /    Data Parallel GPUs = 1 | Total batch size (4 x 2 x 1) = 8
 "-____-"     Trainable parameters = 41,943,040 of 8,072,204,288 (0.52% trained)
wandb: (1) Create a W&B account
wandb: (2) Use an existing W&B account
wandb: (3) Don't visualize my results
wandb: Enter your choice:

 3


wandb: You chose "Don't visualize my results"
wandb: Using W&B in offline mode.
wandb: W&B API key is configured. Use `wandb login --relogin` to force relogin


wandb: Detected [openai] in use.
wandb: Use W&B Weave for improved LLM call tracing. Install Weave with `pip install weave` then add `import weave` to the top of your script.
wandb: For more information, check out the docs at: https://weave-docs.wandb.ai/


Unsloth: Will smartly offload gradients to save VRAM!


Step,Training Loss
160,1.3598
170,1.3678
180,1.3911
190,1.3162
200,1.3821
210,1.3576
220,1.343
230,1.3472
240,1.3522
250,1.363


In [None]:
def create_headline_prompt(headline_text):
    prompt_text = f"""<|begin_of_text|><|start_header_id|>system<|end_header_id|>
You are a witty, cynical stand-up comedian. Your task is to write EXACTLY ONE punchy joke (1-2 sentences) based on the provided headline.

**Key Rules:**
1. The joke must be STANDALONE - include the headline context in the setup
2. Be clever, cynical, or ironic
3. NO explanations or filler - output ONLY the joke
4. Format: One or two sentences maximum

**Examples:**

Headline: "Study finds 90% of office meetings could be emails."
Joke: A new study found that 90% of office meetings could be emails, which implies the other 10% could have just been silence.

Headline: "Billionaire builds giant clock inside a mountain."
Joke: Jeff Bezos is building a giant clock inside a mountain, finally providing a way to tell time for the five people who actually survive the apocalypse.

Headline: "Scientists discover new species of deep-sea jelly."
Joke: Scientists have discovered a new species of jelly at the bottom of the ocean, mostly because they were tired of looking for the ones in their donuts.<|eot_id|><|start_header_id|>user<|end_header_id|>
Target Headline: {headline_text}

Write a standalone joke based on this headline.<|eot_id|><|start_header_id|>assistant<|end_header_id|>
"""
    return prompt_text

FastLanguageModel.for_inference(model)

# Create prompt
headline = "Ryanair to cut 1 million more passenger seats in Spain"
prompt = create_headline_prompt(headline)

# Generate
inputs = tokenizer([prompt], return_tensors="pt").to("cuda")
outputs = model.generate(
    **inputs,
    max_new_tokens=512,
    use_cache=True,
    temperature=0.7,
    top_p=0.9,
    do_sample=True
)

# Print result
print(tokenizer.batch_decode(outputs, skip_special_tokens=True)[0])

system
You are a witty, cynical stand-up comedian. Your task is to write EXACTLY ONE punchy joke (1-2 sentences) based on the provided headline.

**Key Rules:**
1. The joke must be STANDALONE - include the headline context in the setup
2. Be clever, cynical, or ironic
3. NO explanations or filler - output ONLY the joke
4. Format: One or two sentences maximum

**Examples:**

Headline: "Study finds 90% of office meetings could be emails."
Joke: A new study found that 90% of office meetings could be emails, which implies the other 10% could have just been silence.

Headline: "Billionaire builds giant clock inside a mountain."
Joke: Jeff Bezos is building a giant clock inside a mountain, finally providing a way to tell time for the five people who actually survive the apocalypse.

Headline: "Scientists discover new species of deep-sea jelly."
Joke: Scientists have discovered a new species of jelly at the bottom of the ocean, mostly because they were tired of looking for the ones in their d