In [1]:
!pip install "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git" -q
!pip install --no-deps xformers trl peft accelerate bitsandbytes -q

  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m59.1/59.1 MB[0m [31m19.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m506.8/506.8 kB[0m [31m42.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m310.8/310.8 kB[0m [31m31.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m181.2/181.2 kB[0m [31m18.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m47.7/47.7 MB[0m [31m23.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.2/7.2 MB[0m [31m134.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m423.1/423.1 kB[0m [31m38.0 MB/s[0m eta [36m0

In [2]:
!git clone https://github.com/Sinaeskandari/semeval2026-task1-mwahaha.git

Cloning into 'semeval2026-task1-mwahaha'...
remote: Enumerating objects: 213, done.[K
remote: Counting objects: 100% (213/213), done.[K
remote: Compressing objects: 100% (164/164), done.[K
remote: Total 213 (delta 92), reused 143 (delta 41), pack-reused 0 (from 0)[K
Receiving objects: 100% (213/213), 3.53 MiB | 9.47 MiB/s, done.
Resolving deltas: 100% (92/92), done.


In [3]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [4]:
from datasets import Dataset
from pathlib import Path
import glob
import json
import csv
import pandas as pd
import torch
from tqdm import tqdm
from unsloth import FastLanguageModel
from trl import DPOTrainer, DPOConfig
from peft import PeftModel
import os

🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
🦥 Unsloth Zoo will now patch everything to make training faster!


# Dataset Loading

In [5]:
def create_headline_prompt(headline_text):
    prompt_text = f"""### Instruction
You are a witty, cynical stand-up comedian.
Your task is to write EXACTLY ONE punchy joke (1–2 sentences) based on the provided headline.

### Examples
Here is how to turn a headline into a standalone joke (weaving the context into the setup):

Headline: "Study finds 90% of office meetings could be emails."
Joke: "A new study found that 90% of office meetings could be emails, which implies the other 10% could have just been silence."

Headline: "Billionaire builds giant clock inside a mountain."
Joke: "Jeff Bezos is building a giant clock inside a mountain, finally providing a way to tell time for the five people who actually survive the apocalypse."

Headline: "Scientists discover new species of deep-sea jelly."
Joke: "Scientists have discovered a new species of jelly at the bottom of the ocean, mostly because they were tired of looking for the ones in their donuts."

### Task
Target Headline: "{headline_text}"

### Constraints
1. The joke must be **STANDALONE**. Do not assume the audience has read the headline; include the premise in the joke itself.
2. Be clever, cynical, or ironic.
3. **NO** explanations or conversational filler (e.g., do not write "Here is the joke").
4. Output **ONLY** the joke.

### Response
Joke:"""

    return prompt_text


def create_words_prompt(word1, word2):
    prompt_text = f"""You are a witty, cynical stand-up comedian.

Task: Write EXACTLY ONE punchy joke (1–2 sentences) that connects the following two concepts: "{word1}" and "{word2}".

Here are examples of how to connect random words creatively:

Example 1 (Metaphor/Analogy):
Words: "unplug" + "fridge"
Joke: "My current relationship is exactly like an unplugged fridge: it's cold, dark, and I'm terrified to open it and see what's rotting inside."

Example 2 (Ironic Failure):
Words: "hammer" + "banana"
Joke: "I tried to fix my diet with the same tool I use to fix my furniture, but it turns out taking a hammer to a banana just makes a smoothie with too much crunch."

Example 3 (Cynical Observation):
Words: "measure" + "pizza"
Joke: "Trying to measure happiness with money is like trying to measure a pizza with a thermometer: you're using the wrong tool and you're just going to burn your hand."

MANDATORY Rules:
- You can use the words literally OR metaphorically.
- The logic must hold up (e.g., do not say a laptop cooks food).
- Do NOT explain the joke.
- Do NOT use filler like "Here is a joke."

Words to connect: "{word1}" and "{word2}"
Joke:"""

    return prompt_text

In [None]:
def load_type_mapping(tsv_path):
    """
    Load the TSV file and create a mapping of id -> (type, content).

    Returns:
        dict: {id: {'type': 'headline' or 'two_words', 'word1': str, 'word2': str, 'headline': str}}
    """
    type_mapping = {}

    with open(tsv_path, 'r', encoding='utf-8') as f:
        reader = csv.DictReader(f, delimiter='\t')

        for row in reader:
            record_id = row['id']
            word1 = row['word1']
            word2 = row['word2']
            headline = row['headline']

            # Determine type based on which fields have content (not just "-")
            if word1 != '-' and word2 != '-':
                type_mapping[record_id] = {
                    'type': 'two_words',
                    'word1': word1,
                    'word2': word2
                }
            elif headline != '-':
                type_mapping[record_id] = {
                    'type': 'headline',
                    'headline': headline
                }

    return type_mapping

def create_prompt_from_type_info(type_info):
    """Create the appropriate prompt based on type information."""
    if type_info['type'] == 'two_words':
        return create_words_prompt(type_info['word1'], type_info['word2'])
    else:  # headline
        return create_headline_prompt(type_info['headline'])

def load_jsonl(file_path):
    """Load a JSONL file and return list of dictionaries."""
    data = []
    with open(file_path, 'r', encoding='utf-8') as f:
        for line in f:
            data.append(json.loads(line.strip()))
    return data

def process_record(record, type_mapping):
    """
    Process a single record and return standardized format.
    Returns None if record should be filtered out.
    """
    # Filter out ties
    if record.get('winner', '').lower() == 'tie':
        return None

    # Get type information
    record_id = record.get('id')
    if not record_id or record_id not in type_mapping:
        return None

    winner = record.get('winner', '').lower()

    # Map winner to the corresponding joke field
    joke_mapping = {
        'llama': 'joke_lama',
        'gemma': 'joke_gemma',
        'qwen': 'joke_qwen'
    }

    # Find which joke fields exist in this record
    available_jokes = {}
    for model, field in joke_mapping.items():
        if field in record and record[field]:
            available_jokes[model] = record[field]

    # Determine winner model name (handle variations)
    winner_model = None
    if 'llama' in winner or 'lama' in winner:
        winner_model = 'llama'
    elif 'gemma' in winner:
        winner_model = 'gemma'
    elif 'qwen' in winner:
        winner_model = 'qwen'
    else:
        return None  # Unknown winner format

    # Get the winning joke
    if winner_model not in available_jokes:
        return None

    chosen_joke = available_jokes[winner_model]

    # Get the losing joke (any other available joke)
    rejected_joke = None
    for model, joke in available_jokes.items():
        if model != winner_model:
            rejected_joke = joke
            break

    if not rejected_joke:
        return None  # No losing joke available

    # Create the prompt using type information
    type_info = type_mapping[record_id]
    prompt = create_prompt_from_type_info(type_info)

    return {
        'prompt': prompt,
        'chosen': chosen_joke,
        'rejected': rejected_joke
    }

def create_dataset_from_jsonl_files(file_paths, tsv_path):
    """
    Create a HuggingFace dataset from multiple JSONL files with type mapping from TSV.

    Args:
        file_paths: List of paths to JSONL files
        tsv_path: Path to TSV file with type information

    Returns:
        HuggingFace Dataset object
    """
    # Load type mapping
    print(f"Loading type mapping from {tsv_path}...")
    type_mapping = load_type_mapping(tsv_path)
    print(f"Loaded {len(type_mapping)} type mappings")

    all_processed = []
    stats = {
        'total': 0,
        'filtered_ties': 0,
        'filtered_no_type': 0,
        'filtered_other': 0,
        'processed': 0
    }

    for file_path in file_paths:
        print(f"Processing {file_path}...")
        data = load_jsonl(file_path)

        for record in data:
            stats['total'] += 1

            if record.get('winner', '').lower() == 'tie':
                stats['filtered_ties'] += 1
                continue

            record_id = record.get('id')
            if not record_id or record_id not in type_mapping:
                stats['filtered_no_type'] += 1
                continue

            processed = process_record(record, type_mapping)
            if processed:
                all_processed.append(processed)
                stats['processed'] += 1
            else:
                stats['filtered_other'] += 1

    print(f"\n=== Processing Statistics ===")
    print(f"Total records: {stats['total']}")
    print(f"Filtered (ties): {stats['filtered_ties']}")
    print(f"Filtered (no type mapping): {stats['filtered_no_type']}")
    print(f"Filtered (other): {stats['filtered_other']}")
    print(f"Successfully processed: {stats['processed']}")

    # Create HuggingFace dataset
    dataset = Dataset.from_list(all_processed)
    return dataset

In [None]:
file_paths = glob.glob("semeval2026-task1-mwahaha/data/evaluated/*.jsonl")
raw_data_path = "semeval2026-task1-mwahaha/data/raw/task-a-en.tsv"

dataset = create_dataset_from_jsonl_files(file_paths, raw_data_path)

Loading type mapping from semeval2026-task1-mwahaha/data/raw/task-a-en.tsv...
Loaded 1200 type mappings
Processing semeval2026-task1-mwahaha/data/evaluated/result_competition_gemma_llama.jsonl...
Processing semeval2026-task1-mwahaha/data/evaluated/result_competition_gemma_qwen.jsonl...
Processing semeval2026-task1-mwahaha/data/evaluated/result_competition_qwen_llama.jsonl...

=== Processing Statistics ===
Total records: 3600
Filtered (ties): 17
Filtered (no type mapping): 0
Filtered (other): 0
Successfully processed: 3583


In [None]:
dataset

Dataset({
    features: ['prompt', 'chosen', 'rejected'],
    num_rows: 3583
})

In [None]:
dataset[0]

{'prompt': '### Instruction\nYou are a witty, cynical stand-up comedian.\nYour task is to write EXACTLY ONE punchy joke (1–2 sentences) based on the provided headline.\n\n### Examples\nHere is how to turn a headline into a standalone joke (weaving the context into the setup):\n\nHeadline: "Study finds 90% of office meetings could be emails."\nJoke: "A new study found that 90% of office meetings could be emails, which implies the other 10% could have just been silence."\n\nHeadline: "Billionaire builds giant clock inside a mountain."\nJoke: "Jeff Bezos is building a giant clock inside a mountain, finally providing a way to tell time for the five people who actually survive the apocalypse."\n\nHeadline: "Scientists discover new species of deep-sea jelly."\nJoke: "Scientists have discovered a new species of jelly at the bottom of the ocean, mostly because they were tired of looking for the ones in their donuts."\n\n### Task\nTarget Headline: "Ryanair to cut 1 million more passenger seats 

# Model

In [None]:
max_seq_length = 2048
dtype = None  # None for auto detection
load_in_4bit = True  # Use 4-bit quantization

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name="unsloth/Llama-3.1-8B-Instruct-bnb-4bit",
    max_seq_length=max_seq_length,
    dtype=dtype,
    load_in_4bit=load_in_4bit,
)

==((====))==  Unsloth 2026.1.3: Fast Llama patching. Transformers: 4.57.3.
   \\   /|    Tesla T4. Num GPUs = 1. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.9.0+cu126. CUDA: 7.5. CUDA Toolkit: 12.6. Triton: 3.5.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.33.post2. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


In [None]:
model = FastLanguageModel.get_peft_model(
    model,
    r=16,  # LoRA rank
    lora_alpha=16,
    # lora_dropout=0.05,
    bias="none",
    use_gradient_checkpointing="unsloth",
    random_state=3407,
    use_rslora=True,  # Use Rank-Stabilized LoRA
)

Unsloth 2026.1.3 patched 32 layers with 32 QKV layers, 32 O layers and 32 MLP layers.


In [None]:
dpo_config = DPOConfig(
    output_dir="./dpo_llama_output",
    per_device_train_batch_size=4,
    per_device_eval_batch_size=4,
    gradient_accumulation_steps=2,
    learning_rate=5e-4,
    num_train_epochs=1,
    warmup_steps=50,
    # logging_steps=10,
    save_steps=50,
    # eval_strategy="no",  # Set to "steps" if you have eval data
    # bf16=True,  # Use bfloat16 if GPU supports it (A100, RTX 4090, etc.)
    fp16=True,  # Use fp16 for older GPUs
    optim="paged_adamw_8bit",
    weight_decay=0.01,
    seed=42,
    remove_unused_columns=False,
    max_length=max_seq_length,
    max_prompt_length=1024,
    beta=0.1,  # DPO beta parameter
)

In [None]:
trainer = DPOTrainer(
    model=model,
    args=dpo_config,
    train_dataset=dataset,
    tokenizer=tokenizer,
    peft_config=None,  # Already set up with LoRA
)

Extracting prompt in train dataset (num_proc=4):   0%|          | 0/3583 [00:00<?, ? examples/s]

Applying chat template to train dataset (num_proc=4):   0%|          | 0/3583 [00:00<?, ? examples/s]

Tokenizing train dataset (num_proc=4):   0%|          | 0/3583 [00:00<?, ? examples/s]

In [None]:
trainer.train()

The model is already on multiple devices. Skipping the move to device specified in `args`.
==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 3,583 | Num Epochs = 1 | Total steps = 448
O^O/ \_/ \    Batch size per device = 4 | Gradient accumulation steps = 2
\        /    Data Parallel GPUs = 1 | Total batch size (4 x 2 x 1) = 8
 "-____-"     Trainable parameters = 41,943,040 of 8,072,204,288 (0.52% trained)


Unsloth: Will smartly offload gradients to save VRAM!


Step,Training Loss,rewards / chosen,rewards / rejected,rewards / accuracies,rewards / margins,logps / chosen,logps / rejected,logits / chosen,logits / rejected,eval_logits / chosen,eval_logits / rejected,nll_loss
1,0.6931,0.0,0.0,0.0,0.0,-74.670441,-73.167221,-1.274337,-1.256491,0,0,0
2,0.6931,0.0,0.0,0.0,0.0,-76.321823,-69.535812,-1.210879,-1.194559,No Log,No Log,No Log
3,0.6799,0.006516,-0.020346,1.0,0.026862,-87.353958,-81.679886,-1.255622,-1.385416,No Log,No Log,No Log
4,0.6849,-0.032069,-0.054632,0.625,0.022562,-76.325912,-67.839165,-1.067419,-1.322474,No Log,No Log,No Log
5,0.5644,0.065927,-0.251685,0.625,0.317612,-72.345726,-66.863983,-1.190324,-1.248361,No Log,No Log,No Log
6,0.6589,-0.235544,-0.335923,0.5,0.100379,-74.59938,-84.627541,-1.2243,-1.222683,No Log,No Log,No Log
7,1.0097,-0.888191,-0.45671,0.375,-0.431481,-87.234871,-71.902634,-1.185725,-1.26065,No Log,No Log,No Log
8,0.7034,-0.735102,-0.903703,0.375,0.1686,-90.194862,-80.021347,-1.172957,-1.056093,No Log,No Log,No Log
9,0.9869,-1.507065,-1.110401,0.5,-0.396663,-93.959099,-80.358688,-1.251644,-1.156569,No Log,No Log,No Log
10,0.672,-0.861326,-0.949195,0.5,0.087869,-85.456009,-86.624084,-1.128096,-1.177988,No Log,No Log,No Log


Step,Training Loss,rewards / chosen,rewards / rejected,rewards / accuracies,rewards / margins,logps / chosen,logps / rejected,logits / chosen,logits / rejected,eval_logits / chosen,eval_logits / rejected,nll_loss
1,0.6931,0.0,0.0,0.0,0.0,-74.670441,-73.167221,-1.274337,-1.256491,0,0,0
2,0.6931,0.0,0.0,0.0,0.0,-76.321823,-69.535812,-1.210879,-1.194559,No Log,No Log,No Log
3,0.6799,0.006516,-0.020346,1.0,0.026862,-87.353958,-81.679886,-1.255622,-1.385416,No Log,No Log,No Log
4,0.6849,-0.032069,-0.054632,0.625,0.022562,-76.325912,-67.839165,-1.067419,-1.322474,No Log,No Log,No Log
5,0.5644,0.065927,-0.251685,0.625,0.317612,-72.345726,-66.863983,-1.190324,-1.248361,No Log,No Log,No Log
6,0.6589,-0.235544,-0.335923,0.5,0.100379,-74.59938,-84.627541,-1.2243,-1.222683,No Log,No Log,No Log
7,1.0097,-0.888191,-0.45671,0.375,-0.431481,-87.234871,-71.902634,-1.185725,-1.26065,No Log,No Log,No Log
8,0.7034,-0.735102,-0.903703,0.375,0.1686,-90.194862,-80.021347,-1.172957,-1.056093,No Log,No Log,No Log
9,0.9869,-1.507065,-1.110401,0.5,-0.396663,-93.959099,-80.358688,-1.251644,-1.156569,No Log,No Log,No Log
10,0.672,-0.861326,-0.949195,0.5,0.087869,-85.456009,-86.624084,-1.128096,-1.177988,No Log,No Log,No Log


TrainOutput(global_step=448, training_loss=1.2259503778436087, metrics={'train_runtime': 8676.935, 'train_samples_per_second': 0.413, 'train_steps_per_second': 0.052, 'total_flos': 0.0, 'train_loss': 1.2259503778436087, 'epoch': 1.0})

In [None]:
model.save_pretrained("llama_dpo_lora_model")
tokenizer.save_pretrained("llama_dpo_lora_model")

('llama_dpo_lora_model/tokenizer_config.json',
 'llama_dpo_lora_model/special_tokens_map.json',
 'llama_dpo_lora_model/chat_template.jinja',
 'llama_dpo_lora_model/tokenizer.json')

In [None]:
def backup_to_drive(model_path, drive_path):
    """Backup model to Google Drive"""
    import shutil
    import os
    os.makedirs(drive_path, exist_ok=True)
    if os.path.exists(model_path):
        for item in os.listdir(model_path):
            src = os.path.join(model_path, item)
            dst = os.path.join(drive_path, item)
            if os.path.isdir(src):
                shutil.copytree(src, dst, dirs_exist_ok=True)
            else:
                shutil.copy2(src, dst)
        print(f"Model backed up to {drive_path}")

In [None]:
backup_to_drive("llama_dpo_lora_model", "/content/drive/My Drive/llama_dpo_lora_model")

Model backed up to /content/drive/My Drive/llama_dpo_lora_model


# Joke Generation

In [6]:
def load_trained_model(lora_model_path="llama_dpo_lora_model", max_seq_length=2048):
    """
    Load the DPO-trained Llama model with LoRA adapters.

    Args:
        lora_model_path: Path to the saved LoRA model (local path)
        max_seq_length: Maximum sequence length for inference

    Returns:
        model, tokenizer: Loaded model and tokenizer ready for inference
    """
    from peft import PeftConfig

    print("Loading base model...")
    model, tokenizer = FastLanguageModel.from_pretrained(
        model_name="unsloth/Llama-3.1-8B-Instruct-bnb-4bit",
        max_seq_length=max_seq_length,
        dtype=None,
        load_in_4bit=True,
    )

    print(f"Loading LoRA adapters from {lora_model_path}...")

    # Convert to absolute path to handle spaces and special characters
    lora_model_path = os.path.abspath(lora_model_path)

    # Load config first to verify the path exists
    try:
        peft_config = PeftConfig.from_pretrained(lora_model_path)
        print(f"✓ Found adapter_config.json at {lora_model_path}")
    except Exception as e:
        print(f"Error: Could not load adapter config from {lora_model_path}")
        print(f"Details: {e}")
        raise

    # Load the LoRA adapters
    model = PeftModel.from_pretrained(
        model,
        lora_model_path,
        is_trainable=False
    )

    print("Preparing model for inference...")
    FastLanguageModel.for_inference(model)

    return model, tokenizer

In [7]:
def generate_single_joke(
    model,
    tokenizer,
    prompt,
    max_new_tokens=150,
    temperature=0.9,
    top_p=0.9,
    do_sample=True
):
    """
    Generate a single joke given a prompt.

    Args:
        model: The loaded Llama model
        tokenizer: The tokenizer
        prompt: The formatted prompt text
        max_new_tokens: Maximum tokens to generate
        temperature: Creativity parameter
        top_p: Nucleus sampling parameter
        do_sample: Whether to use sampling

    Returns:
        str: The generated joke
    """
    # Apply chat template
    inputs = tokenizer.apply_chat_template(
        [{"role": "user", "content": prompt}],
        add_generation_prompt=True,
        return_tensors="pt"
    ).to(model.device)

    # Create attention mask explicitly
    attention_mask = (inputs != tokenizer.pad_token_id).long().to(model.device)

    # Generate
    with torch.no_grad():
        outputs = model.generate(
            inputs,
            attention_mask=attention_mask,
            max_new_tokens=max_new_tokens,
            temperature=temperature,
            top_p=top_p,
            do_sample=do_sample,
            repetition_penalty=1.2,
            pad_token_id=tokenizer.pad_token_id,
            eos_token_id=[tokenizer.eos_token_id, tokenizer.convert_tokens_to_ids("<|eot_id|>")]
        )

    # Decode and clean
    response = outputs[0][inputs.shape[-1]:]
    joke = tokenizer.decode(response, skip_special_tokens=True).strip()

    return joke


def generate_jokes_from_file(
    model,
    tokenizer,
    input_path,
    output_path="dpo_generated_jokes.json",
    resume=True
):
    """
    Generate jokes for all entries in a TSV file using the trained model.

    Args:
        model: The loaded Llama model
        tokenizer: The tokenizer
        input_path: Path to TSV file with headlines/word pairs
        output_path: Path to save generated jokes as JSON
        resume: If True, skip already processed entries
    """
    df = pd.read_csv(input_path, delimiter="\t")

    processed_ids = set()

    # Check for existing output and resume
    if resume and os.path.isfile(output_path):
        print(f"Found existing output file: {output_path}")
        try:
            with open(output_path, "r", encoding="utf-8") as f:
                existing_data = json.load(f)
            processed_ids = set(existing_data["ids"].keys())
            print(f"Resuming from {len(processed_ids)} already processed entries.")
        except Exception as e:
            print(f"Warning: Could not read existing file ({e}). Starting fresh.")
            processed_ids = set()
    else:
        print(f"Starting fresh with new output file: {output_path}")

    # Initialize output structure
    output_data = {"ids": {}}
    if resume and os.path.isfile(output_path):
        with open(output_path, "r", encoding="utf-8") as f:
            output_data = json.load(f)

    # Process each row
    for idx, row in tqdm(df.iterrows(), total=len(df), desc="Generating jokes"):
        record_id = str(row["id"])

        if record_id in processed_ids:
            continue

        # Determine which prompt to use
        if pd.notna(row['headline']) and str(row['headline']).strip() != '-':
            prompt = create_headline_prompt(row['headline'])
            input_original = row['headline']
            entry_type = "headline"
        else:
            prompt = create_words_prompt(row['word1'], row['word2'])
            input_original = f"{row['word1']}, {row['word2']}"
            entry_type = "words"

        # Generate joke
        joke = generate_single_joke(model, tokenizer, prompt)
        clean_joke = joke.replace('\t', ' ').replace('\n', ' ').strip()

        # Store result
        output_data["ids"][record_id] = {
            "type": entry_type,
            "input_original": input_original,
            "generated_joke": clean_joke
        }

        # Periodic save to prevent data loss
        if (idx + 1) % 10 == 0:
            with open(output_path, "w", encoding="utf-8") as f:
                json.dump(output_data, f, indent=2, ensure_ascii=False)

    # Final save
    with open(output_path, "w", encoding="utf-8") as f:
        json.dump(output_data, f, indent=2, ensure_ascii=False)

    print(f"Generation complete! Results saved to {output_path}")
    return output_data

In [8]:
LORA_MODEL_PATH = "/content/drive/MyDrive/llama_dpo_lora_model"
INPUT_TSV = "semeval2026-task1-mwahaha/data/raw/task-a-en.tsv"
OUTPUT_JSON = "dpo_generated_jokes.json"

# Load model
model, tokenizer = load_trained_model(LORA_MODEL_PATH)

# Generate jokes
results = generate_jokes_from_file(
    model,
    tokenizer,
    input_path=INPUT_TSV,
    output_path=OUTPUT_JSON,
    resume=True
)

print("\nSample results:")
sample_ids = list(results["ids"].keys())[:3]
for sample_id in sample_ids:
    entry = results["ids"][sample_id]
    print(f"\nID: {sample_id}")
    print(f"Type: {entry['type']}")
    print(f"Input: {entry['input_original']}")
    print(f"Joke: {entry['generated_joke']}")

Loading base model...
==((====))==  Unsloth 2026.1.3: Fast Llama patching. Transformers: 4.57.3.
   \\   /|    Tesla T4. Num GPUs = 1. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.9.0+cu126. CUDA: 7.5. CUDA Toolkit: 12.6. Triton: 3.5.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.33.post2. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


model.safetensors:   0%|          | 0.00/5.70G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

tokenizer_config.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/454 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/17.2M [00:00<?, ?B/s]

Loading LoRA adapters from /content/drive/MyDrive/llama_dpo_lora_model...
✓ Found adapter_config.json at /content/drive/MyDrive/llama_dpo_lora_model
Preparing model for inference...
Starting fresh with new output file: dpo_generated_jokes.json


Generating jokes: 100%|██████████| 1200/1200 [1:10:01<00:00,  3.50s/it]

Generation complete! Results saved to dpo_generated_jokes.json

Sample results:

ID: en_0001
Type: headline
Input: Ryanair to cut 1 million more passenger seats in Spain
Joke: "Bright" – I know this means you're planning to provide 1 billion passengers with basically one choice... but I'd rather say we'll give them all Ryan Air's patented brand of neglect."

ID: en_0002
Type: headline
Input: Looted by Nazis, a 17th-Century Painting Resurfaces. But Not for Long.
Joke: "Lived" under Nazi rule - not'stole', probably also 'planned' by the Reich... It's too easy when all you know about someone is some art and what kind of world would've been like if we'd lost most of our art? Now I feel guilty every day."

ID: en_0003
Type: headline
Input: Analysis: Spotlight on childcare reforms reveals how far there is to go
Joke: "You know about childcare reforms - an easy thing when someone leaves you out as a working mother with 'choice'...right until it's only real choice was leaving everything." That


