In [None]:
import os
import json
import torch
from tqdm import tqdm
from datasets import load_dataset
from transformers import AutoProcessor, Qwen2AudioForConditionalGeneration

# ---------------- CONFIG ----------------

DATASET_ID = "theblackcat102/joke_explaination"
DATASET_SPLIT = "train"   

OUT_PATH = "cache/joke_explanations_qwen.jsonl"

MODEL_ID = "Qwen/Qwen2-Audio-7B-Instruct"
MAX_NEW_TOKENS = 120

# -------------- PROMPT -----------------

def build_messages(joke):
    return [
        {
            "role": "system",
            "content": "You are an expert linguist."
        },
        {
            "role": "user",
            "content": f"""Explain the following joke.

Instructions:
- Do NOT explain your analysis process.
- Focus ONLY on why the joke is humorous.
- Mention wordplay, ambiguity, or implied meaning if present.
- If the joke is not based on wordplay, explain the humor mechanism briefly.

Write a concise paragraph (3–6 sentences).

Joke:
{joke}
"""
        }
    ]

# -------------- MAIN -----------------

def main():

    device = "cuda"
    torch.set_grad_enabled(False)

    print(f"Loading model: {MODEL_ID}")
    processor = AutoProcessor.from_pretrained(MODEL_ID)
    model = Qwen2AudioForConditionalGeneration.from_pretrained(
        MODEL_ID,
        device_map="auto",
        torch_dtype=torch.float16,
    ).eval()

    print(f"Loading dataset: {DATASET_ID}")
    ds = load_dataset(DATASET_ID, split=DATASET_SPLIT)

    os.makedirs(os.path.dirname(OUT_PATH), exist_ok=True)

    def generate(joke: str) -> str:
        messages = build_messages(joke)

        prompt = processor.tokenizer.apply_chat_template(
            messages,
            tokenize=False,
            add_generation_prompt=True,
        )

        inputs = processor(
            text=prompt,
            return_tensors="pt",
            padding=True,
        ).to(device)

        with torch.no_grad():
            out = model.generate(
                **inputs,
                max_new_tokens=MAX_NEW_TOKENS,
                min_new_tokens=40,
                do_sample=False,
                pad_token_id=processor.tokenizer.eos_token_id,
            )

        gen_tokens = out[0][inputs["input_ids"].shape[1]:]
        return processor.tokenizer.decode(
            gen_tokens,
            skip_special_tokens=True,
            clean_up_tokenization_spaces=True,
        ).strip()

    with open(OUT_PATH, "w", encoding="utf-8") as f:
        for idx, item in tqdm(enumerate(ds), total=len(ds), desc="Explaining jokes"):
            joke = item.get("joke", "").strip()
            if not joke:
                continue

            explanation = generate(joke)

            out_obj = {
                "id": idx,
                "Joke": joke,
                "Explanation": explanation,
                "URL": item.get("url"),
                "GoldExplanation": item.get("explanation"),
            }

            f.write(json.dumps(out_obj, ensure_ascii=False) + "\n")
            f.flush()

            torch.cuda.empty_cache()

    print("Done.")
    print(f"Output -> {OUT_PATH}")

# --------------- RUN ----------------

if __name__ == "__main__":
    main()


Loading model: Qwen/Qwen2-Audio-7B-Instruct


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Downloading (incomplete total...): 0.00B [00:00, ?B/s]

Fetching 5 files:   0%|          | 0/5 [00:00<?, ?it/s]

Loading weights:   0%|          | 0/876 [00:00<?, ?it/s]

Loading dataset: theblackcat102/joke_explaination


Explaining jokes:   0%|          | 0/377 [00:00<?, ?it/s]The following generation flags are not valid and may be ignored: ['temperature', 'top_p', 'top_k']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
Explaining jokes: 100%|██████████| 377/377 [37:00<00:00,  5.89s/it]

Done.
Output → cache/joke_explanations_qwen.jsonl



