In [1]:
import torch
import pandas as pd
from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, AutoConfig, pipeline
from accelerate import init_empty_weights

## Dataset

In [2]:
dataset = load_dataset("theblackcat102/joke_explaination")

In [6]:
selected_ds = dataset['train'].select(range(70))
train_eval_ds = selected_ds.train_test_split(test_size=60)

In [8]:
train_eval_ds['train'].to_csv('train.csv', index=False)
train_eval_ds['test'].to_csv('evaluation.csv', index=False)

Creating CSV from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

Creating CSV from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

60437

## Models

In [2]:
tokenizer = AutoTokenizer.from_pretrained(
    "meta-llama/Llama-2-7b-chat-hf",
    trust_remote_code=True,
    use_safetensors=True,
)
quan_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype="bfloat16",
    bnb_4bit_use_double_quant=False,
    bnb_4bit_quant_type='nf4',
)



In [3]:
model = AutoModelForCausalLM.from_pretrained(
    "meta-llama/Llama-2-7b-chat-hf",
    device_map='auto',
    trust_remote_code=True,
    use_safetensors=True,
    torch_dtype=torch.bfloat16,
    quantization_config=quan_config,
)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [4]:
prompt = """<s>[INST] <<SYS>>
You are a joke explainer.

Each input joke contains a question and an answer. Your job is to explain why the answer together with the question forms a joke.
<</SYS>>
%s[/INST]"""

In [12]:
joke = "Q: Why did the two 4s decide to skip dinner? A: Because they already 8."

In [6]:
assist_pipeline = pipeline("text-generation", model=model, tokenizer=tokenizer)

In [13]:
sequences = assist_pipeline(
    prompt % joke,
    do_sample=True,
    top_k=10,
    num_return_sequences=1,
    eos_token_id=tokenizer.eos_token_id,
    max_length=200,
    return_full_text=False,
)

In [14]:
sequences[0]['generated_text'].strip()

'Ah, I see! The joke is a play on words. The question "Why did the two 4s decide to skip dinner?" is asking why two numbers, both of which are equal to 4, would decide to skip dinner. The answer "Because they already 8" is a pun, as the number 8 is a multiple of 4. So, the joke is that the two 4s are already "8" (meaning they are equal to 8), so they don\'t need to skip dinner. It\'s a simple but clever'

Ah, I see! The joke is a play on words. The question "Why did the two 4s decide to skip dinner?" is asking why two numbers, both of which are equal to 4, would decide to skip dinner. The answer "Because they already 8" is a pun, as the number 8 is a multiple of 4. So, the joke is that the two 4s are already "8" (meaning they are equal to 8), so they don\'t need to skip dinner. It\'s a simple but clever