In [8]:
import torch
from datasets import load_dataset
from peft import get_peft_model, LoraConfig, prepare_model_for_kbit_training, PeftModel
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
)
import yaml
import re


In [2]:
def _load_model() -> AutoModelForCausalLM:
    """Load model."""
    dtype = torch.bfloat16 if torch.cuda.is_bf16_supported() else torch.float16
    quant_config = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_compute_dtype=dtype,
        bnb_4bit_quant_type="nf4",
        bnb_4bit_use_double_quant=True,
    )
    base_model = AutoModelForCausalLM.from_pretrained(
        "Qwen/Qwen2.5-3B-Instruct",
        quantization_config=quant_config,
        device_map="auto",
        low_cpu_mem_usage=True,
    )
    base_model = prepare_model_for_kbit_training(base_model)
    
    base_model.config.use_cache = False
    base_model.gradient_checkpointing_enable()

    model = PeftModel.from_pretrained(base_model, "checkpoint-30")
    model.eval()
    return model

model = _load_model()

Loading checkpoint shards: 100%|██████████| 2/2 [00:03<00:00,  1.81s/it]


In [3]:
tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2.5-3B-Instruct")

In [4]:
sentence = "Those 2 drinks are part of the HK culture and has years of history. It is so bad."

prompt = [
    {
        "role": "system",
        "content": ("You are a helpful assistant."),
    },
    {
        "role": "user",
        "content": (
            f"Determine the sentiment of the following text:\n{sentence}"
            "\n Answer in one word. The answer can only be positive, negative, or neutral."
        ),
    },
]

inputs = {"prompt": tokenizer.apply_chat_template(prompt, tokenize=False)}


In [5]:
tokenized = tokenizer(
    inputs["prompt"],
    return_tensors="pt",
    truncation=True,
    max_length=4096,
).to(model.device)


In [6]:
with torch.no_grad():
    outputs = model.generate(
        **tokenized,
        max_new_tokens=100,
        do_sample=True,
    )
    decoded = tokenizer.decode(outputs[0], skip_special_tokens=True)

In [12]:
def extract_answer(result: str) -> str:
    """Get predicted answer from completion - positive, neutral, negative or error."""
    try:
        answer = result.rsplit("\n", 1)[-1].strip()

        print(f"extracted answer: {answer}")
        if answer not in ["positive", "negative", "neutral"]:
            return "error"

        return answer

    except Exception:
        print("exeption")
        return "error"

In [13]:
extract_answer(decoded)

extracted answer: negative


'negative'