In [1]:
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import BitsAndBytesConfig
from torch import cuda, bfloat16

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
device = "cuda" # the device to load the model onto

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type='nf4',
    bnb_4bit_use_double_quant=True,
    bnb_4bit_compute_dtype=bfloat16
)

model = AutoModelForCausalLM.from_pretrained(
    "model/Qwen1.5-4B-Chat",
    torch_dtype="auto",
    device_map="auto",
    quantization_config=bnb_config
)
tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen1.5-4B-Chat")



Loading checkpoint shards: 100%|██████████| 2/2 [00:04<00:00,  2.06s/it]
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [5]:
prompt = "What is human heart?"
messages = [
    {"role": "system", "content": "You are a helpful assistant."},
    {"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(
    messages,
    tokenize=False,
    add_generation_prompt=True
)
model_inputs = tokenizer([text], return_tensors="pt").to(device)

In [6]:
generated_ids = model.generate(
    model_inputs.input_ids,
    max_new_tokens=512
)
generated_ids = [
    output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]

response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
response


'The human heart is a muscular organ that pumps blood throughout the body. It is located in the chest and consists of four chambers: two atria (upper chambers) and two ventricles (lower chambers). The left atrium receives blood from the head, neck, and upper body via two veins, while the right atrium receives blood from the legs and lower body via two veins. The left ventricle then pumps the oxygenated blood to the lungs via the aorta, while the right ventricle pumps deoxygenated blood back to the heart via the pulmonary artery. The heart is protected by several layers of muscle, including the epicardium, which lines the outer surface of the heart, and the myocardium, which is the inner lining of the heart muscle. The heart also has valves that prevent blood from flowing backward and helps regulate blood pressure.'