In [1]:
import json
import pandas as pd
import torch
from transformers import AutoTokenizer, AutoModel, AutoModelForCausalLM

In [2]:
query = "Syncope during bathing in infants, a pediatric form of water-induced urticaria?"
system_message = (
    "You are a medical assistant answering user queries concisely and accurately. "
    "Always rely strictly on your knowledge about the topic to answer medical questions. "
    "Provide your answers in 2-3 clear, informative sentences, avoiding speculation. Do NOT use bullet points or lists."
    "Example:\n"
    "Question: What are the outcomes of two different techniques for cataract surgery?\n"
    "Answer: Both techniques for cataract surgery are effective, and most patients recover well. There are no major differences in safety or vision outcomes between them. The choice usually depends on the surgeon’s experience and the patient’s unique needs. In general, both approaches are considered safe and successful. Your doctor can help choose the best option for you.\n\n"
)

messages_with_context = [
    {
        "role": "system",
        "content": system_message,
    },
    {
        "role": "user",
        "content": query,
    },
]

In [3]:
model_name = "mistralai/Mistral-7B-Instruct-v0.2"

tokenizer = AutoTokenizer.from_pretrained(model_name)

model = AutoModelForCausalLM.from_pretrained(
    model_name,
    load_in_4bit=True,
    device_map="auto",
    torch_dtype=torch.float16
)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.


Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

MistralForCausalLM(
  (model): MistralModel(
    (embed_tokens): Embedding(32000, 4096)
    (layers): ModuleList(
      (0-31): 32 x MistralDecoderLayer(
        (self_attn): MistralAttention(
          (q_proj): Linear4bit(in_features=4096, out_features=4096, bias=False)
          (k_proj): Linear4bit(in_features=4096, out_features=1024, bias=False)
          (v_proj): Linear4bit(in_features=4096, out_features=1024, bias=False)
          (o_proj): Linear4bit(in_features=4096, out_features=4096, bias=False)
        )
        (mlp): MistralMLP(
          (gate_proj): Linear4bit(in_features=4096, out_features=14336, bias=False)
          (up_proj): Linear4bit(in_features=4096, out_features=14336, bias=False)
          (down_proj): Linear4bit(in_features=14336, out_features=4096, bias=False)
          (act_fn): SiLU()
        )
        (input_layernorm): MistralRMSNorm((4096,), eps=1e-05)
        (post_attention_layernorm): MistralRMSNorm((4096,), eps=1e-05)
      )
    )
    (norm): Mist

In [4]:
prompt = tokenizer.apply_chat_template(messages_with_context, tokenize=False, add_generation_prompt=True)
inputs = tokenizer(prompt, return_tensors="pt").to(device)
print("Token count:", len(tokenizer(prompt)["input_ids"]))

print("Tokenization complete")

print("Prompt length:", len(prompt))
outputs = model.generate(
    **inputs,
    max_new_tokens=300,
    do_sample=False,
    temperature=0.1,
    top_p=0.95
)
generated = outputs[0][inputs["input_ids"].shape[1]:]
answer = tokenizer.decode(generated, skip_special_tokens=True).strip()

print("Question:", query)
print("Answer:", answer)

The following generation flags are not valid and may be ignored: ['temperature', 'top_p']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Token count: 185
Tokenization complete
Prompt length: 837




Question: Syncope during bathing in infants, a pediatric form of water-induced urticaria?
Answer: Syncope during bathing in infants is not directly related to water-induced urticaria. Urticaria, or hives, is a skin condition characterized by itchy, red, raised areas of skin. Syncope, or fainting, is a loss of consciousness due to insufficient blood flow to the brain. While some infants may experience urticaria during or after bathing, syncope is typically caused by various factors such as dehydration, low blood sugar, or certain medical conditions. If you have concerns about your infant's syncope during bathing, it's best to consult a healthcare professional for proper evaluation and guidance.


In [None]:
if torch.cuda.is_available():
    torch.cuda.empty_cache()
    torch.cuda.ipc_collect()