In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
from peft import PeftModel
import torch

# Load base model (4-bit)
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_use_double_quant=True,
    bnb_4bit_compute_dtype=torch.bfloat16
)

base_model = AutoModelForCausalLM.from_pretrained(
    "Qwen/Qwen3-1.7B",
    quantization_config=bnb_config,
    device_map="auto"
)
tokenizer = AutoTokenizer.from_pretrained("./qwen3")

# Load LoRA adapters
model = PeftModel.from_pretrained(
    base_model, 
    "GermanovDev/qwen3-pubmed-summarization"  # ‚Üê replace with your repo
)

# Generate summary
prompt = """You are a helpful assistant who writes concise, factual summaries of articles. Summarize the following article into a few sentences.
Article:
GLP-1 receptor agonists have demonstrated cardiovascular benefits in patients with type 2 diabetes, reducing major adverse cardiac events by 14% in recent trials.
Summary:"""

inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
outputs = model.generate(
    **inputs, 
    max_new_tokens=128,
    do_sample=False,  # greedy decoding
    pad_token_id=tokenizer.eos_token_id
)
summary = tokenizer.decode(outputs[0][inputs.input_ids.shape[1]:], skip_special_tokens=True)
print(summary)

2025-11-23 14:27:15.850908: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-11-23 14:27:15.859048: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1763897235.868562  126614 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1763897235.871783  126614 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1763897235.880077  126614 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking 

 The article discusses the cardiovascular benefits of GLP-1 receptor agonists in patients with type 2 diabetes, showing a 14% reduction in major adverse cardiac events in recent clinical trials.
