# Run the model

In [None]:
from PIL import Image
import torch
from transformers import AutoModelForCausalLM
from transformers import AutoProcessor

model_id = "/path/to//Phi-3-vision-128k-instruct"
peft_model_id = "/path/to/your/trained/Phi-3V/lora_model"

model = AutoModelForCausalLM.from_pretrained(model_id, device_map="cuda", trust_remote_code=True, torch_dtype=torch.float16, _attn_implementation="eager")
model.load_adapter(peft_model_id)

processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=True)

In [None]:
from IPython.display import display

query = "What's in the image?"
image = Image.open("images/island.jpeg")

display(image)

messages = [
    {"role": "user", "content": f"<|image_1|>\n{query}"},
]

prompt = processor.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)

inputs = processor(prompt, [image], return_tensors="pt").to("cuda")

generation_args = {
    "max_new_tokens": 1000,
    "temperature": 0.0,
    "do_sample": False,
}

generate_ids = model.generate(**inputs, eos_token_id=processor.tokenizer.eos_token_id, **generation_args)

# remove input tokens
generate_ids = generate_ids[:, inputs['input_ids'].shape[1]:]
response = processor.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]

display(response)