# **Setup**

In [15]:
!pip install -q torch transformers pillow pyyaml iopath bitsandbytes accelerate

In [1]:
!git clone https://github.com/Vision-CAIR/MiniGPT-Med.git
%cd MiniGPT-Med

Cloning into 'MiniGPT-Med'...
remote: Enumerating objects: 418, done.[K
remote: Counting objects: 100% (418/418), done.[K
remote: Compressing objects: 100% (321/321), done.[K
remote: Total 418 (delta 109), reused 353 (delta 59), pack-reused 0 (from 0)[K
Receiving objects: 100% (418/418), 4.52 MiB | 28.04 MiB/s, done.
Resolving deltas: 100% (109/109), done.
/kaggle/working/MiniGPT-Med


In [3]:
!ls eval_configs/

minigptv2_benchmark_evaluation.yaml  minigptv2_eval.yaml


In [4]:
import yaml

yaml_file_path = "/kaggle/working/MiniGPT-Med/eval_configs/minigptv2_eval.yaml"

with open(yaml_file_path, 'r') as file:
    config = yaml.safe_load(file)

config['model']['ckpt'] = "/kaggle/input/minigpt-med/pytorch/default/1/miniGPT_Med.pth"

with open(yaml_file_path, 'w') as file:
    yaml.dump(config, file)

In [19]:
!pip install flash-attn

Collecting accelerate
  Downloading accelerate-1.11.0-py3-none-any.whl.metadata (19 kB)
Downloading accelerate-1.11.0-py3-none-any.whl (375 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m375.8/375.8 kB[0m [31m7.6 MB/s[0m eta [36m0:00:00[0m:00:01[0m
[?25hInstalling collected packages: accelerate
Successfully installed accelerate-1.11.0


# **LLaVA-Med**

In [20]:
from transformers import LlavaForConditionalGeneration, AutoProcessor
import torch

model_path = "chaoyinshe/llava-med-v1.5-mistral-7b-hf"

model = LlavaForConditionalGeneration.from_pretrained(
    model_path,
    torch_dtype=torch.bfloat16,
    device_map="auto"                          # multi-GPU ready
)

processor = AutoProcessor.from_pretrained(model_path)

ValueError: Using a `device_map`, `tp_plan`, `torch.device` context manager or setting `torch.set_default_device(device)` requires `accelerate`. You can install it with `pip install accelerate`

In [None]:
import json

template_path = "/kaggle/input/chat-template-json/chat_template.json"

with open(template_path, "r") as f:
    chat_template = json.load(f)["chat_template"]

In [None]:
processor.tokenizer.chat_template = chat_template

In [None]:
print(processor.tokenizer.get_chat_template())

In [None]:
import requests
from PIL import Image
from io import BytesIO

# image url
image_path = "https://media.istockphoto.com/id/182796373/photo/x-ray-image-of-chest.jpg?s=612x612&w=0&k=20&c=Yiomsj4fwqQaZ5xiLm3SXTZKlX9fAgRF8UjHoT53uKU="

# fetching the image
response = requests.get(image_path)

if response.status_code == 200:
    image = Image.open(BytesIO(response.content))
else:
    print("Failed to retrieve the image.")

# inference prompt
messages = [
    {
        "role": "user",
        "content": [
            {"type": "image"},
            {"type": "text", "text": "Is there a tumor in this X-Ray?"}
        ]
    }
]

In [None]:
image

In [None]:
prompt = processor.tokenizer.apply_chat_template(
    messages, tokenize=False, add_generation_prompt=True
)

inputs = processor(
    images=[image], text=prompt, return_tensors="pt"
).to(model.device, torch.float16)  # Make sure to send inputs to the correct device

# Inference
with torch.inference_mode():
    out = model.generate(**inputs, max_new_tokens=256)

# Decode and print result
print(processor.decode(out[0], skip_special_tokens=True))

In [None]:
messages_2 = [
    {
        "role": "user",
        "content": [
            {"type": "image"},
            {"type": "text", "text": "Show me exactly where the tumor is located in the x-ray?"}
        ]
    }
]

prompt_2 = processor.tokenizer.apply_chat_template(
    messages_2, tokenize=False, add_generation_prompt=True
)

inputs = processor(
    images=[image], text=prompt_2, return_tensors="pt"
).to(model.device, torch.float16)  # Make sure to send inputs to the correct device

# Inference
with torch.inference_mode():
    out = model.generate(**inputs, max_new_tokens=256)

# Decode and print result
print(processor.decode(out[0], skip_special_tokens=True))

# **MiniGPT-Med**

In [5]:
!pip install -q psutil==5.9.4 regex==2022.10.31 tqdm==4.64.1 timm==0.6.13 webdataset==0.2.48 omegaconf==2.3.0 opencv-python==4.7.0.72 decord==0.6.0 peft==0.2.0 sentence-transformers gradio==3.47.1 accelerate==0.20.3 scikit-image visual-genome wandb

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m40.9/40.9 kB[0m [31m2.0 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m57.3/57.3 kB[0m [31m3.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m280.2/280.2 kB[0m [31m9.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m781.1/781.1 kB[0m [31m28.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m78.5/78.5 kB[0m [31m5.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m549.1/549.1 kB[0m [31m22.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m51.9/51.9 kB[0m [31m3.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.8/61.8 MB[0m [31m26.4 MB/s[0m eta [36m0:00:00[0m:00:01[0m0:01[0mm
[2K   [90m━━━━━━━━━━━━

In [6]:
from kaggle_secrets import UserSecretsClient
user_secrets = UserSecretsClient()

username = user_secrets.get_secret("HF_USERNAME")
access_token = user_secrets.get_secret("HF_ACCESS_TOKEN")

clone_string = f"https://{username}:{access_token}@huggingface.co/meta-llama/Llama-2-7b-chat-hf"

In [None]:
!git clone --depth 1 {clone_string}

In [None]:
yaml_file_path = "/kaggle/working/MiniGPT-Med/minigpt4/configs/models/minigpt_v2.yaml"

with open(yaml_file_path, 'r') as file:
    config = yaml.safe_load(file)

config['model']['ckpt'] = "/kaggle/input/minigpt-med/pytorch/default/1/miniGPT_Med.pth"

with open(yaml_file_path, 'w') as file:
    yaml.dump(config, file)

In [None]:
!python demo_v2.py --cfg-path eval_configs/minigptv2_eval.yaml --gpu-id 0