In [None]:
!pip install -q transformers accelerate bitsandbytes gradio

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m67.0/67.0 MB[0m [31m11.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m363.4/363.4 MB[0m [31m4.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.8/13.8 MB[0m [31m57.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m24.6/24.6 MB[0m [31m41.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m883.7/883.7 kB[0m [31m20.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m664.8/664.8 MB[0m [31m2.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m211.5/211.5 MB[0m [31m2.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m56.3/56.3 MB[0m [31m7.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [None]:
from transformers import BitsAndBytesConfig, AutoProcessor, AutoModelForImageTextToText, pipeline
from PIL import Image
import torch
import gradio as gr


In [None]:
from huggingface_hub import notebook_login, get_token
if get_token() is None:
    notebook_login()

In [None]:
model_variant = "4b-it"
model_id = f"google/medgemma-{model_variant}"
use_quantization = True

In [None]:
# Model load config
model_kwargs = dict(
    torch_dtype=torch.bfloat16,
    # torch_dtype=torch.float32,
    device_map="auto",
)

if use_quantization:
    model_kwargs["quantization_config"] = BitsAndBytesConfig(load_in_4bit=True)

In [None]:
# Load processor and model
model = AutoModelForImageTextToText.from_pretrained(
    model_id,
    **model_kwargs,
)
processor = AutoProcessor.from_pretrained(model_id)

In [None]:
pipe = pipeline(
    "image-text-to-text",
    model=model,
    processor=processor,
    model_kwargs=model_kwargs,
)

pipe.model.generation_config.do_sample = False

Device set to use cuda:0


In [None]:
# Inference function for Gradio
def medgemma_infer(uploaded_image, prompt):
    if not uploaded_image or not prompt.strip():
        return "Please upload an image and enter a prompt."

    messages = [
        {"role": "system", "content": [{"type": "text", "text": "You are an expert radiologist."}]},
        {"role": "user", "content": [
            {"type": "text", "text": prompt},
            {"type": "image", "image": uploaded_image}
        ]}
    ]

    output = pipe(text=messages, max_new_tokens=300)
    response = output[0]["generated_text"][-1]["content"]
    return response


In [14]:
# Launch Gradio
gr.Interface(
    fn=medgemma_infer,
    inputs=[
        gr.Image(type="pil", label="Upload Medical Image"),
        gr.Textbox(lines=2, label="Describe symptoms or findings")
    ],
    outputs=gr.Textbox(label="MedGemma Diagnosis"),
    title="🧠 MedGemma Medical Visual Assistant",
    description="Upload a medical image and describe symptoms. Model will return an expert-like diagnosis."
).launch(share=True)

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://696090078db9a1ac2b.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


