# MedGemma client & runner

In [None]:
!pip install -U transformers
!pip install -U huggingface_hub transformers
!pip install transformers psutil torch datasets



In [None]:
import os
import sys

google_colab = "google.colab" in sys.modules and not os.environ.get("VERTEX_PRODUCT")

if google_colab:
    # Use secret if running in Google Colab
    from google.colab import userdata
    os.environ["HF_TOKEN"] = userdata.get("HF_TOKEN")
else:
    # Store Hugging Face data under `/content` if running in Colab Enterprise
    if os.environ.get("VERTEX_PRODUCT") == "COLAB_ENTERPRISE":
        os.environ["HF_HOME"] = "/content/hf"
    # Authenticate with Hugging Face
    from huggingface_hub import get_token
    if get_token() is None:
        from huggingface_hub import notebook_login
        notebook_login()

In [None]:
 !pip install --upgrade --quiet accelerate bitsandbytes transformers

In [None]:
from transformers import BitsAndBytesConfig
import torch

model_variant = "4b-it"  # @param ["4b-it", "27b-it", "27b-text-it", "4b-pt"]
model_id = f"google/medgemma-{model_variant}"

use_quantization = True  # @param {type: "boolean"}

# @markdown Set `is_thinking` to `True` to turn on thinking mode. **Note:** Thinking is supported for the 27B variants only.
is_thinking = False  # @param {type: "boolean"}

# If running a 27B variant in Google Colab, check if the runtime satisfies
# memory requirements
if "27b" in model_variant and google_colab:
    if not ("A100" in torch.cuda.get_device_name(0) and use_quantization):
        raise ValueError(
            "Runtime has insufficient memory to run a 27B variant. "
            "Please select an A100 GPU and use 4-bit quantization."
        )

model_kwargs = dict(
    torch_dtype=torch.bfloat16,
    device_map="auto",
)

if use_quantization:
    model_kwargs["quantization_config"] = BitsAndBytesConfig(load_in_4bit=True)

In [None]:
!pip install transformers psutil torch datasets



In [None]:
from google.colab import files
from PIL import Image
uploaded = files.upload()  # Upload the image file from local system

# Get the uploaded file name
image_path = next(iter(uploaded))
image = Image.open(image_path)


Saving xray_image_test.png to xray_image_test (6).png


In [None]:
import time
import requests
import torch
from transformers import pipeline
from PIL import Image
from io import BytesIO
from google.colab import files

# Initialize the MedGemma 4B model
pipe = pipeline(
    "image-text-to-text",
    model="google/medgemma-4b-it",
    torch_dtype=torch.bfloat16,
    device="cuda",  # Use GPU if available
)

# Track system memory and CPU usage before execution
start_time = time.time()  # Define start_time here to measure inference time

# Function to load the image from a URL
def load_image(image_url=None, image_path=None):
    """Load an image from a URL or a local file and verify it."""
    if image_url:
        try:
            response = requests.get(image_url)
            response.raise_for_status()  # Check for a successful response
            image = Image.open(BytesIO(response.content))  # Convert the image data into an Image object
            image.verify()  # Verify if the image is valid
            return image
        except Exception as e:
            return {"error": "Failed to load image", "message": str(e)}
    elif image_path:
        try:
            image = Image.open(image_path)
            image.verify()  # Verify if the image is valid
            return image
        except Exception as e:
            return {"error": "Failed to load image", "message": str(e)}
    return {"error": "No image source provided"}

# Test with Image URL 
#image_url = "https://upload.wikimedia.org/wikipedia/commons/c/c8/Chest_Xray_PA_3-8-2010.png"  # Change URL here
# Or upload a local image (uncomment this for uploading)
# uploaded = files.upload()
# image_path = next(iter(uploaded))

# Load the image
#image = load_image(image_path)  # You can pass image_path if uploading locally

# Prepare the JSON output format
output = {
    "answer": None,
    "citations": [],
    "used_context": [],
    "timings": [],
    "error": None
}

# If the image is loaded successfully
if isinstance(image, dict) and "error" in image:
    # Handle image load failure
    output["error"] = image["message"]
    output["timings"].append(time.time() - start_time)
else:
    # Define the system instruction and user prompt
    system_instruction = "You are an expert radiologist."
    prompt = "Describe this X-ray"

    # Prepare the input in the required format for the MedGemma model
    messages = [
        {
            "role": "system",
            "content": [{"type": "text", "text": system_instruction}]
        },
        {
            "role": "user",
            "content": [
                {"type": "text", "text": prompt},
                {"type": "image", "image": image}
            ]
        }
    ]

    # Ask the model the question about the image
    try:
        response = pipe(text=messages, max_new_tokens=200)  # Get model inference

        # Extract the answer from the response
        output["answer"] = response[0]["generated_text"] if "generated_text" in response[0] else "No answer"
        output["citations"].append("Cited from MedGemma API")  # Add real citations here if applicable
        output["used_context"].append(prompt)
        output["timings"].append(time.time() - start_time)
    except Exception as e:
        # If the model inference fails, capture the error
        output["error"] = f"Model inference error: {str(e)}"
        output["timings"].append(time.time() - start_time)

# Print the output in the required JSON format
print(output)


`torch_dtype` is deprecated! Use `dtype` instead!


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.
Device set to use cuda


{'answer': [{'role': 'system', 'content': [{'type': 'text', 'text': 'You are an expert radiologist.'}]}, {'role': 'user', 'content': [{'type': 'text', 'text': 'Describe this X-ray'}, {'type': 'image', 'image': <PIL.PngImagePlugin.PngImageFile image mode=RGBA size=2412x1956 at 0x79B8B9D99F10>}]}, {'role': 'assistant', 'content': "Okay, here's a description of the chest X-ray based on the image provided:\n\n**Overall Impression:**\n\nThe image shows a standard PA (Posterior-Anterior) chest X-ray. The lungs appear clear, and there are no obvious signs of acute pathology.\n\n**Specific Findings:**\n\n*   **Heart Size:** The heart size appears within normal limits.\n*   **Mediastinum:** The mediastinum (the space between the lungs containing the heart, great vessels, trachea, etc.) appears unremarkable.\n*   **Bones:** The ribs, clavicles, and other bony structures are intact.\n*   **Lungs:** The lungs are clear bilaterally, with no evidence of consolidation, nodules, masses, or significant