In [None]:
# pip install -r requirements.txt

In [None]:
import os
import io
import pandas as pd

import numpy as np
from google.cloud import storage
from PIL import Image
from io import BytesIO
import matplotlib.pyplot as plt
import faiss

import openai
from dotenv import load_dotenv
load_dotenv()

os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE"

openai.api_key = os.environ["OPENAI_API_KEY"]
print(type(openai.api_key))

In [None]:
bucket_name = "srobb-datathon"
local_base_path = "./padchest_sample" 
gcs_base_path = "padchest"
metadata_gcs_path = f"gs://{bucket_name}/{gcs_base_path}/metadata/chest_x_ray_images_labels_sample.csv"

# init client
client = storage.Client()
bucket = client.bucket(bucket_name)

In [None]:
# load CSV directly from GCS
df = pd.read_csv(metadata_gcs_path)
df["Report"].head()

In [None]:
df[['ImageID', 'Labels', 'Report']].sample(3)

In [None]:
# choose a sample row
sample = df.iloc[0]
image_id = sample["ImageID"]
report = sample["Report"]
labels = sample.get("Labels", "Unknown")  # fallback in case column is named differently

# pull image from GCS
image_path = f"padchest/images/{image_id}"
blob = bucket.blob(image_path)
image_data = blob.download_as_bytes()

# show image + report
img = Image.open(io.BytesIO(image_data))
plt.imshow(img, cmap='gray')
plt.axis('off')
plt.title(f"Labels: {labels}")
plt.show()

print("Report:")
print(report)


In [None]:
# grab a report from your dataset
report_text = df["Report"].iloc[1]
print(report_text)

In [None]:
# grab two reports from your dataset
report_texts = [df["Report"].iloc[0], df["Report"].iloc[1]]
print(report_texts)

In [None]:
prompt = f"""
You're a clinical assistant reviewing radiology reports.

Below are two radiology reports. For each one, extract key clinical findings.

Return ONLY a valid Python dictionary, using this format:
{{1: [...], 2: [...]}}

Do not include any explanation, markdown, or extra text—only the dictionary.

Reports:
1.
\"\"\"{report_texts[0]}\"\"\"

2.
\"\"\"{report_texts[1]}\"\"\"
"""

In [None]:
client = openai.OpenAI()

response = client.chat.completions.create(
    model="gpt-4o",
    messages=[{"role": "user", "content": prompt}],
    temperature=0.2,
)

clinical_findings = eval(response.choices[0].message.content)
print(clinical_findings)

## What Is an Image Memory?

An `ImageMemory` is a structured, semantically meaningful representation of a medical imaging event. It acts like a *memory cell* for an agent, containing everything needed to understand, compare, and reason about a specific case.

Think of it as the AI’s version of a clinical chart entry—but embedded with intelligence.

---

### Core Components of an Image Memory

| Field         | Purpose                                                                 |
|---------------|-------------------------------------------------------------------------|
| `image_id`    | Unique identifier for the image                                         |
| `gcs_path`    | Cloud location to access the raw image                                  |
| `report`      | Free-text radiology report associated with the image                   |
| `label`       | Diagnostic label(s), structured (e.g., “pneumonia”, “cardiomegaly”)     |
| `findings`    | Extracted short phrases summarizing key observations                   |
| `embedding`   | 1536-d vector capturing semantic meaning of findings/report             |

---

### Conceptually:
An `ImageMemory` is to a radiologist’s brain what a chunked memory is to an agent:
> "I’ve seen something like this before—it had similar findings, here’s what it looked like, and here’s what I thought at the time."

It bridges:
- Visual data (X-ray)
- Text data (report, findings)
- Structured logic (labels, embeddings)
- Agentic cognition (memory, reflection, recall)

---

### Why This Is Powerful:
It gives your agent the ability to think longitudinally, not just answer in the moment.

Imagine future steps:
- Retrieve similar past ImageMemories based on semantic similarity
- Reflect: “What makes this case different from those?”
- Flag: “Findings are rare—are we missing something?”
- Summarize: “Out of 40 similar cases, 30 were diagnosed with X”

---

Would you like to refine the data structure further or keep it lean and flexible for now?

Once we're aligned on that, we can decide how we want to store and query these ImageMemories.

In [None]:
# grab reports and images for first two rows
samples = df.iloc[:2]

# build memory objects
image_memories = []

for i, (index, row) in enumerate(samples.iterrows(), start=1):
    memory = {
        "image_id": row["ImageID"],
        "gcs_path": f"gs://srobb-datathon/padchest/images/{row['ImageID']}",
        "report": row["Report"],
        "label": row.get("Labels", "unknown"),
        "findings": clinical_findings.get(i, [])
    }
    image_memories.append(memory)

# display result
image_memories

In [None]:
# Initialize FAISS
dimension = 1536
index = faiss.IndexFlatL2(dimension)
memory_map = {}

# loop over all memory objects
for idx, memory in enumerate(image_memories):
    # build embedding input
    findings_text = "; ".join(memory["findings"])
    
    # get embedding
    embedding_response = client.embeddings.create(
        model="text-embedding-3-small",
        input=findings_text
    )
    embedding = embedding_response.data[0].embedding
    memory["embedding"] = embedding

    # add to FAISS
    vec = np.array([embedding], dtype="float32")
    index.add(vec)

    # map index position to memory
    memory_map[idx] = memory

print(f"Embedded and indexed {len(image_memories)} ImageMemories.")


In [None]:
# pick one memory to simulate as the query (can be new or existing)
query_memory = image_memories[1]  # you could try 0, 1, or a new one later
query_vec = np.array([query_memory["embedding"]], dtype="float32")

# search for the top 1 most similar stored memory
D, I = index.search(query_vec, k=1)

# unpack results
matched_index = I[0][0]
matched_distance = D[0][0]
matched_memory = memory_map[matched_index]

# display
print("Search our memory for a match:")
print(f"- Image ID: {matched_memory['image_id']}")
print(f"- Distance Score: {matched_distance:.4f}")
print(f"- Findings: {matched_memory['findings']}")
print(f"- Report: {matched_memory['report']}")


In [None]:
# search top 2 to include the self-match
D, I = index.search(query_vec, k=2)

print("Search results:")
for rank in range(2):
    idx = I[0][rank]
    dist = D[0][rank]
    match = memory_map[idx]
    print(f"\nMatch {rank + 1}:")
    print(f"- Image ID: {match['image_id']}")
    print(f"- Distance Score: {dist:.4f}")
    print(f"- Findings: {match['findings']}")

In [None]:
import open_clip
from PIL import Image
import torch

# --- Load the model & tokenizer ---
# model, preprocess_train, preprocess_val = open_clip.create_model_and_transforms('hf-hub:mgbam/OpenCLIP-BiomedCLIP-Finetuned')
# tokenizer = open_clip.get_tokenizer('hf-hub:mgbam/OpenCLIP-BiomedCLIP-Finetuned')
# model.eval()

model, preprocess_train, preprocess_val = open_clip.create_model_and_transforms('hf-hub:microsoft/BiomedCLIP-PubMedBERT_256-vit_base_patch16_224')
tokenizer = open_clip.get_tokenizer('hf-hub:microsoft/BiomedCLIP-PubMedBERT_256-vit_base_patch16_224')
model.eval()

# --- Load and preprocess your image ---
image_path = "./data/padchest_sample/images/" + image_memories[0]["image_id"]
image = Image.open(image_path).convert("RGB")
image_input = preprocess_val(image).unsqueeze(0)

# --- Define text prompts ---
prompts = ["normal chest x-ray", "aortic elongation", "signs of COPD"]
text_inputs = tokenizer(prompts)

# --- Encode both ---
with torch.no_grad():
    image_features = model.encode_image(image_input)
    text_features = model.encode_text(text_inputs)

# --- Normalize (important for cosine similarity) ---
image_features /= image_features.norm(dim=-1, keepdim=True)
text_features /= text_features.norm(dim=-1, keepdim=True)

# --- Compute similarity ---
similarity = (image_features @ text_features.T).squeeze(0)

# --- Show results ---
for i, prompt in enumerate(prompts):
    print(f"{prompt}: {similarity[i]:.4f}")

In [None]:
# Load and preprocess second image (Memory 1)
image_path = "./data/padchest_sample/images/" + image_memories[1]["image_id"]
image = Image.open(image_path).convert("RGB")
image_input = preprocess_val(image).unsqueeze(0)

# Re-run embedding and similarity code below
with torch.no_grad():
    image_features = model.encode_image(image_input)
    image_features /= image_features.norm(dim=-1, keepdim=True)

    similarity = (image_features @ text_features.T).squeeze(0)

for i, prompt in enumerate(prompts):
    print(f"{prompt}: {similarity[i]:.4f}")

In [None]:
ImageMemory = {
    "image_id": "...png",
    "gcs_path": "...",
    "report": "...",
    "label": "...",
    "findings": [...],
    "text_embedding": [...],     # OpenAI embedding of report or findings
    "image_embedding": [...],    # BiomedCLIP embedding of image
}

In [None]:

# Use the same model/tokenizer you already have loaded
# openai_client already set up as `client`

for mem in image_memories:
    # --- TEXT EMBEDDING ---
    findings_text = "; ".join(mem["findings"])
    text_response = client.embeddings.create(
        model="text-embedding-3-small",
        input=findings_text
    )
    mem["text_embedding"] = text_response.data[0].embedding

    # --- IMAGE EMBEDDING ---
    image_path = f"./data/padchest_sample/images/{mem['image_id']}"
    image = Image.open(image_path).convert("RGB")
    image_tensor = preprocess_val(image).unsqueeze(0)

    with torch.no_grad():
        img_emb = model.encode_image(image_tensor)
        img_emb /= img_emb.norm(dim=-1, keepdim=True)  # normalize
        mem["image_embedding"] = img_emb.squeeze().cpu().tolist()  # convert to list for storage

print("All ImageMemories now contain both text and image embeddings.")

In [None]:
for i, mem in enumerate(image_memories):
    has_text = "text_embedding" in mem and isinstance(mem["text_embedding"], list)
    has_image = "image_embedding" in mem and isinstance(mem["image_embedding"], list)
    print(f"ImageMemory {i}: Text → {has_text} | Image → {has_image}")


In [None]:
len(image_memories)

In [None]:

# TEXT INDEX SETUP
dim = len(image_memories[0]["text_embedding"])
text_index = faiss.IndexFlatL2(dim)
text_id_map = {}

# Populate index
for i, mem in enumerate(image_memories):
    vec = np.array([mem["text_embedding"]], dtype="float32")
    text_index.add(vec)
    text_id_map[i] = mem

print(f"Stored {text_index.ntotal} text embeddings in FAISS.")

In [None]:
# Use Memory 1's text_embedding as a query
query_vec = np.array([image_memories[1]["text_embedding"]], dtype="float32")
D, I = text_index.search(query_vec, k=1)

# Retrieve match
matched_index = I[0][0]
matched_distance = D[0][0]
matched_memory = text_id_map[matched_index]

print("Closest Match by Report:")
print(f"- Image ID: {matched_memory['image_id']}")
print(f"- Distance: {matched_distance:.4f}")
print(f"- Findings: {matched_memory['findings']}")
print(f"- Report: {matched_memory['report']}")

In [None]:
# search top 2 to include the self-match
D, I = text_index.search(query_vec, k=2)

print("Search results:")
for rank in range(2):
    idx = I[0][rank]
    dist = D[0][rank]
    match = memory_map[idx]
    print(f"\nMatch {rank + 1}:")
    print(f"- Image ID: {match['image_id']}")
    print(f"- Distance Score: {dist:.4f}")
    print(f"- Findings: {match['findings']}")

In [None]:
# IMAGE INDEX SETUP
dim = len(image_memories[0]["image_embedding"])
image_index = faiss.IndexFlatL2(dim)
image_id_map = {}

print("Search results:")
for rank in range(2):
    idx = I[0][rank]
    dist = D[0][rank]
    match = memory_map[idx]
    print(f"\nMatch {rank + 1}:")
    print(f"- Image ID: {match['image_id']}")
    print(f"- Distance Score: {dist:.4f}")
    print(f"- Findings: {match['findings']}")

In [None]:
for mem in image_memories:
    text_vec = np.array(mem["text_embedding"], dtype="float32")
    image_vec = np.array(mem["image_embedding"], dtype="float32")
    
    # concatenate
    multimodal_vec = np.concatenate([text_vec, image_vec])
    mem["multimodal_embedding"] = multimodal_vec.tolist()

print("Added multimodal_embedding to all ImageMemories.")

In [None]:
# MULTIMODAL INDEX
dim = len(image_memories[0]["multimodal_embedding"])
multimodal_index = faiss.IndexFlatL2(dim)
multimodal_id_map = {}

# Add to FAISS
for i, mem in enumerate(image_memories):
    vec = np.array([mem["multimodal_embedding"]], dtype="float32")
    multimodal_index.add(vec)
    multimodal_id_map[i] = mem

print(f"Stored {multimodal_index.ntotal} multimodal embeddings in FAISS.")

In [None]:
# prepare query vector from Memory 1
query_vec = np.array([image_memories[1]["multimodal_embedding"]], dtype="float32")

# search top 2 to include self-match
D, I = multimodal_index.search(query_vec, k=2)

# display results
print("Multimodal Search Results:")
for rank in range(2):
    idx = I[0][rank]
    dist = D[0][rank]
    match = multimodal_id_map[idx]
    print(f"\nMatch {rank + 1}:")
    print(f"- Image ID: {match['image_id']}")
    print(f"- Distance Score: {dist:.4f}")
    print(f"- Findings: {match['findings']}")

In [None]:
# Get the two cases: self and top match
query = image_memories[1]
match = image_memories[0]  # top non-self match

# Create prompt for agentic reflection
prompt = f"""
You are a clinical assistant comparing two radiology cases.

Case A (new input):
- Findings: {query['findings']}
- Report: {query['report']}

Case B (closest past memory):
- Findings: {match['findings']}
- Report: {match['report']}

Please reflect on the similarity and difference between these two cases.
Summarize in plain language what they have in common, and what is clinically distinct.
"""

# Call GPT-4o
response = client.chat.completions.create(
    model="gpt-4o",
    messages=[{"role": "user", "content": prompt}],
    temperature=0.2,
)

print("Agent Reflection:\n")
print(response.choices[0].message.content)