In [2]:
# Hierarchical Multi-Agent System for Medical Diagnosis Verification
# This notebook uses small pre-trained models for testing purposes.

import random
import numpy as np
import torch
from transformers import pipeline, AutoTokenizer, AutoModel
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms, models
import torchvision
import os
import requests
from PIL import Image
import zipfile
import pandas as pd
import kagglehub

# Seed for reproducibility
def set_seed(seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)

set_seed(42)

# Download and extract dataset using KaggleHub
path = kagglehub.dataset_download("paultimothymooney/chest-xray-pneumonia")
print("Path to dataset files:", path)

data_dir = os.path.join(path, "chest_xray")

class ChestXrayDataset(Dataset):
    def __init__(self, image_dir, transform=None):
        self.image_dir = image_dir
        self.image_labels = []
        self.image_paths = []

        for label in os.listdir(image_dir):
            label_dir = os.path.join(image_dir, label)
            if os.path.isdir(label_dir):
                for img_name in os.listdir(label_dir):
                    self.image_paths.append(os.path.join(label_dir, img_name))
                    self.image_labels.append(label)

        self.transform = transform

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        img_path = self.image_paths[idx]
        label = self.image_labels[idx]
        image = Image.open(img_path).convert("RGB")

        if self.transform:
            image = self.transform(image)

        return image, label

# Define transforms
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

# Load dataset
data_dir_train = os.path.join(data_dir, "train")
train_dataset = ChestXrayDataset(data_dir_train, transform=transform)
train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)

# Text data (simulated for this example)
synthetic_texts = [
    "Patient has symptoms of cough and fever.",
    "The chest X-ray shows clear signs of infection.",
    "No significant abnormalities detected in the X-ray.",
    "Signs consistent with pneumonia present."
]
labels_text = ["positive", "positive", "negative", "positive"]

# Define models for Agent and Supervisor
# Agent uses CLIP base model
from transformers import CLIPProcessor, CLIPModel
clip_agent_model_name = "openai/clip-vit-base-patch32"
clip_agent_model = CLIPModel.from_pretrained(clip_agent_model_name)
clip_agent_processor = CLIPProcessor.from_pretrained(clip_agent_model_name)

# Supervisor uses a fine-tuned CLIP model (simulated as a different CLIP instance for now)
clip_supervisor_model_name = "openai/clip-vit-base-patch32"
clip_supervisor_model = CLIPModel.from_pretrained(clip_supervisor_model_name)
clip_supervisor_processor = CLIPProcessor.from_pretrained(clip_supervisor_model_name)

# Multi-modal input processing for Agent
def process_with_agent(image, text):
    image_inputs = clip_agent_processor(images=image, return_tensors="pt", padding=True)
    text_inputs = clip_agent_processor(text=text, return_tensors="pt", padding=True)
    image_features = clip_agent_model.get_image_features(**image_inputs)
    text_features = clip_agent_model.get_text_features(**text_inputs)
    return image_features, text_features

# Multi-modal input processing for Supervisor
def process_with_supervisor(image, text):
    image_inputs = clip_supervisor_processor(images=image, return_tensors="pt", padding=True)
    text_inputs = clip_supervisor_processor(text=text, return_tensors="pt", padding=True)
    image_features = clip_supervisor_model.get_image_features(**image_inputs)
    text_features = clip_supervisor_model.get_text_features(**text_inputs)
    return image_features, text_features

# Decision-making
def multi_modal_decision_agent(image, text):
    image_features, text_features = process_with_agent(image, text)
    similarity = (image_features @ text_features.T).squeeze().item()
    decision = "Positive Diagnosis" if similarity > 0.5 else "Negative Diagnosis"
    return similarity, decision

def multi_modal_decision_supervisor(image, text):
    image_features, text_features = process_with_supervisor(image, text)
    similarity = (image_features @ text_features.T).squeeze().item()
    decision = "Positive Diagnosis" if similarity > 0.5 else "Negative Diagnosis"
    return similarity, decision

# Test multi-modal decision with Agent and Supervisor
print("Testing Agent...")
agent_image = Image.open(train_dataset.image_paths[0]).convert("RGB")
agent_text = synthetic_texts[0]
agent_similarity, agent_decision = multi_modal_decision_agent(agent_image, agent_text)
print(f"Agent Similarity: {agent_similarity:.4f}, Decision: {agent_decision}")

print("Testing Supervisor...")
supervisor_image = Image.open(train_dataset.image_paths[1]).convert("RGB")
supervisor_text = synthetic_texts[1]
supervisor_similarity, supervisor_decision = multi_modal_decision_supervisor(supervisor_image, supervisor_text)
print(f"Supervisor Similarity: {supervisor_similarity:.4f}, Decision: {supervisor_decision}")


Downloading from https://www.kaggle.com/api/v1/datasets/download/paultimothymooney/chest-xray-pneumonia?dataset_version_number=2...


100%|██████████| 2.29G/2.29G [00:26<00:00, 93.3MB/s]

Extracting files...





Path to dataset files: /root/.cache/kagglehub/datasets/paultimothymooney/chest-xray-pneumonia/versions/2


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/4.19k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/605M [00:00<?, ?B/s]

preprocessor_config.json:   0%|          | 0.00/316 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/592 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/862k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/525k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.22M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/389 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/605M [00:00<?, ?B/s]

Testing Agent...
Agent Similarity: 26.9805, Decision: Positive Diagnosis
Testing Supervisor...
Supervisor Similarity: 26.4444, Decision: Positive Diagnosis
