In [1]:
#datagen.py
# Import required libraries
import os
import pinecone
from faker import Faker
import random
from datetime import datetime, timedelta
from llama_index.embeddings import get_embedding

# Initialize Faker and Pinecone
fake = Faker()
pinecone.init(api_key=os.getenv("PINECONE_API_KEY"), environment=os.getenv("PINECONE_ENV"))
pinecone_index = pinecone.Index("llama-memory-index")

# Medical-specific data
conditions = [
    "Hypertension", "Type 2 Diabetes", "Asthma", "Arthritis", "Depression",
    "Anxiety", "GERD", "Migraine", "Hypothyroidism", "Hyperlipidemia",
]

medications = [
    "Lisinopril 10mg", "Metformin 500mg", "Albuterol inhaler", "Sertraline 50mg",
    "Omeprazole 20mg", "Levothyroxine 75mcg", "Atorvastatin 40mg", "Amlodipine 5mg",
    "Metoprolol 25mg", "Gabapentin 300mg",
]

allergies = [
    "Penicillin", "Sulfa", "Latex", "Peanuts", "Shellfish", "Iodine", "Aspirin",
    "Morphine", "None", "Dairy",
]

# Generate patient vitals
def generate_vitals():
    return {
        "Blood Pressure": f"{random.randint(110,140)}/{random.randint(60,90)}",
        "Heart Rate": f"{random.randint(60,100)} bpm",
        "Temperature": f"{round(random.uniform(97.0, 99.5), 1)}°F",
        "Respiratory Rate": f"{random.randint(12,20)} breaths/min",
        "Weight": f"{random.randint(120,220)} lbs",
        "Height": f"{random.randint(60,75)} inches",
    }

# Generate patient medical history
def generate_patient_history():
    num_visits = random.randint(3, 8)
    history = []
    current_date = datetime.now()

    for _ in range(num_visits):
        visit = {
            "Date": current_date.strftime("%Y-%m-%d"),
            "Chief Complaint": fake.sentence(nb_words=4),
            "Vitals": generate_vitals(),
            "Assessment": random.sample(conditions, random.randint(1, 3)),
            "Medications": random.sample(medications, random.randint(1, 4)),
            "Plan": fake.paragraph(nb_sentences=2),
        }
        history.append(visit)
        current_date -= timedelta(days=random.randint(30, 180))

    return history

# Send patient document to Pinecone
def send_to_pinecone(patient_id, document_text):
    """Embed and index patient document to Pinecone."""
    embedding = get_embedding(document_text)  # Generate embedding for the document text
    pinecone_index.upsert([(patient_id, embedding)])  # Upsert to Pinecone

# Create patient file and save to Pinecone
def create_patient_file(patient_id):
    """Create a patient record and send it to Pinecone."""
    patient = {
        "Patient ID": patient_id,
        "Name": fake.name(),
        "DOB": fake.date_of_birth(minimum_age=18, maximum_age=90).strftime("%Y-%m-%d"),
        "Gender": random.choice(["Male", "Female"]),
        "Contact": {
            "Phone": fake.phone_number(),
            "Email": fake.email(),
            "Address": fake.address(),
        },
        "Insurance": fake.company(),
        "Allergies": random.sample(allergies, random.randint(0, 3)),
        "Medical History": generate_patient_history(),
    }

    # Create directory if it doesn't exist
    if not os.path.exists("docs"):
        os.makedirs("docs")

    # Write patient data to file
    filename = f"docs/patient_{patient_id}.txt"
    with open(filename, "w") as f:
        f.write(f"PATIENT MEDICAL RECORD\n{'='*50}\n\n")
        f.write(f"Patient ID: {patient['Patient ID']}\n")
        f.write(f"Name: {patient['Name']}\n")
        f.write(f"DOB: {patient['DOB']}\n")
        f.write(f"Gender: {patient['Gender']}\n\n")

        f.write("Contact Information:\n")
        for key, value in patient["Contact"].items():
            f.write(f"{key}: {value}\n")

        f.write(f"\nInsurance: {patient['Insurance']}\n")
        f.write(
            f"Allergies: {', '.join(patient['Allergies']) if patient['Allergies'] else 'None'}\n\n"
        )

        f.write("MEDICAL HISTORY\n")
        f.write("-" * 50 + "\n")

        for visit in patient["Medical History"]:
            f.write(f"\nVisit Date: {visit['Date']}\n")
            f.write(f"Chief Complaint: {visit['Chief Complaint']}\n")

            f.write("\nVitals:\n")
            for k, v in visit["Vitals"].items():
                f.write(f"  {k}: {v}\n")

            f.write(f"\nAssessment: {', '.join(visit['Assessment'])}\n")
            f.write(f"Medications: {', '.join(visit['Medications'])}\n")
            f.write(f"Plan: {visit['Plan']}\n")
            f.write("-" * 50 + "\n")

    # Send patient file to Pinecone after creation
    with open(filename, "r") as f:
        document_text = f.read()
    send_to_pinecone(patient_id, document_text)

# Main function to generate patient records
def main():
    num_patients = 10  # Change this number to generate more or fewer patient records
    for i in range(num_patients):
        create_patient_file(f"P{str(i+1).zfill(6)}")
    print(f"Generated {num_patients} patient records in the 'docs' directory and indexed them in Pinecone.")

if __name__ == "__main__":
    main()

    # Cleanup Pinecone at the end
    pinecone.deinit()


ModuleNotFoundError: No module named 'pinecone'

In [None]:
#agents.py
import os
from dotenv import load_dotenv
from swarms import Agent
from swarm_models import OpenAIChat
from multi_agent_rag.memory import LlamaIndexDB

load_dotenv()

# Get the OpenAI API key from the environment variable
api_key = os.getenv("GROQ_API_KEY")

# Model
model = OpenAIChat(
    openai_api_base="https://api.groq.com/openai/v1",
    openai_api_key=api_key,
    model_name="llama-3.1-70b-versatile",
    temperature=0.1,
)

# Initialize memory system
memory_system = LlamaIndexDB(
    data_dir="docs",  # Directory containing medical documents
    filename_as_id=True,  # Use filenames as document identifiers
    recursive=True,  # Search subdirectories
    similarity_top_k=10,  # Return top 10 most relevant documents
)

# Initialize specialized medical agents
medical_data_extractor = Agent(
    agent_name="Medical-Data-Extractor",
    system_prompt="You are a specialized medical data extraction expert, trained in processing and analyzing clinical data, lab results, medical imaging reports, and patient records. Your role is to carefully extract relevant medical information while maintaining strict HIPAA compliance and patient confidentiality. Focus on identifying key clinical indicators, test results, vital signs, medication histories, and relevant patient history. Ensure all extracted data maintains proper medical context and terminology.",
    llm=model,
    max_loops=1,
    autosave=True,
    verbose=True,
    dynamic_temperature_enabled=True,
    saved_state_path="medical_data_extractor.json",
    user_name="medical_team",
    retry_attempts=1,
    context_length=200000,
    output_type="string",
    memory_system=memory_system,  # Attach memory system for retrieval
)

diagnostic_specialist = Agent(
    agent_name="Diagnostic-Specialist",
    system_prompt="You are a senior diagnostic physician with extensive experience in differential diagnosis. Your role is to analyze patient symptoms, lab results, and clinical findings to develop comprehensive diagnostic assessments. Consider all presenting symptoms, patient history, risk factors, and test results to formulate possible diagnoses. Prioritize diagnoses based on clinical probability and severity. Always consider both common and rare conditions that match the symptom pattern. Recommend additional tests or imaging when needed for diagnostic clarity. Follow evidence-based diagnostic criteria and current medical guidelines.",
    llm=model,
    max_loops=1,
    autosave=True,
    verbose=True,
    dynamic_temperature_enabled=True,
    saved_state_path="diagnostic_specialist.json",
    user_name="medical_team",
    retry_attempts=1,
    context_length=200000,
    output_type="string",
    memory_system=memory_system,  # Attach memory system for retrieval
)

treatment_planner = Agent(
    agent_name="Treatment-Planner",
    system_prompt="You are an experienced clinical treatment specialist focused on developing comprehensive treatment plans. Your expertise covers both acute and chronic condition management, medication selection, and therapeutic interventions. Consider patient-specific factors including age, comorbidities, allergies, and contraindications when recommending treatments. Incorporate both pharmacological and non-pharmacological interventions. Emphasize evidence-based treatment protocols while considering patient preferences and quality of life. Address potential drug interactions and side effects. Include monitoring parameters and treatment milestones.",
    llm=model,
    max_loops=1,
    autosave=True,
    verbose=True,
    dynamic_temperature_enabled=True,
    saved_state_path="treatment_planner.json",
    user_name="medical_team",
    retry_attempts=1,
    context_length=200000,
    output_type="string",
    memory_system=memory_system,  # Attach memory system for retrieval
)

specialist_consultant = Agent(
    agent_name="Specialist-Consultant",
    system_prompt="You are a medical specialist consultant with expertise across multiple disciplines including cardiology, neurology, endocrinology, and internal medicine. Your role is to provide specialized insight for complex cases requiring deep domain knowledge. Analyze cases from your specialist perspective, considering rare conditions and complex interactions between multiple systems. Provide detailed recommendations for specialized testing, imaging, or interventions within your domain. Highlight potential complications or considerations that may not be immediately apparent to general practitioners.",
    llm=model,
    max_loops=1,
    autosave=True,
    verbose=True,
    dynamic_temperature_enabled=True,
    saved_state_path="specialist_consultant.json",
    user_name="medical_team",
    retry_attempts=1,
    context_length=200000,
    output_type="string",
    memory_system=memory_system,  # Attach memory system for retrieval
)

patient_care_coordinator = Agent(
    agent_name="Patient-Care-Coordinator",
    system_prompt="You are a patient care coordinator specializing in comprehensive healthcare management. Your role is to ensure holistic patient care by coordinating between different medical specialists, considering patient needs, and managing care transitions. Focus on patient education, medication adherence, lifestyle modifications, and follow-up care planning. Consider social determinants of health, patient resources, and access to care. Develop actionable care plans that patients can realistically follow. Coordinate with other healthcare providers to ensure continuity of care and proper implementation of treatment plans.",
    llm=model,
    max_loops=1,
    autosave=True,
    verbose=True,
    dynamic_temperature_enabled=True,
    saved_state_path="patient_care_coordinator.json",
    user_name="medical_team",
    retry_attempts=1,
    context_length=200000,
    output_type="string",
    memory_system=memory_system,  # Attach memory system for retrieval
)


In [None]:
#__init.py__
from .agents import (
    medical_data_extractor,
    diagnostic_specialist,
    treatment_planner,
    specialist_consultant,
    patient_care_coordinator
)
from .swarm_models import OpenAIChat

# Initialize the model or other common components
from .common import initialize_model  # If you have a separate init function for the model

# Additional setup or logging if needed
import logging
logging.basicConfig(level=logging.INFO)


In [None]:
#memory.py
import os
import pinecone
from typing import Optional, List
from pathlib import Path
from loguru import logger
from llama_index.core import SimpleDirectoryReader
from llama_index.embeddings import get_embedding

class LlamaPineconeDB:
    """Manage document indexing and querying using Pinecone and LlamaIndex for memory."""

    def __init__(self, data_dir: str = "docs", pinecone_api_key: str = "", pinecone_env: str = "us-east1-aws") -> None:
        self.data_dir = data_dir
        self.index = None

        # Initialize Pinecone with the specified environment
        pinecone.init(api_key=pinecone_api_key, environment=pinecone_env)
        index_name = "llama-memory-index"

        # Check if the index already exists; if not, create it
        if index_name not in pinecone.list_indexes():
            pinecone.create_index(index_name, dimension=768)  # assuming 768 for embedding vector size
        self.pinecone_index = pinecone.Index(index_name)

        logger.info("Initialized LlamaPineconeDB")
        data_path = Path(self.data_dir)
        if not data_path.exists():
            logger.error(f"Directory not found: {self.data_dir}")
            raise FileNotFoundError(f"Directory {self.data_dir} does not exist")

        self.add_documents()

    def add_documents(self) -> None:
        """Read and index documents into Pinecone."""
        try:
            documents = SimpleDirectoryReader(self.data_dir).load_data()
            for doc in documents:
                embedding = get_embedding(doc.text)  # Get embedding for document text
                # Use doc id and embedding in upsert
                self.pinecone_index.upsert([(doc.id, embedding)])
            logger.success(f"Documents indexed successfully from {self.data_dir}")
        except Exception as e:
            logger.error(f"Error indexing documents: {str(e)}")
            raise

    def query(self, query: str, top_k: int = 5) -> List[str]:
        """Retrieve similar documents using Pinecone and provide memory.

        Args:
            query (str): The query string.
            top_k (int): Number of similar documents to retrieve.

        Returns:
            List[str]: List of top-k similar document texts.
        """
        try:
            query_embedding = get_embedding(query)
            results = self.pinecone_index.query(query_embedding, top_k=top_k, include_values=True)

            # Retrieve top documents
            top_docs = [match['text'] for match in results['matches']]
            logger.info(f"Retrieved {len(top_docs)} documents for query: {query}")
            return top_docs
        except Exception as e:
            logger.error(f"Error during query: {str(e)}")
            raise

    def close(self):
        """Clean up Pinecone resources."""
        pinecone.deinit()

# Initialize the Pinecone DB using API key from environment variables
pinecone_api_key = os.getenv("PINECONE_API_KEY")  # Ensure this is set in your environment

if pinecone_api_key:
    llama_pinecone_db = LlamaPineconeDB(
        data_dir="docs",
        pinecone_api_key=pinecone_api_key,
        pinecone_env="us-east1-aws"  # Free tier environment
    )
    response = llama_pinecone_db.query("What is the medical history of patient 1?")
    print(response)
    llama_pinecone_db.close()
else:
    logger.error("Pinecone API key not found. Please set the PINECONE_API_KEY environment variable.")


In [None]:
#main.py
# Import the AgentRearrange class for coordinating multiple agents
from swarms import AgentRearrange

# Import specialized medical agents for different aspects of patient care
from multi_agent_rag.agents import (
    diagnostic_specialist,  # Agent for diagnostic analysis
    medical_data_extractor,  # Agent for extracting medical data
    patient_care_coordinator,  # Agent for coordinating patient care
    specialist_consultant,  # Agent for specialist consultation
    treatment_planner,  # Agent for treatment planning
)

# Import database class for storing and retrieving medical documents
from multi_agent_rag.memory import LlamaIndexDB

# Initialize the SwarmRouter to coordinate the medical agents
router = AgentRearrange(
    name="medical-diagnosis-treatment-swarm",
    description="Collaborative medical team for comprehensive patient diagnosis and treatment planning",
    max_loops=1,  # Limit to one iteration through the agent flow
    agents=[
        medical_data_extractor,  # First agent to extract medical data
        diagnostic_specialist,  # Second agent to analyze and diagnose
        treatment_planner,  # Third agent to plan treatment
        specialist_consultant,  # Fourth agent to provide specialist input
        patient_care_coordinator,  # Final agent to coordinate care plan
    ],
    # Configure the document storage and retrieval system
    memory_system=LlamaIndexDB(
        data_dir="docs",  # Directory containing medical documents
        filename_as_id=True,  # Use filenames as document identifiers
        recursive=True,  # Search subdirectories
        similarity_top_k=10,  # Return top 10 most relevant documents
    ),
    # Define the sequential flow of information between agents
    flow=f"{medical_data_extractor.agent_name} -> {diagnostic_specialist.agent_name} -> {treatment_planner.agent_name} -> {specialist_consultant.agent_name} -> {patient_care_coordinator.agent_name}",
)

# Example usage
if __name__ == "__main__":
    # Run a comprehensive medical analysis task for patient Lucas Brown
    patient_data = "Patient Lucas Brown's medical data goes here."  # Example input
    router.run(patient_data)  # Pass the patient data through the agent flow
