## Environment Setup and Installation  

This notebook requires specific dependencies, which are listed in `requirements.txt`.

In [None]:
# Install required packages
# !pip install -r requirements.txt

# Multi-Agent AI for Autonomous Astrophysics Research

This notebook implements a system of AI agents that work together to detect anomalies in astrophysical data, with a focus on gravitational wave events. The system:

1. Ingests data from astrophysics sources (GWOSC, NASA HEASARC)
2. Detects anomalies in the data
3. Generates theoretical models to explain the anomalies
4. Correlates findings with existing research
5. Visualizes the results

In [None]:
# Import necessary libraries
import os
import requests
import json
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from dotenv import load_dotenv
from crewai import Agent, Task, Crew, LLM, Process
from langchain.tools import Tool, tool
import time
from datetime import datetime

# Load environment variables from .env file
load_dotenv()

# Access API key
# ANTHROPIC_API_KEY = os.getenv("ANTHROPIC_API_KEY")
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")

# if not ANTHROPIC_API_KEY:
#     print("No Anthropic key found...")
# else:
#     print(ANTHROPIC_API_KEY[:4],"*********************")

if not OPENAI_API_KEY:
    print("No OpenAI key found...")
else:
    print(OPENAI_API_KEY[:4],"*********************")


# Set environment variable for crewai
# os.environ["ANTHROPIC_API_KEY"] = ANTHROPIC_API_KEY
os.environ["OPENAI_API_KEY"] = OPENAI_API_KEY

## Data Ingestion Tools

First, we'll create tools to fetch real astrophysics data from various sources.

In [None]:
# Import necessary libraries
import os
import requests
import re
import json
import pandas as pd
from dotenv import load_dotenv
from crewai import Agent, Task, Crew, LLM, Process
from langchain.tools import tool

# Load environment variables
load_dotenv()
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
os.environ["OPENAI_API_KEY"] = OPENAI_API_KEY

# Ensure API key exists
if not OPENAI_API_KEY:
    raise ValueError("No OpenAI API key found!")

In [None]:
# Set LLM
openai_model = "gpt-4o-mini"
openai_llm = LLM(model=openai_model, api_key=OPENAI_API_KEY)

In [None]:
# TOOL: Fetch GWOSC Events
@tool
def fetch_gwosc_events(limit=5):
    """Fetch gravitational wave events from GWOSC."""
    try:
        url = "https://gwosc.org/eventapi/json/allevents/"
        response = requests.get(url)

        if response.status_code == 200:
            data = response.json()
            events = list(data.get('events', {}).items())[:limit]
            
            formatted_events = [
                {
                    'event_id': event_id,
                    'mass_1_source': event_data.get('mass_1_source'),
                    'mass_2_source': event_data.get('mass_2_source'),
                    'luminosity_distance': event_data.get('luminosity_distance'),
                    'network_snr': event_data.get('network_snr')
                }
                for event_id, event_data in events
            ]
            return formatted_events
        else:
            return f"Error fetching data: {response.status_code}"
    except Exception as e:
        return f"Exception occurred: {str(e)}"

# TOOL: Validate GWOSC Data
@tool
def validate_gwosc_data(events):
    """Validate event IDs and required fields before passing to the workflow."""
    valid_events = []
    invalid_events = []
    warnings = []

    required_fields = ['mass_1_source', 'mass_2_source', 'luminosity_distance']
    optional_fields = ['network_snr']
    event_id_pattern = re.compile(r'^GW\d{6}_\d{6}-v\d+$')  # Example: GW190521_030229-v1

    for event in events:
        event_id = event.get('event_id', '')
        missing_fields = [field for field in required_fields if event.get(field) is None]
        missing_optional = [field for field in optional_fields if event.get(field) is None]

        if not event_id_pattern.match(event_id):
            invalid_events.append({"event_id": event_id, "reason": "Invalid format"})
        elif missing_fields:
            invalid_events.append({"event_id": event_id, "reason": f"Missing required fields: {', '.join(missing_fields)}"})
        elif missing_optional:
            warnings.append({"event_id": event_id, "warning": f"Missing optional fields: {', '.join(missing_optional)}"})
            valid_events.append(event)  # Allow it to pass with warning
        else:
            valid_events.append(event)

    return {"valid_events": valid_events, "warnings": warnings, "invalid_events": invalid_events}


In [None]:
# AGENT: Data Ingestor
data_ingestor = Agent(
    role="Astrophysical Data Ingestor",
    goal="Retrieve astrophysics event data.",
    backstory="You fetch raw event data from public astrophysics sources.",
    verbose=True,
    tools=[fetch_gwosc_events],
    allow_delegation=False,
    llm_model=openai_model
)

# AGENT: Data Validator
data_validator = Agent(
    role="Astrophysical Data Validator",
    goal="Validate event IDs and check for missing fields.",
    backstory="You specialize in validating astrophysical event data.",
    verbose=True,
    tools=[validate_gwosc_data],
    allow_delegation=False,
    llm_model=openai_model
)

In [None]:
# TASK: Fetch Data
data_ingestion_task = Task(
    description="Fetch the latest gravitational wave event data.",
    agent=data_ingestor,
    expected_output="A structured dataset of gravitational wave events.",
    async_execution=False
)

# TASK: Validate Data
data_validation_task = Task(
    description="Validate event IDs and check for missing fields before processing.",
    agent=data_validator,
    expected_output="List of valid events and rejected events with reasons.",
    context=[data_ingestion_task],  # Must run after data ingestion
    async_execution=False
)

In [None]:
# CREW: Minimal Workflow
astrophysics_crew = Crew(
    agents=[data_ingestor, data_validator],
    tasks=[data_ingestion_task, data_validation_task],
    verbose=True,
    process=Process.sequential
)

# RUN CREW
try:
    print("Starting minimal astrophysics workflow...\n")
    results = astrophysics_crew.kickoff()
    print("\nWorkflow execution completed!")
except Exception as e:
    print(f"Error: {str(e)}")

In [None]:
# Extract structured results from API response
task_outputs = results.tasks_output

# Initialize formatted results
formatted_results = """
## ANALYSIS REPORT  

"""

# Loop through task outputs and extract results
for task in task_outputs:
    agent_name = task.agent
    task_result = task.raw if task.raw else "No result available."

    formatted_results += f"""

## {agent_name} Analysis  

{task_result}

"""

# Save results as a structured text file
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
output_file = f"astrophysics_report_{timestamp}.md"

with open(output_file, "w", encoding="utf-8") as file:
    file.write(formatted_results)

print(f"\nResults saved to: {output_file}")

In [None]:
agents = [data_ingestor, data_validator]

for agent in agents:
    print(f"{agent.role} is using LLM: {agent.llm.model}")