# **Agentic Climate AI Notebook**

### 1.1 Downloading Dependencies


In [None]:
!pip install awscli langchain xarray matplotlib requests zarr xarray cftime langchain_community duckduckgo_search scikit-learn
%pip install \
  boto3==1.40.29 \
  langchain \
  langchain-core \
  langchain-text-splitters \
  langsmith \
  pydantic \
  SQLAlchemy \
  PyYAML \
  botocore==1.40.29 \
  s3transfer \
  requests \
  xarray \
  s3fs==0.4.2 \
  numpy \
  pandas \
  matplotlib \
  cftime \
  zarr \
  dask \
  netCDF4 \
  polars
!pip install --upgrade numpy pandas bottleneck numexpr
!pip install cartopy geopandas


### 1.2 Configure AWS

1.   Fill in the necessary details from your AWS Environment
2.   This involves the AWS Acces Key ID, Secrete Access Key, Region Name, and Output format (json)





In [None]:
!pip show boto3 botocore s3fs

In [None]:
!aws configure



### 1.3 Mount Your Drive

In [None]:
from google.colab import drive
drive.mount('/content/drive/')



In [None]:
import sys

# PASTE THE PATH OF YOUR AGENT FILES
folder_path = '/content/drive/MyDrive/AgenticAINotebook/'
sys.path.append(folder_path)

### 1.4 Import Functions

In [None]:

# Import the nasa_cmr_data_acquisition_agent module from the specified path
from nasa_cmr_data_acquisition_agent import get_nasa_cmr_agent
from cesm_obs_comparison_agent import get_cesm_comparison_agent
from cesm_verification_agent import create_verification_agent
from knowledge_graph_agent_bedrock import get_knowledge_graph_agent
from cesm_lens_langchain_agent import get_cesm_lens_agent
from climate_research_orchestrator import create_climate_research_orchestrator

# Instantiate the NasaCMRDataAgent agent
knowledge_graph_agent = get_knowledge_graph_agent()
nasa_cmr_data_agent = get_nasa_cmr_agent()
cesm_lens_agent = get_cesm_lens_agent()
cesm_obs_comp_agent = get_cesm_comparison_agent()
cesm_verif_agent = create_verification_agent()
orchestrator = create_climate_research_orchestrator()

#Accept GRAPH_ID as a parameter in the future to be added.

## 2. Agentic AI

### 2.1 Summary
There are 6 Agents in total. Each serves a different purpose.

### 2.2 Knowledge Graph Agent

In [None]:
# Enter your prompt for the Knowledge Graph Agent here
prompt_knowledge_graph = "Find me datasets in NYC for rainfall and flooding "
response_knowledge_graph = knowledge_graph_agent.invoke({"input": prompt_knowledge_graph})
print(response_knowledge_graph)

### 2.3 NASA CMR Data Acquisition Agent

In [None]:
# Enter your prompt for the NASA CMR Data Acquisition Agent here
prompt_nasa_cmr = "What is the relationship between NYC rainfall and flooding using the existing data?"
response_nasa_cmr = nasa_cmr_data_agent.invoke({"input": prompt_nasa_cmr})
print(response_nasa_cmr)

### 2.4 CESM LENS Langchain Agent

In [None]:
# Enter your prompt for the CESM LENS Langchain Agent here
prompt_cesm_lens = "Climate simulations for rainfall nyc."
response_cesm_lens = cesm_lens_agent.invoke({"input": prompt_cesm_lens})
print(response_cesm_lens)

### 2.5 CESM Observation Comparison Agent

In [None]:
# Enter your prompt for the CESM Observation Comparison Agent here
prompt_cesm_obs_comp = "Compare the CESM model output for precipitation in North America with observational data."
response_cesm_obs_comp = cesm_obs_comp_agent.invoke({"input": prompt_cesm_obs_comp})
print(response_cesm_obs_comp)

### 2.6 CESM Verification Agent

In [None]:
# Enter your prompt for the CESM Verification Agent here
prompt_cesm_vertif = "Verify the CESM model's simulation of Arctic sea ice extent."
response_cesm_vertif = cesm_verif_agent.invoke({"input": prompt_cesm_vertif})
print(response_cesm_vertif)

### 2.7 Climate Research Orchestrator

In [None]:
# Enter your prompt for the Climate Research Orchestrator here
prompt_orchestrator = "What are the potential impacts of a 2 degree Celsius global temperature increase?"
response_orchestrator = orchestrator.invoke({"input": prompt_orchestrator})
print(response_orchestrator)

# 3. Explore Local Database

## 3.1 Observational Datasets


In [1]:
import sqlite3
import pandas as pd

# Connect and view your stored data
conn = sqlite3.connect('climate_knowledge_graph.db')

# Check if data exists
cursor = conn.execute("SELECT name FROM sqlite_master WHERE type='table';")
tables = cursor.fetchall()
print("Tables found:", [table[0] for table in tables])

# View your datasets
if tables:
    df = pd.read_sql_query("SELECT * FROM stored_datasets", conn)
    display(df)
else:
    print("No tables found - database might be empty")

conn.close()

Tables found: ['stored_datasets', 'dataset_relationships', 'sqlite_sequence']


Unnamed: 0,dataset_id,title,short_name,dataset_properties,dataset_labels,total_relationships,relationship_types,links,created_at,updated_at
0,dataset_UNH_WWRDII_WATBAL_16263,Gridded Fields of Major Water Balance Componen...,UNH_WWRDII_WATBAL,"{""science_keywords"": ""Category EARTH SCIENCE T...","[""Dataset""]",18,"[""hasCESMVariable"", ""hasConsortium"", ""hasConta...","[{""link_type"": ""HTTP"", ""hreflang"": ""enUS"", ""li...",2025-09-21T15:32:54.507844,2025-09-21T15:32:54.507844
1,dataset_comp_runoff_monthly_xdeg_994_18480,ISLSCP II UNHGRDC Composite Monthly Runoff,comp_runoff_monthly_xdeg_994,"{""science_keywords"": ""Category EARTH SCIENCE T...","[""Dataset""]",19,"[""hasConsortium"", ""hasContact"", ""hasDataCatego...","[{""link_type"": ""Earthdata"", ""hreflang"": ""enUS""...",2025-09-21T15:33:12.193605,2025-09-21T15:33:12.193605
2,dataset_UNH_GRDC_GCRDS_47417,UNH GRDC Global Composite Runoff Data Set v10,UNH_GRDC_GCRDS,"{""science_keywords"": ""Category EARTH SCIENCE T...","[""Dataset""]",14,"[""hasCESMVariable"", ""hasConsortium"", ""hasConta...","[{""link_type"": ""HTTP"", ""hreflang"": ""enUS"", ""li...",2025-09-21T15:33:26.066863,2025-09-21T15:33:26.066863


## 3.2 Climate Simulation Datasets

In [None]:
import sqlite3
import pandas as pd

# Connect and view your stored data
conn = sqlite3.connect('cesm_data_registry.db')

# Check if data exists
cursor = conn.execute("SELECT name FROM sqlite_master WHERE type='table';")
tables = cursor.fetchall()
print("Tables found:", [table[0] for table in tables])

# View your datasets
if tables:
    df = pd.read_sql_query("SELECT * FROM cesm_data_paths", conn)
    display(df)
else:
    print("No tables found - database might be empty")

conn.close()

In [None]:

import sqlite3
import json

def debug_links_data():
    """Debug what's actually stored in the links fields"""

    print("=== DEBUGGING LINKS DATA STRUCTURE ===")
    print("=" * 60)

    try:
        conn = sqlite3.connect('climate_knowledge_graph.db')
        cursor = conn.cursor()

        # Get a few datasets to examine their links structure
        cursor.execute("""
            SELECT dataset_id, title, dataset_properties
            FROM stored_datasets
            LIMIT 10
        """)

        datasets = cursor.fetchall()

        for dataset_id, title, props_json in datasets:
            print(f"\n Dataset: {dataset_id}")
            print(f" Title: {title[:50]}...")
            print("-" * 40)

            if props_json and props_json != 'None':
                try:
                    props = json.loads(props_json)

                    # Examine the links field in detail
                    if 'links' in props:
                        links_value = props['links']
                        print(f"    Links type: {type(links_value)}")
                        print(f"    Links value: {links_value}")
                        print(f"    Links repr: {repr(links_value)}")

                        # If it's a number, maybe it's referencing something else
                        if isinstance(links_value, (int, str)) and str(links_value).isdigit():
                            print(f"    This looks like a numeric ID/reference: {links_value}")

                    # Look for other fields that might contain actual URLs
                    for key, value in props.items():
                        if isinstance(value, str) and ('http' in value.lower() or 'earthdata' in value.lower() or 's3://' in value.lower()):
                            print(f"    Found URLs in '{key}': {value[:100]}...")
                        elif isinstance(value, str) and len(value) > 50:
                            # Check if any long text fields contain URLs
                            if 'http' in value or 'earthdata' in value or 's3' in value:
                                print(f"    Possible URLs in '{key}': {value[:100]}...")

                    # Show all available fields for reference
                    print(f"    All fields: {list(props.keys())}")

                except json.JSONDecodeError as e:
                    print(f"    JSON parse error: {e}")
            else:
                print("    No properties data")

        # Also check if there's a separate links/relationships table
        print(f"\n CHECKING DATASET_RELATIONSHIPS TABLE:")
        print("-" * 40)

        cursor.execute("SELECT * FROM dataset_relationships LIMIT 5")
        relationships = cursor.fetchall()

        if relationships:
            # Get column names for relationships table
            cursor.execute("PRAGMA table_info(dataset_relationships)")
            rel_columns = [col[1] for col in cursor.fetchall()]
            print(f"    Relationship columns: {rel_columns}")

            for rel in relationships[:3]:
                rel_dict = dict(zip(rel_columns, rel))
                print(f"    Relationship: {rel_dict}")

                # Check if any relationship contains URL-like data
                for key, value in rel_dict.items():
                    if isinstance(value, str) and ('http' in value.lower() or 'earthdata' in  value.lower()):
                        print(f"       Found URL in {key}: {value}")
        else:
            print("    No relationships found")

        conn.close()

    except Exception as e:
        print(f" Error: {e}")

    print(f"\n🏁 Debug complete!")

# Run the debug analysis
debug_links_data()