# Configuration 

In [7]:
import os
from openai import OpenAI








from dotenv import load_dotenv
import os
from openai import OpenAI

load_dotenv()



AZURE_OPENAI_KEY = os.getenv("AZURE_OPENAI_KEY")
AZURE_OPENAI_ENDPOINT = os.getenv("AZURE_OPENAI_ENDPOINT")
AZURE_OPENAI_DEPLOYMENT = "gpt-4o-mini"   # or whatever your Azure deployment is called

# (Optional) if you want them also in env vars:
os.environ["AZURE_OPENAI_API_KEY"] = AZURE_OPENAI_KEY
os.environ["AZURE_OPENAI_ENDPOINT"] = AZURE_OPENAI_ENDPOINT

# Create client for Azure OpenAI (new SDK style)
client = OpenAI(
    api_key=os.environ["AZURE_OPENAI_API_KEY"],
    base_url=f"{os.environ['AZURE_OPENAI_ENDPOINT']}/openai/v1/",
)

# Simple test call
resp = client.chat.completions.create(
    model=AZURE_OPENAI_DEPLOYMENT,  # <- deployment name
    messages=[
        {"role": "system", "content": "You are a helpful assistant."},
        {"role": "user", "content": "Introduce your self in short sentence."},
    ],
)

print(resp.choices[0].message.content)


I’m an AI language model designed to assist with information, answer questions, and provide helpful insights across a variety of topics.


# LLM planner: convert the natural language query to a structured JSON plan


In [8]:
import json

SYSTEM_PROMPT = """
You are a GIS planner agent for a Nordic municipality.
You NEVER run SQL or touch databases yourself.
Instead, you output a JSON plan that another system will execute.

Rules:
- Output ONLY valid JSON. No backticks, no explanations.
- The JSON MUST have exactly these fields:
  - "operation": string, one of ["select_near_river", "select_buffer", "select_by_attribute"]
  - "layer": string (e.g. "buildings", "parcels", "roads")
  - "buffer_meters": number or null
  - "limit": integer or null
  - "where_clause": string with a simple attribute filter (human readable, not strict SQL)
"""

def plan_spatial_query(nl_query: str) -> dict:
    """Ask the LLM to turn a natural-language GIS question into a JSON plan."""
    resp = client.chat.completions.create(
        model=AZURE_OPENAI_DEPLOYMENT,   # your deployment name
        messages=[
            {"role": "system", "content": SYSTEM_PROMPT},
            {"role": "user", "content": nl_query},
        ],
        temperature=0.1,
        max_tokens=300,
    )


    #check the api response
    if resp.choices and resp.choices[0].message.content:
        raw = resp.choices[0].message.content
    else:
        raise ValueError("No response from model.")
    
    
    #print("RAW MODEL OUTPUT:\n", raw, "\n")
       

    # If the model accidentally wraps in ```json ... ``` we strip that:
    if raw.strip().startswith("```"):
        raw = raw.strip().strip("`")
        # remove possible "json" at beginning
        if raw.lower().startswith("json"):
            raw = raw[4:].strip()

    plan = json.loads(raw)
    return plan



# Test the LLM planner

In [9]:

query = "Find 100 residential houses within 200 meters of a river in Kristiansand."
plan = plan_spatial_query(query)
plan


{'operation': 'select_near_river',
 'layer': 'buildings',
 'buffer_meters': 200,
 'limit': 100,
 'where_clause': "type = 'residential'"}

# Print a conceptual SQL query

In [18]:
def execute_gis_plan(plan: dict):
    op = plan["operation"]
    layer = plan["layer"]
    buf = plan["buffer_meters"]
    limit = plan["limit"]
    where = plan["where_clause"]

    if op == "select_near_river":
        print(f"""
[GIS ENGINE: select_near_river]
Layer: {layer}
Buffer: {buf} m from all rivers
Filter: {where}
Limit: {limit}

Conceptual PostGIS workflow:

WITH river_buffer AS (
    SELECT ST_Buffer(r.geom, {buf}) AS geom
    FROM rivers r
),
selected AS (
    SELECT b.*
    FROM {layer} b
    JOIN river_buffer rb
      ON ST_Intersects(b.geom, rb.geom)
    -- attribute filter:
    -- WHERE {where}
)
SELECT *
FROM selected
LIMIT {limit};
""")
    elif op == "select_buffer":
        print(f"[GIS ENGINE] Would select features from {layer} within {buf} m of another layer. (Not fully implemented yet.)")
    elif op == "select_by_attribute":
        print(f"[GIS ENGINE] Would select from {layer} WHERE {where} LIMIT {limit}.")
    else:
        print("[GIS ENGINE] Unsupported operation:", op)


# Text execute_gis_plan ()

In [58]:
query = "Find 100 houses near the river in Kristiansand."
plan = plan_spatial_query(query)
print("PARSED PLAN:", plan, "\n")
execute_gis_plan(plan)


PARSED PLAN: {'operation': 'select_near_river', 'layer': 'buildings', 'buffer_meters': 100, 'limit': 100, 'where_clause': "type = 'house' AND municipality = 'Kristiansand'"} 


[GIS ENGINE: select_near_river]
Layer: buildings
Buffer: 100 m from all rivers
Filter: type = 'house' AND municipality = 'Kristiansand'
Limit: 100

Conceptual PostGIS workflow:

WITH river_buffer AS (
    SELECT ST_Buffer(r.geom, 100) AS geom
    FROM rivers r
),
selected AS (
    SELECT b.*
    FROM buildings b
    JOIN river_buffer rb
      ON ST_Intersects(b.geom, rb.geom)
    -- attribute filter:
    -- WHERE type = 'house' AND municipality = 'Kristiansand'
)
SELECT *
FROM selected
LIMIT 100;



# Converts the user’s prompt into an SQL query string.

In [59]:
def plan_to_sql(plan: dict) -> str:
    op = plan["operation"]
    buf = plan["buffer_meters"]
    limit = plan["limit"] or 100
    where = plan["where_clause"] or "TRUE"

    if op == "select_near_river":
        sql = f"""
WITH river_buffer AS (
    SELECT ST_Buffer(r.geom, {buf}) AS geom
    FROM public.flomsoner r
),
selected AS (
    SELECT b.*
    FROM public.buildings b
    JOIN river_buffer rb
      ON ST_Intersects(b.geom, rb.geom)
    WHERE {where}
)
SELECT
    *
FROM selected
LIMIT {limit};
"""
    else:
        raise ValueError(f"Unsupported operation in plan_to_sql: {op}")

    return sql.strip()


# Connect to the PostGIS database and run the SQL query, returning a DataFrame


In [60]:
import psycopg2
import pandas as pd

def run_postgis_query(sql: str) -> pd.DataFrame:
    conn_str = os.environ["PGCONN_STRING"]
    
    #print("Connecting to:", conn_str.split("@")[1]) 
    
    with psycopg2.connect(conn_str) as conn:
        
        with conn.cursor() as cur:
            cur.execute(sql)
            rows = cur.fetchall()
            cols = [desc[0] for desc in cur.description]
    return pd.DataFrame(rows, columns=cols)


# Execute the GIS plan in PostGIS and return the result as a DataFrame


In [61]:
def execute_gis_plan_db(plan: dict):
    sql = plan_to_sql(plan)
    df = run_postgis_query(sql)
    return df

# Ask the GIS agent: NL query -> LLM plan -> SQL -> PostGIS -> DataFrame


In [62]:
def ask_gis_agent(query: str) -> pd.DataFrame:
    """
    Full pipeline:
    NL query -> LLM plan -> SQL -> PostGIS -> result DataFrame
    """
    #print(f"\nUSER QUERY: {query}\n")
    plan = plan_spatial_query(query)
    #print("PLAN:", plan, "\n")
    df = execute_gis_plan_db(plan)
    return df

# Send the query to the GIS agent and display the first results


In [63]:
df = ask_gis_agent("Find 100 residential houses within 200 meters of a river in Kristiansand.")
len(df)


100

In [64]:
df.head()

Unnamed: 0,gid,osm_id,code,fclass,name,type,geom
0,526782,542964436,1500,building,,residential,0106000020E96400000100000001030000000100000005...
1,526782,542964436,1500,building,,residential,0106000020E96400000100000001030000000100000005...
2,526783,542964439,1500,building,,residential,0106000020E96400000100000001030000000100000005...
3,526782,542964436,1500,building,,residential,0106000020E96400000100000001030000000100000005...
4,526783,542964439,1500,building,,residential,0106000020E96400000100000001030000000100000005...


# Simple CLI chat loop that sends user queries to the GIS agent and prints the results


In [66]:
from IPython.display import display




def chat_loop():
    print("GIS agent chat – type 'quit' to stop.\n")
    while True:
        user_q = input("You: ")
        if user_q.strip().lower() in ("quit", "exit", "q"):
            print("Bye ")
            break

        try:
            df = ask_gis_agent(user_q)
            if len(df) == 0:
                print("No results found.\n")
            else:
                display(df)  
        except Exception as e:
            print("Error:", e, "\n")

chat_loop()


GIS agent chat – type 'quit' to stop.



You:  Find 10 residential houses within 200 meters of a river in Drammen


Unnamed: 0,gid,osm_id,code,fclass,name,type,geom
0,526782,542964436,1500,building,,residential,0106000020E96400000100000001030000000100000005...
1,526782,542964436,1500,building,,residential,0106000020E96400000100000001030000000100000005...
2,526783,542964439,1500,building,,residential,0106000020E96400000100000001030000000100000005...
3,526782,542964436,1500,building,,residential,0106000020E96400000100000001030000000100000005...
4,526783,542964439,1500,building,,residential,0106000020E96400000100000001030000000100000005...
5,526783,542964439,1500,building,,residential,0106000020E96400000100000001030000000100000005...
6,526782,542964436,1500,building,,residential,0106000020E96400000100000001030000000100000005...
7,526783,542964439,1500,building,,residential,0106000020E96400000100000001030000000100000005...
8,3620837,1007411114,1500,building,,residential,0106000020E96400000100000001030000000100000005...
9,3621279,1007411556,1500,building,,residential,0106000020E96400000100000001030000000100000009...


You:  Find 10 residential houses within 200 meters of a river in Grimstad


Unnamed: 0,gid,osm_id,code,fclass,name,type,geom
0,3620837,1007411114,1500,building,,residential,0106000020E96400000100000001030000000100000005...
1,3621279,1007411556,1500,building,,residential,0106000020E96400000100000001030000000100000009...
2,3621282,1007411559,1500,building,,residential,0106000020E96400000100000001030000000100000005...
3,3621283,1007411560,1500,building,,residential,0106000020E96400000100000001030000000100000009...
4,3621277,1007411554,1500,building,,residential,0106000020E96400000100000001030000000100000007...
5,3621276,1007411553,1500,building,,residential,0106000020E96400000100000001030000000100000005...
6,3620840,1007411117,1500,building,,residential,0106000020E9640000010000000103000000010000000B...
7,3621280,1007411557,1500,building,,residential,0106000020E96400000100000001030000000100000007...
8,3621281,1007411558,1500,building,,residential,0106000020E96400000100000001030000000100000005...
9,3620837,1007411114,1500,building,,residential,0106000020E96400000100000001030000000100000005...


You:  quit


Bye 
