# Configuration 

In [2]:
import os
from openai import OpenAI



from dotenv import load_dotenv
import os
from openai import OpenAI

load_dotenv()



AZURE_OPENAI_KEY = os.getenv("AZURE_OPENAI_KEY")
AZURE_OPENAI_ENDPOINT = os.getenv("AZURE_OPENAI_ENDPOINT")
AZURE_OPENAI_DEPLOYMENT = "gpt-4o-mini"  

os.environ["AZURE_OPENAI_API_KEY"] = AZURE_OPENAI_KEY
os.environ["AZURE_OPENAI_ENDPOINT"] = AZURE_OPENAI_ENDPOINT


# Create client for Azure OpenAI
client = OpenAI(
    api_key=os.environ["AZURE_OPENAI_API_KEY"],
    base_url=f"{os.environ['AZURE_OPENAI_ENDPOINT']}/openai/v1/",
)

# Simple test call
resp = client.chat.completions.create(
    model=AZURE_OPENAI_DEPLOYMENT, 
    messages=[
        {"role": "system", "content": "You are a helpful assistant."},
        {"role": "user", "content": "Introduce your self in short sentence."},
    ],
)

print(resp.choices[0].message.content)


I'm an AI language model designed to assist with a wide range of questions and provide information on various topics.


# Check the language syntax 

In [3]:
def normalize_query(text: str) -> str:
    resp = client.chat.completions.create(
        model=AZURE_OPENAI_DEPLOYMENT,
        messages=[
            {"role": "system", "content": "You understand poorly written Norwegian and English. Correct spelling mistakes, interpret the meaning, and rewrite the sentence clearly."},
            {"role": "user", "content": text},
        ],
        temperature=0.1
    )
    return resp.choices[0].message.content.strip()


# LLM planner: convert the natural language query to a structured JSON plan


In [4]:
import json

SYSTEM_PROMPT = """


You are a GIS planner agent for a Nordic municipality.
You NEVER execute SQL and NEVER touch the database.
You ONLY output a JSON plan that another system will translate to SQL.

------------------------------------------------------------
OUTPUT FORMAT (STRICT)
------------------------------------------------------------
You MUST output ONLY valid JSON with EXACTLY these fields:

{
  "operation": "...",
  "layer": "...",
  "target_layer": "...",
  "buffer_meters": ...,
  "limit": ...,
  "where_clause": "..."
}




Rules:
- No backticks, no explanations, no comments.
- All fields MUST exist, even if null or empty.
- buffer_meters MUST be a number or null.
- limit MUST be a number or null.
- where_clause MUST be either "" or a simple phrase (see rules below).
- layer and target_layer MUST be valid or "".


------------------------------------------------------------
ALLOWED OPERATIONS
------------------------------------------------------------
General:
- "select_limit_only"
- "select_by_attribute"
- "select_buffer"
- "select_intersect"
- "select_nearest"
- "select_within_polygon"

Special (only if user explicitly asks):
- Buildings: "select_buildings_in_floodzone", "select_buildings_near_route", "select_buildings_by_area"
- Flood zones: "select_within_floodzone", "select_intersect_floodzone"
- Bicycle routes: "select_near_bikeroute", "select_intersect_bikeroute"
- Walking routes: "select_near_walkroute", "select_intersect_walkroute"
- Ski routes: "select_near_skiroute", "select_intersect_skiroute"
- Route info points: "select_nearest_rutepoint", "select_points_in_area"



------------------------------------------------------------
ALLOWED LAYERS (MUST MATCH EXACTLY)
------------------------------------------------------------
- "buildings"
- "flomsoner"
- "buildings_sample"
- "arealbruk_skogbonitet_sample"
- "flomsoner_sample"
- "sykkelrute_senterlinje_sample"
- "skiloype_senterlinje"
- "annenrute_senterlinje"
- "annenruteinfo_tabell"
- "arealbruk_skogbonitet"
- "fotrute_senterlinje"
- "fotruteinfo_tabell"
- "ruteinfopunkt_posisjon"
- "skiloypeinfo_tabell"
- "sykkelrute_senterlinje"
- "sykkelruteinfo_tabell"

If no layer is clearly referenced → layer = "".






------------------------------------------------------------
target_layer RULES (VERY IMPORTANT)
------------------------------------------------------------
For ANY spatial operation (buffer, intersect, nearest),
the JSON MUST include a valid "target_layer".

Mapping:
- "near water" → "flomsoner"
- "near river" → "flomsoner"
- "intersect floodzone" → "flomsoner"
- "inside floodzone" → "flomsoner"
- "near bikeroute" → "sykkelrute_senterlinje"
- "intersect bikeroute" → "sykkelrute_senterlinje"
- "near walkroute" → "fotrute_senterlinje"
- "near skiroute" → "skiloype_senterlinje"

If user gives NO spatial relation → target_layer MUST be "".



------------------------------------------------------------
OPERATION SELECTION RULES
------------------------------------------------------------
- If the user says “within X meters” → operation = "select_buffer".
- If the user says “near X” → operation = "select_buffer", unless they say “nearest”.
- If the user says “nearest” or “closest” → operation = "select_nearest".
- If no spatial relation is described → DO NOT choose buffer/intersect/nearest.
- City names (e.g., “Kristiansand”) MUST NOT produce filters.

------------------------------------------------------------
LIMIT RULES
------------------------------------------------------------
- If user gives a number (e.g., “10 buildings”) → use it.
- If user does not specify → limit = null.
- If the user writes exactly "all" → limit = "all".
- If the user says "all buildings", "all houses", etc. → limit = "all".

------------------------------------------------------------
BUFFER RULES
------------------------------------------------------------
- If user says “within X meters”, set buffer_meters = X.
- Otherwise buffer_meters = null.

------------------------------------------------------------
WHERE_CLAUSE RULES
------------------------------------------------------------
Only fill where_clause if the user explicitly says:
- near water
- near river
- near bikeroute
- inside floodzone
- intersect floodzone
- etc.

Otherwise: where_clause = "".

------------------------------------------------------------
FINAL INSTRUCTION
------------------------------------------------------------
Your entire output MUST be a single JSON object following all rules.
No prose. No Markdown. No explanations. Only JSON.

"""


previous_input = None

def plan_spatial_query(nl_query: str) -> dict:
    resp = client.chat.completions.create(
        model=AZURE_OPENAI_DEPLOYMENT,
        messages=[
            {"role": "system", "content": SYSTEM_PROMPT},
            {"role": "user", "content": nl_query},
        ],
        temperature=0.0,
        max_tokens=300,
    )
    raw = resp.choices[0].message.content.strip()
    if raw.startswith("```"):
        raw = raw.strip("`")
        if raw.lower().startswith("json"):
            raw = raw[4:].strip()
    return json.loads(raw)



# Process user input

In [5]:
FIELD_INFO = {
    # ****************************************************
    "operation": (
        "Type of GIS action.\n"
        "Examples:\n"
        "- select_buffer\n"
        "- select_intersect\n"
        "- select_nearest\n"
        "- select_limit_only\n"
        "- select_by_attribute\n"
        "This tells the GIS engine WHAT to do."
    ),

    # ****************************************************
    "layer": (
        "The PRIMARY dataset you want results from.\n"
        "Must match one of the allowed database layers:\n"
        "- buildings\n"
        "- flomsoner\n"
        "- buildings_sample\n"
        "- arealbruk_skogbonitet_sample\n"
        "- flomsoner_sample\n"
        "- sykkelrute_senterlinje_sample\n"
        "- skiloype_senterlinje\n"
        "- annenrute_senterlinje\n"
        "- annenruteinfo_tabell\n"
        "- arealbruk_skogbonitet\n"
        "- fotrute_senterlinje\n"
        "- fotruteinfo_tabell\n"
        "- ruteinfopunkt_posisjon\n"
        "- skiloypeinfo_tabell\n"
        "- sykkelrute_senterlinje\n"
        "- sykkelruteinfo_tabell"
    ),

    # ****************************************************
    "target_layer": (
        "The SECOND dataset used for spatial relation.\n"
        "ONLY required for operations involving two layers (buffer, intersect, nearest).\n"
        "Examples:\n"
        "- layer = buildings\n"
        "- target_layer = sykkelrute_senterlinje  -> buildings near bike routes\n"
        "- target_layer = flomsoner               -> buildings intersect floodzones\n"
        "\nIf operation does not require a second dataset → target_layer must be null."
    ),

    # ****************************************************
    "buffer_meters": (
        "Distance in meters for spatial proximity.\n"
        "Examples:\n"
        "- 50\n"
        "- 100\n"
        "- 200\n"
        "Used only for operations requiring distance (select_buffer, select_near_...)."
    ),

    # ****************************************************
    "limit": (
        "How many results to return.\n"
        "Examples: 5, 10, 50.\n"
            ),
    


    # ****************************************************
    "where_clause": (
        "Optional simple spatial or attribute filter.\n"
        "Only filled when the user explicitly mentions a real filter:\n"
        "- near river\n"
        "- inside floodzone\n"
        "- intersect bikeroute\n"
        "- type = 'house'\n"
        "\nIf user does NOT specify a filter → where_clause = \"\"."
    )
}



def process_user_input(user_input):
    
    clean_text = normalize_query(user_input)

    plan = plan_spatial_query(clean_text)


    lower_input = user_input.lower()
    
    if "all " in lower_input:
        plan["limit"] = "all"
   
    required_fields = ["operation", "layer", "buffer_meters", "limit", "target_layer"]
    missing = []




    
    limit = plan.get("limit")

    if isinstance(limit, str) and limit.lower() == "all":
        plan["limit"] = 100
    else:
        plan["limit"] = limit  

    # Check missing fields
    for f in required_fields:
        if plan.get(f) is None or plan.get(f) == "":
            missing.append(f)

    # If missing → return helpful message
    if missing:
        explanations = "\n".join([f"- {f}: {FIELD_INFO[f]}" for f in missing])

        return (
            f"\n⚠ Missing required field(s): {', '.join(missing)}\n"
            + "\n----------------------------------------\n"
            + f"{explanations}\n"
            + "----------------------------------------\n"
            + f"Your input:\n  {user_input}\n"
            + "----------------------------------------\n"
        )

        

    return plan


# Test the LLM planner

In [6]:

query = "Find 100 residential houses within 200 meters of a river in Kristiansand."
plan = plan_spatial_query(query)
plan


{'operation': 'select_buffer',
 'layer': 'buildings',
 'target_layer': 'flomsoner',
 'buffer_meters': 200,
 'limit': 100,
 'where_clause': ''}

In [7]:

query = "Find all buildings within 100 m of bicycle routes"
plan = plan_spatial_query(query)
plan


{'operation': 'select_buffer',
 'layer': 'buildings',
 'target_layer': 'sykkelrute_senterlinje',
 'buffer_meters': 100,
 'limit': None,
 'where_clause': ''}

# Converts the user’s prompt into an SQL query string.

In [8]:
def plan_to_sql(plan: dict) -> str:
    op = plan["operation"]
    layer = plan["layer"]
    target = plan.get("target_layer")
    buf = plan.get("buffer_meters")
    limit = plan.get("limit") or 100
    where = plan.get("where_clause") or "TRUE"

    # -------------------------------
    # ALWAYS return WKT geometry
    # -------------------------------
    wkt_select = "ST_AsText(ST_Transform(a.geom, 4326)) AS wkt_geom"

    # SELECT LIMIT ONLY
    if op == "select_limit_only":
        return f"""
SELECT 
    a.*, 
    ST_AsText(ST_Transform(a.geom, 4326)) AS wkt_geom
FROM public.{layer} a
LIMIT {limit};
""".strip()

    # SELECT BY ATTRIBUTE
    if op == "select_by_attribute":
        return f"""
SELECT
    a.*,
    ST_AsText(ST_Transform(a.geom, 4326)) AS wkt_geom
FROM public.{layer} a
WHERE {where}
LIMIT {limit};
""".strip()

    # BUFFER OPERATION
    if op == "select_buffer":
        if not target:
            raise ValueError("select_buffer requires 'target_layer'")
        return f"""
SELECT
    a.*,
    ST_AsText(ST_Transform(a.geom, 4326)) AS wkt_geom
FROM public.{layer} a
JOIN (
    SELECT ST_Buffer(geom, {buf}) AS geom
    FROM public.{target}
) t
ON ST_Intersects(a.geom, t.geom)
WHERE {where}
LIMIT {limit};
""".strip()

    # INTERSECT OPERATION
    if op == "select_intersect":
        if not target:
            raise ValueError("select_intersect requires 'target_layer'")
        return f"""
SELECT
    a.*,
    ST_AsText(ST_Transform(a.geom, 4326)) AS wkt_geom
FROM public.{layer} a
JOIN public.{target} b
  ON ST_Intersects(a.geom, b.geom)
WHERE {where}
LIMIT {limit};
""".strip()

    # NEAREST OPERATION
    if op == "select_nearest":
        if not target:
            raise ValueError("select_nearest requires 'target_layer'")
        return f"""
SELECT
    a.*,
    ST_AsText(ST_Transform(a.geom, 4326)) AS wkt_geom
FROM public.{layer} a
ORDER BY (
    SELECT MIN(ST_Distance(a.geom, b.geom))
    FROM public.{target} b
)
LIMIT {limit};
""".strip()

    # WITHIN POLYGON
    if op == "select_within_polygon":
        polygon = plan["where_clause"]
        return f"""
SELECT
    a.*,
    ST_AsText(ST_Transform(a.geom, 4326)) AS wkt_geom
FROM public.{layer} a
WHERE ST_Within(geom, ST_GeomFromText('{polygon}', 4326))
LIMIT {limit};
""".strip()

    raise ValueError(f"Unsupported operation: {op}")


# Connect to the PostGIS database and run the SQL query, returning a DataFrame


In [9]:
import psycopg2
import pandas as pd

def run_postgis_query(sql: str) -> pd.DataFrame:
    conn_str = os.environ["PGCONN_STRING"]
    
    
    with psycopg2.connect(conn_str) as conn:
        
        with conn.cursor() as cur:
            cur.execute(sql)
            rows = cur.fetchall()
            cols = [desc[0] for desc in cur.description]
    return pd.DataFrame(rows, columns=cols)


In [24]:
query = "SELECT pg_size_pretty(pg_total_relation_size('public.buildings'));"
run_postgis_query(query)

Unnamed: 0,pg_size_pretty
0,1251 MB


In [25]:
df = run_postgis_query("""
SELECT relname AS table_name,
       pg_size_pretty(pg_total_relation_size(relid)) AS total_size
FROM pg_catalog.pg_statio_user_tables
ORDER BY pg_total_relation_size(relid) DESC;
""")
df


Unnamed: 0,table_name,total_size
0,arealbruk_skogbonitet,3160 MB
1,buildings,1251 MB
2,flomsoner,343 MB
3,fotrute_senterlinje,141 MB
4,flomsoner_sample,54 MB
5,fotruteinfo_tabell,27 MB
6,sykkelrute_senterlinje,18 MB
7,arealbruk_skogbonitet_sample,17 MB
8,sykkelrute_senterlinje_sample,17 MB
9,skiloype_senterlinje,13 MB


In [23]:
query = "SELECT relname AS table_name, pg_size_pretty(pg_total_relation_size(relid)) AS total_size FROM pg_catalog.pg_statio_user_tables ORDER BY pg_total_relation_size(relid) DESC;"
run_postgis_query(query)


Unnamed: 0,table_name,total_size
0,arealbruk_skogbonitet,3160 MB
1,buildings,1251 MB
2,flomsoner,343 MB
3,fotrute_senterlinje,141 MB
4,flomsoner_sample,54 MB
5,fotruteinfo_tabell,27 MB
6,sykkelrute_senterlinje,18 MB
7,arealbruk_skogbonitet_sample,17 MB
8,sykkelrute_senterlinje_sample,17 MB
9,skiloype_senterlinje,13 MB


# Execute the GIS plan in PostGIS and return the result as a DataFrame


In [10]:
def execute_gis_plan_db(plan: dict) -> pd.DataFrame:
    sql = plan_to_sql(plan)
    df = run_postgis_query(sql)
    return df


# Ask the GIS agent: NL query -> LLM plan -> SQL -> PostGIS -> DataFrame


In [11]:
def ask_gis_agent(query: str) :



    
    plan = process_user_input(query)

    # If the agent returned a string → it's an error message
    if isinstance(plan, str):
        print(plan)
        return None  # STOP HERE

    
    #df = execute_gis_plan_db(plan)
    return plan

# Send the query to the GIS agent and display the first results


In [12]:

queryy = "Find 100 residential houses within 200 meters of a river in Kristiansand."
plan = plan_spatial_query(query)
plan


{'operation': 'select_buffer',
 'layer': 'buildings',
 'target_layer': 'sykkelrute_senterlinje',
 'buffer_meters': 100,
 'limit': None,
 'where_clause': ''}

In [13]:

queryy = "Find all buildings within 100 m of bicycle routes."
plan = plan_spatial_query(query)
plan


{'operation': 'select_buffer',
 'layer': 'buildings',
 'target_layer': 'sykkelrute_senterlinje',
 'buffer_meters': 100,
 'limit': None,
 'where_clause': ''}

In [14]:
query = "Find 100 residential houses within 200 meters of a river in Kristiansand."
df = ask_gis_agent(query)
df


{'operation': 'select_buffer',
 'layer': 'buildings',
 'target_layer': 'flomsoner',
 'buffer_meters': 200,
 'limit': 100,
 'where_clause': ''}

In [15]:
 res = ask_gis_agent("c")
res


⚠ Missing required field(s): operation, layer, buffer_meters, limit, target_layer

----------------------------------------
- operation: Type of GIS action.
Examples:
- select_buffer
- select_intersect
- select_nearest
- select_limit_only
- select_by_attribute
This tells the GIS engine WHAT to do.
- layer: The PRIMARY dataset you want results from.
Must match one of the allowed database layers:
- buildings
- flomsoner
- buildings_sample
- arealbruk_skogbonitet_sample
- flomsoner_sample
- sykkelrute_senterlinje_sample
- skiloype_senterlinje
- annenrute_senterlinje
- annenruteinfo_tabell
- arealbruk_skogbonitet
- fotrute_senterlinje
- fotruteinfo_tabell
- ruteinfopunkt_posisjon
- skiloypeinfo_tabell
- sykkelrute_senterlinje
- sykkelruteinfo_tabell
- buffer_meters: Distance in meters for spatial proximity.
Examples:
- 50
- 100
- 200
Used only for operations requiring distance (select_buffer, select_near_...).
- limit: How many results to return.
Examples: 5, 10, 50.

- target_layer: The 

In [16]:

from shapely import wkt
import folium
from shapely.geometry import Polygon, MultiPolygon


def normalize_geom(g):
    if isinstance(g, Polygon):
        return [g]
    if isinstance(g, MultiPolygon):
        return list(g.geoms)
    return []

def showMap(df):
    first_geom = wkt.loads(df["wkt_geom"].iloc[0])
    m = folium.Map(location=[first_geom.centroid.y, first_geom.centroid.x], zoom_start=16)    
    
    for w in df["wkt_geom"]:
        geom = wkt.loads(w)
        for poly in normalize_geom(geom):
            folium.GeoJson(
                poly.__geo_interface__,
                style_function=lambda x: {
                    "color": "red",
                    "weight": 2,
                    "fillColor": "yellow",
                    "fillOpacity": 0.3,
                },
            ).add_to(m)
    
    return m

# Simple CLI chat loop that sends user queries to the GIS agent and prints the results


In [17]:
def chat_loop():
    print("GIS agent chat – type 'quit' to stop.\n")

    while True:
        user_q = input("You: ").strip()
        print("'''''''''''''''''''''''''''''''''''''''''''")

        if user_q.lower() in ("quit", "exit", "q"):
            print("Welcome back")
            break

        plan = ask_gis_agent(user_q)

        if plan is None:
            continue  

        try:
            res = execute_gis_plan_db(plan)
        except Exception as e:
            print("\n SQL/DB error:", e)
            continue

        if isinstance(res, str):
            print(res)
            continue
        
        display(showMap(res))

chat_loop()


GIS agent chat – type 'quit' to stop.



You:  q


'''''''''''''''''''''''''''''''''''''''''''
Welcome back


In [None]:
from shapely import wkt
import folium
from shapely.geometry import Polygon, MultiPolygon


#kan ikke lese binøre fra databasen  
query = "Find 20 buildings near floodzone"

df = execute_gis_plan_db(plan)


# hent data  og 



In [19]:
!ls -la


total 292
drwxrwsr-x  4 matinm users   4096 Dec  1 02:03 .
drwxrwsr-x 24 root   users   4096 Nov 29 13:11 ..
-rw-rw-r--  1 matinm users   1097 Nov 28 16:46 app.py
-rw-rw-r--  1 matinm users    346 Nov 29 15:49 .env
drwxrwsr-x  8 matinm users   4096 Nov 29 16:47 .git
-rw-rw-r--  1 matinm users      5 Nov  5 23:54 .gitignore
drwxrwsr-x  2 matinm users   4096 Nov 28 16:46 .ipynb_checkpoints
-rw-rw-r--  1 matinm users 258857 Dec  1 02:03 Nordkart.ipynb
-rw-rw-r--  1 matinm users   2285 Nov 28 14:33 README.md
-rw-rw-r--  1 matinm users    319 Nov  5 23:23 requirements.txt


In [20]:
plan = ask_gis_agent("Find 30 houses within 200 meters of bike routes in F")
if plan is None:
    print("Invalid plan")
else:
    df = execute_gis_plan_db(plan)


In [21]:
df

Unnamed: 0,gid,osm_id,code,fclass,name,type,geom,wkt_geom
0,2624334,954372038,1500,building,,semidetached_house,0106000020E96400000100000001030000000100000005...,"MULTIPOLYGON(((25.50966 69.4755591,25.50980939..."
1,2624336,954372040,1500,building,,semidetached_house,0106000020E9640000010000000103000000010000000C...,MULTIPOLYGON(((25.507451000000003 69.475189000...
2,2624340,954372044,1500,building,,semidetached_house,0106000020E96400000100000001030000000100000007...,MULTIPOLYGON(((25.505829399999996 69.474532999...
3,2624352,954372056,1500,building,,house,0106000020E96400000100000001030000000100000009...,MULTIPOLYGON(((25.507028999999985 69.474858399...
4,2624371,954372075,1500,building,,house,0106000020E96400000100000001030000000100000007...,"MULTIPOLYGON(((25.509574100000002 69.475193,25..."
5,2624372,954372076,1500,building,,house,0106000020E96400000100000001030000000100000007...,MULTIPOLYGON(((25.51055699999999 69.4751837999...
6,2624373,954372077,1500,building,,garage,0106000020E96400000100000001030000000100000005...,"MULTIPOLYGON(((25.5095884 69.47499720000002,25..."
7,2624379,954372083,1500,building,,house,0106000020E96400000100000001030000000100000009...,"MULTIPOLYGON(((25.510711199999996 69.4759379,2..."
8,2624389,954372093,1500,building,,house,0106000020E96400000100000001030000000100000005...,MULTIPOLYGON(((25.507470099999992 69.475878899...
9,2624390,954372094,1500,building,,house,0106000020E9640000010000000103000000010000000B...,"MULTIPOLYGON(((25.5083742 69.4758046,25.508445..."


In [22]:
plan = ask_gis_agent("Find 100 residential houses within 100 meters of a river")
df   = execute_gis_plan_db(plan)

from shapely import wkt
import folium
from shapely.geometry import Polygon, MultiPolygon


def normalize_geom(g):
    if isinstance(g, Polygon):
        return [g]
    if isinstance(g, MultiPolygon):
        return list(g.geoms)
    return []




def showMap(df):
    first_geom = wkt.loads(df["wkt_geom"].iloc[0])
    m = folium.Map(location=[first_geom.centroid.y, first_geom.centroid.x], zoom_start=16)
    
    for w in df["wkt_geom"]:
        geom = wkt.loads(w)
        for poly in normalize_geom(geom):
            folium.GeoJson(
                poly.__geo_interface__,
                style_function=lambda x: {
                    "color": "red",
                    "weight": 2,
                    "fillColor": "yellow",
                    "fillOpacity": 0.3,
                },
            ).add_to(m)
    
    return m


KeyboardInterrupt: 

In [None]:
showMap(df)