# Lets figure out GIS

## Todo

- [ ] Get property boundaries loaded
- [ ] Get planning layers loaded
- [ ] Get heritage overlays loaded
- [ ] Delete earlier notebook

# Scrapping

Here I'm testing different configurations for scrapping.

## Request Times

- Sydney Bounds
  - within 2023 with 0.0625 resolution, 41326 items @ 1m 4s
  - within 2023 with 0.1 resolution, 41484 items @ 1m 0s
- NSW Bounds
  - within

In [None]:
from aiohttp import ClientSession
import geopandas as gpd
from IPython.display import clear_output
import math
import matplotlib.pyplot as plt
import time

from lib.gis.bounds import BoundsIterator, SYDNEY_BOUNDS, NSW_BOUNDS, ExpBoundResolution, ConstantBoundResolution
from lib.gis.gis_reader import GisReader, GisProjection, GisReaderError
from lib.gis.schema import ADDR_GIS_SCHEMA, LOT_GIS_SCHEMA


# syd scaling
r1 = ConstantBoundResolution(0.1)
r0625 = ConstantBoundResolution(0.0625)
r_p1_p1_e1 = ExpBoundResolution(0.15, 0.1, 1.0)
r_p02_p005_e75 = ExpBoundResolution(0.02, 0.005, 0.75)


# nsw scaling
r_p02_p00002_e75 = ExpBoundResolution(0.02, 0.005, 0.75)

borders, res, concurrent, print_n, year = SYDNEY_BOUNDS, r_p02_p005_e75, 12, 100, 2019
borders, res, concurrent, print_n, year = SYDNEY_BOUNDS, r_p1_p1_e1, 12, 10, 2019
borders, res, concurrent, print_n, year = SYDNEY_BOUNDS, r1, 12, 25, 2023
borders, res, concurrent, print_n, year = NSW_BOUNDS, r1, 12, 1, 2023

def splot():
    return plt.subplots(1, 1, figsize=(2, 4))

async def read_pages(reader, bounds, proj):
    fig, ax = splot()
    count, items, start_time = 0, 0, time.time()

    def render():
        t = int(time.time() - start_time)
        clear_output(wait=True)
        print(res, concurrent) 
        print(f'#{count}: {items} items @ {t // 60}m {t % 60}s')
        plt.show()
    
    async for page in reader.get_pages(bounds, proj):
        page.plot(ax=ax, column='Shape__Area')
        items, count = items + len(page), count + 1
        if not count % print_n:
            render()
            display(page.iloc[:1])
            
    render()
    t = int(time.time() - start_time)
    print(f"finished loading GIS @ {t // 60}m {t % 60}s'")
    display(page.iloc[:1])

crs = 4326
bounds = BoundsIterator(borders, resolution=res, epsg_crs=crs)
addr_proj = GisProjection(
    schema=ADDR_GIS_SCHEMA,
    fields=['id', ('assoc', 2), ('data', 2), ('meta', 2), 'geo'],
    epsg_crs=crs,
)
lots_proj = GisProjection(schema=LOT_GIS_SCHEMA, fields='*', epsg_crs=crs)

try:
    async with ClientSession() as session:
        gis_reader = GisReader.create(
            session, 
            max_concurrent=concurrent,
            where_clause=[
                f"lastupdate > DATE '{year}-01-01'",
                f"lastupdate < DATE '{year}-02-01'",
            ],
        )
        await read_pages(gis_reader, bounds, addr_proj)
        # await read_pages(gis_reader, bounds, lots_proj)
except GisReaderError as e:
    print(e)

ConstantBoundResolution(value=0.1) 12
#57: 3354 items @ 0m 14s


Unnamed: 0,RID,propid,principaladdresssiteoid,addressstringoid,propertytype,superlot,housenumber,address,principaladdresstype,createdate,startdate,enddate,lastupdate,shapeuuid,changetype,Shape__Length,Shape__Area,geometry
0,6390469,4457619,4669193,8778602,1,N,81,81 CAMFIELD DRIVE HEATHERBRAE,1,1671921557000,1671921731000,32503680000000,1672737455026,5eed5f10-112e-3371-93ea-1cc1013c0c67,M,286.984203,4575.236128,"POLYGON ((151.74931 -32.78685, 151.74909 -32.7..."
