# Lets figure out GIS

## Todo

- [ ] Get property boundaries loaded
- [ ] Get planning layers loaded
- [ ] Get heritage overlays loaded
- [ ] Delete earlier notebook

# Scrapping

Here I'm testing different configurations for scrapping.

## Request Times

- Sydney Bounds, `Shape__Area < 50000000`
  - Resolution **0.01** With **1** Concurrent Requests
    - **1** block, **0.8**s 
  - Resolution **0.02** With **8** Concurrent Requests
    - **100** block, **15**s - **17**s - **28s** 
    - **200** block, **56**s - **57**s - **218**s
    - **400** block, **519**s - **528**s - **530**s _(8 minutes 50 seconds)_ - **539**s
    - **600** block, **2222**s _(37 minutes 2 seconds)_
    - **800**
      - timed out before 

In [None]:
from aiohttp import ClientSession
import geopandas as gpd
import math
import matplotlib.pyplot as plt
import time

from lib.gis.bounds import BoundsIterator, SYDNEY_BOUNDS
from lib.gis.gis_reader import GisReader, GisProjection, GisReaderError
from lib.gis.schema import ADDR_GIS_SCHEMA, LOT_GIS_SCHEMA

res, concurrent, stop_n, print_n = (0.02, 0.005), 12, 1600, 100

async def read_pages(reader, bounds, proj):
    fig, ax = plt.subplots()
    count, items, start_time = 0, 0, time.time()
    
    async for page in reader.get_pages(bounds, proj):
        page.plot(ax=ax, column='Shape__Area')
        items, count = items + len(page), count + 1
        if not count % print_n:
            t = int(time.time() - start_time)
            print(f'#{count}: {items} items @ {t // 60}m {t % 60}s')
        if count >= stop_n:
            break
            
    end_time = time.time()
    print(f"GIS loading took: {end_time - start_time} seconds")
    plt.show()
    display(list(page.columns))
    display(page.head())

crs = 4326
bounds = BoundsIterator(SYDNEY_BOUNDS, resolution=res, epsg_crs=crs)
addr_proj = GisProjection(
    schema=ADDR_GIS_SCHEMA,
    fields=['id', 'geo', ('data', 2), ('meta', 2), ('assoc', 2)],
    epsg_crs=crs,
)
lots_proj = GisProjection(schema=LOT_GIS_SCHEMA, fields='*', epsg_crs=crs)

try:
    async with ClientSession() as session:
        gis_reader = GisReader.create(
            session, 
            max_concurrent=concurrent,
            where_clause=[
                'Shape__Area < 50000000',
                'Shape__Length < 100000',
            ],
        )
        await read_pages(gis_reader, bounds, addr_proj)
        # await read_pages(gis_reader, bounds, lots_proj)
except GisReaderError as e:
    print(e)

#40: 1385 items @ 0m 3s
#80: 4718 items @ 0m 6s
#120: 8343 items @ 0m 12s
#160: 13950 items @ 0m 17s
#200: 17445 items @ 0m 23s
#240: 21918 items @ 0m 26s
#280: 24357 items @ 0m 31s
#320: 26275 items @ 0m 36s
#360: 30739 items @ 0m 43s
#400: 34975 items @ 0m 47s
#440: 44876 items @ 0m 58s
#480: 52571 items @ 1m 8s
#520: 59213 items @ 1m 19s
#560: 76633 items @ 1m 52s
#600: 86118 items @ 2m 16s
#640: 103490 items @ 2m 42s
#680: 115362 items @ 3m 2s
#720: 124124 items @ 3m 18s
#760: 134093 items @ 3m 35s
#800: 142096 items @ 3m 53s
#840: 148263 items @ 4m 9s
#880: 156266 items @ 4m 26s
#920: 169929 items @ 4m 55s
#960: 180256 items @ 5m 14s
#1000: 199478 items @ 5m 47s
#1040: 211807 items @ 6m 36s
