# Hex fill Notebook

Notebook with algorithm to fill missing hexagons created in lower resolutions where there are no nodes

## Import modules

In [2]:
import geopandas as gpd
from geopandas.tools import overlay
import matplotlib.pyplot as plt
import os 
import sys
module_path = os.path.abspath(os.path.join('../../'))
if module_path not in sys.path:
    sys.path.append(module_path)
    import aup

import leafmap
import math
import shapely
import h3
import pandas as pd
import numpy as np


import os
os.environ['USE_PYGEOS'] = '0'
import geopandas

In a future release, GeoPandas will switch to using Shapely by default. If you are using PyGEOS directly (calling PyGEOS functions on geometries from GeoPandas), this will then stop working and you are encouraged to migrate from PyGEOS to Shapely 2.0 (https://shapely.readthedocs.io/en/latest/migration_pygeos.html).
  import geopandas as gpd


## Donwload municipality polygons

In [3]:
df = pd.read_json("../../scripts/Metropolis_CVE.json")
mpos_folder = 'mpos_2020'
hex_folder = 'hex_bins_index_2020'
mun_gdf = gpd.GeoDataFrame()
ageb_gdf = gpd.GeoDataFrame()
hex_gdf = gpd.GeoDataFrame()

In [5]:
#Download agebs, hexgrid and municipality figures
for c in df.columns.unique():
    if c == 'Guadalajara':
        mun_gdf = gpd.GeoDataFrame()
        hex_gdf = gpd.GeoDataFrame()
        ageb_gdf = gpd.GeoDataFrame()
        for i in range(len(df.loc["mpos", c])):
            # Extracts specific municipality code
            m = df.loc["mpos", c][i]
            hex_schema = "hexgrid"
            hex_table = "hexgrid_10_city"
            # Downloads municipality polygon according to code
            query = f"SELECT * FROM marco.{mpos_folder} WHERE \"CVEGEO\" LIKE \'{m}\'"
            mun_gdf = mun_gdf.append(aup.gdf_from_query(query, geometry_col='geometry'))
            query = f"SELECT * FROM {hex_schema}.{hex_table} WHERE \"CVEGEO\" LIKE \'{m}\'"
            hex_gdf = hex_gdf.append(aup.gdf_from_query(query, geometry_col='geometry'))
            query = f"SELECT * FROM marco.ageb_2020 WHERE \"cve_geo_ageb\" LIKE \'{m}%%\'"
            ageb_gdf = ageb_gdf.append(aup.gdf_from_query(query, geometry_col='geometry'))

In [6]:
hex_schema = "raster_analysis"
hex_table = "ndmi_analysis_hex"
c = 'Guadalajara'
n = '11'
query = f'SELECT * FROM {hex_schema}.{hex_table} WHERE \"city\" LIKE \'{c}\' AND \"res\" = {n}'
ndmi_gdl = aup.gdf_from_query(query, geometry_col='geometry')
ndmi_gdl.columns = ndmi_gdl.columns.str.replace('hex_id', 'hex_id_10')
ndmi_gdl.head(10)

Unnamed: 0,hex_id_10,geometry,res,ndmi_mean,ndmi_std,ndmi_median,ndmi_max,ndmi_min,ndmi_diff,ndmi_tend,city
0,8b49ab4b261efff,"POLYGON ((-103.38872 20.75207, -103.38855 20.7...",11,-0.02443,0.088269,-0.04594,0.125081,-0.108787,0.233868,9.9e-05,Guadalajara
1,8b49ab4b2694fff,"POLYGON ((-103.38507 20.75226, -103.38490 20.7...",11,-0.001659,0.048773,-0.011628,0.100045,-0.044546,0.144592,-0.000135,Guadalajara
2,8b49ab4b26eafff,"POLYGON ((-103.39028 20.75465, -103.39011 20.7...",11,0.004639,0.078975,-0.003485,0.129107,-0.105572,0.234678,0.00027,Guadalajara
3,8b49ab4b271dfff,"POLYGON ((-103.38996 20.74874, -103.38979 20.7...",11,0.099768,0.052265,0.098254,0.175194,0.03317,0.142024,-0.000491,Guadalajara
4,8b49ab4b2613fff,"POLYGON ((-103.38830 20.75179, -103.38812 20.7...",11,-0.028204,0.106004,-0.051343,0.154841,-0.123025,0.277866,0.000392,Guadalajara
5,8b49ab4b27a6fff,"POLYGON ((-103.38688 20.74775, -103.38671 20.7...",11,-0.014251,0.055812,-0.03339,0.107957,-0.055972,0.16393,0.000133,Guadalajara
6,8b49ab4b2793fff,"POLYGON ((-103.38532 20.74982, -103.38514 20.7...",11,0.005626,0.044411,-0.002412,0.097986,-0.027236,0.125222,1.4e-05,Guadalajara
7,8b49ab4b265bfff,"POLYGON ((-103.39208 20.75480, -103.39191 20.7...",11,0.143926,0.055587,0.130723,0.241717,0.074088,0.167628,0.000376,Guadalajara
8,8b49ab4b2789fff,"POLYGON ((-103.38797 20.75053, -103.38779 20.7...",11,-0.000564,0.107075,-0.037318,0.16261,-0.089739,0.252348,-0.001059,Guadalajara
9,8b49ab4b2664fff,"POLYGON ((-103.39332 20.75147, -103.39315 20.7...",11,-0.034403,0.052683,-0.047964,0.072917,-0.068646,0.141563,-4.6e-05,Guadalajara


In [7]:
#Define projections
mun_gdf = mun_gdf.set_crs("EPSG:4326")
ageb_gdf = ageb_gdf.set_crs("EPSG:4326")
hex_gdf = hex_gdf.set_crs("EPSG:4326")
ndmi_gdl = ndmi_gdl.set_crs("EPSG:4326")

## Donwload nodes and edges

In [8]:
#Creates wkt for query
gdf_tmp = mun_gdf.copy()
gdf_tmp = gdf_tmp.to_crs("EPSG:6372")
gdf_tmp = gdf_tmp.buffer(1).reset_index().rename(columns={0:'geometry'})
gdf_tmp = gdf_tmp.set_geometry('geometry')
gdf_tmp = gdf_tmp.to_crs("EPSG:4326")
poly_wkt = gdf_tmp.dissolve().geometry.to_wkt()[0]
aup.log("Created wkt based on dissolved polygon")

In [9]:
#Download nodes with distance to denue data
query = f"SELECT * FROM osmnx.edges WHERE ST_Intersects(geometry, \'SRID=4326;{poly_wkt}\')"
edges = aup.gdf_from_query(query, geometry_col='geometry')
aup.log(f"Downloaded {len(edges)} edges from database for {c}")
edges = edges.to_crs("EPSG:4326")
query = f"SELECT * FROM osmnx.nodes WHERE ST_Intersects(geometry, \'SRID=4326;{poly_wkt}\')"
nodes = aup.gdf_from_query(query, geometry_col='geometry')
aup.log(f"Downloaded {len(edges)} edges from database for {c}")
nodes = nodes.to_crs("EPSG:4326")


## Hexagon analysis

In [10]:
#PIP (Point in Polygon). Overlays nodes with hexes to find hexes that have nodes in them and those that do not
pip = gpd.overlay(ndmi_gdl, nodes, how='intersection', keep_geom_type=False)
pip = pip.set_index('hex_id_10')
hex_gdf = hex_gdf.set_index('hex_id_10')
#simplify and keep only relevant columns
pip_idx = pip[['CVEGEO']]
hex_geom = hex_gdf[['geometry']]
#Merge with indicator. Right only means that the hexagon does NOT have any node (first filter)
hex_node = pip_idx.merge(hex_geom, left_index=True, right_index=True, how='outer', indicator=True)
R2 = hex_node[hex_node['_merge']=='right_only']
#(simplify) and plot
R3 = gpd.GeoDataFrame(R2, geometry = 'geometry')
R3 = R3[['geometry']]
R3.reset_index(inplace = True)
#R3.plot()


KeyError: "None of [Index(['CVEGEO'], dtype='object')] are in the [columns]"

In [None]:
#LIP (Line in Polygon). Overlays edges with hexes already filtered by nodes
#  to find hexes that have edges in them and those that do not. In this case
#We want to keep those that do have roads.
lip = gpd.overlay(R3, edges, how='intersection', keep_geom_type=False)
lip = lip.set_index('hex_id_10')
R3 = R3.set_index('hex_id_10')
lip_idx = lip[['osmid']]
R3_geom = R3[['geometry']]
#Merge with indicator. Both means that the hex has a road.
hex_edge = lip_idx.merge(R3_geom, left_index=True, right_index=True, how='outer', indicator=True)
hex_edge
R4 = hex_edge[hex_edge['_merge']=='both']
#Simplify and Plot
R5 = gpd.GeoDataFrame(R4, geometry = 'geometry')
R5 = R5[['geometry']]
R5.reset_index(inplace = True)


In [None]:
#AIP (AGEB in Polygon). Overlays agebs with hexes already filtered by nodes and edges
#  to find hexes that have agebs in them and those that do not. In this case
#We want to keep those that are within an ageb.
aip = gpd.overlay(R5, ageb_gdf, how='intersection', keep_geom_type=False)
aip = aip.set_index('hex_id_10')
R5 = R5.set_index('hex_id_10')
aip_idx = aip[['cve_ageb']]
R5_geom = R5[['geometry']]
#Merge with indicator. Both means that the hex has intersects with an ageb.
hex_ageb = aip_idx.merge(R5_geom, left_index=True, right_index=True, how='outer', indicator=True)
R6 = hex_ageb[hex_ageb['_merge']=='both']
#Simplify and Plot
missing_hex = gpd.GeoDataFrame(R6, geometry = 'geometry')
missing_hex = missing_hex[['geometry']]
missing_hex.reset_index(inplace = True)


In [None]:
missing_hex.plot()

In [None]:
missing_hex = missing_hex.drop_duplicates(subset=['hex_id_10'])

In [None]:
missing_hex

## Fill missing hexagons

In [None]:
## Start part 2, fill missing hexes with average value of neighbours
missing_hex[['v']] = np.nan
missing_hex


In [None]:
########This should be replaced with the hexes with distance information
######## 'v' = distance
## Optional, I am missing the real values
hexval = hex_node[hex_node['_merge']=='both']
hexval = gpd.GeoDataFrame(hexval, geometry = 'geometry')
hexval.reset_index(inplace=True)
rand = pd.DataFrame(np.random.randint(1,100,size=(195461, 1)), columns=list('v'))
hexval= hexval.merge(rand, left_index=True, right_index=True)
hexval.reset_index(inplace=True)
hexval = hexval.drop_duplicates(subset=['hex_id_10'])
hexval = hexval[['hex_id_10', 'geometry', 'v']]

In [None]:
urb_hex = gpd.GeoDataFrame()

In [None]:
missing_hex.reset_index(inplace = True)
hexval.reset_index(inplace = True)
urb_hex = hexval.append(missing_hex)
urb_hex = urb_hex.set_index('hex_id_10')
missing_hex = missing_hex.set_index('hex_id_10')
hexval = hexval.set_index('hex_id_10')
urb_hex

In [None]:
## Start looping
count = 0
iter = 1
urb_hex['v'+ str(count)] = urb_hex['v'].copy()
while urb_hex['v'+str(count)].isna().sum() > 0:
    if count< 5:
        missing = urb_hex[urb_hex['v'+str(count)].isna()]
        urb_hex['v'+ str(iter)] = urb_hex['v'+str(count)].copy()
        for idx,row in missing.iterrows():
            ###Cell 1
            near = pd.DataFrame(h3.k_ring(idx,1))
            near['hex_id_10'] = h3.k_ring(idx,1)
            near['a'] = np.nan
            near= near.set_index('hex_id_10')
            ###Cell 2
            neighbors = near.merge(urb_hex, left_index=True, right_index=True, how='left')
            #Cell 3
            average = neighbors['v'+str(count)].mean()
            urb_hex.at[idx, 'v'+str(iter)] = average
        count = count + 1
        iter = iter + 1
    if count>= 10:
        urb_hex['v'+str(count)] = 1
fill_hex = urb_hex[['geometry']]
fill_hex['v'] = urb_hex['v'+ str(count)].copy()

In [None]:
urb_hex['v'+str(count)].isna().sum()

In [None]:
fill_hex.reset_index(inplace = True)
fill_hex