In [2]:
import time
import folium
import geopandas as gpd #A more flexible package to work with geospatial data in python 
from itertools import product
import json
import networkx as nx # networkx package 
import numpy as np
from osgeo import ogr #GDAL package 
import osmnx as ox 
import pandas as pd #Base package for data analysis and manipulation 
from pyproj import CRS #package for your projection management 
import random
import rasterio as rio
from rasterio import mask
import rasterstats as rs
import requests
import shapely.geometry #python package for basic spatial operation 
import xml.dom.minidom


### INTRODUCTION

The sections of this notebook are in order. 
The OSM settings should always be run.
Some of the operations in this notebook require data that can be downloaded from the data folder in this repository.

Downloading the AHN4 datasets can take quite some time, so make sure you have everything set up right before you do that. This also counts for downloading the networks.

The section with OSMOSIS can be ignored for now, but might be necessary for later if the downloading of network via OSMnx fails for some reason.

Adding grid value section is a work in progress.

### OSM settings

In [None]:
#adds surface as extra tag for the edges (THIS IS A REQUIRED STEP BEFORE YOU DOWNLOAD THE NETWORK)
ox.settings.useful_tags_way.extend(['surface','footway','cycleway', 'crossing', 'barrier'])
ox.settings.useful_tags_node.extend(['barrier'])

### NETWERK EXTENT 

In [4]:
#set the city names
g_namen = ['Amsterdam', 'Rotterdam', "Den_Haag", "Utrecht"]

#download city geometries from file
project_gemeenten = gpd.read_file(r"data\boundaries\UrbanRunner_Areas.geojson") ## dit komt van de cbs gemeentegrenzen. Beschrijven bij data


#set additional geometry column of extent
project_gemeenten["extent"] = project_gemeenten.envelope.buffer(2000).to_crs("EPSG:4326")

#set index to gemeentenaam column
project_gemeenten.set_index('gemeentenaam', inplace=True)

#project to WGS84
project_gemeenten.to_crs("EPSG:4326", inplace=True)
project_gemeenten['extent'].to_crs("EPSG:4326")


gemeentenaam
Utrecht      POLYGON ((4.94185 52.02589, 4.94070 52.14183, ...
Amsterdam    POLYGON ((4.70129 52.27695, 4.69890 52.43084, ...
Den_Haag     POLYGON ((4.16227 52.01354, 4.15916 52.12691, ...
Rotterdam    POLYGON ((3.93606 51.83744, 3.93082 51.99951, ...
Name: extent, dtype: geometry

##### plot in folium

In [None]:
map_centroid = project_gemeenten.unary_union.envelope.centroid.coords[0]

#create folium map
m = folium.Map(location = (map_centroid[1], map_centroid[0]))
#plot the convexhulls of each network region in the folium map
for _, r in project_gemeenten.iterrows():
    #unbuffered convex hull
    extent = gpd.GeoSeries(r["extent"])
    extent_j = extent.to_json()
    extent_j = folium.GeoJson(data=extent_j,
                           style_function=lambda x: {'fillColor': 'blue'})
    folium.Popup(r.name).add_to(extent_j)
    extent_j.add_to(m)

    #orginal geometry
    geom = gpd.GeoSeries(r['geometry'])
    geom_j = geom.to_json()
    geom_j = folium.GeoJson(data=geom_j,
                           style_function=lambda x: {'fillColor': 'green'})
    folium.Popup(r.name).add_to(geom_j)
    geom_j.add_to(m)

m

### DOWNLOADING AHN

In [None]:
ahn_datavlakken = gpd.read_file(r"data/kaartbladen_AHN4.gpkg") ## AHN4 data beschrijven in methode. Kaartbladen_AHN4 bestaan uit tiles met download links naar verschillende varianten van AHN4. Wij gebruiken AHN4_0.5M_DTM. Beschrijven waarom deze wordt gepakt.
ahn_datavlakken.to_crs("EPSG:4326", inplace=True)
for g in ["Amsterdam"]:
    globals()[f"{g}_ahnvlakken"]=ahn_datavlakken.clip(project_gemeenten.loc[g].extent.envelope) ## Beschrijven in de methode hoe de juiste tiles wordt gekozen.

In [None]:
for gemeente in g_namen:
    globals()[f"{gemeente}_ahnvlakken"]=ahn_datavlakken.clip(project_gemeenten.loc[gemeente].extent.envelope)
    for i in range(globals()[f"{gemeente}_ahnvlakken"].Name_1.count()):    
        name = globals()[f"{gemeente}_ahnvlakken"].iat[i,2]

        response = requests.get(globals()[f"{gemeente}_ahnvlakken"].iat[i,3])
        print('done with downloading number ' + i + ' of gemeente ' + gemeente)
        open('C:\\Users\\danny\\Documents\\persoonlijk\\GIMA\\modules\\module 6\\data\\AHN4_05M_DTM\\'+gemeente + '\\'+ name + '.zip', "wb").write(response.content)

In [None]:
for g in ['Amsterdam']:
    globals()[f"{g}_DTM_list"] = []
    for i in range(globals()[f"{g}_ahnvlakken"].Name_1.count()):    
        name = globals()[f"{g}_ahnvlakken"].iat[i,2]
        globals()[f"{g}_DTM_list"].append('C:\\Users\\danny\\Documents\\persoonlijk\\GIMA\\modules\\module 6\\data\\AHN4_05M_DTM\\'+ g + '\\M_'+ name +".tif")

### GETTING NETWORK

##### for getting the real deal

In [None]:
# simplify false
for g in g_namen: ## beschrijven dat er twee netwerken worden gepakt: van walk en van bike, en dat deze dan worden samengevoegd met de networkx compose functie. 
    print('starting with', g)
    globals()[f"{g}_network_walk"] = ox.graph_from_polygon(project_gemeenten.loc[g].extent.envelope, network_type="walk", retain_all=True ,simplify=False) ## we hebben gekozen voor simplify false. Beschrijven wat dit inhoudt ahv osmnx documentation. We hebben de dijkstra algoritme getest op twee netwerken, één met simplify aan en de ander met simplify uit. Er was wel een verschil, maar niet groot genoeg om simplify te gebruiken. Retain_all= true moet ook worden uitgelegd waarom. Dit is namelijk omdat we twee verschillende netwerken samenvoegen, en dat we daarna de niet verbonden edges er afgooien.
    print('done with walking graph of', g)
    globals()[f"{g}_network_bike"] = ox.graph_from_polygon(project_gemeenten.loc[g].extent.envelope, network_type="bike", retain_all=True, simplify=False)
    print('done with biking graph of', g)
    globals()[f"{g}_network_both"] = nx.compose(globals()[f"{g}_network_walk"], globals()[f"{g}_network_bike"])

##### for testing (small network extent within Utrecht)

In [5]:
#small network for testing
# simplify True
for g in ["Utrecht"]:
    globals()[f"{g}_network_walk_test"] = ox.graph_from_polygon(project_gemeenten.loc[g].extent.envelope, network_type="walk")
    print('done with walking network')
    globals()[f"{g}_network_bike_test"] = ox.graph_from_polygon(project_gemeenten.loc[g].extent.envelope, network_type="bike")
    print('done with biking network')
    globals()[f"{g}_network_both_test_simplified"] = nx.compose(globals()[f"{g}_network_walk_test"], globals()[f"{g}_network_bike_test"])

#simplify false
for g in ["Utrecht"]:
    globals()[f"{g}_network_walk_test"] = ox.graph_from_polygon(project_gemeenten.loc[g].extent.envelope, network_type="walk", simplify=False)
    print('done with walking network')
    globals()[f"{g}_network_bike_test"] = ox.graph_from_polygon(project_gemeenten.loc[g].extent.envelope, network_type="bike", simplify=False)
    print('done with biking network')
    globals()[f"{g}_network_both_test"] = nx.compose(globals()[f"{g}_network_walk_test"], globals()[f"{g}_network_bike_test"])

done with walking network
done with biking network
done with walking network
done with biking network


### ROUTES

In [6]:
def get_random_XY_in_polygon(poly): ## manier om XY coordinaten te pakken binnen een polygon
    minx, miny, maxx, maxy = poly.bounds
    while True:
        p = shapely.geometry.Point(random.uniform(minx, maxx), random.uniform(miny, maxy))
        if poly.contains(p):
            return p.coords[0][0], p.coords[0][1]

In [12]:
#number of nodes
n_nodes = 100

#getting random nodes
for g in g_namen:
    X_list = []
    Y_list = []    
    for i in range(n_nodes):
        x, y = get_random_XY_in_polygon(project_gemeenten.loc[g].geometry)
        X_list.append(x)
        Y_list.append(y)
    globals()[f"{g}_random_nodes"] = ox.nearest_nodes(globals()[f"{g}_network_both"], X_list, Y_list, return_dist=True)

In [None]:
#getting nodes from the random XY sets
#UNSIMPLIFIED

source_nodes = globals()[f"{g}_random_nodes"][0][:50]
goal_nodes = globals()[f"{g}_random_nodes"][0][50:100]

##### test: comparing simplified with nonsimplified

In [None]:
for g in ["Utrecht"]:
    X_list = []
    Y_list = []    
    for i in range(100):
        x, y = get_random_XY_in_polygon(project_gemeenten.loc[g].geometry)
        X_list.append(x)
        Y_list.append(y)
    #random_nodes = ox.nearest_nodes(, X_list, Y_list, return_dist=True)

In [None]:
#getting nodes from the random XY sets
#UNSIMPLIFIED
random_nodes = ox.nearest_nodes(u_network_both, X_list, Y_list, return_dist=True)
source_nodes = random_nodes[0][:50]
goal_nodes = random_nodes[0][50:100]

In [13]:
#getting nodes from the random XY sets
#UNSIMPLIFIED
Utrecht_random_nodes = ox.nearest_nodes(Utrecht_network_both_test, X_list, Y_list, return_dist=True)
Utrecht_source_nodes = Utrecht_random_nodes[0][:50]
Utrecht_goal_nodes   =    Utrecht_random_nodes[0][50:100]
#SIMPLIFIED
Utrecht_random_nodes_simplified = ox.nearest_nodes(Utrecht_network_both_test_simplified, X_list, Y_list, return_dist=True)
Utrecht_source_nodes_simplified = Utrecht_random_nodes_simplified[0][:50]
Utrecht_goal_nodes_simplified   =   Utrecht_random_nodes_simplified[0][50:100]

In [14]:
# calculating shortest route of the source and goal nodes
#UNSIMPLIFIED
start_time = time.time()
random_shortest_routes = ox.distance.shortest_path(
    Utrecht_network_both_test, Utrecht_source_nodes, Utrecht_goal_nodes, weight='length', cpus=1)

print("gathering random routes for unsimplified network takes %s seconds" % (time.time() - start_time))
randomroute_time = (time.time() - start_time)

#SIMPLIFIED
start_time = time.time()
random_shortest_routes_simplified = ox.distance.shortest_path(
    Utrecht_network_both_test_simplified, Utrecht_source_nodes_simplified, Utrecht_goal_nodes_simplified, weight='length', cpus=1)
    
print("gathering random routes for simplified network takes %s seconds" % (time.time() - start_time))
randomroute_time_simplified = (time.time() - start_time)

gathering random routes for unsimplified network takes 31.143508672714233 seconds
gathering random routes for simplified network takes 10.06904125213623 seconds


In [None]:
ox.folium.plot_route_folium(u_network_both, random_shortest_routes[2], route_map=None, popup_attribute=None, tiles='cartodbpositron', zoom=1, fit_bounds=True)

In [None]:
ox.folium.plot_route_folium(u_network_both_simplified, random_shortest_routes_simplified[2], route_map=None, popup_attribute=None, tiles='cartodbpositron', zoom=1, fit_bounds=True)

### USING OSMOSIS

#### for ways

#### for greenspaces


COUNTING greenspace per type

In [None]:
doc = xml.dom.minidom.parse(r"data/OSM/utrecht_greenspaces.osm")

ways = doc.getElementsByTagName('way')

In [None]:
wayWithLanduse = []
wayWithLeisure = []
wayWithNatural = []
for way in ways:
    for way_child in way.childNodes:
        if type(way_child) == xml.dom.minidom.Text:
            continue
        elif way_child.hasAttribute('k'):
            if way_child.getAttribute('k') == 'landuse': 
                wayWithLanduse.append(way)
            elif way_child.getAttribute('k') == 'leisure':
                wayWithLeisure.append(way)
            elif way_child.getAttribute('k') == 'natural':
                wayWithNatural.append(way)

In [None]:
print(len(wayWithLeisure), len(wayWithLanduse), len(wayWithNatural))


### GREENSPACES, WATERTAPS AND OBSTACLES

In [None]:
for g in ["Utrecht"]:
    globals()[f"greenspaces_{g}"] = ox.geometries_from_polygon(project_gemeenten.loc[g].extent.envelope, tags={'landuse':['village_green','recreation_ground','grass','forest'], 'leisure':['nature_reserve','park','garden'],'natural':['fell','heath','wood']}) ## beschrijven
    globals()[f"greenspaces_{g}"] = globals()[f"greenspaces_{g}"].loc['way'][["geometry","natural","leisure","landuse"]]

    globals()[f"waterpoints_{g}"] = ox.geometries_from_polygon(project_gemeenten.loc[g].extent.envelope, tags={'amenity':'drinking_water'})
    globals()[f"waterpoints_{g}"].to_crs('EPSG:28992', inplace=True)
    globals()[f"waterpoints_{g}"]['buffered'] = globals()[f"waterpoints_{g}"].buffer(50) ## dit moet worden onderbouwd: waarom 50 meter buffer? & in de discussie: watertappunten is lastig, want je wilt niet na 100m al langs een watertappunt lopen
    globals()[f"waterpoints_{g}"].set_geometry('buffered', inplace=True, crs="EPSG:28992")
    globals()[f"waterpoints_{g}"].to_crs('EPSG:4326', inplace=True)
    globals()[f"waterpoints_{g}"] = globals()[f"waterpoints_{g}"][['amenity','buffered']]

    globals()[f"obstruction_{g}"] = ox.geometries_from_polygon(project_gemeenten.loc[g].extent.envelope, tags={'barrier':['gate', 'stile'], 'footway':'crossing', 'cycleway':'crossing'}) ## barriers beschrijven, hiernaast wordt ook gebruik gemaakt van traffic signals, die al inherent in het netwerk is
    globals()[f"obstruction_nodes_{g}"] = globals()[f"obstruction_{g}"].loc['node']['barrier']
    globals()[f"obstruction_ways_{g}"] = globals()[f"obstruction_{g}"].loc['way'][['footway','cycleway']]

##### adding to node and edges

In [None]:
for g in g_namen:
    globals()[f"{g}_edges"] = gpd.sjoin(globals()[f"{g}_edges"], globals()[f"greenspaces_{g}"], how = 'left')
    globals()[f"{g}_edges"] = gpd.sjoin(globals()[f"{g}_edges"], globals()[f"waterpoints_{g}"], how = 'left')
    globals()[f"{g}_edges"] = gpd.sjoin(globals()[f"{g}_edges"], globals()[f"waterpoints_{g}"], how = 'left')
    globals()[f"{g}_nodes"] = gpd.sjoin(globals()[f"{g}_nodes"], globals()[f"obstruction_nodes_{g}"], how = 'left')

### GETTING EDGE CLIMB FROM NODE ELEVATION (WORK IN PROGRESS)

### SAVING NETWORKS TO GEOPACKAGE

In [None]:
#saving networks
def saving_networks(network):
    ox.save_graph_geopackage(network, filepath='data\\graphs\\' + g + '_Graph_complete.gpkg')

### ADDING HEAT DATA

In [None]:
left, right, bottom, top = 185250, 197000, 438500, 447500
gpd.GeoDataFrame({'geometry': shapely.geometry.box(left, bottom, right, top)}, index=[0], crs="EPSG:28992").to_json()

In [None]:
gpd.GeoDataFrame({"geometry": project_gemeenten.to_crs("EPSG:28992").loc['Utrecht'].extent.envelope}, index=[0], crs="EPSG:4326").to_crs("EPSG:28992").to_json()

In [None]:
def make_grid_update(polygon, edge_size):
    """
    polygon : shapely.geometry
    edge_size : length of the grid cell
    """
    bounds = polygon.bounds
    x_rest = edge_size - (bounds[2]-bounds[0])%edge_size
    y_rest = edge_size - (bounds[3]-bounds[1])%edge_size
    x_coords = np.arange(bounds[0] - x_rest/2 + edge_size/2, bounds[2] + x_rest/2, edge_size)
    y_coords = np.arange(bounds[1] - y_rest/2 + edge_size/2, bounds[3] + y_rest/2, edge_size)
    combinations = np.array(list(product(x_coords, y_coords)))
    squares = gpd.points_from_xy(combinations[:, 0], combinations[:, 1]).buffer(edge_size / 2, cap_style=3)
    return gpd.GeoSeries(squares[squares.intersects(polygon)])

In [22]:
# Preprocess PET map
rastfile = rio.open('data/RIVM_R88_20170621_gm_actueelUHI.tif') # 
bboxshape = [json.loads(gpd.GeoDataFrame({"geometry": project_gemeenten.to_crs("EPSG:28992").loc['Utrecht'].extent.envelope}, 
    index=[0], crs="EPSG:4326").to_crs("EPSG:28992").to_json())['features'][0]['geometry']]
petmap, petmap_transform = mask.mask(rastfile, shapes=bboxshape, crop=True)
with rio.open('hittedata.tif', 'w', driver="GTiff",
                   height=petmap.shape[1], width=petmap.shape[2], 
                   transform=petmap_transform, crs=CRS.from_epsg(28992),
                   nodata=255, dtype='uint8', count= 1) as file:
    file.write(petmap)

In [21]:
petmap.shape

(1, 1705, 1953)

In [None]:
def EnrichEdgesWithRasterInfo(edges, raster, statsprefix):
    edges['UID'] = range(0, len(edges)) # add a temporary Unique ID column
    e = edges.loc[:, ['UID', 'geometry']] # make subset of relevant columns
    e_zonalstats = rs.zonal_stats(e, raster, prefix=statsprefix, geojson_out=True) # perform spatial overlay/ zonal statistics and add statistics
    e_props = pd.DataFrame.from_dict(e_zonalstats).properties # convert to dataframe and select only the properties (results again in dictionary)
    e_propsdf = pd.DataFrame.from_dict(list(e_props)) # convert dictionary with properties to a pandas dataframe
    edges_updated = edges.join(other=e_propsdf.set_index('UID'), on=('UID')) # join stats to the original edges and save as updated edges
    return edges_updated

### GRAPH FROM GEOPACKAGE

In [None]:
for g in ["Utrecht", "Den_Haag"]:
    fp = 'data/graphs_team/'+g+'_Graph.gpkg'
    globals()[f"{g}_nodes"] = gpd.read_file(fp, layer='nodes').set_index('osmid')
    globals()[f"{g}_edges"] = gpd.read_file(fp, layer='edges').set_index(['u', 'v', 'key'])

In [None]:
Rotterdam_nodes.to_file("data/graphs/reprojected/Rotterdam_Graph.gpkg", layer="nodes", driver="GPKG")
Rotterdam_edges.to_file("data/graphs/reprojected/Rotterdam_Graph.gpkg", layer="edges", driver="GPKG")

In [None]:
print(Utrecht_nodes.crs, Den_Haag_nodes.crs)

In [None]:
Den_Haag_edges.crs = "EPSG:28992"
Den_Haag_nodes.crs = "EPSG:28992"

In [None]:
Den_Haag_edges.to_crs("EPSG:4326", inplace=True)
Den_Haag_nodes.to_crs("EPSG:4326", inplace=True)

In [None]:
Utrecht_nodes.to_file("data/graphs/reprojected/Utrecht_Graph.gpkg", layer="nodes", driver="GPKG")
Utrecht_edges.to_file("data/graphs/reprojected/Utrecht_Graph.gpkg", layer="edges", driver="GPKG")
Den_Haag_nodes.to_file("data/graphs/reprojected/Den_Haag_Graph.gpkg", layer="nodes", driver="GPKG")
Den_Haag_edges.to_file("data/graphs/reprojected/Den_Haag_Graph.gpkg", layer="edges", driver="GPKG")

In [None]:
def graph_from_geopackage(gemeente):  
    fp = 'data/graphs/'+g+'_Graph.gpkg'
    gdf_nodes = gpd.read_file(fp, layer='nodes').set_index('osmid')
    gdf_edges = gpd.read_file(fp, layer='edges').set_index(['u', 'v', 'key'])
    assert gdf_nodes.index.is_unique and gdf_edges.index.is_unique
    # convert the node/edge GeoDataFrames to a MultiDiGraph
    graph_attrs = {'crs': 'epsg:4326', 'simplified': True}
    G = ox.graph_from_gdfs(gdf_nodes, gdf_edges, graph_attrs)
    return G

In [None]:
for g in ["Amsterdam", "Rotterdam", "Den_Haag"]:
    G1 = graph_from_geopackage(g)
    G2 = ox.utils_graph.get_largest_component(G1)
    ox.save_graph_geopackage(G2, filepath='data\\graphs\\' + g +'_Graph.gpkg')

In [None]:
Utrecht_Graph = ox.utils_graph.get_largest_component(Utrecht_Graph)


In [None]:
ox.save_graph_geopackage(Utrecht_Graph, filepath='data\\graphs\\Utrecht_Graph.gpkg')

In [None]:
print(len(G2.nodes), len(G3.nodes))

In [None]:
len(Utrecht_Graph.nodes)