In [1]:
import sys
sys.path.insert(0,'../tools')
from calcDistributions import loadData
import utils

import json
import geopandas as gpd
import matplotlib
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import contextily as cx
from shapely.geometry import Point
from scipy.stats import entropy
import sqlalchemy
from routingpy import Graphhopper
%matplotlib qt

In [2]:
# Start local server!
routingEngine = Graphhopper(base_url='http://localhost:8989')
def getRoutedDistances(startLocs, stopLocs):
    # Convert crs
    starts = gpd.GeoSeries(startLocs, crs=3857).to_crs(4326)
    startLons = starts.x
    startLats = starts.y
    stops = gpd.GeoSeries(stopLocs, crs=3857).to_crs(4326)
    stopLons = stops.x
    stopLats = stops.y

    distances, routes = [], []
    for i in range(len(startLons)):
        route = routingEngine.directions(locations=[[startLons[i], startLats[i]], [stopLons[i], stopLats[i]]], profile='car')
        routes.append(route)
        distances.append(route.distance)
    return distances, routes

# Load data

In [4]:
buildings = pd.read_csv("../Buildings.csv")
geometry = [Point(x, y) for x, y in zip(buildings['x'], buildings['y'])]
buildings = gpd.GeoDataFrame(buildings, geometry=geometry, crs=3857)

with sqlalchemy.create_engine('postgresql://postgres:password@localhost/OSM_Ger').connect() as conn:
    sql = ("SELECT b.way, b.way_area FROM planet_osm_polygon as a JOIN planet_osm_polygon as b ON ST_Intersects(a.way, b.way)"
           " AND a.admin_level='6' AND a.name='München' AND b.admin_level='9'")
    adminAreas = gpd.GeoDataFrame.from_postgis(sql, conn, geom_col="way")

# INSPIRE Grid
usecols = ['OBJECTID', 'id', 'geometry']

path = "C:/Users/strobel/Projekte/esmregio/Daten/INSPIRE_Grids/500m/geogitter/DE_Grid_ETRS89-LAEA_500m.gpkg"
inspire500 = gpd.read_file(path, mask=adminAreas).to_crs(epsg=3857)
inspire500 = inspire500[usecols].set_index("id")

path = "C:/Users/strobel/Projekte/esmregio/Daten/INSPIRE_Grids/1km/geogitter/DE_Grid_ETRS89-LAEA_1km.gpkg"
inspire1k = gpd.read_file(path, mask=adminAreas).to_crs(epsg=3857)
inspire1k = inspire1k[usecols].set_index("id")

path = "C:/Users/strobel/Projekte/esmregio/Daten/INSPIRE_Grids/5km/geogitter/DE_Grid_ETRS89-LAEA_5km.gpkg"
inspire5k = gpd.read_file(path, mask=adminAreas).to_crs(epsg=3857)
inspire5k = inspire5k[usecols].set_index("id")

# MID
lngNameMap = {"W": "WORK", "B": "BUSINESS", "S": "SCHOOL", "P": "SHOPPING", "O": "OTHER", "H": "HOME"}
trips, persons = loadData()
trips.StartLoc = trips.StartLoc.apply(lambda x: lngNameMap[x] if x is not None else None)
trips.DestLoc = trips.DestLoc.apply(lambda x: lngNameMap[x] if x is not None else None)

In [15]:
with open("../out.json", 'r') as f:
    gamgOut = json.load(f)

In [16]:
activities = []
locations = []
dwellTimes = []
agentID = []
for person in gamgOut:
    iD = person['id']
    for activity in person['profile']:
        activityType = activity['type']
        loc = Point(activity['x'], activity['y'])
        activities.append(activityType)
        locations.append(loc)
        dwellTimes.append(activity["stayTime"])
        agentID.append(iD)
gamgOutput = pd.DataFrame({"AgentID": agentID, "StartLoc": activities, "location": locations, "dwellTimes": dwellTimes})
gamgOutput = gpd.GeoDataFrame(gamgOutput, geometry="location", crs=3857)

  arr = construct_1d_object_array_from_listlike(values)


---

# Trip-Starts
## Visual

In [17]:
resolutions = {"500m": inspire500, "1km": inspire1k, "5km": inspire5k}

In [18]:
# TODO one cell has to many trips -> investigate. Donhasuer says it is probably error. He deleted it.
# erroneousCell = '500mN27830E44360' 
activities = ["ALL", "HOME", "WORK", "SHOPPING", "BUSINESS", "OTHER", "SCHOOL"]

for activity in activities:
    for name, df in {"MID" : trips, "GAMG": gamgOutput}.items():
        for resolution, inspire in resolutions.items():
            column = name + "StartCount" + activity + resolution

            if activity == "ALL":
                mask = pd.Series(True, index = df.index)
            else:
                mask = df.StartLoc == activity      

            if name == "MID":
                midCol = 'GITTER_SO_' + resolution
                inspire[column] = df[df[midCol].isin(inspire.index) & mask].groupby(midCol).count().W_ID
                inspire[column].fillna(0, inplace=True)               
            else:
                inspire[column] = gpd.sjoin(inspire, df[mask], op='contains').groupby(level=0).index_right.count()
                inspire[column].fillna(0, inplace=True)
            inspire[column + "_Perc"] = inspire[column] / inspire[column].sum()

In [20]:
keys = ["MIDStartCountALL", "GAMGStartCountALL", "MIDStartCountWORK", "GAMGStartCountWORK"]
logUB = [0.01, 0.01, 1]

%matplotlib qt
f, ax = plt.subplots(3, len(keys), figsize=(10, 10))
f.tight_layout()
for i, key in enumerate(keys):
    for j, (resolution, inspire) in enumerate(resolutions.items()):
        column = keys[i] + resolution + "_Perc"
        legend = True if i == len(keys) -1 else False
        inspire[inspire[column]>0].plot(column=column, ax=ax[j, i], legend=legend, norm=matplotlib.colors.LogNorm(0.001, logUB[j]))
        # Styling
        cx.add_basemap(ax[j, i], source=cx.providers.Stamen.TonerLite)
        ax[j, i].set_axis_off()
    ax[0, i].set_title(keys[i])
        

---

# Metrics:

- Trip starts (i.e. Activity locations) distributions
- OD-Matrix RSME
- With Routing: 
  - Daily driven distance
  - Street usage?
- Time accuracy:
  - Activity profiles. Compare like in Paper

## Routing

In [117]:
gamgOutput["stopLocation"] = list(gamgOutput["location"].values[1:]) + [np.nan]
gamgOutput.loc[gamgOutput.dwellTimes.isna(), "stopLocation"] = np.nan
notLastActivity = gamgOutput.dwellTimes.notna()
startLocs = gamgOutput[notLastActivity].location.values
stopLocs = gamgOutput[notLastActivity].stopLocation.values
distances, _ = getRoutedDistances(startLocs, stopLocs)
gamgOutput.loc[notLastActivity, "distance"] = distances

  arr = construct_1d_object_array_from_listlike(values)


## Daily driven distance

In [124]:
data = {"MID" : trips, "GAMG": gamgOutput}
metrics = pd.DataFrame(columns=data.keys())
for name, df in data.items():
    if name == "MID":
        dists = persons[persons.GITTER_500m.isin(inspire500.index)].perskm2
        dists = dists[dists < 2000]
        metrics.loc['Person-Kilometer 25% Quantile', name] = dists.quantile(0.25)
        metrics.loc['Person-Kilometer Median', name] = dists.quantile(0.50)
        metrics.loc['Person-Kilometer 75% Quantile', name] = dists.quantile(0.75)
        metrics.loc['Person-Kilometer Mean', name] = dists.mean()
    else:
        metrics.loc['Person-Kilometer 25% Quantile', name] = (df.groupby('AgentID')['distance'].sum() / 1000).quantile(0.25)
        metrics.loc['Person-Kilometer Median', name] = (df.groupby('AgentID')['distance'].sum() / 1000).quantile(0.50)
        metrics.loc['Person-Kilometer 75% Quantile', name] = (df.groupby('AgentID')['distance'].sum() / 1000).quantile(0.75)
        metrics.loc['Person-Kilometer Mean', name] = (df.groupby('AgentID')['distance'].sum() / 1000).mean()

In [125]:
metrics

Unnamed: 0,MID,GAMG
Person-Kilometer 25% Quantile,4.3025,4.5345
Person-Kilometer Median,13.72,14.9965
Person-Kilometer 75% Quantile,31.0875,26.62975
Person-Kilometer Mean,38.780979,17.929259


## 5km-5km cell flow RSME