## Built environment of individual home grids
1. Number of jobs within 15 km (by car)
2. Number of transit stations within 800 m (by walking)

In [1]:
%load_ext autoreload
%autoreload 2
%cd D:\mobi-social-segregation-se

D:\mobi-social-segregation-se


In [2]:
# Load libs
import pandas as pd
import geopandas as gpd
from pyrosm import OSM
import preprocess
import sqlalchemy

  shapely_geos_version, geos_capi_version_string


In [3]:
# Data location
user = preprocess.keys_manager['database']['user']
password = preprocess.keys_manager['database']['password']
port = preprocess.keys_manager['database']['port']
db_name = preprocess.keys_manager['database']['name']
engine = sqlalchemy.create_engine(f'postgresql://{user}:{password}@localhost:{port}/{db_name}')

## 1. Number of jobs within 15 km by car

In [4]:
cmd_car = """
osmosis --read-pbf  sweden-latest.osm.pbf --tf accept-ways highway=motorway,motorway_link,trunk,trunk_link,primary,primary_link,secondary_link,secondary,tertiary,motorway_junction --used-node --write-pbf sweden_major.osm.pbf
"""
osm_file_car = "dbs/geo/sweden_major.osm.pbf"
distance_threshold_car = 15000  # m

### 1.1 Load driving network as a pandana graph

In [5]:
osm = OSM(osm_file_car)
nodes, edges = osm.get_network(network_type="driving", nodes=True)
G = osm.to_graph(nodes, edges, graph_type="pandana")

### 1.2 Get job centroids

In [6]:
jobs = gpd.GeoDataFrame.from_postgis("""SELECT zone, job, geom FROM grids;""", con=engine)
# Ensure all jobs are represented as points
jobs["geometry"] = jobs.centroid
jobs = jobs.drop(columns=['geom'])

In [7]:
jobs = gpd.GeoDataFrame(jobs, geometry=jobs['geometry'], crs=3006).to_crs(4326)

In [8]:
jobs["lat"] = jobs["geometry"].y
jobs["lon"] = jobs["geometry"].x
jobs = jobs.dropna(subset=["lon", "lat"])
jobs = jobs.loc[jobs.job > 0, ['zone', 'job', 'lat', 'lon']]
jobs.head()

Unnamed: 0,zone,job,lat,lon
1,3952506385500,33.0,57.60087,13.249291
2,5070006231750,29.0,56.231713,15.11493
4,7070006642000,5.0,59.867831,18.706639
5,4190006188000,6.0,55.835333,13.71469
9,5155006491250,3.0,58.562553,15.268567


### 1.3 Find the closest graph nodes of jobs

In [9]:
# Find the closest node-id for each job
node_ids = G.get_node_ids(jobs.lon, jobs.lat)
# Add employee counts to the graph
G.set(node_ids, variable=jobs.job, name="job_cnt")

### 1.4 Load home locations and find their closest graph nodes

In [10]:
df_home = pd.read_sql("""SELECT uid, lat, lng, zone FROM home_p;""", con=engine)
# Find the closest node-id for each individual
home_node_ids = G.get_node_ids(df_home.lng, df_home.lat).to_frame(name="node")
df_home.loc[:, 'node'] = home_node_ids['node']

### 1.5 Calculate cumulative number of jobs from each node

In [11]:
result = G.aggregate(distance_threshold_car, type="sum", decay="linear", name="job_cnt")
result = result.to_frame(name="cum_jobs").reset_index().rename(columns={'index': 'node'})

In [12]:
df_home = pd.merge(df_home, result, on='node', how='left')
df_home.head()

Unnamed: 0,uid,lat,lng,zone,node,cum_jobs
0,00008608-f79e-414d-bf1c-25632d6bc059,56.174205,12.569499,3490006228000,2795563187,4813.309325
1,0000c837-ef82-4dfd-b2a5-00bdc8680b0b,59.280962,18.006834,6712506575000,2280453583,251052.005647
2,0000cd68-c931-4e3c-96f6-7c5837f59b08,59.267604,15.862095,5490006570000,8975440739,65.054466
3,0000f6ad-ffa4-4af2-9c2a-49d6dc86ec3a,57.89,16.39,5822506417250,3196663385,945.954997
4,000115f0-937a-4716-8d8b-09b1ed54c5ce,59.284523,17.792851,6590006575000,56506258,5956.498001


## 2. Number of transit stations within 800 walking distance

In [13]:
cmd_pt = """
osmosis --read-pbf  sweden-latest.osm.pbf --tf accept-ways highway=living_street,footway,pedestrian --used-node --write-pbf sweden_pedestrian.osm.pbf
"""
osm_file_pt = "dbs/geo/sweden_pedestrian.osm.pbf"
distance_threshold_pt = 800  # m

### 2.1 Load Walking network as a pandana graph

In [14]:
osm = OSM(osm_file_car)
nodes, edges = osm.get_network(network_type="walking", nodes=True)
G = osm.to_graph(nodes, edges, graph_type="pandana")

### 2.2 Get stop locations and their closest nodes

In [15]:
stops = gpd.GeoDataFrame.from_postgis(sql="""SELECT stop_id, stop_geom as geom FROM gtfs.stops;""", con=engine)
stops["lat"] = stops["geom"].y
stops["lon"] = stops["geom"].x
stops = stops.dropna(subset=["lon", "lat"])
stops.loc[:, 'stop_cnt'] = 1

In [16]:
# Find the closest node-id for each job
node_ids = G.get_node_ids(stops.lon, stops.lat)
# Add employee counts to the graph
G.set(node_ids, variable=stops.stop_cnt, name="stop_cnt")

### 2.3 Load home locations and find their closest graph nodes

In [22]:
df_home_s = pd.read_sql("""SELECT uid, lat, lng, zone FROM home_p;""", con=engine)
# Find the closest node-id for each individual
home_node_ids = G.get_node_ids(df_home_s.lng, df_home_s.lat).to_frame(name="node")
df_home_s.loc[:, 'node'] = home_node_ids['node']

### 2.4 Calculate cumulative number of stops from each node

In [23]:
result_s = G.aggregate(distance_threshold_pt, type="count", decay="linear", name="stop_cnt")
result_s = result_s.to_frame(name="cum_stops").reset_index().rename(columns={'index': 'node'})

In [24]:
df_home_s = pd.merge(df_home_s, result_s, on='node', how='left')
df_home_s.head()

Unnamed: 0,uid,lat,lng,zone,node,cum_stops
0,00008608-f79e-414d-bf1c-25632d6bc059,56.174205,12.569499,3490006228000,2795563187,3.0
1,0000c837-ef82-4dfd-b2a5-00bdc8680b0b,59.280962,18.006834,6712506575000,441895702,487.0
2,0000cd68-c931-4e3c-96f6-7c5837f59b08,59.267604,15.862095,5490006570000,8975440739,0.0
3,0000f6ad-ffa4-4af2-9c2a-49d6dc86ec3a,57.89,16.39,5822506417250,3196663385,8.0
4,000115f0-937a-4716-8d8b-09b1ed54c5ce,59.284523,17.792851,6590006575000,1886742812,82.0


## 3. Combine data and save

In [25]:
df_home = pd.merge(df_home[['uid', 'zone', 'cum_jobs']],
                   df_home_s[['uid', 'cum_stops']],
                   on='uid', how='outer')
df_home.fillna(0, inplace=True)
df_home.to_sql(name='access_grid', schema='built_env', con=engine, index=False,
               method='multi', if_exists='replace', chunksize=1000)