# Accessibility to jobs by public transit - data prep

In [1]:
%load_ext autoreload
%autoreload 2
%cd D:\mobi-social-segregation-se

D:\mobi-social-segregation-se


In [2]:
# Load libs
import os
os.environ['USE_PYGEOS'] = '0'
import pandas as pd
import r5py
import matplotlib.pyplot as plt
import geopandas as gpd
import sqlalchemy
from lib import preprocess as preprocess
from lib import routing_helpers as rh
from tqdm.notebook import tqdm
import datetime
import time

In [3]:
# Data location
user = preprocess.keys_manager['database']['user']
password = preprocess.keys_manager['database']['password']
port = preprocess.keys_manager['database']['port']
db_name = preprocess.keys_manager['database']['name']
engine = sqlalchemy.create_engine(f'postgresql://{user}:{password}@localhost:{port}/{db_name}?gssencmode=disable')

## 0. Update GTFS data

In [None]:
regional_operators = ["blekinge", "dt", "dintur", "gotland", "halland", "jlt", "klt", "krono", "jamtland",
                      "norrbotten", "vasterbotten", "orebro", "skane", "sl", "sormland", "ul", "vastmanland",
                      "varm", "vt", "xt", "otraf", "sj"]
ct = 0
for rg in tqdm(regional_operators, desc='Downloading GTFS data'):
    rh.gtfs_downloader(region='sweden', 
                       user='yuan',
                       region_operator=rg,
                       skip_country=True, 
                       api_version=3)
    ct += 1
    if ct == 10:
        time.sleep(60)
        ct = 0

## 1. Reduce the extent of OSM
Create multiple counties' poly files.

In [4]:
gdf_z = gpd.GeoDataFrame.from_postgis(sql="""SELECT deso, geom FROM zones;""", con=engine)
gdf_z.loc[:, 'deso_2'] = gdf_z.loc[:, 'deso'].apply(lambda x: x[:2])
counties = gdf_z.loc[:, 'deso_2'].unique()

In [5]:
county = counties[0]

In [None]:
def osm_extent(county):
    gdf_c = gdf_z.loc[gdf_z.deso_2==county, :]
    convex_hull = gdf_c.unary_union.convex_hull.buffer(40000)
    convex_hull = gpd.GeoDataFrame(geometry=[convex_hull], crs=gdf_c.crs)
    print(f"Area for {county}", convex_hull.area / 10**6)
    rh.gdf2poly(geodata=convex_hull, targetfile=f'dbs/geo/sweden_bounding_{county}.poly', buffer=0)
    
    # Process data
    ROOT_dir = "D:/mobi-social-segregation-se/"
    osm_file = ROOT_dir + 'dbs/geo/sweden-latest.osm.pbf'
    terget_file = ROOT_dir + f'dbs/accessibility/sweden-{county}.osm.pbf'
    poly_file = ROOT_dir + f'dbs/geo/sweden_bounding_{county}.poly'
    osmosis_path = 'osmosis'
    rh.osm_country2region(osm_file=osm_file, 
                          terget_file=terget_file, 
                          poly_file=poly_file,
                          osmosis_path=osmosis_path)

In [8]:
ROOT_dir = "D:/mobi-social-segregation-se/"
osm_file = ROOT_dir + 'dbs/geo/sweden-latest.osm.pbf'
terget_file = ROOT_dir + f'dbs/accessibility/sweden-{county}.osm.pbf'
poly_file = ROOT_dir + f'dbs/geo/sweden_bounding_{county}.poly'
osmosis_path = 'osmosis'
rh.osm_country2region(osm_file=osm_file, 
                      terget_file=terget_file, 
                      poly_file=poly_file,
                      osmosis_path=osmosis_path)

## 2. Prepare origins and destinations

In [15]:
# Load grids
gdf_g = gpd.read_postgis(sql="""SELECT zone, pop, job, geom FROM grids;""", con=engine)

In [16]:
# Refine destinations
gdf_d = gpd.sjoin(gdf_g.loc[gdf_g.job > 0, :], convex_hull)
gdf_d = gdf_d.drop(columns=['index_right']).rename(columns={'geom': 'geometry'}).set_geometry('geometry')

In [17]:
# Refine origins
df = pd.read_sql(sql="""SELECT uid, zone, deso FROM home_p;""", con=engine)
df.loc[:, 'deso_2'] = df.loc[:, 'deso'].apply(lambda x: x[:2])
df = df.loc[df.deso_2 == county, :]
gdf_o = gdf_g.loc[gdf_g.zone.isin(df.zone), :].copy().rename(columns={'geom': 'geometry'}).set_geometry('geometry')

In [18]:
print("Length of origins:", len(gdf_o), "Length of destinations:", len(gdf_d))

Length of origins: 7949 Length of destinations: 15460


In [19]:
gdf_o["geometry"] = gdf_o.geometry.centroid
gdf_d["geometry"] = gdf_d.geometry.centroid
gdf_d = gdf_d.to_crs(4326)
gdf_o = gdf_o.to_crs(4326)
gdf_o.loc[:, 'lon'] = gdf_o.geometry.x
gdf_d.loc[:, 'lon'] = gdf_d.geometry.x
gdf_o.loc[:, 'lat'] = gdf_o.geometry.y
gdf_d.loc[:, 'lat'] = gdf_d.geometry.y

In [20]:
origins = gdf_o.loc[:, ['zone', 'lon', 'lat']].rename(columns={'zone': 'id'})
destinations = gdf_d.loc[gdf_d.job > 0, ['zone', 'lon', 'lat', 'job']].rename(columns={'zone': 'id'})
destinations.iloc[0]

id     7070006642000
lon        18.706639
lat        59.867831
job              5.0
Name: 4, dtype: object

In [21]:
origins.to_csv(f"dbs/accessibility/data/origins_{county}.csv", index=False)
destinations.to_csv(f"dbs/accessibility/data/destinations_{county}.csv", index=False)

## 2. Accessibility estimation using r5r

In [8]:
os.environ['R_HOME'] = "C:\Program Files\R\R-4.0.2"
import rpy2.robjects as ro
from rpy2.robjects.packages import importr
from rpy2.robjects import conversion, default_converter, pandas2ri
with conversion.localconverter(default_converter):
    ro.r('''options(java.parameters = "-Xmx48G")''')
    r_r5r = importr('r5r')

In [None]:
data_path = "dbs/accessibility"
with conversion.localconverter(default_converter + pandas2ri.converter):
    r_origins = ro.conversion.get_conversion().py2rpy(origins)
    r_destinations = ro.conversion.get_conversion().py2rpy(destinations)
    ro.r.assign('data_path', data_path)
    ro.r(f'''r5r_core <- setup_r5(data_path = data_path)''')

ERROR: Java exception occurred during rJava bootstrap - see stderr for Java stack trace.

R[write to console]: Error in rJava::.jinit() : Unable to create a Java class loader.

R[write to console]: In addition: 

R[write to console]: 1: 
R[write to console]: In rJava::.jinit() :
R[write to console]: 
 
R[write to console]:  Another VM is running already and the VM did not allow me to append paths to the class path.

R[write to console]: 2: 
R[write to console]: In rJava::.jinit() :
R[write to console]: 
 
R[write to console]:  Cannot set VM parameters, because VM is running already.



In [21]:
transport_network = r5py.TransportNetwork(
    f"dbs/accessibility/sweden-{county}.osm.pbf",
    [
        "dbs/accessibility/sweden_fixed.zip",
    ]
)

In [None]:
access_computer = r5py.AccessComputer(
    transport_network,
    origins=origin,
    destinations=points,
    decay_function='linear',
    decay_value=30,
    departure=datetime.datetime(2022,2,22,8,30),
    transport_modes=[TransitMode.TRANSIT, LegMode.WALK]
)

access_computer.compute_access()