# Accessibility to jobs by public transit - data prep

In [1]:
%load_ext autoreload
%autoreload 2
%cd D:\mobi-social-segregation-se

D:\mobi-social-segregation-se


In [2]:
# Load libs
import os
os.environ['USE_PYGEOS'] = '0'
import pandas as pd
import geopandas as gpd
import sqlalchemy
from lib import preprocess as preprocess
from lib import routing_helpers as rh
from tqdm.notebook import tqdm
import time

In [3]:
# Data location
user = preprocess.keys_manager['database']['user']
password = preprocess.keys_manager['database']['password']
port = preprocess.keys_manager['database']['port']
db_name = preprocess.keys_manager['database']['name']
engine = sqlalchemy.create_engine(f'postgresql://{user}:{password}@localhost:{port}/{db_name}?gssencmode=disable')

## 0. Update GTFS data

In [None]:
regional_operators = ["blekinge", "dt", "dintur", "gotland", "halland", "jlt", "klt", "krono", "jamtland",
                      "norrbotten", "vasterbotten", "orebro", "skane", "sl", "sormland", "ul", "vastmanland",
                      "varm", "vt", "xt", "otraf", "sj"]
ct = 0
for rg in tqdm(regional_operators, desc='Downloading GTFS data'):
    rh.gtfs_downloader(region='sweden', 
                       user='yuan',
                       region_operator=rg,
                       skip_country=True, 
                       api_version=3)
    ct += 1
    if ct == 10:
        time.sleep(60)
        ct = 0

## 1. Reduce the extent of OSM
Create multiple counties' poly files.

In [22]:
gdf_z = gpd.GeoDataFrame.from_postgis(sql="""SELECT deso, geom FROM zones;""", con=engine)
gdf_z.loc[:, 'deso_2'] = gdf_z.loc[:, 'deso'].apply(lambda x: x[:2])
counties = gdf_z.loc[:, 'deso_2'].unique()

In [23]:
county = '14'

In [24]:
gdf_c = gdf_z.loc[gdf_z.deso_2==county, :]
convex_hull = gdf_c.unary_union.convex_hull.buffer(20000)
convex_hull = gpd.GeoDataFrame(geometry=[convex_hull], crs=gdf_c.crs)
print(f"Area for {county}", convex_hull.area / 10**6)
rh.gdf2poly(geodata=convex_hull, targetfile=f'dbs/geo/sweden_bounding_{county}.poly', buffer=0)

# Process data
ROOT_dir = "D:/mobi-social-segregation-se/"
osm_file = ROOT_dir + 'dbs/geo/sweden-latest.osm.pbf'
terget_file = ROOT_dir + f'dbs/accessibility/c_{county}/sweden-{county}.osm.pbf'
poly_file = ROOT_dir + f'dbs/geo/sweden_bounding_{county}.poly'
osmosis_path = 'osmosis'
rh.osm_country2region(osm_file=osm_file, 
                      terget_file=terget_file, 
                      poly_file=poly_file,
                      osmosis_path=osmosis_path)

Area for 14 0    51699.950158
dtype: float64


## 2. Prepare origins and destinations

In [15]:
# Load grids
gdf_g = gpd.read_postgis(sql="""SELECT zone, pop, job, geom FROM grids;""", con=engine)

In [25]:
# Refine destinations
gdf_d = gpd.sjoin(gdf_g.loc[gdf_g.job > 0, :], convex_hull)
gdf_d = gdf_d.drop(columns=['index_right']).rename(columns={'geom': 'geometry'}).set_geometry('geometry')

In [26]:
# Refine origins
df = pd.read_sql(sql="""SELECT uid, zone, deso FROM home_p;""", con=engine)
df.loc[:, 'deso_2'] = df.loc[:, 'deso'].apply(lambda x: x[:2])
df = df.loc[df.deso_2 == county, :]
gdf_o = gdf_g.loc[gdf_g.zone.isin(df.zone), :].copy().rename(columns={'geom': 'geometry'}).set_geometry('geometry')

In [27]:
print("Length of origins:", len(gdf_o), "Length of destinations:", len(gdf_d))

Length of origins: 8973 Length of destinations: 19085


In [28]:
gdf_o["geometry"] = gdf_o.geometry.centroid
gdf_d["geometry"] = gdf_d.geometry.centroid
gdf_d = gdf_d.to_crs(4326)
gdf_o = gdf_o.to_crs(4326)
gdf_o.loc[:, 'lon'] = gdf_o.geometry.x
gdf_d.loc[:, 'lon'] = gdf_d.geometry.x
gdf_o.loc[:, 'lat'] = gdf_o.geometry.y
gdf_d.loc[:, 'lat'] = gdf_d.geometry.y

In [29]:
origins = gdf_o.loc[:, ['zone', 'lon', 'lat']].rename(columns={'zone': 'id'})
destinations = gdf_d.loc[:, ['zone', 'lon', 'lat', 'job']].rename(columns={'zone': 'id'})
destinations.iloc[0]

id     3952506385500
lon        13.249291
lat         57.60087
job             33.0
Name: 1, dtype: object

In [30]:
origins.to_csv(f"dbs/accessibility/data/origins_{county}.csv", index=False)
destinations.to_csv(f"dbs/accessibility/data/destinations_{county}.csv", index=False)