# Input data preprocessing

In [1]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

import os
import subprocess
import sys
def get_repo_root():
    """Get the root directory of the repo."""
    dir_in_repo = os.path.dirname(os.path.abspath('__file__')) # os.getcwd()
    return subprocess.check_output('git rev-parse --show-toplevel'.split(),
                                   cwd=dir_in_repo,
                                   universal_newlines=True).rstrip()
sys.path.append(get_repo_root())
ROOT_dir = get_repo_root()

In [2]:
import pandas as pd
import geopandas as gpd

osm_folder = ROOT_dir + '/dbs/osm/'
city = 'dublin'
network = osm_folder + f'{city}.shp'

## 1. Load data

In [7]:
gdf = gpd.read_file(network)
gdf['osm_id'] = gdf['osm_id'].astype(int)
df = pd.read_csv(ROOT_dir + '/dbs/flow3.csv')
df.drop(columns=['Unnamed: 0', 'Unnamed: 0.1'], inplace=True)
df.columns

Index(['Time', 'Site_name', 'Here_segmentID', 'Flow_Direction_0',
       'Flow_Direction_1', 'Speed_direction_0', 'Speed_direction_1',
       'Speed_segment_name_dir_0', 'Speed_segment_name_dir_1', 'OSM_ID_dir_0',
       'OSM_Max_Speed_dir_0', 'OSM_ID_dir_1', 'OSM_Max_Speed_dir_1',
       'Total_Lane_count', 'Max_Flow_Direction_0', 'Max_Flow_Direction_1',
       '95th_Flow_Direction_0', '95th_Flow_Direction_1',
       'Speed_segment_dir_0_FFS', 'Speed_segment_dir_1_FFS'],
      dtype='object')

### 1.1 For flow3.csv, separate the directions

In [8]:
# Direction 0
name_mapping0 = {'Time': 'time', 'Site_name': 'sensor',
                 'Here_segmentID': 'HERE_segID', 'Flow_Direction_0': 'flow',
                 'Speed_direction_0': 'speed', 'OSM_ID_dir_0': 'osm_id',
                 'Speed_segment_dir_0_FFS': 'speed_ff'}
df0 = df.loc[:, ['Time', 'Site_name', 'Here_segmentID',
                  'Flow_Direction_0', 'Speed_direction_0',
                 'Speed_segment_dir_0_FFS', 'OSM_ID_dir_0']].rename(columns=name_mapping0)
df0.loc[:, 'direction'] = 0

# Direction 1
name_mapping1 = {'Time': 'time', 'Site_name': 'sensor',
                 'Here_segmentID': 'HERE_segID', 'Flow_Direction_1': 'flow',
                 'Speed_direction_1': 'speed', 'OSM_ID_dir_1': 'osm_id',
                 'Speed_segment_dir_1_FFS': 'speed_ff'}
df1 = df.loc[:, ['Time', 'Site_name', 'Here_segmentID',
                  'Flow_Direction_1', 'Speed_direction_1',
                 'Speed_segment_dir_1_FFS', 'OSM_ID_dir_1']].rename(columns=name_mapping1)
df1.loc[:, 'direction'] = 1

df = pd.concat([df0, df1])
df.head()

Unnamed: 0,time,sensor,HERE_segID,flow,speed,speed_ff,osm_id,direction
0,2018-02-01 00:00:00,513,2_10462,705,8.72,15.0,37865178,0
1,2018-02-01 00:00:00,513,2_8332,705,20.57,28.0,4385840,0
2,2018-02-01 00:00:00,513,2_4190,705,20.03,27.0,4385840,0
3,2018-02-01 00:00:00,513,2_18258,705,18.64,31.0,4385840,0
4,2018-02-01 00:00:00,513,2_18259,705,21.07,27.0,4385840,0


In [9]:
df_n = pd.merge(df, gdf.loc[:, ['osm_id', 'oneway', 'lanes']].drop_duplicates(subset=['osm_id']),
                on='osm_id', how='left')
# Missing lanes are assumed to be small roads with lane number = 1
df_n.loc[:, 'lanes'] = df_n.loc[:, 'lanes'].apply(lambda x: int(x) if x in ['4', '3', '2', '1'] else 1)

# Missing values are assumed to be small roads with oneway = 1
df_n.loc[:, 'oneway'] = df_n.loc[:, 'oneway'].apply(lambda x: int(x) if x in [0, 1] else 1)
df_n.head()

Unnamed: 0,time,sensor,HERE_segID,flow,speed,speed_ff,osm_id,direction,oneway,lanes
0,2018-02-01 00:00:00,513,2_10462,705,8.72,15.0,37865178,0,0,1
1,2018-02-01 00:00:00,513,2_8332,705,20.57,28.0,4385840,0,0,1
2,2018-02-01 00:00:00,513,2_4190,705,20.03,27.0,4385840,0,0,1
3,2018-02-01 00:00:00,513,2_18258,705,18.64,31.0,4385840,0,0,1
4,2018-02-01 00:00:00,513,2_18259,705,21.07,27.0,4385840,0,0,1


In [10]:
df_n.groupby(['oneway', 'lanes']).size()

oneway  lanes
0       1        552814
        2        586333
        3        201158
        4        185189
1       1        355506
        2        252212
        3        154713
        4         38233
dtype: int64

In [11]:
print('Number of road segments:', len(df_n.HERE_segID.unique()))

Number of road segments: 557


## 2 Define capacity theoretically

In [12]:
capacity_dict = {(1, 1): 1200, (1, 0): 700, (2, 1): 2400, (2, 0): 1500,
                 (3, 1): 3600, (3, 0): 2800, (4, 1): 4800, (4, 0): 4000}

In [13]:
df_n.loc[:, 'capacity'] = df_n.apply(lambda row: capacity_dict[(row['lanes'], row['oneway'])], axis=1)
df_n.to_csv(ROOT_dir + '/dbs/flow3m.csv', index=False)

## 3 Test the simple form of BPR

In [9]:
df_n = pd.read_csv(ROOT_dir + '/dbs/flow2m.csv')
def capacity_estimation(a=None, L=None, u=None, RT=None, l=None):
    return 1000*l/(0.278 * RT + u/(26 * a) + L)
df_n.loc[:, 'capacity_est'] = df_n.apply(lambda row: capacity_estimation(a=2.5, L=4.5, u=row['OSM_Max_Speed'],
                                                                         RT=1.1, l=row['lanes']), axis=1)

In [12]:
alpha = 0.15
beta = 4
# Here_FFS
df_n.loc[:, 'flow'] = df_n.capacity * ( (df_n.Here_FFS / df_n.Here_Speed_uncap - 1) / alpha )**(1/beta)