# Events filtering
1. Create data with feasible agents;
2. Reproject events to make sure link_osm and link_id are consistent.

In [1]:
%load_ext autoreload
%autoreload 2
%cd D:\sysmo-data
%matplotlib inline

D:\sysmo-data


In [2]:
from tqdm.notebook import tqdm
import pandas as pd
import pickle
import os
os.environ['USE_PYGEOS'] = '0'
import geopandas as gpd

## 1. Load feasible agent IDs

In [3]:
df_id = pd.read_csv('dbs/results/combined_Sweden_all_unique_agents.csv')
feasible_agents = df_id.loc[df_id.agent_status == 'feasible', 'PId'].unique()

In [5]:
df_pop_s = pd.read_pickle("dbs/scenarios/syn_pop_s.pkl")
df_pop = pd.read_pickle('dbs/syn_pop_all.pkl')
df_pop = pd.merge(df_pop, df_id, on='PId', how='left')
df_pop.fillna('non_car_agent', inplace=True)
tqdm.pandas()
df_pop.loc[:, 'agent_status'] = df_pop.progress_apply(lambda row: 'stationary' if row['PId'] in df_pop_s.PId else row['agent_status'], axis=1)
df_pop.iloc[0]

  0%|          | 0/10203820 [00:00<?, ?it/s]

PId                           1
Deso                  0115A0040
kommun                     0115
marital                  couple
sex                           1
age                          22
HId                           1
HHtype                   couple
HHsize                        2
num_babies                    0
employment                    1
studenthood                   0
income_class                  1
num_cars                      0
HHcars                        0
pot_car_driver         0.279179
region            non_car_agent
agent_status      non_car_agent
Name: 0, dtype: object

In [9]:
df_pop.groupby('agent_status').size() / len(df_pop) * 100

agent_status
feasible         26.246602
infeasible        2.667864
non_car_agent    63.265307
stationary        7.820228
dtype: float64

In [12]:
df_pop.to_parquet('dbs/results/syn_pop_all.parquet', index=False)

## 2. Filter plans

In [36]:
def process_plans(df=None):
    df_r_h = df.loc[(df['act_id'] == 0) & (df['src']=='input'), :]
    home_dict = dict(zip(df_r_h.PId, 
                         list(df_r_h[['POINT_X', 'POINT_Y']].itertuples(index=False, name=None))))
    df = df.loc[df['src'] == 'output', :]
    df.loc[:, 'POINT_X'] = df.apply(lambda row: home_dict[row['PId']][0] if row['act_id'] == 0 else row['POINT_X'],
                                        axis=1)
    df.loc[:, 'POINT_Y'] = df.apply(lambda row: home_dict[row['PId']][1] if row['act_id'] == 0 else row['POINT_Y'],
                                        axis=1)
    return df.drop(columns=['src'])

In [37]:
list_df = []
for region in ('palt20', 'palt21', 'palt22', 'palt23', 'palt24', 'palt25',
               'vast13', 'vast14', 'vast17',
                'samm', 'skane', 'sydost'):
    print(f'Process {region}...')
    df_r = pd.read_csv(f'dbs/output/plans_{region}.csv.gz', compression='gzip')
    df_r = process_plans(df_r)
    df_r = df_r.loc[df_r.PId.isin(feasible_agents), :]
    df_r.loc[:, 'region'] = region
    list_df.append(df_r)
df = pd.concat(list_df)

Process palt20...
Process palt21...
Process palt22...
Process palt23...
Process palt24...
Process palt25...
Process vast13...
Process vast14...
Process vast17...
Process samm...
Process skane...
Process sydost...


In [38]:
df.to_parquet('dbs/results/plans.parquet', index=False)

## 3. Filter events in line with the updated road network

In [4]:
gdf_network = gpd.read_file('dbs/network/network_o.shp')
gdf_network.head()

Unnamed: 0,length,freespeed,capacity,permlanes,oneway,modes,link_osm,from_node,to_node,region,link_id,geometry
0,226.510087,4.166667,600.0,1.0,1,car,10000487120005f,2663465841,1212743975,palt20,0,"LINESTRING (563992.768 6747623.594, 563953.829..."
1,63.598364,4.166667,600.0,1.0,1,car,2122785310003f,1212743850,1212743975,palt20,1,"LINESTRING (563901.883 6747365.117, 563953.829..."
2,69.926807,4.166667,600.0,1.0,1,car,4621445410001r,3342856683,1212743975,palt20,2,"LINESTRING (563941.478 6747331.704, 563953.829..."
3,598.005977,4.166667,600.0,1.0,1,car,9940199710025r,2663465841,9183582768,palt20,3,"LINESTRING (563992.768 6747623.594, 564578.040..."
4,226.510087,4.166667,600.0,1.0,1,car,10000487120005r,1212743975,2663465841,palt20,4,"LINESTRING (563953.829 6747400.496, 563992.768..."


In [13]:
link_dict_rgs = dict()
for region in ('palt20', 'palt21', 'palt22', 'palt23', 'palt24', 'palt25',
               'vast13', 'vast14', 'vast17',
                'samm', 'skane', 'sydost'):
    d = gdf_network.loc[gdf_network.region == region, :]
    link_dict = dict(
        zip(d['link_osm'],
            list(d[['from_node', 'to_node']].itertuples(index=False, name=None)))
    )
    link_dict_rgs[region] = link_dict

In [19]:
link_dict = dict(
    zip(gdf_network['link_osm'],
        list(gdf_network[['from_node', 'to_node']].itertuples(index=False, name=None)))
)

In [25]:
def process_events(events=None, region=None):
    df = events.loc[events.person.isin(feasible_agents), :].copy()
    df.drop(columns=['link_id'], inplace=True)
    df.loc[:, 'from_node'] = df['link'].apply(lambda x: 
                                              link_dict[x][0] if x not in link_dict_rgs[region]
                                              else link_dict_rgs[region][x][0])
    df.loc[:, 'to_node'] = df['link'].apply(lambda x: 
                                            link_dict[x][1] if x not in link_dict_rgs[region] 
                                            else link_dict_rgs[region][x][1])
    df.loc[:, 'region'] = region
    return df

In [26]:
# Data location
for region in ('palt20', 'palt21', 'palt22', 'palt23', 'palt24', 'palt25',
               'vast13', 'vast14', 'vast17',
                'samm', 'skane', 'sydost'):
    print(f'Process {region}...')
    for batch in tqdm(range(0, 7), desc='Reading batches'):
        events_file = f'dbs/events/{region}_events_batch{batch}.csv.gz'
        df_events = pd.read_csv(events_file, compression='gzip')
        to_file = f'dbs/results/events/{region}_events_batch{batch}.parquet'
        df_proc = process_events(events=df_events, region=region)
        df_proc.to_parquet(to_file, index=False)

Process palt20...


Reading batches:   0%|          | 0/7 [00:00<?, ?it/s]

Process palt21...


Reading batches:   0%|          | 0/7 [00:00<?, ?it/s]

Process palt22...


Reading batches:   0%|          | 0/7 [00:00<?, ?it/s]

Process palt23...


Reading batches:   0%|          | 0/7 [00:00<?, ?it/s]

Process palt24...


Reading batches:   0%|          | 0/7 [00:00<?, ?it/s]

Process palt25...


Reading batches:   0%|          | 0/7 [00:00<?, ?it/s]

Process vast13...


Reading batches:   0%|          | 0/7 [00:00<?, ?it/s]

Process vast14...


Reading batches:   0%|          | 0/7 [00:00<?, ?it/s]

Process vast17...


Reading batches:   0%|          | 0/7 [00:00<?, ?it/s]

Process samm...


Reading batches:   0%|          | 0/7 [00:00<?, ?it/s]

Process skane...


Reading batches:   0%|          | 0/7 [00:00<?, ?it/s]

Process sydost...


Reading batches:   0%|          | 0/7 [00:00<?, ?it/s]