# MATSim input data preparation
Prepare road network and synthetic population.

In [1]:
%load_ext autoreload
%autoreload 2
%cd D:\synthetic-sweden
%matplotlib inline

D:\synthetic-sweden


In [2]:
# Load libs
import math
import pandas as pd
import numpy as np
import geopandas as gpd
from tqdm import tqdm
import dataworkers as dw
import matsim
import gzip
import matplotlib.pyplot as plt

## 1. Network preparation

### Load boundary
Create boundary using QGIS focusing on VGR.

### Prepare command line
#### .osm.pbf -> .xml.gz

In [3]:
file_java = 'D:\\matsim-code-examples-13.x\\src\\main\\java\\org\\matsim\\codeexamples\\network'
print(file_java)

D:\matsim-code-examples-13.x\src\main\java\org\matsim\codeexamples\network


In [4]:
Epsg = "EPSG:3006"
input = "D:\\synthetic-sweden\\dbs\\network\\sweden-latest.osm.pbf"
filter = "D:\\synthetic-sweden\\dbs\\network\\vgr_filter.shp" # Created in QGIS
output = "D:\\synthetic-sweden\\dbs\\network\\vgr.xml.gz"

### Convert boundary into poly for osmosis filtering

In [7]:
dw.shp2poly(filter, targetfile="D:\\synthetic-sweden\\dbs\\network\\vgr_filter.poly")

## 2. Population preparation

In [3]:
# h-Home, w-Work, o-Other, s-School
activity_purpose_dict = {1: 'h', 4: 'w', 10: 'o', 6: 's'}

### Load agents who live in VGR

In [4]:
df_pop = pd.read_pickle('dbs/agents/syn_pop_vgr.pkl')
df_pop.head(3)

Unnamed: 0,PId,Deso,kommun,marital,sex,age,HId,HHtype,HHsize,num_babies,employment,studenthood,income_class,num_cars,HHcars,pot_car_driver
1044304,1044305,0643C1030,643,couple,0,22,322442,couple,3,1,1,0,3,1,2,0.799575
1044305,1044306,0643C1030,643,couple,1,20,322442,couple,3,1,0,0,1,1,2,0.576646
1044306,1044307,0643C1030,643,child,0,0,322442,couple,3,1,0,0,0,0,2,0.0


In [5]:
df_plan = pd.read_pickle('dbs/agents/df_act_plan_vgr.pkl')
df_plan['act_purpose'] = df_plan['act_purpose'].map(activity_purpose_dict)
df_plan.head(3)

Unnamed: 0,PId,act_id,joint_PId_actid,act_purpose,act_start,act_end,mode,building_ID,building_type,Deso,Reg_model,New_Zone_ID,POINT_X_sweref99,POINT_Y_sweref99,POINT_X,POINT_Y
4421287,1044305,0,1044305;0,h,3.0,7.863121,,353594,130,0643C1030,National,1986231,446134.3136,6418626.0,14.091069,57.906175
4421288,1044305,1,1044305;1,w,7.954329,11.771308,Car,358719,699,0643C1010,National,1986231,447177.8607,6417934.0,14.108829,57.900082
4421289,1044305,2,1044305;2,o,11.862517,12.056344,Car,379199,499,0642C1010,National,1986221,434141.2081,6419646.0,13.888454,57.913722


In [6]:
print(df_plan.act_purpose.unique())

['h' 'w' 'o' 's']


In [7]:
print(df_plan['mode'].unique())

['' 'Car' 'Walking' 'CarPassenger' 'Bike' 'PublicTransport']


### Sample agents for simulation
Create a subset of car users from all VG population.

In [18]:
agents_car_users = df_plan.loc[df_plan['mode'].isin(['Car', 'CarPassenger']), 'PId'].unique()
df_plan_sim = df_plan.loc[df_plan['PId'].isin(agents_car_users), :]

#### Dump the agents' activity plans into MATSim format
Excluding those who

1) do not move (stay home),
2) start and end locations are not the same

In [15]:
purpose_dict = {'h': 'home', 'o': 'other', 'w': 'work', 's': 'school'}
mode_dict = {'Car': 'car', 'CarPassenger': 'car', 'Bike': 'bike',
             'Walking': 'walk', 'PublicTransport': 'pt'}

In [16]:
def data2xml(data):
    num_activities = len(data)
    # Modes between activities
    mode_array = data['mode'].values[1:]
    act_purpose_array = data['act_purpose'].values
    X_array = data['POINT_X_sweref99'].values
    Y_array = data['POINT_Y_sweref99'].values
    act_end_array = data['act_end'].values
    # Process the case where the last activity crosses 0
    if act_end_array[-2] < 3:
        act_purpose_array = np.concatenate((act_purpose_array[-2], act_purpose_array[:-1]), axis=None)
        X_array = np.concatenate((X_array[-2], X_array[:-1]), axis=None)
        Y_array = np.concatenate((Y_array[-2], Y_array[:-1]), axis=None)
        act_end_array = np.concatenate((act_end_array[-2], act_end_array[:-1]), axis=None)
        act_end_array[-1] = 23.99
    # Excluding those who 1) do not move (stay home),
    # 2) start and end locations are not the same
    if (num_activities > 2) & (act_purpose_array[0] == act_purpose_array[-1]):
        writer.start_person(person_id=data['PId'].values[0])
        writer.start_plan(selected=True)
        for i in range(num_activities):
            # The last activity is always home
            if i == num_activities - 1:
                writer.add_activity(type=purpose_dict[act_purpose_array[i]],
                                    x=X_array[i],
                                    y=Y_array[i])
            else:
                writer.add_activity(type=purpose_dict[act_purpose_array[i]],
                        x=X_array[i],
                        y=Y_array[i],
                        end_time=act_end_array[i]*3600)
                writer.add_leg(mode=mode_dict[mode_array[i]])
        writer.end_plan()
        writer.end_person()

In [19]:
with gzip.open("dbs/agents/plans_cars.xml.gz", 'wb+') as f_write:
    writer = matsim.writers.PopulationWriter(f_write)
    writer.start_population()
    tqdm.pandas()
    df_plan_sim.groupby('PId').progress_apply(data2xml)
    writer.end_population()

100%|██████████| 839765/839765 [06:46<00:00, 2065.17it/s]
