# Output summary

In [1]:
%load_ext autoreload
%autoreload 2
%cd D:\sysmo-data
%matplotlib inline

D:\sysmo-data


In [2]:
from tqdm.notebook import tqdm
import pandas as pd
import matsim
import datetime
from lib import workers as workers

## 1. Load input data

In [3]:
input_file = 'dbs/scenarios/palt23/plans_cars.xml.gz'
plans = matsim.plan_reader(input_file)
tst = 0
list_df_tst = []
for person, plan in plans:
    list_df_tst.append(workers.personplan2df(person, plan, output=False, experienced=False))
    tst += 1
    if tst == 99:
        break
df_tst = pd.concat(list_df_tst)
df_tst.head()

Unnamed: 0,act_purpose,PId,act_end,act_id,mode,POINT_X,POINT_Y,dep_time,src,score
0,home,2744886,06:42:29,0,,585619.0985000003,6981072.204500001,0,input,0
1,other,2744886,09:45:11,1,car,583755.7174000004,6987753.562399998,06:42:29,input,0
2,other,2744886,12:47:53,2,car,584572.7879999997,6987384.729499999,09:45:11,input,0
3,other,2744886,15:50:35,3,car,582742.5109999999,6989346.078499999,12:47:53,input,0
4,home,2744886,23:59:59,4,car,585619.0985000003,6981072.204500001,15:50:35,input,0


### 1.1 Fill in trav_time in the input plans

In [4]:
df_plan = pd.read_pickle('dbs/df_act_plan.pkl')
df_plan = df_plan.loc[:, ['PId', 'act_id', 'act_start', 'act_end']].\
    sort_values(by=['PId', 'act_id'], ascending=True)
df_plan.PId = df_plan.PId.astype(str)

In [5]:
def trav_time_cal(data):
    data.loc[:, 'trav_time_min'] = [0.0] + [y-x for x, y in zip(data['act_end'].values[:-1], data['act_start'].values[1:])]
    return data

# Delta time format conversion
def digi2string(delta_time):
    hours = int(delta_time)
    minutes = int((delta_time - hours) * 60)
    seconds = int((delta_time - hours - minutes / 60) * 3600)
    time_delta = datetime.timedelta(hours=hours, minutes=minutes, seconds=seconds)
    # Format the time as "hh:mm:ss"
    formatted_time = str(time_delta)

    # If you want to remove the days part (if present)
    if 'days' in formatted_time:
        formatted_time = formatted_time.split(', ')[-1]
    return formatted_time

In [None]:
tqdm.pandas()
df_plan = df_plan.groupby('PId').progress_apply(trav_time_cal).reset_index(drop=True)

In [6]:
df_plan_trav = df_plan.loc[df_plan.PId.isin(df_tst.PId.unique()), :]
tqdm.pandas()
df_plan_trav = df_plan_trav.groupby('PId').progress_apply(trav_time_cal).reset_index(drop=True)

tqdm.pandas()
df_plan_trav.loc[:, 'trav_time'] = df_plan_trav.loc[:, 'trav_time_min'].progress_apply(lambda x: digi2string(x))
df_plan_trav.loc[:, 'trav_time_min'] *= 60
df_plan_trav.loc[:, 'act_time'] = df_plan_trav.apply(lambda row: 60*(row['act_end'] - row['act_start']) if row['act_end'] > row['act_start'] else 60*(row['act_end'] + 24 - row['act_start']), axis=1)
df_plan_trav.head()

  0%|          | 0/99 [00:00<?, ?it/s]

  0%|          | 0/434 [00:00<?, ?it/s]

Unnamed: 0,PId,act_id,act_start,act_end,trav_time_min,trav_time,act_time
0,2744886,0,3.0,6.70831,0.0,0:00:00,222.498571
1,2744886,1,6.787101,9.753229,4.727474,0:04:43,177.967666
2,2744886,2,9.83202,12.798148,4.727474,0:04:43,177.967666
3,2744886,3,12.876939,15.843067,4.727474,0:04:43,177.967666
4,2744886,4,15.921858,3.0,4.727474,0:04:43,664.688535


In [7]:
df_tst = pd.merge(df_tst, 
                  df_plan_trav[['PId', 'act_id', 'trav_time', 'trav_time_min', 'act_time']], 
                  on=['PId', 'act_id'], how='left')
df_tst.head()

Unnamed: 0,act_purpose,PId,act_end,act_id,mode,POINT_X,POINT_Y,dep_time,src,score,trav_time,trav_time_min,act_time
0,home,2744886,06:42:29,0,,585619.0985000003,6981072.204500001,0,input,0,0:00:00,0.0,222.498571
1,other,2744886,09:45:11,1,car,583755.7174000004,6987753.562399998,06:42:29,input,0,0:04:43,4.727474,177.967666
2,other,2744886,12:47:53,2,car,584572.7879999997,6987384.729499999,09:45:11,input,0,0:04:43,4.727474,177.967666
3,other,2744886,15:50:35,3,car,582742.5109999999,6989346.078499999,12:47:53,input,0,0:04:43,4.727474,177.967666
4,home,2744886,23:59:59,4,car,585619.0985000003,6981072.204500001,15:50:35,input,0,0:04:43,4.727474,664.688535


## 2. Load output data

In [8]:
output_file = 'dbs/scenarios/palt23/output/output_experienced_plans.xml.gz'
plans = matsim.plan_reader(output_file)
# Aggregate all individuals' plans
df_output = workers.plans_summary(
    pd.concat([workers.personplan2df(person, plan, output=True, experienced=True)
               for person, plan in
               tqdm(plans, desc='Processing individual plan')]))

Processing individual plan: 0it [00:00, ?it/s]

In [9]:
df_output.PId = df_output.PId.astype(str)
df_tst_o = df_output.loc[df_output.PId.isin(df_tst.PId.unique()), :]
df_tst_o.head()

Unnamed: 0,act_purpose,PId,act_end,act_id,mode,POINT_X,POINT_Y,dep_time,trav_time,distance,score,trav_time_min,speed,act_time,src
0,home,2744886,6.899722,0,,0.0,0.0,0.0,00:00:00,0.0,123.286245,0.0,,413.983333,output
1,other,2744886,9.485278,1,car,583755.7174000004,6987753.562399998,6.899722,00:09:55,10.070987,123.286245,9.916667,60.933706,145.216667,output
2,other,2744886,12.255556,2,car,584572.7879999997,6987384.729499999,9.485278,00:02:41,1.265812,123.286245,2.683333,28.303874,163.533333,output
3,other,2744886,14.558889,3,car,582742.5109999999,6989346.078499999,12.255556,00:04:36,3.052609,123.286245,4.6,39.816642,133.6,output
4,home,2744886,23.999722,4,car,585619.0985000003,6981072.204500001,14.558889,00:19:13,11.981365,123.286245,19.216667,37.409293,547.233333,output


### 2.1 Fill in distance and speed back to the input plans

In [11]:
df_tst = pd.merge(df_tst, df_tst_o[['PId', 'act_id', 'distance']], on=['PId', 'act_id'])
df_tst.head()

Unnamed: 0,act_purpose,PId,act_end,act_id,mode,POINT_X,POINT_Y,dep_time,src,score,trav_time,trav_time_min,act_time,distance
0,home,2744886,06:42:29,0,,585619.0985000003,6981072.204500001,0,input,0,0:00:00,0.0,222.498571,0.0
1,other,2744886,09:45:11,1,car,583755.7174000004,6987753.562399998,06:42:29,input,0,0:04:43,4.727474,177.967666,10.070987
2,other,2744886,12:47:53,2,car,584572.7879999997,6987384.729499999,09:45:11,input,0,0:04:43,4.727474,177.967666,1.265812
3,other,2744886,15:50:35,3,car,582742.5109999999,6989346.078499999,12:47:53,input,0,0:04:43,4.727474,177.967666,3.052609
4,home,2744886,23:59:59,4,car,585619.0985000003,6981072.204500001,15:50:35,input,0,0:04:43,4.727474,664.688535,11.981365


In [12]:
df_tst.loc[:, 'speed'] = df_tst.loc[:, 'distance'] / (df_tst.loc[:, 'trav_time_min'] / 60)  # in km/h

## 3. Combine input and output

In [13]:
df = pd.concat([df_tst, df_tst_o])
df.head()

Unnamed: 0,act_purpose,PId,act_end,act_id,mode,POINT_X,POINT_Y,dep_time,src,score,trav_time,trav_time_min,act_time,distance,speed
0,home,2744886,06:42:29,0,,585619.0985000003,6981072.204500001,0,input,0.0,0:00:00,0.0,222.498571,0.0,
1,other,2744886,09:45:11,1,car,583755.7174000004,6987753.562399998,06:42:29,input,0.0,0:04:43,4.727474,177.967666,10.070987,127.818632
2,other,2744886,12:47:53,2,car,584572.7879999997,6987384.729499999,09:45:11,input,0.0,0:04:43,4.727474,177.967666,1.265812,16.065394
3,other,2744886,15:50:35,3,car,582742.5109999999,6989346.078499999,12:47:53,input,0.0,0:04:43,4.727474,177.967666,3.052609,38.743007
4,home,2744886,23:59:59,4,car,585619.0985000003,6981072.204500001,15:50:35,input,0.0,0:04:43,4.727474,664.688535,11.981365,152.064702


In [14]:
df.dtypes

act_purpose       object
PId               object
act_end           object
act_id             int64
mode              object
POINT_X           object
POINT_Y           object
dep_time          object
src               object
score            float64
trav_time         object
trav_time_min    float64
act_time         float64
distance         float64
speed            float64
dtype: object