# Tram data

In [None]:
# Import libraries

import os
import sys
import json
import time
from datetime import date, datetime

# numerical libraries
import pandas as pd
import numpy as np

# plotting libraries
import seaborn as sns
import matplotlib
import matplotlib.pyplot as plt
from matplotlib import rcParams

In [None]:
# global variables
cutting_date = "2019-05-01"  # remove trips and data published before this date
meta_data_path = "../../data-campaigns/meta-data/"
input_path = "../../2019-12-16.out/"

In [None]:
# input files
legs = "all_legs_merged_no_outlier_0.01.pkl"
trips_users = "trips_users_df.pkl"
trips = "trips_df.pkl"

# read datasets
legs_df = pd.read_pickle(input_path + legs)
trips_users_df = pd.read_pickle(input_path + trips_users)
trips_df = pd.read_pickle(input_path + trips)

In [None]:
legs_df.head(3)

In [None]:
trips_users_df.head(3)

In [None]:
trips_df.head(3)

In [None]:
import csv

transport_modes_file = os.path.join(meta_data_path, "transport_mode.csv")

transport_modes_id2name = {}
transport_modes_name2id = {}
with open(transport_modes_file) as tmfp:
    reader = csv.reader(tmfp, delimiter=";")

    # skip header
    next(reader)

    for line in reader:
        modeid = int(line[0])
        modename = line[1]
        transport_modes_id2name[modeid] = modename
        transport_modes_name2id[modename] = modeid

transport_modes_id2name

In [None]:
tramid = transport_modes_name2id["tram"]

In [None]:
[col for col in legs_df.columns if "mo" in col.lower()]

In [None]:
tram_legs = legs_df.loc[legs_df["correctedModeOfTransport_str"] == "tram"]

In [None]:
tram_legs.head()

In [None]:
len(tram_legs)

In [None]:
legs_df.columns

In [None]:
tram_legs_ESP = tram_legs.loc[tram_legs["country"] == "ESP"]

In [None]:
len(tram_legs_ESP)

In [None]:
len(tram_legs_ESP["userid"].unique())

In [None]:
# take only values in 1-5
tram_legs_ESP_tmp = tram_legs_ESP[
    (tram_legs_ESP["wastedTime"] > 0) & (tram_legs_ESP["wastedTime"] <= 5)
]

# round to integer
tram_legs_ESP_tmp["wastedTime"] = tram_legs_ESP_tmp["wastedTime"].apply(
    lambda x: int(x)
)

print("useful legs:", len(tram_legs_ESP_tmp))
tram_wt = tram_legs_ESP_tmp.groupby("wastedTime").size().reset_index(name="count")

In [None]:
bus_legs = legs_df.loc[legs_df["correctedModeOfTransport_str"] == "bus"]

In [None]:
bus_legs_ESP = bus_legs.loc[bus_legs["country"] == "ESP"]

In [None]:
len(bus_legs_ESP)

In [None]:
# take only values in 1-5
bus_legs_ESP_tmp = bus_legs_ESP[
    (bus_legs_ESP["wastedTime"] > 0) & (bus_legs_ESP["wastedTime"] <= 5)
]

# round to integer
bus_legs_ESP_tmp["wastedTime"] = bus_legs_ESP_tmp["wastedTime"].apply(lambda x: int(x))

print("useful legs:", len(bus_legs_ESP_tmp))
bus_wt = bus_legs_ESP_tmp.groupby("wastedTime").size().reset_index(name="count")

In [None]:
foo_wt = bus_wt.merge(tram_wt, on="wastedTime", how="left")

In [None]:
foo_wt.fillna(0)

In [None]:
foo_wt["rel_bus"] = foo_wt["count_x"].apply(
    lambda x: float(x) / (foo_wt["count_x"].sum())
)
foo_wt["rel_tram"] = foo_wt["count_y"].apply(
    lambda x: float(x) / (foo_wt["count_y"].sum())
)

In [None]:
foo_wt = foo_wt.fillna(0)
foo_wt

In [None]:
foo_wt[["rel_bus", "rel_tram"]].plot(kind="bar")
plt.title("Evaluation of bus and tram (trips legs from ESP)")
plt.xlabel("Evaluation (0=lowest, 4=highest)")
plt.ylabel("Relative frequency")
plt.savefig("bus_tram.png")

### Table 

In [None]:
)
print(all_gen_act.legid.nunique())


In [None]:
# read data
all_gen_act = pd.read_pickle(input_path + 'all_gen_act.pkl')

# rename values in  column 'code'
all_gen_act['code'] = all_gen_act['code'].apply(lambda x: x[10:])
                                                
# add transport category
all_gen_act = all_gen_act.merge(legs_df[['legid', 'transp_category', 'wastedTime']], on='legid')

# filter useful values of wt and round to int
all_gen_act = all_gen_act[(all_gen_act.wastedTime >0) & (all_gen_act.wastedTime <6)]
all_gen_act.wastedTime = all_gen_act.wastedTime.apply(lambda x: np.round(x))

# add values from trip
values_from_trip = pd.read_pickle(input_path + 'values_from_trip.pkl')
values_from_trip = values_from_trip[values_from_trip.valueFromTrip != 'Unknown']

tmp = values_from_trip[["legid", "value", "valueFromTrip"]]
values_from_trip_pivot = pd.pivot(
    data=tmp, index="legid", columns="valueFromTrip", values="value").reset_index()

# Merge Paid_work and Personal_tasks into Productivity taking the **maximum** value
values_from_trip_pivot["Productivity"] = values_from_trip_pivot[
    ["Paid_work", "Personal_tasks"]
].max(axis=1)
values_from_trip_pivot.drop(["Paid_work", "Personal_tasks"], axis=1, inplace=True)



all_gen_act = all_gen_act.merge(values_from_trip_pivot, on='legid').drop_duplicates()
print('shape', all_gen_act.shape)
print('unique legs', all_gen_act.legid.nunique())
all_gen_act.head()

In [None]:
# top5 generic activities for each tc
gen_act_by_tc = all_gen_act.groupby(['transp_category','code']).size().reset_index(name='count').sort_values(by=['transp_category', 'count'], ascending=False)
# take the total count per tc
#tc_total_legs = dict(gen_act_by_tc.groupby('transp_category')['count'].sum())
tc_total_legs = dict(all_gen_act.groupby(['transp_category'])['legid'].nunique())

# add relative count
gen_act_by_tc['rel_count']= gen_act_by_tc.apply(lambda x: np.round(x['count']/tc_total_legs[x['transp_category']],2), axis=1)
gen_act_by_tc_top5 = gen_act_by_tc.groupby('transp_category').head(5).reset_index(drop=True)

gen_act_by_tc_top5


In [None]:
final_table = all_gen_act.groupby('transp_category')['wastedTime', 'Enjoyment', 'Fitness', 'Productivity'].mean().round(decimals=2)
for i in range(5):

    act = gen_act_by_tc_top5.groupby('transp_category')['code'].nth(i).reset_index(name='activity_'+str(i+1))
    cnt = gen_act_by_tc_top5.groupby('transp_category')['rel_count'].nth(i).reset_index(name='rel_freq_'+str(i+1))
    final_table = final_table.merge(act, on='transp_category')
    final_table = final_table.merge(cnt, on='transp_category')    

# rename columns 
final_table.rename(columns = {'wastedTime':'worthwhile_time', 'Enjoyment':'enjoyment', 
                              'Productivity':'productivity', 'Fitness':'fitness'}, inplace=True)
# save
final_table.to_csv('info_by_transp_cat.csv', index=False)

final_table