### Report Preprocessing

Ex `AnalysisForUX.0.1.ipynb` 

**!!** The user selected for a check is `pKGhRs0mGJgJuATlf6mIKxgkhfK2`

## Analysis

- [Time spent in travel](#time_travel)
- [Time spent in travel (percentage)](#time_travel_perc)
- [Time spent in travel (percentage considering the main mode)](#time_travel_perc_main)
- [Worthwhileness satisfaction per mode of transport](#worth_satisfaction)
- [Worthwhileness satisfaction for short, medium and long distance](#worth_satisfaction_distance)
- [Average distance per transport mode](#avg_distance_transport_mode)
- [Cumulative distance per transport mode](#cum_distance_transport_mode)
- [co2 per transport mode (total and percentage)](#co2)
- [Percentage of total distance per transport mode](#total_distance_transp_mode)
- [Percentage of distance (considering the main mode)](#perc_distance_transp_mode_main)
- [Average legs travel time per mode](#avg_travel_time_permode)
- [Average assessment per mode - lousiness vs greatness](#avg_assessment_permode)
- [ Worthwhileness Index](#worth_index)

In [None]:
#Import libraries
import os
import json
import pandas as pd
import numpy as np
import time
from datetime import date, datetime
import seaborn as sns
import matplotlib
import matplotlib.pyplot as plt
from matplotlib import rcParams
import sys

from IPython.core.display import display, HTML
display(HTML("<style>.container { width:95% !important; }</style"))

from docx import Document
from docx.shared import Inches

rcParams['axes.titlepad'] = 45
rcParams['font.size'] = 16

In [None]:
# Global variables
cutting_date = '2019-05-01' # remove trips and data published before this date
meta_data_path = '../../data-campaigns/meta-data/'

legs = 'all_legs_merged_no_outlier_0.01.pkl'
input_path = '../../2019-12-16.out/'
out_path = '../../2019-12-16.out/reports/'
img_path = '../../2019-12-16.out/reports/img_merged_nooutliers/'
report_name = 'Results_01.05_16.12_merged_nooutliers_0.01.docx'
    
# try to create the img path folder
if not os.path.exists(img_path):
    os.makedirs(img_path)
    

**Read data**

In [None]:
all_legs = pd.read_pickle(input_path + legs)
trips_users_df = pd.read_pickle(input_path + 'trips_users_df.pkl')
trips_df = pd.read_pickle(input_path + 'trips_df.pkl')

print('Legs:', all_legs.shape[0])
print('Trips: ', len(all_legs.tripid.unique()))
print('Users:', len(all_legs.userid.unique()))


**Start document**

In [None]:
## Initial information 

start_date = str(all_legs['startDate_formated'].min())[0:10]
end_date = str(all_legs['startDate_formated'].max())[0:10]


subtitle = 'Data from ' + start_date + ' to ' + end_date + ' Post-processing legs merged - Outlier removed'
subtitle_1 = ' Lower and Upper bound computed considering only main/longest (in terms of distance) legs. For \
each transport mode we are considering as outliers the 1% with the lowest and highest values'


document = Document()    
document.add_heading('Data Analysis')
document.add_heading(subtitle, level=2)
document.add_heading(subtitle_1, level=3)

p = document.add_paragraph()
p.style = 'List Bullet'
r = p.add_run()

p = document.add_paragraph()
p.style = 'List Bullet'
r = p.add_run()
r.add_text('Total number of legs: '+ str(all_legs.shape[0]) )
print('Total number of legs: '+ str(all_legs.shape[0]) )

p = document.add_paragraph()
p.style = 'List Bullet'
r = p.add_run()
r.add_text('Total number of trip: '+ str(len(all_legs['tripid'].unique())))
print('Total number of trip: '+ str(len(all_legs['tripid'].unique())))

p = document.add_paragraph()
p.style = 'List Bullet'
r = p.add_run()
r.add_text('Total number of users: ' + str(len(all_legs['userid'].unique())))
print('Total number of users: ' + str(len(all_legs['userid'].unique())))

p = document.add_paragraph()
r = p.add_run()

In [None]:
## Trips with starting and final address

print('Percentage of trips with starting and final address: ',
     round(trips_df[~trips_df['startAddress'].isna()].shape[0]/trips_df.shape[0]*100,2))


p = document.add_paragraph()
p.style = 'List Bullet'
r = p.add_run()
r.add_text('Percentage of trips with starting and final address: ' + 
           str(round(trips_df[~trips_df['startAddress'].isna()].shape[0]/trips_df.shape[0]*100,2)) )

**Analysis**

In [None]:
usn = 'pKGhRs0mGJgJuATlf6mIKxgkhfK2'

<a id='time_travel'></a>
### Time spent in travel

The total travel time minutes per mode

In [None]:
transport_mode_share = all_legs.groupby('correctedModeOfTransport_str')['inferred_leg_duration_min'].sum().reset_index().sort_values(by='inferred_leg_duration_min', ascending=False)
transport_mode_share.columns = ['transportMode', 'tot_duration']
transport_mode_share.head()

In [None]:
fig = plt.figure(figsize=(12,12))
ax = plt.gca()

sns.set_style("whitegrid")
rcParams['figure.figsize'] = 12,8

g = sns.barplot(data = transport_mode_share, x="transportMode", y='tot_duration').set(
    xlabel='Transport mode', 
    ylabel = 'time (min)'
)


# plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
plt.gcf().subplots_adjust(bottom=0.2) # make space for labels
plt.title('Total travel time minutes per mode - all users', y=1.)
plt.xticks(rotation=90)

for p in ax.patches:
             ax.annotate("%.0f" % p.get_height(), (p.get_x() + p.get_width() / 2., p.get_height()+100),
                 ha='center', va='center', fontsize=14, color='black', rotation=90, xytext=(0, 20),
                 textcoords='offset points')  
        
plt.savefig(img_path + "total_trav_time_allusers.png")
plt.savefig(img_path + "total_trav_time_allusers.pdf")
plt.tight_layout()

In [None]:
## add to report
document.add_heading('Total travel time minutes per mode')

document.add_heading('All Users', level=2)
p = document.add_paragraph()
r = p.add_run()
r.add_picture(img_path + 'total_trav_time_allusers.png',width=Inches(7.0))

<a id='time_travel_perc'></a>
### Time spent in travel (percentage)

In [None]:
# remove unknown
transport_mode_share = transport_mode_share[transport_mode_share['transportMode'] != 'unknown']
transport_mode_share['frel'] = transport_mode_share['tot_duration']/transport_mode_share['tot_duration'].sum() *100

In [None]:
fig = plt.figure(figsize=(12,12))
ax = plt.gca()


sns.set_style("whitegrid")
rcParams['figure.figsize'] = 12,8

g = sns.barplot(data = transport_mode_share, x="transportMode", y='frel').set(
    xlabel='Transport mode', 
    ylabel = 'Percentage of time'
)

#g = transport_mode_share['frel'].plot(kind = 'pie', subplots=True)
# plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
plt.gcf().subplots_adjust(bottom=0.2) # make space for labels
plt.title('Percentage of total travel time per mode  - all users', y=1.)
plt.xticks(rotation=90)

for p in ax.patches:
             ax.annotate("%.2f" % p.get_height(), (p.get_x() + p.get_width() / 2., p.get_height()),
                 ha='center', va='center', fontsize=14, color='black', rotation=90, xytext=(0, 20),
                 textcoords='offset points')  

plt.savefig(img_path + "percentage_trav_time_allusers.png")
plt.savefig(img_path + "percentage_trav_time_allusers.pdf")
plt.tight_layout()

In [None]:
document.add_heading('Percentage of total travel time per mode')

document.add_heading('All Users', level=2)
p = document.add_paragraph()
r = p.add_run()
r.add_picture(img_path + 'percentage_trav_time_allusers.png',width=Inches(7.0))

**Group all transport modes with percentage < 1 in a unique set named "other" and all train types in "Train"**

In [None]:
transport_mode_share['transportMode'] = transport_mode_share.apply(lambda x: "train" if x['transportMode'] in (['intercityTrain','highSpeedTrain']) else x['transportMode'], axis=1)
transport_mode_share = transport_mode_share.groupby('transportMode').sum().reset_index().sort_values('frel', ascending=False)
transport_mode_share['transportMode'] = transport_mode_share.apply(lambda x: x['transportMode'] if x['frel'] >= 1 else "other", axis=1)
transport_mode_share = transport_mode_share.groupby('transportMode').sum().reset_index().sort_values('frel', ascending=False)
transport_mode_share

In [None]:
transport_mode_share2 =transport_mode_share.set_index('transportMode')

fig = plt.figure(figsize=(12,12))
ax = plt.gca()


sns.set_style("whitegrid")
rcParams['figure.figsize'] = 12,8

#g = sns.barplot(data = transport_mode_share, x="transportMode", y='frel').set(
#    xlabel='Transport mode', 
#    ylabel = 'Percentage of time'
#)


g = transport_mode_share2['frel'].plot(kind = 'pie', subplots=True)
plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
plt.gcf().subplots_adjust(bottom=0.2) # make space for labels
plt.title('Percentage of total travel time per mode  - all users', y=1.)
plt.xticks(rotation=90)

#for p in ax.patches:
#             ax.annotate("%.2f" % p.get_height(), (p.get_x() + p.get_width() / 2., p.get_height()),
#                 ha='center', va='center', fontsize=14, color='black', rotation=90, xytext=(0, 20),
#                 textcoords='offset points')  
        
plt.savefig(img_path + "percentage_trav_time_allusers_group_transp.png")        
#plt.savefig(img_path + "percentage_trav_time_allusers_group_transp.pdf")     

plt.tight_layout()

In [None]:
fig = plt.figure(figsize=(12,12))
ax = plt.gca()


sns.set_style("whitegrid")
rcParams['figure.figsize'] = 12,8

g = sns.barplot(data = transport_mode_share, x="transportMode", y='frel').set(
    xlabel='Transport mode', 
    ylabel = 'Percentage of time'
)


# plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
plt.gcf().subplots_adjust(bottom=0.2) # make space for labels
plt.title('Percentage of total travel time per mode  - all users', y=1.)
plt.xticks(rotation=90)

for p in ax.patches:
             ax.annotate("%.2f" % p.get_height(), (p.get_x() + p.get_width() / 2., p.get_height()),
                 ha='center', va='center', fontsize=14, color='black', rotation=90, xytext=(0, 20),
                 textcoords='offset points')  
        
plt.savefig(img_path + "percentage_trav_time_allusers_group_transp.png")        
plt.savefig(img_path + "percentage_trav_time_allusers_group_transp.pdf")     

plt.tight_layout()

In [None]:
document.add_heading(' Group all transport modes with percentage < 1 in "other" and all train types in "Train"', level=2)
p = document.add_paragraph()
r = p.add_run()
r.add_picture(img_path + 'percentage_trav_time_allusers_group_transp.png',width=Inches(7.0))

<a id='time_travel_perc_main'></a>
### Time spent in travel (percentage considering the main mode) 

Percentage of time spent in travel considering the longest "main" mode, i.e. for every trip with more than one leg, we add one count to the mode that took longest time.

In [None]:
all_legs.columns

In [None]:
legs_in_multi_legs_trips = all_legs[(all_legs['class'] == 'Leg')]
legs_in_multi_legs_trips = legs_in_multi_legs_trips.sort_values(['tripid', 'inferred_leg_duration_min'], 
                                                                ascending=[True, False])
legs_in_multi_legs_trips['rank'] = legs_in_multi_legs_trips.groupby(['tripid']).cumcount()+1; 

# take the longest legs
longest_legs = legs_in_multi_legs_trips[legs_in_multi_legs_trips['rank']==1]

In [None]:
transport_mode_share = longest_legs.groupby('correctedModeOfTransport_str')['legid'].size().reset_index().sort_values(by='legid', ascending=False)
transport_mode_share.columns = ['transportMode', '#legs']
transport_mode_share['frel'] = transport_mode_share['#legs']/transport_mode_share['#legs'].sum()*100
print(transport_mode_share['frel'].sum())
transport_mode_share.head()

In [None]:
fig = plt.figure(figsize=(12,12))
ax = plt.gca()


sns.set_style("whitegrid")
rcParams['figure.figsize'] = 12,8

g = sns.barplot(data = transport_mode_share, x="transportMode", y='frel').set(
    xlabel='Transport mode', 
    ylabel = 'Percentage of legs'
)


# plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
plt.gcf().subplots_adjust(bottom=0.2) # make space for labels
plt.title('Percentage mode share based on the total count for each mode\ncounting only the longest “main” mode  - all users', y=1.)
plt.xticks(rotation=90)

for p in ax.patches:
             ax.annotate("%.2f" % p.get_height(), (p.get_x() + p.get_width() / 2., p.get_height()),
                 ha='center', va='center', fontsize=14, color='black', rotation=90, xytext=(0, 20),
                 textcoords='offset points')  

plt.savefig(img_path + "mode_share_longest_leg_time.png")   
#plt.savefig(img_path + "mode_share_longest_leg_time.pdf")   
plt.tight_layout()

In [None]:
document.add_heading('Percentage mode share based on the total count for each mode\ncounting only the longest (in terms of time) “main” mode ')

document.add_heading('All Users', level=2)
p = document.add_paragraph()
r = p.add_run()
r.add_picture(img_path + 'mode_share_longest_leg_time.png',width=Inches(7.0))

**Group all transport modes with percentage < 1 in a unique set named "other" and all train types in "Train"**

In [None]:
transport_mode_share['transportMode'] = transport_mode_share.apply(lambda x: "train" if x['transportMode'] in (['intercityTrain','highSpeedTrain']) else x['transportMode'], axis=1)
transport_mode_share = transport_mode_share.groupby('transportMode').sum().reset_index().sort_values('frel', ascending=False)
transport_mode_share['transportMode'] = transport_mode_share.apply(lambda x: x['transportMode'] if x['frel'] >= 1 else "other", axis=1)
transport_mode_share = transport_mode_share.groupby('transportMode').sum().reset_index().sort_values('frel', ascending=False)

In [None]:
fig = plt.figure(figsize=(12,12))
ax = plt.gca()


sns.set_style("whitegrid")
rcParams['figure.figsize'] = 12,8

g = sns.barplot(data = transport_mode_share, x="transportMode", y='frel').set(
    xlabel='Transport mode', 
    ylabel = 'Percentage of legs'
)


# plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
plt.gcf().subplots_adjust(bottom=0.2) # make space for labels
plt.title('Percentage mode share based on the total count for each mode\ncounting only the longest “main” mode  - all users', y=1.)
plt.xticks(rotation=90)

for p in ax.patches:
             ax.annotate("%.2f" % p.get_height(), (p.get_x() + p.get_width() / 2., p.get_height()),
                 ha='center', va='center', fontsize=14, color='black', rotation=90, xytext=(0, 20),
                 textcoords='offset points')  

plt.savefig(img_path + "mode_share_longest_leg_time_group.png")
#plt.savefig(img_path + "mode_share_longest_leg_time_group.pdf")
plt.tight_layout()


In [None]:
document.add_heading('All transport modes < 1 => "other" and all train types in "Train"', level=2)
p = document.add_paragraph()
r = p.add_run()
r.add_picture(img_path + 'mode_share_longest_leg_time_group.png',width=Inches(7.0))

<a id='worth_satisfaction' ></a>
### Worthwhileness satisfaction per mode of transport

For each mode of transport produce an average assessment of the variable `wastedTime`

In [None]:
# filtering only values between 1 and 5
all_legs_wt = all_legs[(all_legs['wastedTime'] > 0) & (all_legs['wastedTime'] <= 5)]
all_legs_wt['wastedTime'] = pd.to_numeric(all_legs_wt['wastedTime'])
wasted_x_transp = all_legs_wt.groupby('correctedModeOfTransport_str')['wastedTime'].mean().reset_index()
wasted_x_transp.sort_values(by='wastedTime', ascending=False, inplace=True)
wasted_x_transp.head()

In [None]:
fig = plt.figure(figsize=(12,12))
ax = plt.gca()


sns.set_style("whitegrid")
rcParams['figure.figsize'] = 12,8

g = sns.barplot(data = wasted_x_transp, x="correctedModeOfTransport_str", y='wastedTime').set(
    xlabel='Transport mode', 
    ylabel = 'Average assessment '
)

plt.gcf().subplots_adjust(bottom=0.2) # make space for labels

# plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
plt.gcf().subplots_adjust(bottom=0.2) # make space for labels
plt.title('Average assessment per mode of wasted vs worthwhileness  - All users', y=1.)
plt.xticks(rotation=90)

for p in ax.patches:
             ax.annotate("%.2f" % p.get_height(), (p.get_x() + p.get_width() / 2., p.get_height()),
                 ha='center', va='center', fontsize=14, color='black', rotation=90, xytext=(0, 20),
                 textcoords='offset points')  
plt.savefig(img_path + "avg_ass_mode_allusers.png")
#plt.savefig(img_path + "avg_ass_mode_allusers.pdf")
plt.tight_layout()

In [None]:
document.add_heading('Average assessment per mode of wasted vs worthwhileness')

document.add_heading('All Users', level=2)
p = document.add_paragraph()
r = p.add_run()
r.add_picture(img_path + 'avg_ass_mode_allusers.png',width=Inches(7.0))

**Group all transportMode with relative frequency <1 and all trains into "Trains**

In [None]:
legs_x_mode = all_legs.groupby('correctedModeOfTransport_str')['legid'].count().reset_index()
legs_x_mode.columns = ['correctedModeOfTransport_str','nlegs']
legs_x_mode.sort_values('nlegs', ascending=False, inplace=True)
legs_x_mode['perc_legs'] = legs_x_mode['nlegs']/legs_x_mode['nlegs'].sum()*100

# train
legs_x_mode['correctedModeOfTransport_str'] = legs_x_mode.apply(lambda x: "train" if x['correctedModeOfTransport_str'] in (['intercityTrain','highSpeedTrain']) else x['correctedModeOfTransport_str'], axis=1)
legs_x_mode = legs_x_mode.groupby('correctedModeOfTransport_str').sum().reset_index().sort_values('perc_legs', ascending=False)
# group
legs_x_mode['correctedModeOfTransport_str'] = legs_x_mode.apply(lambda x: x['correctedModeOfTransport_str'] if x['perc_legs'] >= 1 else "other", axis=1)
legs_x_mode = legs_x_mode.groupby('correctedModeOfTransport_str').sum().reset_index().sort_values('perc_legs', ascending=False)




In [None]:
# Select the WT mean with the grouped transport mode
all_legs_tmp = all_legs.copy()
all_legs_tmp['gr_mode'] = all_legs_tmp['correctedModeOfTransport_str'].apply(lambda x: 'train' if x in (['intercityTrain','highSpeedTrain']) else x)
all_legs_tmp['gr_mode'] = all_legs_tmp['gr_mode'].apply(lambda x: x if x in list(legs_x_mode['correctedModeOfTransport_str']) else 'other')

# filtering only values between 1 and 5
all_legs_wt_grouped = all_legs_tmp[(all_legs_tmp['wastedTime'] > 0) & (all_legs_tmp['wastedTime'] <= 5)]
all_legs_wt_grouped['wastedTime'] = pd.to_numeric(all_legs_wt_grouped['wastedTime'])
wasted_x_transp_grouped = all_legs_wt_grouped.groupby('gr_mode')['wastedTime'].mean().reset_index()
wasted_x_transp_grouped.sort_values(by='wastedTime', ascending=False, inplace=True)
#wasted_x_transp_grouped.head()

In [None]:
fig = plt.figure(figsize=(12,12))
ax = plt.gca()

sns.set_style("whitegrid")
rcParams['figure.figsize'] = 12,8

g = sns.barplot(data = wasted_x_transp_grouped, x="gr_mode", y='wastedTime').set(
    xlabel='Transport mode', 
    ylabel = 'Average assessment '
)

# plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
plt.gcf().subplots_adjust(bottom=0.2) # make space for labels
plt.title('Average assessment per mode of wasted vs worthwhileness  - All users', y=1.)
plt.xticks(rotation=90)

for p in ax.patches:
             ax.annotate("%.2f" % p.get_height(), (p.get_x() + p.get_width() / 2., p.get_height()),
                 ha='center', va='center', fontsize=14, color='black', rotation=90, xytext=(0, 20),
                 textcoords='offset points')  
plt.savefig(img_path + "avg_ass_mode_allusers_grouped.png")
#plt.savefig(img_path + "avg_ass_mode_allusers_grouped.pdf")
plt.tight_layout()

In [None]:
document.add_heading('All transport modes < 1 => "other" and all train types in "Train"', level=2)
p = document.add_paragraph()
r = p.add_run()
r.add_picture(img_path + 'avg_ass_mode_allusers_grouped.png',width=Inches(7.0))

<a id='worth_satisfaction_distance' ></a>
### Worthwhileness satisfaction for short, medium and long distance 

(quantiles 0.33 and 0.66)

In [None]:
# Find the quantiles for each transport mode
dist_segs = all_legs.groupby('correctedModeOfTransport_str')['legDistance'].quantile([0.33,0.66]).reset_index()
dist_segs = pd.pivot_table(dist_segs, values='legDistance', 
                     index=['correctedModeOfTransport_str'], 
                     columns='level_1').reset_index()
dist_segs.columns = ['correctedModeOfTransport_str','0.33','0.66']
#dist_segs

# add the info to all_legs and classify into short, medium, long distance
all_legs_tmp = pd.merge(all_legs, dist_segs, on='correctedModeOfTransport_str', how='left' )
all_legs_tmp['dist_seg'] = all_legs_tmp.apply(lambda x: 'short' if x['legDistance']<=x['0.33'] 
                                              else 'long' if x['legDistance']>=x['0.66'] else 'medium', axis=1)

# select only wasted time and the distance category
all_legs_tmp_wt_dist_seg = all_legs_tmp[(all_legs_tmp['wastedTime'] > 0) & (all_legs_tmp['wastedTime'] <= 5)]
all_legs_tmp_wt_dist_seg['wastedTime'] = pd.to_numeric(all_legs_tmp_wt_dist_seg['wastedTime'])

wasted_x_transp_dist_seg = all_legs_tmp_wt_dist_seg.groupby(['correctedModeOfTransport_str','dist_seg'])['wastedTime'].mean().reset_index()
wasted_x_transp_dist_seg.sort_values(by='wastedTime', ascending=False, inplace=True)
wasted_x_transp_dist_seg.sort_values(['dist_seg','wastedTime'], ascending=[False,False], inplace=True)

# take the top 10 modes of transport 
top_10_modes = all_legs.groupby('correctedModeOfTransport_str')['legid'].count().reset_index()
top_10_modes = top_10_modes.sort_values('legid', ascending=False).head(10)

In [None]:
wasted_x_transp_dist_seg = wasted_x_transp_dist_seg[wasted_x_transp_dist_seg['correctedModeOfTransport_str'].isin(top_10_modes['correctedModeOfTransport_str'])]

# plt.subplot(1, 2, 1) # grid of 1 row, 2 column and put next plot in position 1

for dist_seg in wasted_x_transp_dist_seg['dist_seg'].unique():
    #print(dist_seg)
    
    fig = plt.figure(figsize=(12,12))
    ax = plt.gca()
    wasted_x_transp_dist_seg_1 = wasted_x_transp_dist_seg[wasted_x_transp_dist_seg['dist_seg'] == dist_seg]
    sns.set_style("whitegrid")
    rcParams['figure.figsize'] = 12,8

    
    
    g = sns.barplot(data = wasted_x_transp_dist_seg_1, x="correctedModeOfTransport_str", y='wastedTime').set(
        xlabel='Transport mode', 
        ylabel = 'Average assessment '
    )
    # plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
    plt.gcf().subplots_adjust(bottom=0.3) # make space for labels
    plt.title('Average assessment per mode of wasted vs worthwhileness for ' + dist_seg +' legs\nAll users and top 10 mode of transport', y=1.)
    plt.xticks(rotation=90)

    for p in ax.patches:
                 ax.annotate("%.2f" % p.get_height(), (p.get_x() + p.get_width() / 2., p.get_height()),
                     ha='center', va='center', fontsize=14, color='black', rotation=90, xytext=(0, 20),
                     textcoords='offset points')  
    plt.savefig(img_path + dist_seg+"_avg_ass_mode_dist_segs.png")
    #plt.savefig(img_path + dist_seg+"_avg_ass_mode_dist_segs.pdf")
    plt.tight_layout()

In [None]:
for dist_seg in wasted_x_transp_dist_seg['dist_seg'].unique():
    document.add_heading('Worthwhileness satisfaction for different distance segments: short, medium and long (quantiles 0.33 and 0.66) ', level=2)
    document.add_heading('Average assessment per mode of wasted vs worthwhileness for ' + dist_seg +' legs  - All users', level=3)
    p = document.add_paragraph()
    r = p.add_run()
    r.add_picture(img_path + dist_seg+"_avg_ass_mode_dist_segs.png",width=Inches(7.0))


**Group all transportMode with freq<1 and all trains**

In [None]:
all_legs_tmp['gr_mode'] = all_legs_tmp['correctedModeOfTransport_str'].apply(lambda x: 'train' if x in (['intercityTrain','highSpeedTrain']) else x)
all_legs_tmp['gr_mode'] = all_legs_tmp['gr_mode'].apply(lambda x: x if x in list(legs_x_mode['correctedModeOfTransport_str']) else 'other')

all_legs_wt = all_legs_tmp[(all_legs_tmp['wastedTime'] > 0) & (all_legs_tmp['wastedTime'] <= 5)]
all_legs_wt['wastedTime'] = pd.to_numeric(all_legs_wt['wastedTime'])

wasted_x_transp_group_dist_seg = all_legs_wt.groupby(['gr_mode','dist_seg'])['wastedTime'].mean().reset_index()
wasted_x_transp_group_dist_seg.sort_values(by='wastedTime', ascending=False, inplace=True)
wasted_x_transp_group_dist_seg.sort_values(['dist_seg','wastedTime'], ascending=[False,False], inplace=True)

In [None]:
for dist_seg in wasted_x_transp_group_dist_seg['dist_seg'].unique():
    #print(dist_seg)
    
    fig = plt.figure(figsize=(12,12))
    ax = plt.gca()
    wasted_x_transp_group_dist_seg_1 = wasted_x_transp_group_dist_seg[wasted_x_transp_group_dist_seg['dist_seg'] == dist_seg]
    sns.set_style("whitegrid")
    rcParams['figure.figsize'] = 12,8

    
    
    g = sns.barplot(data = wasted_x_transp_group_dist_seg_1, x="gr_mode", y='wastedTime').set(
        xlabel='Transport mode', 
        ylabel = 'Average assessment '
    )
    # plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
    plt.gcf().subplots_adjust(bottom=0.3) # make space for labels
    plt.title('Average assessment per mode of wasted vs worthwhileness for ' + dist_seg +' legs  - All users and top 10 mode of transport', y=1.)
    plt.xticks(rotation=90)

    for p in ax.patches:
                 ax.annotate("%.2f" % p.get_height(), (p.get_x() + p.get_width() / 2., p.get_height()),
                     ha='center', va='center', fontsize=14, color='black', rotation=90, xytext=(0, 20),
                     textcoords='offset points')  
    plt.savefig(img_path + dist_seg+"_avg_ass_mode_group_dist_segs.png")
    #plt.savefig(img_path + dist_seg+"_avg_ass_mode_group_dist_segs.pdf")
    plt.tight_layout()

In [None]:
for dist_seg in wasted_x_transp_dist_seg['dist_seg'].unique():
    document.add_heading('Worthwhileness satisfaction for different distance segments: short, medium and long (quantiles 0.33 and 0.66) ', level=2)
    document.add_heading('Group all transportMode with freq<1 and all trains', level=2)    
    document.add_heading('Average assessment per mode of wasted vs worthwhileness for ' + dist_seg +' legs  - All users', level=3)
    p = document.add_paragraph()
    r = p.add_run()
    r.add_picture(img_path + dist_seg+"_avg_ass_mode_group_dist_segs.png",width=Inches(7.0))


<a id='avg_distance_transport_mode'></a>
### Average distance per transport mode

In [None]:
avg_leg_dist = all_legs.groupby('correctedModeOfTransport_str')['legDistance'].mean().reset_index().sort_values('legDistance', ascending=False)
avg_leg_dist['legDistance_km'] = avg_leg_dist['legDistance']/1000

In [None]:
rcParams['font.size'] = 16
fig = plt.figure(figsize=(12,12))
ax = plt.gca()


sns.set_style("whitegrid")
rcParams['figure.figsize'] = 12,8

g = sns.barplot(data = avg_leg_dist, x="correctedModeOfTransport_str", y='legDistance_km').set(
    xlabel='Transport mode', 
    ylabel = 'Average distance (Km) '
)


# plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
plt.gcf().subplots_adjust(bottom=0.2) # make space for labels
plt.title('Average distance per trip leg per mode  - all users', y=1.)
plt.xticks(rotation=90)

for p in ax.patches:
             ax.annotate("%.2f" % p.get_height(), (p.get_x() + p.get_width() / 2., p.get_height()),
                 ha='center', va='center', fontsize=14, color='black', rotation=90, xytext=(0, 20),
                 textcoords='offset points')  
plt.savefig(img_path + "avg_dist_leg_mode_allusers.png")
#plt.savefig(img_path + "avg_dist_leg_mode_allusers.pdf")
plt.tight_layout()

In [None]:
document.add_heading('Average distance per trip leg per mode')

document.add_heading('All Users', level=2)
p = document.add_paragraph()
r = p.add_run()
r.add_picture(img_path + 'avg_dist_leg_mode_allusers.png',width=Inches(7.0))

<a id='cum_distance_transport_mode'></a>
### Cumulative distance per transport mode

In [None]:
# legs_x_bin
document.add_heading('Cumulative distribution function of leg distances')
document.add_heading('All Users', level=2)

In [None]:
for tr in all_legs['correctedModeOfTransport_str'].unique():
    newbins = [-1,1,2,5,10,20, 50,100,200,500,1000, 100000]
    all_legs_tmp = all_legs[all_legs['correctedModeOfTransport_str'] == tr]
    all_legs_tmp['binned'] = pd.cut(all_legs_tmp['legDistance']/1000, newbins)

    legs_x_bin = all_legs_tmp.groupby('binned').size().reset_index()
    legs_x_bin.columns = ['bin','#legs']
    legs_x_bin['frel'] = legs_x_bin['#legs']/legs_x_bin['#legs'].sum() *100

    x_bins = newbins[1:]  
    legs_x_bin['x_bin'] = x_bins

    legs_x_bin['cum_legs'] = legs_x_bin['frel'].cumsum()
    legs_x_bin

    fig = plt.figure(figsize=(10,8))
    ax = plt.gca()


    sns.set_style("whitegrid")

    g = sns.barplot(data = legs_x_bin, x="x_bin", y='cum_legs',palette="Blues").set(
        xlabel='Km', 
        ylabel = 'Percentage of legs'
    )


    # plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
    plt.gcf().subplots_adjust(bottom=0.2) # make space for labels
    title = 'Cumulative distribution function of leg distances for ' + tr +  ' - All users'
    plt.title( title, y=1.,  fontsize=14)
    plt.xticks(rotation=90)

    for p in ax.patches:
                 ax.annotate("%.2f" % p.get_height(), (p.get_x() + p.get_width() / 2., p.get_height()),
                     ha='center', va='center', fontsize=14, color='black', rotation=90, xytext=(0, 20),
                     textcoords='offset points')  
    
    img_name = img_path + "cum_dist_all_users_" +tr + ".png"
    #img_name_pdf = img_path + "cum_dist_all_users_" +tr + ".pdf"
    plt.savefig(img_name)
    #plt.savefig(img_name_pdf)
    plt.tight_layout()
    
    p = document.add_paragraph()
    r = p.add_run()
    r.add_picture(img_name,width=Inches(7.0))

<a id='co2'></a>
### co2 per transport mode (total and percentage)

Multiply the km travelled by the average co2 / km travelled - per day

In [None]:
transp_mode_dict = {

'0':{'name':'vehicle', 'max_speed':200, 'co2':120},
'1':{'name':'bicycle', 'max_speed':100, 'co2':0},
'2':{'name':'onfoot', 'max_speed':12, 'co2':0},
'3':{'name':'still', 'max_speed':0, 'co2':99999999},
'4':{'name':'unknown', 'max_speed':100, 'co2':99999999},
'5':{'name':'tilting', 'max_speed':100, 'co2':99999999},
'6':{'name':'inexistent', 'max_speed':100, 'co2':99999999},
'7':{'name':'walking', 'max_speed':12, 'co2':0},
'8':{'name':'running', 'max_speed':20, 'co2':0},
'9':{'name':'car', 'max_speed':250, 'co2':120},
'10':{'name':'train', 'max_speed':350, 'co2':14},
'11':{'name':'tram', 'max_speed':100, 'co2':14},
'12':{'name':'subway', 'max_speed':100, 'co2':14},
'13':{'name':'ferry', 'max_speed':200, 'co2':256.5},
'14':{'name':'plane', 'max_speed':7000, 'co2':285},
'15':{'name':'bus', 'max_speed':150, 'co2':68},
'16':{'name':'electricBike', 'max_speed':50, 'co2':6},
'17':{'name':'bikeSharing', 'max_speed':50, 'co2':0},
'18':{'name':'microScooter', 'max_speed':50, 'co2':12},
'19':{'name':'skate', 'max_speed':20, 'co2':0},
'20':{'name':'motorcycle', 'max_speed':300, 'co2':80},
'21':{'name':'moped', 'max_speed':80, 'co2':60},
'22':{'name':'carPassenger', 'max_speed':250, 'co2':80},
'23':{'name':'taxi', 'max_speed':250, 'co2':100},
'24':{'name':'rideHailing', 'max_speed':100, 'co2':120},
'25':{'name':'carSharing', 'max_speed':250, 'co2':120},
'26':{'name':'carpooling', 'max_speed':250, 'co2':120},
'27':{'name':'busLongDistance', 'max_speed':150, 'co2':68},
'28':{'name':'highSpeedTrain', 'max_speed':350, 'co2':25},
'29':{'name':'other', 'max_speed':100, 'co2':0},
'30':{'name':'otherPublic', 'max_speed':300, 'co2':70},
'31':{'name':'otherActive', 'max_speed':30, 'co2':0},
'32':{'name':'otherPrivate', 'max_speed':250, 'co2':90},
'33':{'name':'intercityTrain', 'max_speed':300, 'co2':14},
'34':{'name':'wheelChair', 'max_speed':10, 'co2':0},
'35':{'name':'cargoBike', 'max_speed':30, 'co2':0},
'36':{'name':'carSharingPassenger', 'max_speed':250, 'co2':80},
'37':{'name':'electricWheelchair', 'max_speed':30, 'co2':15}}

In [None]:
all_legs_tmp = all_legs.copy()
all_legs_tmp['co2'] = all_legs_tmp.apply(lambda x: (x['legDistance']/1000)*(transp_mode_dict[str(int(x['correctedModeOfTransport']))]['co2']), axis=1 )

tot_co2_mode = all_legs_tmp.groupby('correctedModeOfTransport_str')['co2'].sum().reset_index().sort_values('co2', ascending=False)
# remove unknown 
tot_co2_mode = tot_co2_mode[tot_co2_mode['correctedModeOfTransport_str'] != 'unknown']
tot_co2_mode['co2'] = tot_co2_mode['co2']/1000
tot_co2_mode['co2_perc'] = tot_co2_mode['co2']/tot_co2_mode['co2'].sum()*100
tot_co2_mode.head()

In [None]:
fig = plt.figure(figsize=(12,12))
ax = plt.gca()


sns.set_style("whitegrid")

g = sns.barplot(data = tot_co2_mode, x="correctedModeOfTransport_str", y='co2').set(
    xlabel='Tranport mode', 
    ylabel = 'CO2 (Kg)'
)


# plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
plt.gcf().subplots_adjust(bottom=0.2) # make space for labels
plt.title('Total Co2 per mode  - all users', y=1.)
plt.xticks(rotation=90)

for p in ax.patches:
             ax.annotate("%.0f" % p.get_height(), (p.get_x() + p.get_width() / 2., p.get_height()),
                 ha='center', va='center', fontsize=14, color='black', rotation=90, xytext=(0, 20),
                 textcoords='offset points')  
        
plt.savefig(img_path + "tot_co2_mode_allusers.png")
#plt.savefig(img_path + "tot_co2_mode_allusers.pdf")

plt.tight_layout()

In [None]:
document.add_heading('Co2 per mode')

document.add_heading('Total CO2 - All Users', level=2)
p = document.add_paragraph()
r = p.add_run()
r.add_picture(img_path + 'tot_co2_mode_allusers.png',width=Inches(7.0))

In [None]:
fig = plt.figure(figsize=(12,12))
ax = plt.gca()


sns.set_style("whitegrid")

g = sns.barplot(data = tot_co2_mode, x="correctedModeOfTransport_str", y='co2_perc').set(
    xlabel='Tranport mode', 
    ylabel = 'Percentage CO2 '
)


# plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
plt.gcf().subplots_adjust(bottom=0.2) # make space for labels
plt.title('Percentage of Co2 per mode  - all users', y=1.)
plt.xticks(rotation=90)

for p in ax.patches:
             ax.annotate("%.2f" % p.get_height(), (p.get_x() + p.get_width() / 2., p.get_height()),
                 ha='center', va='center', fontsize=14, color='black', rotation=90, xytext=(0, 20),
                 textcoords='offset points') 
    
plt.savefig(img_path + "perc_co2_mode_allusers.png")
#plt.savefig(img_path + "perc_co2_mode_allusers.pdf")
plt.tight_layout()

In [None]:
document.add_heading('Percentage CO2 - All Users', level=2)
p = document.add_paragraph()
r = p.add_run()
r.add_picture(img_path + 'perc_co2_mode_allusers.png',width=Inches(7.0))

**Group all transport modes < 1 in a unique set named "other" and all train types in "Train"**

In [None]:
tot_co2_mode['correctedModeOfTransport_str'] = tot_co2_mode.apply(lambda x: "train" if x['correctedModeOfTransport_str'] in (['intercityTrain','highSpeedTrain']) else x['correctedModeOfTransport_str'], axis=1)
tot_co2_mode = tot_co2_mode.groupby('correctedModeOfTransport_str').sum().reset_index().sort_values('co2_perc', ascending=False)
tot_co2_mode['correctedModeOfTransport_str'] = tot_co2_mode.apply(lambda x: x['correctedModeOfTransport_str'] if x['co2_perc'] >= 1 else "other", axis=1)
tot_co2_mode = tot_co2_mode.groupby('correctedModeOfTransport_str').sum().reset_index().sort_values('co2_perc', ascending=False)

In [None]:
fig = plt.figure(figsize=(12,12))
ax = plt.gca()
sns.set_style("whitegrid")

g = sns.barplot(data = tot_co2_mode, x="correctedModeOfTransport_str", y='co2').set(
    xlabel='Tranport mode', 
    ylabel = 'CO2 (Kg)'
)

# plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
plt.gcf().subplots_adjust(bottom=0.2) # make space for labels
plt.title('Total Co2 per mode  - all users', y=1.)
plt.xticks(rotation=90)

for p in ax.patches:
             ax.annotate("%.0f" % p.get_height(), (p.get_x() + p.get_width() / 2., p.get_height()),
                 ha='center', va='center', fontsize=14, color='black', rotation=90, xytext=(0, 20),
                 textcoords='offset points')  
plt.savefig(img_path + "tot_co2_mode_alluser_group.png")
#plt.savefig(img_path + "tot_co2_mode_alluser_group.pdf")
plt.tight_layout()

In [None]:
document.add_heading('Total Co2 per mode  - all users - All transport modes < 1 => "other" and all train types in "Train"', level=2)
p = document.add_paragraph()
r = p.add_run()
r.add_picture(img_path + 'tot_co2_mode_alluser_group.png',width=Inches(7.0))

In [None]:
fig = plt.figure(figsize=(12,12))
ax = plt.gca()


sns.set_style("whitegrid")

g = sns.barplot(data = tot_co2_mode, x="correctedModeOfTransport_str", y='co2_perc').set(
    xlabel='Tranport mode', 
    ylabel = 'Percentage CO2 '
)


# plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
plt.gcf().subplots_adjust(bottom=0.2) # make space for labels
plt.title('Percentage of Co2 per mode  - all users', y=1.)
plt.xticks(rotation=90)

for p in ax.patches:
             ax.annotate("%.2f" % p.get_height(), (p.get_x() + p.get_width() / 2., p.get_height()),
                 ha='center', va='center', fontsize=14, color='black', rotation=90, xytext=(0, 20),
                 textcoords='offset points')  
plt.savefig(img_path + "perc_co2_mode_alluser_group.png")
#plt.savefig(img_path + "perc_co2_mode_alluser_group.pdf")
plt.tight_layout()

In [None]:
document.add_heading('Percentage Co2 per mode  - all users - All transport modes < 1 => "other" and all train types in "Train"', level=2)
p = document.add_paragraph()
r = p.add_run()
r.add_picture(img_path + 'perc_co2_mode_alluser_group.png',width=Inches(7.0))

<a id='total_distance_transp_mode'></a>
### Percentage of total distance per transport mode 


In [None]:
transport_mode_share = all_legs[~all_legs['correctedModeOfTransport_str'].isin(['unknown'])].groupby('correctedModeOfTransport_str')['legDistance'].sum().reset_index().sort_values(by='legDistance', ascending=False)
transport_mode_share.columns = ['transportMode', 'tot_distance']
transport_mode_share['frel'] = transport_mode_share['tot_distance']/transport_mode_share['tot_distance'].sum() *100


In [None]:
fig = plt.figure(figsize=(12,12))
ax = plt.gca()

sns.set_style("whitegrid")
rcParams['figure.figsize'] = 12,8

g = sns.barplot(data = transport_mode_share, x="transportMode", y='frel').set(
    xlabel='Transport mode', 
    ylabel = 'Percentage of distance'
)

# plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
plt.gcf().subplots_adjust(bottom=0.2) # make space for labels
plt.title('Percentage of total travel distance per mode  - all users', y=1.)
plt.xticks(rotation=90)

for p in ax.patches:
             ax.annotate("%.2f" % p.get_height(), (p.get_x() + p.get_width() / 2., p.get_height()),
                 ha='center', va='center', fontsize=14, color='black', rotation=90, xytext=(0, 20),
                 textcoords='offset points')  
        
plt.savefig(img_path + "perc_trav_dist_mode_allusers.png")
#plt.savefig(img_path + "perc_trav_dist_mode_allusers.pdf")

plt.tight_layout()

In [None]:
document.add_heading('Percentage of total travel distance per mode')

document.add_heading('All Users', level=2)
p = document.add_paragraph()
r = p.add_run()
r.add_picture(img_path + 'perc_trav_dist_mode_allusers.png',width=Inches(7.0))

**Group all transport modes < 1 in a unique set named "other" and all train types in "Train"**

In [None]:
transport_mode_share['transportMode'] = transport_mode_share.apply(lambda x: "train" if x['transportMode'] in (['intercityTrain','highSpeedTrain']) else x['transportMode'], axis=1)
transport_mode_share = transport_mode_share.groupby('transportMode').sum().reset_index().sort_values('frel', ascending=False)
transport_mode_share['transportMode'] = transport_mode_share.apply(lambda x: x['transportMode'] if x['frel'] >= 1 else "other", axis=1)
transport_mode_share = transport_mode_share.groupby('transportMode').sum().reset_index().sort_values('frel', ascending=False)
transport_mode_share

In [None]:
fig = plt.figure(figsize=(12,12))
ax = plt.gca()


sns.set_style("whitegrid")
rcParams['figure.figsize'] = 12,8

g = sns.barplot(data = transport_mode_share, x="transportMode", y='frel').set(
    xlabel='Transport mode', 
    ylabel = 'Percentage of distance'
)


# plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
plt.gcf().subplots_adjust(bottom=0.2) # make space for labels
plt.title('Percentage of total travel distance per mode  - all users', y=1.)
plt.xticks(rotation=90)

for p in ax.patches:
             ax.annotate("%.2f" % p.get_height(), (p.get_x() + p.get_width() / 2., p.get_height()),
                 ha='center', va='center', fontsize=14, color='black', rotation=90, xytext=(0, 20),
                 textcoords='offset points')  
        
plt.savefig(img_path + "perc_dist_mode_allusers.png")
plt.savefig(img_path + "perc_dist_mode_allusers.pdf")
plt.tight_layout()

In [None]:
document.add_heading('Percentage of total travel distance per mode')
document.add_heading('All transport modes < 1 => "other" and all train types in "Train"', level=2)
p = document.add_paragraph()
r = p.add_run()
r.add_picture(img_path + 'perc_dist_mode_allusers.png',width=Inches(7.0))

<a id='perc_distance_transp_mode_main'></a>
### Percentage of distance (considering the main mode)

In [None]:
legs_in_multi_legs_trips = all_legs[ (all_legs['class'] == 'Leg')]
legs_in_multi_legs_trips = legs_in_multi_legs_trips.sort_values(['tripid', 'legDistance'], 
                                                                ascending=[True, False])
legs_in_multi_legs_trips['rank'] = legs_in_multi_legs_trips.groupby(['tripid']).cumcount()+1; 
# take the longest
longest_legs = legs_in_multi_legs_trips[legs_in_multi_legs_trips['rank']==1]

transport_mode_share = longest_legs.groupby('correctedModeOfTransport_str')['legid'].size().reset_index().sort_values(by='legid', ascending=False)
transport_mode_share.columns = ['transportMode', '#legs']
transport_mode_share['frel'] = transport_mode_share['#legs']/transport_mode_share['#legs'].sum()*100

In [None]:
fig = plt.figure(figsize=(12,12))
ax = plt.gca()


sns.set_style("whitegrid")
rcParams['figure.figsize'] = 12,8

g = sns.barplot(data = transport_mode_share, x="transportMode", y='frel').set(
    xlabel='Transport mode', 
    ylabel = 'Percentage of legs'
)


# plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
plt.gcf().subplots_adjust(bottom=0.2) # make space for labels
plt.title('Percentage mode share based on the total count for each mode\ncounting only the longest (in distance) “main” mode - all users', y=1.)
plt.xticks(rotation=90)

for p in ax.patches:
             ax.annotate("%.2f" % p.get_height(), (p.get_x() + p.get_width() / 2., p.get_height()),
                 ha='center', va='center', fontsize=14, color='black', rotation=90, xytext=(0, 20),
                 textcoords='offset points')  

plt.savefig(img_path + "mode_share_longest_dist_allusers.png")
plt.savefig(img_path + "mode_share_longest_dist_allusers.pdf")

plt.tight_layout()


In [None]:
document.add_heading('Percentage mode share based on the total count for each mode\ncounting only the longest (in distance) “main” mode')

document.add_heading('All Users', level=2)
p = document.add_paragraph()
r = p.add_run()
r.add_picture(img_path + 'mode_share_longest_dist_allusers.png',width=Inches(7.0))

**Group all transport modes < 1 in a unique set named "other" and all train types in "Train"**

In [None]:
transport_mode_share['transportMode'] = transport_mode_share.apply(lambda x: "train" if x['transportMode'] in (['intercityTrain','highSpeedTrain']) else x['transportMode'], axis=1)
transport_mode_share = transport_mode_share.groupby('transportMode').sum().reset_index().sort_values('frel', ascending=False)
transport_mode_share['transportMode'] = transport_mode_share.apply(lambda x: x['transportMode'] if x['frel'] >= 1 else "other", axis=1)
transport_mode_share = transport_mode_share.groupby('transportMode').sum().reset_index().sort_values('frel', ascending=False)

In [None]:
fig = plt.figure(figsize=(12,12))
ax = plt.gca()


sns.set_style("whitegrid")
rcParams['figure.figsize'] = 12,8

g = sns.barplot(data = transport_mode_share, x="transportMode", y='frel').set(
    xlabel='Transport mode', 
    ylabel = 'Percentage of legs'
)


# plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
plt.gcf().subplots_adjust(bottom=0.2) # make space for labels
plt.title('Percentage mode share based on the total count for each mode\ncounting only the longest (in distance) “main” mode  - all users', y=1.)
plt.xticks(rotation=90)

for p in ax.patches:
             ax.annotate("%.2f" % p.get_height(), (p.get_x() + p.get_width() / 2., p.get_height()),
                 ha='center', va='center', fontsize=14, color='black', rotation=90, xytext=(0, 20),
                 textcoords='offset points')  
        
plt.savefig(img_path + "mode_share_longest_dist_allusers_group.png")
plt.savefig(img_path + "mode_share_longest_dist_allusers_group.pdf")
plt.tight_layout()

In [None]:
document.add_heading('All transport modes < 1 => "other" and all train types in "Train"', level=2)
p = document.add_paragraph()
r = p.add_run()
r.add_picture(img_path + 'mode_share_longest_dist_allusers_group.png',width=Inches(7.0))

<a id='avg_travel_time_permode'></a>
### Average legs travel time per mode

In [None]:
avg_leg_time = all_legs.groupby('correctedModeOfTransport_str')['inferred_leg_duration_min'].mean().reset_index().sort_values('inferred_leg_duration_min', ascending=False)

In [None]:
rcParams['font.size'] = 16
fig = plt.figure(figsize=(12,12))
ax = plt.gca()


sns.set_style("whitegrid")
rcParams['figure.figsize'] = 12,8

g = sns.barplot(data = avg_leg_time, x="correctedModeOfTransport_str", y='inferred_leg_duration_min').set(
    xlabel='Transport mode', 
    ylabel = 'Time (min) '
)


# plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
plt.gcf().subplots_adjust(bottom=0.2) # make space for labels
plt.title('Average time per trip leg per mode  - all users', y=1.)
plt.xticks(rotation=90)

for p in ax.patches:
             ax.annotate("%.2f" % p.get_height(), (p.get_x() + p.get_width() / 2., p.get_height()),
                 ha='center', va='center', fontsize=14, color='black', rotation=90, xytext=(0, 20),
                 textcoords='offset points')  

plt.savefig(img_path + "avg_time_leg_mode_all_users.png")
plt.savefig(img_path + "avg_time_leg_mode_all_users.pdf")

plt.tight_layout()


In [None]:
document.add_heading('Average time per trip leg per mode')

document.add_heading('All Users', level=2)
p = document.add_paragraph()
r = p.add_run()
r.add_picture(img_path + 'avg_time_leg_mode_all_users.png',width=Inches(7.0))

<a id='avg_assessment_permode' ></a>
###  Average assessment per mode – lousiness vs greatness

Variable `overall_score` based on trip evaluation, not leg. 
<br> We will ascribe a main mode to trips, based on the longest leg 

In [None]:
legs_in_multi_legs_trips = all_legs[ (all_legs['class'] == 'Leg')]
legs_in_multi_legs_trips = legs_in_multi_legs_trips.sort_values(['tripid', 'legDistance'], 
                                                                ascending=[True, False])
legs_in_multi_legs_trips['rank'] = legs_in_multi_legs_trips.groupby(['tripid']).cumcount()+1; 
# take the longest
longest_legs = legs_in_multi_legs_trips[legs_in_multi_legs_trips['rank']==1]
ovscore_trip = trips_df[['tripid','overallScore']]

longest_legs_1 = pd.merge(longest_legs, ovscore_trip, on='tripid', how='left')
longest_legs_1 = longest_legs_1[longest_legs_1['overallScore'] > 0 ]

# overall score
overallScore_x_transp = longest_legs_1.groupby('correctedModeOfTransport_str')['overallScore'].mean().reset_index()
overallScore_x_transp.sort_values(by='overallScore', ascending=False, inplace=True)
overallScore_x_transp.head()

In [None]:
fig = plt.figure(figsize=(12,12))
ax = plt.gca()


sns.set_style("whitegrid")
rcParams['figure.figsize'] = 12,8

g = sns.barplot(data = overallScore_x_transp, x="correctedModeOfTransport_str", y='overallScore').set(
    xlabel='Transport mode', 
    ylabel = 'Average assessment '
)


# plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
plt.gcf().subplots_adjust(bottom=0.2) # make space for labels
plt.title('Average assessment per mode - feeling of lousiness vs greatness  - All users', y=1.)
plt.xticks(rotation=90)

for p in ax.patches:
             ax.annotate("%.2f" % p.get_height(), (p.get_x() + p.get_width() / 2., p.get_height()),
                 ha='center', va='center', fontsize=14, color='black', rotation=90, xytext=(0, 20),
                 textcoords='offset points')  
        
plt.savefig(img_path + "avg_ass_mode_lous_great_allusers.png")
plt.savefig(img_path + "avg_ass_mode_lous_great_allusers.pdf")

plt.tight_layout()

In [None]:
document.add_heading('Average assessment per mode - feeling of lousiness vs greatness (main legs considering distance)')
document.add_heading('All Users', level=2)
p = document.add_paragraph()
r = p.add_run()
r.add_picture(img_path + 'avg_ass_mode_lous_great_allusers.png',width=Inches(7.0))

<a id='worth_index' ></a> 
###  Worthwhileness Index

In [None]:
### Value from trip
all_values_from_trip = pd.read_pickle(input_path + 'values_from_trip.pkl')
all_values_from_trip = all_values_from_trip[['tripid','legid','valueFromTrip', 'value']]

all_legs_tmp = all_legs[['tripid','legid','correctedModeOfTransport_str']]
values_from_trip = pd.merge(all_values_from_trip, all_legs_tmp, on=['tripid','legid'])

avg_value_from_trip = values_from_trip.groupby(['correctedModeOfTransport_str','valueFromTrip'])['value'].mean().reset_index()
avg_value_from_trip = avg_value_from_trip[avg_value_from_trip['valueFromTrip'] != 'Unknown']


In [None]:
fig = plt.figure(figsize=(15,12))
ax = plt.gca()


sns.set_style("whitegrid")
rcParams['figure.figsize'] = 12,8

g = sns.barplot(data = avg_value_from_trip, x="correctedModeOfTransport_str", y='value',hue='valueFromTrip' ).set(
    xlabel='Transport mode', 
    ylabel = 'Average assessment '
)


plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
plt.gcf().subplots_adjust(bottom=0.2) # make space for labels
plt.title(' Paid Work, Personal tasks, Enjoyment, and Fitness values per mode on average  - All users', y=1.)
plt.xticks(rotation=90)

for p in ax.patches:
             ax.annotate("%.2f" % p.get_height(), (p.get_x() + p.get_width() / 2., p.get_height()),
                 ha='center', va='center', fontsize=10, color='black', rotation=90, xytext=(0, 20),
                 textcoords='offset points')  

plt.savefig(img_path + "ww_values_allusers.png")
plt.savefig(img_path + "ww_values_allusers.pdf")
        
plt.tight_layout()

In [None]:
document.add_heading('Paid Work, Personal tasks, Enjoyment, and Fitness values per mode on average')

document.add_heading('All Users', level=2)
p = document.add_paragraph()
r = p.add_run()
r.add_picture(img_path + 'ww_values_allusers.png',width=Inches(7.0))

In [None]:
result = avg_value_from_trip.pivot(index='correctedModeOfTransport_str', columns='valueFromTrip', values='value')

result.sort_index(level=0, ascending=False, inplace=True)

rcParams['font.size'] = 18

fig = plt.figure(figsize=(18,15))
ax = plt.gca()


plt.gcf().subplots_adjust(bottom=0.2, left=0.2) # make space for labels

sns.heatmap(result, annot=True, fmt=".2f", cmap="YlGnBu").set(
    xlabel='worthwhileness elements', 
    ylabel = 'Transport mode'
)


# Bigger than normal fonts
plt.xticks(rotation=90) 
plt.yticks(rotation=0)

plt.title('Paid Work, Personal tasks, Enjoyment, and Fitness values per mode on average  - All users',  y=1)

plt.savefig(img_path + "heat_ww_values_allusers.png")
plt.savefig(img_path + "heat_ww_values_allusers.pdf")

plt.tight_layout()


In [None]:
document.add_heading('All Users heatmap', level=2)
p = document.add_paragraph()
r = p.add_run()
r.add_picture(img_path + 'heat_ww_values_allusers.png',width=Inches(7.0))

In [None]:
all_legs_tmp = all_legs[['tripid','legid','transp_category']]
values_from_trip = pd.merge(all_values_from_trip, all_legs_tmp, on=['tripid','legid'])

avg_value_from_trip = values_from_trip.groupby(['transp_category','valueFromTrip'])['value'].mean().reset_index()
avg_value_from_trip = avg_value_from_trip[avg_value_from_trip['valueFromTrip'] != 'Unknown']

result = avg_value_from_trip.pivot(index='transp_category', columns='valueFromTrip', values='value')
result.sort_index(level=0, ascending=False, inplace=True)

rcParams['font.size'] = 18
fig = plt.figure(figsize=(18,15))
ax = plt.gca()

plt.gcf().subplots_adjust(bottom=0.2, left=0.2) # make space for labels 

sns.heatmap(result, annot=True, fmt=".2f", cmap="YlGnBu").set(
    xlabel='worthwhileness elements',
    ylabel = 'Transport mode'
)

# Bigger than normal fonts
plt.xticks(rotation=90)
plt.yticks(rotation=0)
plt.title('Paid Work, Personal tasks, Enjoyment, and Fitness values per mode on average  - All users',  y=1)
plt.savefig(img_path + "heat_ww_values_allusers_cat.png")
plt.savefig(img_path + "heat_ww_values_allusers_cat.pdf")
plt.tight_layout()

In [None]:
document.add_heading('All Users heatmap by category', level=2)
p = document.add_paragraph()
r = p.add_run()
r.add_picture(img_path + 'heat_ww_values_allusers_cat.png',width=Inches(7.0))

**Considering only most common transport mode**

In [None]:
all_legs_tmp = all_legs[['tripid','legid','correctedModeOfTransport_str']]
values_from_trip = pd.merge(all_values_from_trip, all_legs_tmp, on=['tripid','legid'])
avg_value_from_trip = values_from_trip.groupby(['correctedModeOfTransport_str','valueFromTrip'])['value'].mean().reset_index()
avg_value_from_trip = avg_value_from_trip[avg_value_from_trip['valueFromTrip'] != 'Unknown']

# Select transport modes
avg_value_from_trip_1 = avg_value_from_trip[avg_value_from_trip['correctedModeOfTransport_str'].isin(['walking', 'car', 'bicycle', 'bus'])]

fig = plt.figure(figsize=(15,12))
ax = plt.gca()

sns.set_style("whitegrid")
rcParams['figure.figsize'] = 12,8

g = sns.barplot(data = avg_value_from_trip_1, x="correctedModeOfTransport_str", y='value',hue='valueFromTrip' ).set(
    xlabel='Transport mode', 
    ylabel = 'Average assessment '
)

plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
plt.gcf().subplots_adjust(bottom=0.2) # make space for labels
plt.title(' Paid Work, Personal tasks, Enjoyment, and Fitness values per mode on average  - All users', y=1.)
plt.xticks(rotation=90)

for p in ax.patches:
             ax.annotate("%.2f" % p.get_height(), (p.get_x() + p.get_width() / 2., p.get_height()),
                 ha='center', va='center', fontsize=10, color='black', rotation=90, xytext=(0, 20),
                 textcoords='offset points')  

plt.tight_layout()

### Save

In [None]:
document.save(out_path + report_name)

In [None]:
out_path + report_name