# H15

**Obj:** Travel comfort factors
<br> To explore how VTT is influenced on the perceived comfort of the locations or while travelling.

## Questions

- [Q1](#Q1): What are the top positive and negative experience factors?
- [Q2](#Q2): What is the ranking of 'today's weather' as an experience factor among all other factors?
- [Q3](#Q3): What is the distribution of top satisfaction/dissatisfaction factors related to Crowdedness and Seating availability in public transport modes?
- [Q4](#Q4): What is the distribution of top satisfaction/dissatisfaction factors related to Perceived Safety in cycling modes and public transport modes?
- [Q5](#Q5): What is the distribution of top satisfaction/dissatisfaction factors related to Traffic congestion in car modes?
- [Q6](#Q6): What are the main enabling factors for activities ?
- [Q7](#Q7): What is the correlation between worthwhileness assessments and satisfaction factors?
- [Q8](#Q8): What is the correlation between worthwhileness assessments and ‘today’s weather’ as an experience factor?
- [Q9](#Q9): What is the correlation between worthwhileness assessments and satisfaction factors and activities?
- [Q13](#Q13): What is the distribution of worthwhileness and mood ratings among different weather scenarios?
- [Q14](#Q14): What is the distribution of transport modes and worthwhileness ratings among different weather scenarios?

**oss:** all analysis should be done for all users and also by gender

**VTT: "Value of Travel Time"**

In [None]:
import os
import sys
import json
import math
import datetime
import importlib
import itertools
import pandas as pd
import numpy as np
import seaborn as sns
import sklearn.metrics as metrics
import matplotlib.pyplot as plt

from pprint import pprint
from pandas.io.json import json_normalize
from matplotlib import rcParams

%matplotlib inline

from IPython.core.display import display, HTML

display(HTML("<style>.container { width:95% !important; }</style"))

In [None]:
# Global variables
meta_data_path = "../../data-campaigns/meta-data/"

legs = "all_legs_merged_no_outlier_0.01.pkl"
input_path = "../../2019-12-16.out/"
out_path = "../../2019-12-16.out/hypothesis/H15/"
img_path = "../../2019-12-16.out/hypothesis/H15/"

# Graphical parameters
rcParams["axes.titlepad"] = 45
rcParams["font.size"] = 16
rcParams["figure.figsize"] = 12, 8
sns.set_style("whitegrid")

In [None]:
try:
    os.makedirs(os.path.abspath(out_path))
except FileExistsError:
    print("Directory '{}' already exists".format(out_path), file=sys.stderr)

In [None]:
all_legs = pd.read_pickle(input_path + legs)
# trips_users_df = pd.read_pickle(input_path + 'trips_users_df.pkl')
trips_df = pd.read_pickle(input_path + "trips_df.pkl")
## select only trips in all_legs
# trips_df = trips_df[trips_df['tripid'].isin(all_legs['tripid'])]

# transport categories
with open(input_path + "category_transp_mode_dict.json", "r") as f:
    category_transp_mode_dict = json.load(f)

inverted_category_transp_mode_dict = dict(
    (v, k) for k in category_transp_mode_dict for v in category_transp_mode_dict[k]
)

#### remove "unknown" as transport category (?)

print("Legs:", all_legs.shape[0])
print("Trips: ", len(all_legs.tripid.unique()))
print("Users:", len(all_legs.userid.unique()))
print()

In [None]:
### read experience factors
all_factors = pd.read_pickle(input_path + "all_factors.pkl")

# delete legs with minus=F and plus=F
all_factors = all_factors[
    ~((all_factors["minus"] == False) & (all_factors["plus"] == False))
]

# delete legs with minus=T and plus=T (3% of obs)
all_factors = all_factors[
    ~((all_factors["minus"] == True) & (all_factors["plus"] == True))
]

# select only useful cols
all_factors = all_factors[
    [
        "correctedModeOfTransport_str",
        "legid",
        "minus",
        "plus",
        "tripid",
        "factor",
        "legStartDay",
    ]
]

# add info
all_factors = all_factors.merge(
    all_legs[
        ["legid", "wastedTime", "gender", "age", "onCampaigns", "transp_category"]
    ],
    on="legid",
)

## add purpose
# read purposes -> trip_obj_grouped.pkl
# trip_objs = pd.read_pickle(input_path + 'trip_objs_grouped.pkl')
# add purpose to values_from_trip
# all_factors = all_factors.merge(trip_objs[['tripid', 'objective_str']], on='tripid').drop_duplicates()

# select useful wastedTime
all_factors = all_factors[(all_factors.wastedTime > 0) & (all_factors.wastedTime < 6)]
all_factors["wastedTime"] = all_factors["wastedTime"].apply(lambda x: np.round(x, 0))

# remove legs with "None" transport category
all_factors = all_factors[(all_factors.transp_category.notna())]

# checks
print("all records:", len(all_factors))
xx = all_factors[(all_factors["minus"] == False) & (all_factors["plus"] == True)]
print("only plus: ", len(xx))
xx = all_factors[(all_factors["minus"] == True) & (all_factors["plus"] == False)]
print("only minus: ", len(xx))

# create a column with the impact (minus)
# all_factors['impact'] = np.nan
# for idx, row in all_factors.iterrows():

# only plus
#    if (row['minus'] == False) & (row['plus'] == True):
#        all_factors.loc[idx, 'impact'] = 'plus'
#    # only minus
#    if (row['minus'] == True) & (row['plus'] == False):
#        all_factors.loc[idx, 'impact'] = 'minus'


impact_lst = ["plus", "minus"]
# all_factors.groupby('impact').size()

all_factors.head()

In [None]:
# define for plots
age_range = list(all_legs.age.unique())

# assign 'CHE' to the class Other (AAA)
all_legs["onCampaigns"] = all_legs["onCampaigns"].apply(
    lambda x: "AAA" if x == "CHE" else x
)
top10 = list(all_legs.onCampaigns.unique())

# transp_category list
tc_lst = all_factors.transp_category.unique()

# gender list
gender_lst = ["Male", "Female"]

<a id='Q1' ></a>
### Q1:   What are the top positive and negative experience factors?

The top positive and negative experience factors are already available in **tables attached to H2**

In [None]:
plus_factors = all_factors[
    (all_factors["minus"] == False) & (all_factors["plus"] == True)
]
minus_factors = all_factors[
    (all_factors["minus"] == True) & (all_factors["plus"] == False)
]

In [None]:
### BY GENDER
path = out_path + "gender/"

for g in gender_lst:

    tmpP = plus_factors[plus_factors.gender == g]
    tmp = (
        tmpP.groupby("factor")
        .size()
        .sort_values(ascending=False)
        .reset_index(name="nlegs")
    )
    # save
    tmp.to_csv(path + "h15_q1_table_plus_" + g.lower() + ".csv", index=False)

    tmpM = minus_factors[minus_factors.gender == g]
    tmp = (
        tmpM.groupby("factor")
        .size()
        .sort_values(ascending=False)
        .reset_index(name="nlegs")
    )
    # save
    tmp.to_csv(path + "h15_q1_table_minus_" + g.lower() + ".csv", index=False)

In [None]:
### BY COUNTRY
path = out_path + "country/"

for c in top10:

    tmpP = plus_factors[plus_factors.onCampaigns == c]
    tmp = (
        tmpP.groupby("factor")
        .size()
        .sort_values(ascending=False)
        .reset_index(name="nlegs")
    )
    # save
    tmp.to_csv(path + "h15_q1_table_plus_" + c + ".csv", index=False)

    tmpM = minus_factors[minus_factors.onCampaigns == c]
    tmp = (
        tmpM.groupby("factor")
        .size()
        .sort_values(ascending=False)
        .reset_index(name="nlegs")
    )
    # save
    tmp.to_csv(path + "h15_q1_table_minus_" + c + ".csv", index=False)

In [None]:
### BY TRANSPORT CATEGORY
path = out_path + "transp_category/"

for tc in tc_lst:

    tmpP = plus_factors[plus_factors.transp_category == tc]
    tmp = (
        tmpP.groupby("factor")
        .size()
        .sort_values(ascending=False)
        .reset_index(name="nlegs")
    )
    # save
    tmp.to_csv(path + "h15_q1_table_plus_" + tc + ".csv", index=False)

    tmpM = minus_factors[minus_factors.transp_category == tc]
    tmp = (
        tmpM.groupby("factor")
        .size()
        .sort_values(ascending=False)
        .reset_index(name="nlegs")
    )
    # save
    tmp.to_csv(path + "h15_q1_table_minus_" + tc + ".csv", index=False)

<a id='Q2' ></a>
### Q2: What is the ranking of 'today's weather' as an experience factor among all other factors?

**ALREADY ANSWERED**

<a id='Q3' ></a>
### Q3: What is the distribution of top satisfaction/dissatisfaction factors related to Crowdedness and Seating availability in public transport modes?

To explore comfort travel factors in public transport.

In [None]:
crowdness = all_factors[all_factors.factor == "Crowdedness_Seating"]
# remove single with mode:car and with crowdness factor.
crowdness = crowdness[crowdness.transp_category != "private_motorized"]

# create a column with the impact
crowdness["impact"] = np.nan
for idx, row in crowdness.iterrows():

    # only plus
    if (row["minus"] == False) & (row["plus"] == True):
        crowdness.loc[idx, "impact"] = "plus"
    # only minus
    if (row["minus"] == True) & (row["plus"] == False):
        crowdness.loc[idx, "impact"] = "minus"

crowdness.head()

In [None]:
# all and gender
mode_lst = [
    "bus",
    "bus_long",
    "ferry",
    "high_speed_train",
    "intercity_train",
    "subway",
    "train",
    "tram",
]
count_mode_dict = dict(all_legs.groupby("correctedModeOfTransport_str").size())


fig, axes = plt.subplots(nrows=1, ncols=3, figsize=(18, 7), sharey=True)
axes = axes.ravel()

# all
tmp = (
    crowdness.groupby(["correctedModeOfTransport_str", "impact"])
    .size()
    .reset_index(name="count")
)

sns.barplot(
    x="correctedModeOfTransport_str", y="count", hue="impact", data=tmp, ax=axes[0]
)
axes[0].set_title("All")
axes[0].set_xticklabels(mode_lst)
axes[0].tick_params(axis="x", rotation=90)
axes[0].set_xlabel("Mode of transport")
axes[0].legend(loc="best", fontsize="x-small")

# gender
for i in range(len(gender_lst)):

    tmp_df = crowdness[crowdness.gender == gender_lst[i]]
    tmp = (
        tmp_df.groupby(["correctedModeOfTransport_str", "impact"])
        .size()
        .reset_index(name="count")
    )

    sns.barplot(
        x="correctedModeOfTransport_str",
        y="count",
        hue="impact",
        data=tmp,
        ax=axes[i + 1],
    )
    axes[i + 1].legend("")
    axes[i + 1].set_title(gender_lst[i])
    axes[i + 1].set_xticklabels(mode_lst)
    axes[i + 1].tick_params(axis="x", rotation=90)
    axes[i + 1].set_xlabel("Mode of transport")


plt.tight_layout()
plt.savefig(
    img_path + "h15_q3_abs_all_gender.png", bbox_to_anchor=True, bbox_inches="tight"
)

In [None]:
# all and gender
mode_lst = [
    "bus",
    "bus_long",
    "ferry",
    "high_speed_train",
    "intercity_train",
    "subway",
    "train",
    "tram",
]
count_mode_dict = dict(all_legs.groupby("correctedModeOfTransport_str").size())


fig, axes = plt.subplots(nrows=1, ncols=3, figsize=(18, 7), sharey=True)
axes = axes.ravel()

# all
tmp = (
    crowdness.groupby(["correctedModeOfTransport_str", "impact"])
    .size()
    .reset_index(name="count")
)
tmp["rel_count"] = tmp.apply(
    lambda row: row["count"] / count_mode_dict[row["correctedModeOfTransport_str"]],
    axis=1,
)

sns.barplot(
    x="correctedModeOfTransport_str", y="rel_count", hue="impact", data=tmp, ax=axes[0]
)
axes[0].set_title("All")
axes[0].set_xticklabels(mode_lst)
axes[0].tick_params(axis="x", rotation=90)
axes[0].set_xlabel("Mode of transport")
axes[0].legend(loc="best", fontsize="x-small")

# gender
for i in range(len(gender_lst)):

    tmp_df = crowdness[crowdness.gender == gender_lst[i]]
    tmp = (
        tmp_df.groupby(["correctedModeOfTransport_str", "impact"])
        .size()
        .reset_index(name="count")
    )
    tmp["rel_count"] = tmp.apply(
        lambda row: row["count"] / count_mode_dict[row["correctedModeOfTransport_str"]],
        axis=1,
    )

    sns.barplot(
        x="correctedModeOfTransport_str",
        y="rel_count",
        hue="impact",
        data=tmp,
        ax=axes[i + 1],
    )
    axes[i + 1].legend("")
    axes[i + 1].set_title(gender_lst[i])
    axes[i + 1].set_xticklabels(mode_lst)
    axes[i + 1].tick_params(axis="x", rotation=90)
    axes[i + 1].set_xlabel("Mode of transport")


plt.tight_layout()
plt.savefig(
    img_path + "h15_q3_all_gender.png", bbox_to_anchor=True, bbox_inches="tight"
)

In [None]:
### BY COUNTRY

fig, axes = plt.subplots(nrows=2, ncols=5, figsize=(18, 10), sharex=True)
axes = axes.ravel()

for i in range(len(top10)):

    tmp = crowdness[crowdness.onCampaigns == top10[i]]
    val_count = (
        tmp.groupby(["correctedModeOfTransport_str", "impact"])
        .size()
        .reset_index(name="count")
    )
    # val_count['rel_count'] = val_count.apply(lambda row: row['count'] / count_mode_dict[row['correctedModeOfTransport_str']], axis=1)

    sns.barplot(
        x="correctedModeOfTransport_str",
        y="count",
        hue="impact",
        data=val_count,
        ax=axes[i],
    )
    axes[i].set_title(top10[i])
    axes[i].set_xticklabels(mode_lst)
    axes[i].tick_params(axis="x", rotation=90)
    axes[i].set_xlabel("Mode of transport")
    axes[i].legend("")

    axes[0].legend(loc="best", fontsize="x-small")


plt.tight_layout()
plt.savefig(img_path + "h15_q3_country.png", bbox_to_anchor=True, bbox_inches="tight")

<a id='Q4' ></a>
### Q4: What is the distribution of top satisfaction/dissatisfaction factors related to Perceived Safety in cycling modes and public transport modes?

To explore factors related to Perceived Safety in cycling ('Road/path availability and safety', 'Traffic Signals/Crossings' and 'Cars/Other vehicles') and public transport ('Security and Safety').

In [None]:
safety_lst_cycl = [
    "Road_Path_Availability_And_Safety",
    "Traffic_Signals_Crossings",
    "Cars_Other_Vehicles",
]
safety_lst_publ = ["Security_And_Safety"]

safety_cycl = all_factors[
    (all_factors.factor.isin(safety_lst_cycl))
    & (all_factors.transp_category == "cycling_emerging_micromobility")
]
safety_publ = all_factors[
    (all_factors.factor.isin(safety_lst_publ))
    & (
        all_factors.transp_category.isin(
            ["public_transp_long_dist", "public_transp_short_dist"]
        )
    )
]


def find_impact(plus, minus):

    if (minus == False) & (plus == True):
        return "plus"
    if (minus == True) & (plus == False):
        return "minus"


safety_cycl["impact"] = safety_cycl.apply(
    lambda row: find_impact(row["plus"], row["minus"]), axis=1
)
safety_publ["impact"] = safety_publ.apply(
    lambda row: find_impact(row["plus"], row["minus"]), axis=1
)

In [None]:
### ALL AND GENDER - CYCLING
# all and gender

fig, axes = plt.subplots(nrows=1, ncols=3, figsize=(18, 7))
axes = axes.ravel()

# all
tmp = (
    safety_cycl.groupby(["correctedModeOfTransport_str", "impact"])
    .size()
    .reset_index(name="count")
)

sns.barplot(
    x="correctedModeOfTransport_str", y="count", hue="impact", data=tmp, ax=axes[0]
)
axes[0].set_title("All")
# axes[0].set_xticklabels(mode_lst)
axes[0].tick_params(axis="x", rotation=90)
axes[0].set_xlabel("Mode of transport")
axes[0].legend(loc="best", fontsize="x-small")

# gender
for i in range(len(gender_lst)):

    tmp_df = safety_cycl[safety_cycl.gender == gender_lst[i]]
    tmp = (
        tmp_df.groupby(["correctedModeOfTransport_str", "impact"])
        .size()
        .reset_index(name="count")
    )
    sns.barplot(
        x="correctedModeOfTransport_str",
        y="count",
        hue="impact",
        data=tmp,
        ax=axes[i + 1],
    )
    axes[i + 1].legend("")
    axes[i + 1].set_title(gender_lst[i])
    #   axes[i+1].set_xticklabels(mode_lst)
    axes[i + 1].tick_params(axis="x", rotation=90)
    axes[i + 1].set_xlabel("Mode of transport")


plt.tight_layout()
plt.savefig(
    img_path + "h15_q4_all_gender_cycl.png", bbox_to_anchor=True, bbox_inches="tight"
)

In [None]:
### ALL AND GENDER - PUBLIC
# all and gender

fig, axes = plt.subplots(nrows=1, ncols=3, figsize=(18, 7))
axes = axes.ravel()

# all
tmp = (
    safety_publ.groupby(["correctedModeOfTransport_str", "impact"])
    .size()
    .reset_index(name="count")
)

sns.barplot(
    x="correctedModeOfTransport_str", y="count", hue="impact", data=tmp, ax=axes[0]
)
axes[0].set_title("All")
axes[0].tick_params(axis="x", rotation=90)
axes[0].set_xlabel("Mode of transport")
axes[0].legend(loc="best", fontsize="x-small")

# gender
for i in range(len(gender_lst)):

    tmp_df = safety_publ[safety_publ.gender == gender_lst[i]]
    tmp = (
        tmp_df.groupby(["correctedModeOfTransport_str", "impact"])
        .size()
        .reset_index(name="count")
    )
    sns.barplot(
        x="correctedModeOfTransport_str",
        y="count",
        hue="impact",
        data=tmp,
        ax=axes[i + 1],
    )
    axes[i + 1].legend("")
    axes[i + 1].set_title(gender_lst[i])
    axes[i + 1].tick_params(axis="x", rotation=90)
    axes[i + 1].set_xlabel("Mode of transport")


plt.tight_layout()
plt.savefig(
    img_path + "h15_q4_all_gender_publ.png", bbox_to_anchor=True, bbox_inches="tight"
)

<a id='Q5' ></a>
### Q5: What is the distribution of top satisfaction/dissatisfaction factors related to Traffic congestion in car modes?

<a id='Q6' ></a>
### Q6: What are the main enabling factors for activities ?

When people report an activity, what are the top positive factors that were reported.

In [None]:
# read data - ACTIVITIES
all_gen_act = pd.read_pickle(input_path + "all_gen_act.pkl")


# add transport category
all_gen_act = all_gen_act.merge(
    all_legs[["legid", "wastedTime", "gender", "onCampaigns"]], on="legid",
)


def find_impact(plus, minus):

    if (minus == False) & (plus == True):
        return "plus"
    if (minus == True) & (plus == False):
        return "minus"


# select positive factors
all_factors["impact"] = all_factors.apply(
    lambda row: find_impact(row["plus"], row["minus"]), axis=1
)
pos_impact = all_factors[all_factors.impact == "plus"]

In [None]:
act_lst = all_gen_act.code.unique()

first = True
for act in act_lst:
    # select legs with act
    act_legs_lst = all_gen_act["legid"][all_gen_act.code == act]

    # select positive factors of those legs and count the occurrences.
    # take the first 5 factors.
    pos_factors_leg = pos_impact[pos_impact.legid.isin(act_legs_lst)]
    tmp = (
        pos_factors_leg.groupby("factor")
        .size()
        .sort_values(ascending=False)
        .reset_index(name=act)[:5]
    )

    if first:
        first = False
        heatmap_df = tmp
    else:
        heatmap_df = heatmap_df.merge(tmp, on="factor", how="outer").drop_duplicates()

heatmap_df.fillna(0, inplace=True)
heatmap_df.set_index("factor", inplace=True)
heatmap_df[list(heatmap_df.columns)] = heatmap_df[list(heatmap_df.columns)].astype(int)
heatmap_df

In [None]:
plt.figure(figsize=(15, 8))
sns.heatmap(heatmap_df, annot=True, fmt="d")
plt.title("All users")
plt.tight_layout()
plt.savefig(img_path + "h15_q6_all.png", bbox_to_anchor=True, bbox_inches="tight")

In [None]:
### BY GENDER

In [None]:
### BY TC

<a id='Q7' ></a>
### Q7: What is the correlation between worthwhileness assessments and satisfaction factors?

Study if the impact of these experience factors change according to mode, purpose, territory/country, weekday/weekend.

**SATISFACTION**

In [None]:
nrows = 1
ncols = 2
fig, axes = plt.subplots(nrows=nrows, ncols=ncols, figsize=(15, 5))
axes = axes.ravel()

tmp = plus_factors.groupby("wastedTime").size().reset_index(name="count")
sns.barplot(x="wastedTime", y="count", data=tmp, ax=axes[0])
axes[0].set_title("All")

tmp = plus_factors.groupby(["wastedTime", "gender"]).size().reset_index(name="count")
tmp = tmp[tmp.gender != "Other"]
sns.barplot(x="wastedTime", y="count", hue="gender", data=tmp, ax=axes[1])
axes[1].set_title("By gender")

fig.tight_layout()
plt.savefig(
    img_path + "h15_q7_all_gender_plus.png", bbox_to_anchor=True, bbox_inches="tight"
)

In [None]:
#### BY COUNTRIES AND TRANSP_CATEGORY
nrows = 1
ncols = 2
fig, axes = plt.subplots(nrows=nrows, ncols=ncols, figsize=(18, 6), sharey=True)
axes = axes.ravel()

tmp = plus_factors.groupby("onCampaigns")["wastedTime"].mean().reset_index(name="avg")
axes[0].scatter(x=tmp.onCampaigns, y=tmp.avg, lw=6)
axes[0].set_title("countries")

tmp = (
    plus_factors.groupby("transp_category")["wastedTime"].mean().reset_index(name="avg")
)
axes[1].scatter(x=tmp.transp_category, y=tmp.avg, lw=6, marker="v")
axes[1].set_title("transport categories")
axes[1].set_xticks(range(5))
axes[1].set_xticklabels(
    ["cycling", "private", "public_long", "public_short", "walking"]
)

fig.tight_layout()
plt.savefig(
    img_path + "h15_q7_country_tc_plus.png", bbox_to_anchor=True, bbox_inches="tight"
)

**DISSATISFACTION**

In [None]:
nrows = 1
ncols = 2
fig, axes = plt.subplots(nrows=nrows, ncols=ncols, figsize=(15, 5))
axes = axes.ravel()

tmp = minus_factors.groupby("wastedTime").size().reset_index(name="count")
sns.barplot(x="wastedTime", y="count", data=tmp, ax=axes[0])
axes[0].set_title("All")

tmp = minus_factors.groupby(["wastedTime", "gender"]).size().reset_index(name="count")
tmp = tmp[tmp.gender != "Other"]
sns.barplot(x="wastedTime", y="count", hue="gender", data=tmp, ax=axes[1])
axes[1].set_title("By gender")

fig.tight_layout()
plt.savefig(
    img_path + "h15_q7_all_gender_minus.png", bbox_to_anchor=True, bbox_inches="tight"
)

In [None]:
#### BY COUNTRIES AND TRANSP_CATEGORY
nrows = 1
ncols = 2
fig, axes = plt.subplots(nrows=nrows, ncols=ncols, figsize=(18, 6), sharey=True)
axes = axes.ravel()

tmp = minus_factors.groupby("onCampaigns")["wastedTime"].mean().reset_index(name="avg")
axes[0].scatter(x=tmp.onCampaigns, y=tmp.avg, lw=6)
axes[0].set_title("countries")

tmp = (
    minus_factors.groupby("transp_category")["wastedTime"]
    .mean()
    .reset_index(name="avg")
)
axes[1].scatter(x=tmp.transp_category, y=tmp.avg, lw=6, marker="v")
axes[1].set_title("transport categories")
axes[1].set_xticks(range(5))
axes[1].set_xticklabels(
    ["cycling", "private", "public_long", "public_short", "walking"]
)

fig.tight_layout()
plt.savefig(
    img_path + "h15_q7_country_tc_minus.png", bbox_to_anchor=True, bbox_inches="tight"
)

<a id='Q8' ></a>
### Q8: What is the correlation between worthwhileness assessments and ‘today’s weather’ as an experience factor?

Is weather influencing preceived value of travel time?

In [None]:
# select only today's weather
today_weather = all_factors[all_factors["factor"] == "Reliability_Of_Travel_Time"]
print("legs with today weather", len(today_weather))

today_weather["impact"] = today_weather.apply(
    lambda row: find_impact(row["plus"], row["minus"]), axis=1
)
today_weather.head()

In [None]:
## ALL + GENDER

nrows = 1
ncols = 3
fig, axes = plt.subplots(nrows=nrows, ncols=ncols, figsize=(18, 5), sharey=True)
axes = axes.ravel()

tmp = today_weather.groupby(["wastedTime", "impact"]).size().reset_index(name="count")

totM = tmp[tmp["impact"] == "minus"]["count"].sum()
totP = tmp[tmp["impact"] == "plus"]["count"].sum()
tmp["rel_count"] = tmp.apply(
    lambda x: x["count"] / totM if x["impact"] == "minus" else x["count"] / totP, axis=1
)


sns.barplot(x="wastedTime", y="rel_count", hue="impact", data=tmp, ax=axes[0])
axes[0].legend(fontsize="x-small")
# axes[0].set_xlabel(fontsize=12)
axes[0].set_title("All", fontsize=14)

gender_lst = ["Male", "Female"]
for i in range(ncols - 1):
    tmp = (
        today_weather[today_weather.gender == gender_lst[i]]
        .groupby(["wastedTime", "impact"])
        .size()
        .reset_index(name="count")
    )

    totM = tmp[tmp["impact"] == "minus"]["count"].sum()
    totP = tmp[tmp["impact"] == "plus"]["count"].sum()
    tmp["rel_count"] = tmp.apply(
        lambda x: x["count"] / totM if x["impact"] == "minus" else x["count"] / totP,
        axis=1,
    )

    sns.barplot(x="wastedTime", y="rel_count", hue="impact", data=tmp, ax=axes[i + 1])
    axes[i + 1].legend("")
    axes[i + 1].set_ylabel(None)
    # axes[i+1].set_xlabels(fontsize=12)
    axes[i + 1].set_title(gender_lst[i], fontsize=14)

plt.tight_layout()
plt.savefig(
    img_path + "h15_q8_all_gender.png", bbox_to_anchor=True, bbox_inches="tight"
)

In [None]:
### BY TRANSPORT CATEGORY

nrows = 2
ncols = 3
fig, axes = plt.subplots(nrows=nrows, ncols=ncols, figsize=(18, 8), sharey=True)
axes = axes.ravel()

axid = 0
for idx, c in list(enumerate(tc_lst)):

    tmp = today_weather[today_weather.transp_category == c]
    tmp = tmp.groupby(["wastedTime", "impact"]).size().reset_index(name="count")

    totM = tmp[tmp["impact"] == "minus"]["count"].sum()
    totP = tmp[tmp["impact"] == "plus"]["count"].sum()
    tmp["rel_count"] = tmp.apply(
        lambda x: x["count"] / totM if x["impact"] == "minus" else x["count"] / totP,
        axis=1,
    )

    sns.barplot(x="wastedTime", y="rel_count", hue="impact", data=tmp, ax=axes[idx])
    axes[idx].legend("")
    axes[idx].set_title(c, fontsize=14)
    axes[idx].set_ylabel("relative count", fontsize=12)
    axes[idx].set_xlabel("worthwhileness ratings", fontsize=12)
    axes[idx].tick_params(axis="both", labelsize=12)

    if idx == 0:
        axes[idx].legend(fontsize="x-small", loc="upper left")

plt.tight_layout()
plt.savefig(img_path + "h15_q8_tc.png", bbox_to_anchor=True, bbox_inches="tight")

In [None]:
### BY COUNTRY

nrows = 2
ncols = 5
fig, axes = plt.subplots(nrows=nrows, ncols=ncols, figsize=(18, 8), sharey=True)
axes = axes.ravel()

axid = 0
for idx, c in list(enumerate(top10)):

    tmp = today_weather[today_weather.onCampaigns == c]
    tmp = tmp.groupby(["wastedTime", "impact"]).size().reset_index(name="count")

    totM = tmp[tmp["impact"] == "minus"]["count"].sum()
    totP = tmp[tmp["impact"] == "plus"]["count"].sum()
    tmp["rel_count"] = tmp.apply(
        lambda x: x["count"] / totM if x["impact"] == "minus" else x["count"] / totP,
        axis=1,
    )

    sns.barplot(x="wastedTime", y="rel_count", hue="impact", data=tmp, ax=axes[idx])
    axes[idx].legend("")
    axes[idx].set_title(c, fontsize=14)
    axes[idx].set_ylabel("relative count", fontsize=12)
    axes[idx].set_xlabel("worthwhileness ratings", fontsize=12)
    axes[idx].tick_params(axis="both", labelsize=12)

    if idx == 0:
        axes[idx].legend(fontsize="x-small", loc="upper left")

plt.tight_layout()
plt.savefig(img_path + "h15_q8_country.png", bbox_to_anchor=True, bbox_inches="tight")

<a id='Q9' ></a>
### Q9: What is the correlation between worthwhileness assessments and satisfaction factors and activities?

We would like to explore if any specific combination of satisfaction factors and activities is associated with particularly high or low worthwhileness assessments. Is that possible (perhaps to be examined in pairs)?

    --- SOLO WT=1 E WT=5

In [None]:
# read data
all_gen_act = pd.read_pickle(input_path + "all_gen_act.pkl")

# add transport category
all_gen_act = all_gen_act.merge(
    all_legs[["legid", "wastedTime", "gender", "onCampaigns"]], on="legid",
)

# select only wt=1 and wt=5
all_gen_act.wastedTime = all_gen_act.wastedTime.apply(lambda x: np.round(x))
all_gen_act_wt1 = all_gen_act[(all_gen_act.wastedTime == 1)]
all_gen_act_wt5 = all_gen_act[(all_gen_act.wastedTime == 5)]

all_gen_act_wt1.head()

In [None]:
# select only factors with wt=1 and wt=5
all_factors_wt1 = all_factors[all_factors.wastedTime == 1]
print(
    "common legs with wt1:",
    len(
        set(all_factors_wt1.legid.unique()).intersection(all_gen_act_wt1.legid.unique())
    ),
)

all_factors_wt5 = all_factors[all_factors.wastedTime == 5]
print(
    "common legs with wt5:",
    len(
        set(all_factors_wt5.legid.unique()).intersection(all_gen_act_wt5.legid.unique())
    ),
)

**WT=1 - PLUS**

In [None]:
def find_impact(plus, minus):

    if (minus == False) & (plus == True):
        return "plus"
    if (minus == True) & (plus == False):
        return "minus"

In [None]:
# select onlu plus facotrs
all_factors_wt1["impact"] = all_factors_wt1.apply(
    lambda row: find_impact(row["plus"], row["minus"]), axis=1
)
all_factors_wt1_plus = all_factors_wt1[all_factors_wt1.impact == "plus"]


# all combinations of factor-activity per wt1
factor_activity_dict_wt1 = {}
wt1_common_legs = set(all_factors_wt1.legid.unique()).intersection(
    all_gen_act_wt1.legid.unique()
)

for lid in wt1_common_legs:

    leg_df_factor = all_factors_wt1_plus[all_factors_wt1_plus.legid == lid]
    leg_df_act = all_gen_act_wt1[all_gen_act_wt1.legid == lid]

    factor_lst = leg_df_factor.factor.unique()
    act_lst = leg_df_act.code.unique()

    comb_lst = list(itertools.product(factor_lst, act_lst))

    for comb in comb_lst:

        if comb not in factor_activity_dict_wt1.keys():
            factor_activity_dict_wt1[comb] = 0
        factor_activity_dict_wt1[comb] += 1

# create df
row_lst = []
for k in factor_activity_dict_wt1.keys():

    row = [k[0], k[1], factor_activity_dict_wt1[k]]
    row_lst.append(row)

factor_activity_df_wt1 = pd.DataFrame(row_lst, columns=["factor", "activity", "count"])
factor_activity_df_wt1.head()

In [None]:
heatmap_df = factor_activity_df_wt1.pivot(
    index="factor", columns="activity", values="count"
)
heatmap_df.fillna(0, inplace=True)
heatmap_df.sort_values(
    by=list(heatmap_df.columns), axis=0, ascending=False, inplace=True
)

plt.figure(figsize=(18, 16))
sns.heatmap(heatmap_df, annot=True)
plt.yticks(range(len(heatmap_df)), heatmap_df.index)

plt.tight_layout()
plt.savefig(
    img_path + "h15_q9_heatmap_wt1_plus.png", bbox_to_anchor=True, bbox_inches="tight"
)

**WT=1 - MINUS**

In [None]:
# select onlu plus facotrs
all_factors_wt1["impact"] = all_factors_wt1.apply(
    lambda row: find_impact(row["plus"], row["minus"]), axis=1
)
all_factors_wt1_minus = all_factors_wt1[all_factors_wt1.impact == "minus"]


# all combinations of factor-activity per wt1
factor_activity_dict_wt1 = {}
wt1_common_legs = set(all_factors_wt1.legid.unique()).intersection(
    all_gen_act_wt1.legid.unique()
)

for lid in wt1_common_legs:

    leg_df_factor = all_factors_wt1_minus[all_factors_wt1_minus.legid == lid]
    leg_df_act = all_gen_act_wt1[all_gen_act_wt1.legid == lid]

    factor_lst = leg_df_factor.factor.unique()
    act_lst = leg_df_act.code.unique()

    comb_lst = list(itertools.product(factor_lst, act_lst))

    for comb in comb_lst:

        if comb not in factor_activity_dict_wt1.keys():
            factor_activity_dict_wt1[comb] = 0
        factor_activity_dict_wt1[comb] += 1

# create df
row_lst = []
for k in factor_activity_dict_wt1.keys():

    row = [k[0], k[1], factor_activity_dict_wt1[k]]
    row_lst.append(row)

factor_activity_df_wt1 = pd.DataFrame(row_lst, columns=["factor", "activity", "count"])
factor_activity_df_wt1.head()

In [None]:
heatmap_df = factor_activity_df_wt1.pivot(
    index="factor", columns="activity", values="count"
)
heatmap_df.fillna(0, inplace=True)
heatmap_df.sort_values(
    by=list(heatmap_df.columns), axis=0, ascending=False, inplace=True
)

plt.figure(figsize=(18, 16))
sns.heatmap(heatmap_df, annot=True)
plt.yticks(range(len(heatmap_df)), heatmap_df.index)

plt.tight_layout()
plt.savefig(
    img_path + "h15_q9_heatmap_wt1_minus.png", bbox_to_anchor=True, bbox_inches="tight"
)

**WT=5 - PLUS**

In [None]:
# select onlu plus facotrs
all_factors_wt5["impact"] = all_factors_wt5.apply(
    lambda row: find_impact(row["plus"], row["minus"]), axis=1
)
all_factors_wt5_plus = all_factors_wt5[all_factors_wt5.impact == "plus"]


# all combinations of factor-activity per wt5
factor_activity_dict_wt5 = {}
wt5_common_legs = set(all_factors_wt5.legid.unique()).intersection(
    all_gen_act_wt5.legid.unique()
)

for lid in wt5_common_legs:

    leg_df_factor = all_factors_wt5_plus[all_factors_wt5_plus.legid == lid]
    leg_df_act = all_gen_act_wt5[all_gen_act_wt5.legid == lid]

    factor_lst = leg_df_factor.factor.unique()
    act_lst = leg_df_act.code.unique()

    comb_lst = list(itertools.product(factor_lst, act_lst))

    for comb in comb_lst:

        if comb not in factor_activity_dict_wt5.keys():
            factor_activity_dict_wt5[comb] = 0
        factor_activity_dict_wt5[comb] += 1

# create df
row_lst = []
for k in factor_activity_dict_wt5.keys():

    row = [k[0], k[1], factor_activity_dict_wt5[k]]
    row_lst.append(row)

factor_activity_df_wt5 = pd.DataFrame(row_lst, columns=["factor", "activity", "count"])
factor_activity_df_wt5.head()

In [None]:
heatmap_df = factor_activity_df_wt5.pivot(
    index="factor", columns="activity", values="count"
)
heatmap_df.fillna(0, inplace=True)
heatmap_df.sort_values(
    by=list(heatmap_df.columns), axis=0, ascending=False, inplace=True
)
heatmap_df.astype(np.int64, inplace=True)

plt.figure(figsize=(18, 16))
sns.heatmap(heatmap_df, annot=True, fmt=".0f")
plt.yticks(range(len(heatmap_df)), heatmap_df.index)

plt.tight_layout()
plt.savefig(
    img_path + "h15_q9_heatmap_wt5_plus.png", bbox_to_anchor=True, bbox_inches="tight"
)

**WT=5 - MINUS**

In [None]:
# select onlu plus facotrs
all_factors_wt5["impact"] = all_factors_wt5.apply(
    lambda row: find_impact(row["plus"], row["minus"]), axis=1
)
all_factors_wt5_minus = all_factors_wt5[all_factors_wt5.impact == "minus"]


# all combinations of factor-activity per wt5
factor_activity_dict_wt5 = {}
wt5_common_legs = set(all_factors_wt5.legid.unique()).intersection(
    all_gen_act_wt5.legid.unique()
)

for lid in wt5_common_legs:

    leg_df_factor = all_factors_wt5_minus[all_factors_wt5_minus.legid == lid]
    leg_df_act = all_gen_act_wt5[all_gen_act_wt5.legid == lid]

    factor_lst = leg_df_factor.factor.unique()
    act_lst = leg_df_act.code.unique()

    comb_lst = list(itertools.product(factor_lst, act_lst))

    for comb in comb_lst:

        if comb not in factor_activity_dict_wt5.keys():
            factor_activity_dict_wt5[comb] = 0
        factor_activity_dict_wt5[comb] += 1

# create df
row_lst = []
for k in factor_activity_dict_wt5.keys():

    row = [k[0], k[1], factor_activity_dict_wt5[k]]
    row_lst.append(row)

factor_activity_df_wt5 = pd.DataFrame(row_lst, columns=["factor", "activity", "count"])
factor_activity_df_wt5.head()

In [None]:
heatmap_df = factor_activity_df_wt5.pivot(
    index="factor", columns="activity", values="count"
)
heatmap_df.fillna(0, inplace=True)
heatmap_df.sort_values(
    by=list(heatmap_df.columns), axis=0, ascending=False, inplace=True
)


plt.figure(figsize=(18, 16))
sns.heatmap(heatmap_df, annot=True, fmt=".0f")
plt.yticks(range(len(heatmap_df)), heatmap_df.index)

plt.tight_layout()
plt.savefig(
    img_path + "h15_q9_heatmap_wt5_minus.png", bbox_to_anchor=True, bbox_inches="tight"
)

<a id='Q13' ></a>
### Q13: What is the distribution of worthwhileness and mood ratings among different weather scenarios?

Information to be extracted following a combination of weather statistics during trips and user profiles.

In [None]:
weather_df = pd.read_pickle(input_path + "weather_final_with_legs_df.pkl")

In [None]:
weather_df.head(3)

In [None]:
weather_scenarios_metadata_filename = "weather_scenarios.json"
weather_scenarios_metadata_path = os.path.join(
    meta_data_path, weather_scenarios_metadata_filename
)
with open(weather_scenarios_metadata_path, "r") as infp:
    weather_scenarios = json.load(infp)

In [None]:
pprint(weather_scenarios)

In [None]:
# colors
cws = {
    "neutral/good": "#BAFFC9",
    "cold": "#2C85B1",
    "warm": "#FE6D6D",
    "uncomfortable temperature": "#F78003",
    "rainy/snowy": "#AFAFAF",
    "cloudy": "#BAE1FF",
    "windy": "#D4C2E1",
}

In [None]:
legs_df = pd.read_pickle(input_path + legs)

In [None]:
all_ws = weather_df[["legid", "weather_scenario"]]

all_ws_legids = set(all_ws.legid.values)
all_ws_legs = legs_df.loc[legs_df["legid"].isin(all_ws_legids)][
    ["legid", "tripid", "wastedTime"]
]
all_ws_legs = all_ws_legs.astype({"wastedTime": "int32"})
all_ws_trips = trips_df[["tripid", "overallScore"]]

all_ws = all_ws.merge(all_ws_legs)
all_ws = all_ws.merge(all_ws_trips)

all_ws = all_ws[(all_ws.overallScore > 0) & (all_ws.overallScore < 6)]

In [None]:
all_ws.head(2)

In [None]:
ws_data = dict()
for ws in weather_scenarios.keys():
    print("ws:", ws)

    # Pandas select rows based on a function of a column
    # See:
    #   https://stackoverflow.com/a/56703848/2377454
    ws_df = all_ws.loc[all_ws["weather_scenario"].apply(lambda wslist: ws in wslist)]
    ws_data[ws] = ws_df

In [None]:
# Best way to find the intersection of multiple sets?
# See:
#   https://stackoverflow.com/a/2541814/2377454
def intersect_ws(weather_scenarios):
    wslist = list(weather_scenarios)
    wssets = [set(ws) for ws in wslist]

    wsintersection = set.intersection(*wssets)

    return list(wsintersection)

In [None]:
def select_ws(weather_scenarios):
    wslist = list(weather_scenarios)
    wssets = [set(ws) for ws in wslist]

    if len(wssets) > 0:
        wsintersection = set.intersection(*wssets)
        return all([aset == wsintersection for aset in wssets])
    else:
        return True


# Pandas select rows based on a function of a column
# See:
#   https://stackoverflow.com/a/56703848/2377454
selected = all_ws.groupby("tripid")["weather_scenario"].agg(select_ws)
nonselected = selected.loc[selected == False]

print("# of nonselected:", len(nonselected))

In [None]:
nonselected.head(5)

In [None]:
all_ws.loc[all_ws["tripid"] == "#33:9960"]

In [None]:
all_ws.loc[all_ws["tripid"] == "#33:9960"].groupby("tripid")["weather_scenario"].agg(
    intersect_ws
).reset_index()

In [None]:
all_ws.loc[all_ws["tripid"] == "#130:14450"]

In [None]:
all_ws.loc[all_ws["tripid"] == "#130:14450"].groupby("tripid")["weather_scenario"].agg(
    intersect_ws
).reset_index()

In [None]:
all_ws.loc[all_ws["tripid"] == "#30:13084"]

In [None]:
all_ws.loc[all_ws["tripid"] == "#30:13084"].groupby("tripid")["weather_scenario"].agg(
    intersect_ws
).reset_index()

In [None]:
tmp_wastedtime = (
    all_ws.loc[(all_ws.wastedTime > 0) & (all_ws.wastedTime < 6)]
    .groupby("tripid")["wastedTime"]
    .mean()
    .reset_index()
)
tmp_wastedtime["wastedTime"] = tmp_wastedtime["wastedTime"].apply(
    lambda x: int(round(x, 0))
)
tmp_wastedtime.head(3)

In [None]:
all_ws_composite = (
    all_ws.groupby("tripid")[["weather_scenario", "overallScore"]]
    .agg({"weather_scenario": [intersect_ws], "overallScore": [np.mean]})
    .reset_index()
)
all_ws_composite.droplevel(0, axis=1)
all_ws_composite.columns = ["tripid", "weather_scenario", "overallScore"]

all_ws_composite = all_ws_composite.merge(tmp_wastedtime, on="tripid")
all_ws_composite["wscount"] = all_ws_composite["weather_scenario"].apply(
    lambda x: len(x)
)

In [None]:
all_ws_composite.head(3)

In [None]:
all_ws_composite.loc[all_ws_composite["tripid"] == "#30:13084"]

In [None]:
all_ws_composite.loc[all_ws_composite["tripid"] == "#33:9960"]

In [None]:
all_ws_composite.loc[all_ws_composite["tripid"] == "#130:14450"]

In [None]:
all_ws_composite.loc[all_ws_composite["tripid"] == "#30:13084"]

In [None]:
all_ws_composite.groupby("wscount").count()

In [None]:
foo = all_ws_composite.groupby("wscount").count().reset_index()
print("Total number of scenarios:", sum(foo["wscount"] * foo["weather_scenario"]))

In [None]:
ws_composite_data = dict()
for ws in weather_scenarios.keys():
    print("ws:", ws)

    # Pandas select rows based on a function of a column
    # See:
    #   https://stackoverflow.com/a/56703848/2377454
    ws_composite_df = all_ws_composite.loc[
        all_ws_composite["weather_scenario"].apply(lambda wslist: ws in wslist)
    ]
    ws_composite_data[ws] = ws_composite_df

In [None]:
ws_sum = 0
for ws in weather_scenarios.keys():
    ws_count = len(ws_composite_data[ws])
    ws_sum += ws_count
    print("  * {}:{}".format(ws, ws_count))

print("Total number of scenarios:", ws_sum)

In [None]:
ws_composite_data["cold"].head(3)

In [None]:
### Plot each distribution for each weather scenario

nrows = 2
ncols = 4
fig, axes = plt.subplots(nrows=nrows, ncols=ncols, figsize=(18, 8), sharey=True)
fig.suptitle(
    "Distributions (absolute counts) of mood ratings by weather scenario",
    size=16,
    y=1.12,
)
axes = axes.ravel()


axid = 0
for idx, ws in list(enumerate(weather_scenarios.keys())):
    print("(idx, ws): ({}, {})".format(idx, ws))

    tmp = ws_composite_data[ws]
    tmp = tmp.groupby(["overallScore"]).size().reset_index(name="count")

    sns.barplot(x="overallScore", y="count", data=tmp, color=cws[ws], ax=axes[idx])
    axes[idx].legend("")
    axes[idx].set_title(ws, fontsize=14)
    axes[idx].set_ylabel("count", fontsize=12)
    axes[idx].set_xlabel("mood ratings", fontsize=12)
    axes[idx].tick_params(axis="both", labelsize=12)

plt.tight_layout()
plt.savefig(img_path + "h15_q13.png", bbox_to_anchor=True, bbox_inches="tight")

In [None]:
### Plot each distribution for each weather scenario

nrows = 2
ncols = 4
fig, axes = plt.subplots(nrows=nrows, ncols=ncols, figsize=(18, 8), sharey=True)
fig.suptitle(
    "Distributions (relative counts) of mood ratings by weather scenario",
    size=16,
    y=1.12,
)
axes = axes.ravel()


axid = 0
for idx, ws in list(enumerate(weather_scenarios.keys())):
    print("(idx, ws): ({}, {})".format(idx, ws))

    tmp = ws_composite_data[ws]
    tmp = tmp.groupby(["overallScore"]).size().reset_index(name="count")

    tot = tmp["count"].sum()
    tmp["rel_count"] = tmp.apply(lambda x: x["count"] / tot, axis=1,)

    sns.barplot(x="overallScore", y="rel_count", data=tmp, color=cws[ws], ax=axes[idx])
    axes[idx].legend("")
    axes[idx].set_title(ws, fontsize=14)
    axes[idx].set_ylabel("relative count", fontsize=12)
    axes[idx].set_xlabel("mood ratings", fontsize=12)
    axes[idx].tick_params(axis="both", labelsize=12)

plt.tight_layout()
plt.savefig(img_path + "h15_q13_relcount.png", bbox_to_anchor=True, bbox_inches="tight")

<a id='Q14' ></a>
### Q14: What is the distribution of transport modes and worthwhileness ratings among different weather scenarios?

Information to be extracted following a combination of weather statistics during trips and user profiles.

In [None]:
### Plot each distribution for each weather scenario

nrows = 2
ncols = 4
fig, axes = plt.subplots(nrows=nrows, ncols=ncols, figsize=(18, 8), sharey=True)
fig.suptitle(
    "Distributions (absolute counts) of worhthwhileness ratings by weather scenario",
    size=16,
    y=1.12,
)
axes = axes.ravel()


axid = 0
for idx, ws in list(enumerate(weather_scenarios.keys())):
    print("(idx, ws): ({}, {})".format(idx, ws))

    tmp = ws_composite_data[ws]
    tmp = tmp.groupby(["wastedTime"]).size().reset_index(name="count")

    sns.barplot(x="wastedTime", y="count", data=tmp, color=cws[ws], ax=axes[idx])
    axes[idx].legend("")
    axes[idx].set_title(ws, fontsize=14)
    axes[idx].set_ylabel("count", fontsize=12)
    axes[idx].set_xlabel("worthwhileness ratings", fontsize=12)
    axes[idx].tick_params(axis="both", labelsize=12)

plt.tight_layout()
plt.savefig(img_path + "h15_q14.png", bbox_to_anchor=True, bbox_inches="tight")

In [None]:
c  ### Plot each distribution for each weather scenario

nrows = 2
ncols = 4
fig, axes = plt.subplots(nrows=nrows, ncols=ncols, figsize=(18, 8), sharey=True)
fig.suptitle(
    "Distributions (relative counts) of worhthwhileness ratings by weather scenario",
    size=16,
    y=1.12,
)
axes = axes.ravel()

axid = 0
for idx, ws in list(enumerate(weather_scenarios.keys())):
    print("(idx, ws): ({}, {})".format(idx, ws))

    tmp = ws_composite_data[ws]
    tmp = tmp.groupby(["wastedTime"])["tripid"].size().reset_index(name="count")

    tot = tmp["count"].sum()
    tmp["rel_count"] = tmp.apply(lambda x: x["count"] / tot, axis=1,)

    sns.barplot(x="wastedTime", y="rel_count", data=tmp, color=cws[ws], ax=axes[idx])
    axes[idx].legend("")
    axes[idx].set_title(ws, fontsize=14)
    axes[idx].set_ylabel("relative count", fontsize=12)
    axes[idx].set_xlabel("worthwhileness ratings", fontsize=12)
    axes[idx].tick_params(axis="both", labelsize=12)

plt.tight_layout()
plt.savefig(img_path + "h15_q14_relcount.png", bbox_to_anchor=True, bbox_inches="tight")

In [None]:
ws_composite_data_tc = {}
for ws in weather_scenarios.keys():
    print("ws:", ws)

    ws_composite_data_tc[ws] = ws_composite_data[ws].merge(
        legs_df[["tripid", "transp_category"]]
    )

In [None]:
ws_composite_data_tc["cold"].head(3)

In [None]:
### BY TRANSPORT CATEGORY


def q14_count_plot(tc):
    nrows = 2
    ncols = 4
    fig, axes = plt.subplots(nrows=nrows, ncols=ncols, figsize=(18, 8), sharey=True)
    fig.suptitle(
        "Distributions (absolute counts) of worhthwhileness ratings by weather scenario "
        'per transport category "{}"'.format(tc),
        size=16,
        y=1.12,
    )
    axes = axes.ravel()

    axid = 0
    for idx, ws in list(enumerate(weather_scenarios.keys())):
        print("(idx, ws): ({}, {})".format(idx, ws))

        tmp = ws_composite_data_tc[ws].loc[
            ws_composite_data_tc[ws]["transp_category"] == tc
        ]
        tmp = tmp.groupby(["wastedTime"]).size().reset_index(name="count")

        sns.barplot(x="wastedTime", y="count", data=tmp, color=cws[ws], ax=axes[idx])
        axes[idx].legend("")
        axes[idx].set_title(ws, fontsize=14)
        axes[idx].set_ylabel("count", fontsize=12)
        axes[idx].set_xlabel("worthwhileness ratings", fontsize=12)
        axes[idx].tick_params(axis="both", labelsize=12)

    plt.tight_layout()
    filename = "h15_q14_{}.png".format(tc)
    plt.savefig(img_path + filename, bbox_to_anchor=True, bbox_inches="tight")


def q14_relcount_plot(tc):
    nrows = 2
    ncols = 4
    fig, axes = plt.subplots(nrows=nrows, ncols=ncols, figsize=(18, 8), sharey=True)
    fig.suptitle(
        "Distributions (relative counts) of worhthwhileness ratings by weather scenario "
        'per transport category "{}"'.format(tc),
        size=16,
        y=1.12,
    )
    axes = axes.ravel()

    axid = 0
    for idx, ws in list(enumerate(weather_scenarios.keys())):
        print("(idx, ws): ({}, {})".format(idx, ws))

        tmp = ws_composite_data_tc[ws].loc[
            ws_composite_data_tc[ws]["transp_category"] == tc
        ]
        tmp = tmp.groupby(["wastedTime"])["tripid"].size().reset_index(name="count")

        tot = tmp["count"].sum()
        tmp["rel_count"] = tmp.apply(lambda x: x["count"] / tot, axis=1,)

        sns.barplot(
            x="wastedTime", y="rel_count", data=tmp, color=cws[ws], ax=axes[idx]
        )
        axes[idx].legend("")
        axes[idx].set_title(ws, fontsize=14)
        axes[idx].set_ylabel("relative count", fontsize=12)
        axes[idx].set_xlabel("worthwhileness ratings", fontsize=12)
        axes[idx].tick_params(axis="both", labelsize=12)

    plt.tight_layout()
    filename = "h15_q14_relcount_{}.png".format(tc)
    plt.savefig(img_path + filename, bbox_to_anchor=True, bbox_inches="tight")

In [None]:
transport_categories = [
    tc for tc in set(legs_df.transp_category.values) if tc is not None
]
print("transport_categories:", transport_categories)

In [None]:
for tc in transport_categories:
    q14_count_plot(tc)

for tc in transport_categories:
    q14_relcount_plot(tc)