# H16

**Obj:** Jerkiness as a proxy for comfort
<br> To explore how comfort while travelling is influenced by vibration, jerkiness and shocks.

## Questions

- [Q1](#Q1): How are experience factors related to 'Smoothness' correlated to worthwhileness? (and by whom?)
- [Q2](#Q2): When factors on smoothness are negatively rated, what are the activities that are carried out while on the move (and how does that compare to 'normal' list of activities when this factor is not mentioned)?

**Smoothness:** road path quality and road path directness in Active Transport and vehicle ride smoothness in both Public and Private Transport.



In [None]:
import os
import sys
import pandas as pd
import numpy as np
import importlib
import itertools
from pandas.io.json import json_normalize
import sklearn.metrics as metrics
import matplotlib.pyplot as plt
import seaborn as sns
import datetime
from matplotlib import rcParams
import json
import math

%matplotlib inline

from IPython.core.display import display, HTML

display(HTML("<style>.container { width:95% !important; }</style"))

In [None]:
# Global variables
meta_data_path = "../../data-campaigns/meta-data/"

legs = "all_legs_merged_no_outlier_0.01.pkl"
input_path = "../../2019-12-16.out/"
out_path = "../../2019-12-16.out/hypothesis/H16/"
img_path = "../../2019-12-16.out/hypothesis/H16/"

# Graphical parameters
rcParams["axes.titlepad"] = 45
rcParams["font.size"] = 16
rcParams["figure.figsize"] = 12, 8
sns.set_style("whitegrid")

In [None]:
try:
    os.makedirs(os.path.abspath(out_path))
except FileExistsError:
    print("Directory '{}' already exists".format(out_path), file=sys.stderr)

In [None]:
all_legs = pd.read_pickle(input_path + legs)
# trips_users_df = pd.read_pickle(input_path + 'trips_users_df.pkl')
trips_df = pd.read_pickle(input_path + "trips_df.pkl")
## select only trips in all_legs
# trips_df = trips_df[trips_df['tripid'].isin(all_legs['tripid'])]

# transport categories
with open(input_path + "category_transp_mode_dict.json", "r") as f:
    category_transp_mode_dict = json.load(f)

inverted_category_transp_mode_dict = dict(
    (v, k) for k in category_transp_mode_dict for v in category_transp_mode_dict[k]
)

#### remove "unknown" as transport category (?)

print("Legs:", all_legs.shape[0])
print("Trips: ", len(all_legs.tripid.unique()))
print("Users:", len(all_legs.userid.unique()))
print()

In [None]:
### read experience factors
all_factors = pd.read_pickle(input_path + "all_factors.pkl")

# delete legs with minus=F and plus=F
all_factors = all_factors[
    ~((all_factors["minus"] == False) & (all_factors["plus"] == False))
]

# delete legs with minus=T and plus=T (3% of obs)
all_factors = all_factors[
    ~((all_factors["minus"] == True) & (all_factors["plus"] == True))
]

# select only useful cols
all_factors = all_factors[
    [
        "correctedModeOfTransport_str",
        "legid",
        "minus",
        "plus",
        "tripid",
        "factor",
        "legStartDay",
    ]
]

# add info
all_factors = all_factors.merge(
    all_legs[
        ["legid", "wastedTime", "gender", "age", "onCampaigns", "transp_category"]
    ],
    on="legid",
)

# select useful wastedTime
all_factors = all_factors[(all_factors.wastedTime > 0) & (all_factors.wastedTime < 6)]
all_factors["wastedTime"] = all_factors["wastedTime"].apply(lambda x: np.round(x, 0))

# remove legs with "None" transport category
all_factors = all_factors[(all_factors.transp_category.notna())]

# select only "SMOOTHNESS"
lst = [
    "Road_Path_Quality",
    "Road_Path_Directness",
    "Vehicle_Ride_Smoothness",
    "Road_Quality_Vehicle_Ride_Smoothness",
]
smooth_factors = all_factors[all_factors.factor.isin(lst)]


# checks
print("all records:", len(smooth_factors))
xx = smooth_factors[
    (smooth_factors["minus"] == False) & (smooth_factors["plus"] == True)
]
print("only plus: ", len(xx))
xx = smooth_factors[
    (smooth_factors["minus"] == True) & (smooth_factors["plus"] == False)
]
print("only minus: ", len(xx))


def find_impact(plus, minus):

    if (minus == False) & (plus == True):
        return "plus"
    if (minus == True) & (plus == False):
        return "minus"


smooth_factors["impact"] = smooth_factors.apply(
    lambda row: find_impact(row["plus"], row["minus"]), axis=1
)

smooth_factors.head()

In [None]:
# define for plots
age_range = list(all_legs.age.unique())

# assign 'CHE' to the class Other (AAA)
smooth_factors["onCampaigns"] = smooth_factors["onCampaigns"].apply(
    lambda x: "AAA" if x == "CHE" else x
)
top10 = list(smooth_factors.onCampaigns.unique())

# transp_category list
tc_lst = smooth_factors.transp_category.unique()

# gender list
gender_lst = ["Male", "Female"]

# impact list
impact_lst = ["plus", "minus"]

<a id='Q1' ></a>
### Q1: How are experience factors related to 'Smoothness' correlated to worthwhileness? (and by whom?)

In [None]:
# all + gender
fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(18, 7))
axes = axes.ravel()

smooth_names = ["Road_Quality", "Road_Directness", "Vehicle_Smooth", "Road_Smooth"]

for i in range(len(impact_lst)):

    # select plus or minus
    impact_df = smooth_factors[smooth_factors.impact == impact_lst[i]]
    tmp = impact_df.groupby("factor")["wastedTime"].mean().reset_index(name="avg_wt")

    # all
    axes[i].scatter(tmp.factor, tmp.avg_wt, c="black", marker="x", label="All")

    # male
    tmp_m = impact_df[impact_df.gender == "Male"]
    tmp = tmp_m.groupby("factor")["wastedTime"].mean().reset_index(name="avg_wt")
    axes[i].scatter(tmp.factor, tmp.avg_wt, c="b", marker="s", label="Male")

    # female
    tmp_f = impact_df[impact_df.gender == "Female"]
    tmp = tmp_f.groupby("factor")["wastedTime"].mean().reset_index(name="avg_wt")
    axes[i].scatter(tmp.factor, tmp.avg_wt, c="r", marker="d", label="Female")

    axes[i].set_xlabel("Smoothness factors")
    axes[i].set_ylabel("Average worthwhileness rating")
    axes[i].set_title("Smoothness factors rated as a " + impact_lst[i])
    axes[i].legend(loc="best")
    axes[i].set_xticks(range(len(smooth_names)))
    axes[i].set_xticklabels(smooth_names)

plt.tight_layout()
plt.savefig(
    img_path + "h16_q1_all_gender.png", bbox_to_anchor=True, bbox_inches="tight"
)

In [None]:
### BY COUNTRY - PLUS AND MINUS

fig, axes = plt.subplots(nrows=2, ncols=5, sharey=True, figsize=(18, 7))
axes = axes.ravel()

smooth_names = ["RQ", "RD", "VS", "RS"]
for i in range(len(top10)):

    for imp in range(len(impact_lst)):

        imp_df = smooth_factors[smooth_factors.impact == impact_lst[imp]]
        tmp_c = imp_df[imp_df.onCampaigns == top10[i]]
        tmp = tmp_c.groupby("factor")["wastedTime"].mean().reset_index(name="avg_wt")

        axes[i].scatter(tmp.factor, tmp.avg_wt, lw=2, label=impact_lst[imp])

    if i == 0:
        axes[i].legend(fontsize="x-small")
    axes[i].set_title(top10[i])
    axes[i].set_xticks(range(4))
    axes[i].set_xticklabels(smooth_names)

plt.tight_layout()
plt.savefig(img_path + "h16_q1_country.png", bbox_to_anchor=True, bbox_inches="tight")

In [None]:
smooth_factors.factor.unique()

In [None]:
tmp

In [None]:
### BY TC - PLUS AND MINUS

fig, axes = plt.subplots(nrows=2, ncols=3, figsize=(18, 9))
axes = axes.ravel()

smooth_names_dict = {
    "Road_Path_Quality": "RQ",
    "Road_Path_Directness": "RD",
    "Vehicle_Ride_Smoothness": "VS",
    "Road_Quality_Vehicle_Ride_Smoothness": "RS",
}

for i in range(len(tc_lst)):

    for imp in range(len(impact_lst)):

        imp_df = smooth_factors[smooth_factors.impact == impact_lst[imp]]
        tmp_c = imp_df[imp_df.transp_category == tc_lst[i]]
        ticks_lst = [smooth_names_dict[t] for t in tmp_c.factor.unique()]
        tmp = (
            tmp_c.groupby(["factor", "impact"])["wastedTime"]
            .mean()
            .reset_index(name="avg_wt")
        )

        # if i == 0:
        # sns.scatterplot(tmp.factor, tmp.avg_wt, hue=tmp.impact, ax=axes[i], lw=2, sizes=6, legend=False)
        axes[i].scatter(tmp.factor, tmp.avg_wt, lw=3, label=impact_lst[imp])
        # else:
        # axes[i].scatter(tmp.factor, tmp.avg_wt, lw=2, label=impact_lst[imp])

    if i == 0:
        axes[i].legend(fontsize="x-small")
    axes[i].set_title(tc_lst[i])
    axes[i].set_ylabel("avg worthwhileness")
    axes[i].set_xticks(range(len(ticks_lst)))
    axes[i].set_xticklabels(ticks_lst)

plt.tight_layout()
# plt.savefig(img_path + "h16_q1_tc.png", bbox_to_anchor=True, bbox_inches="tight")

<a id='Q2' ></a>
### Q2: When factors on smoothness are negatively rated, what are the activities that are carried out while on the move (and how does that compare to 'normal' list of activities when this factor is not mentioned)?

In [None]:
# Select legs with negative impact
neg_smooth = smooth_factors[smooth_factors.impact == "minus"]
neg_smooth_legid_lst = neg_smooth.legid.unique()

# Read activities
all_gen_act = pd.read_pickle(input_path + "all_gen_act.pkl")

# add info
all_gen_act = all_gen_act.merge(
    all_legs[["legid", "wastedTime", "gender", "onCampaigns"]], on="legid",
)

# select only activities of legs with negative smoothness
all_gen_act_neg_smooth = all_gen_act[all_gen_act.legid.isin(neg_smooth_legid_lst)]

all_gen_act_neg_smooth.head()

In [None]:
# create a table with the list of activities for all the legs (first col) and for legs with negative smoothness + filters
table = (
    all_gen_act.groupby("code")
    .size()
    .sort_values(ascending=False)
    .reset_index(name="count_all")
)

# all users
tmp = (
    all_gen_act_neg_smooth.groupby("code")
    .size()
    .sort_values(ascending=False)
    .reset_index(name="NS_count_all")
)
table = table.merge(tmp, on="code")

# by gender
for i in gender_lst:
    tmp = (
        all_gen_act_neg_smooth[all_gen_act_neg_smooth.gender == i]
        .groupby("code")
        .size()
        .sort_values(ascending=False)
        .reset_index(name="NS_count_" + i)
    )
    table = table.merge(tmp, on="code")

# save
table.to_csv(out_path + "h16_q2_all_gender.csv", index=False)
table

In [None]:
# create a table with the list of activities for all the legs (first col) and for legs with negative smoothness + filters
table = (
    all_gen_act.groupby("code")
    .size()
    .sort_values(ascending=False)
    .reset_index(name="count_all")
)

# by country
for i in top10:
    tmp = (
        all_gen_act_neg_smooth[all_gen_act_neg_smooth.onCampaigns == i]
        .groupby("code")
        .size()
        .sort_values(ascending=False)
        .reset_index(name="NS_count_" + i)
    )
    table = table.merge(tmp, on="code")

# save
table.to_csv(out_path + "h16_q2_country.csv", index=False)
table

In [None]:
# by transport category

# create a table with the list of activities for all the legs (first col) and for legs with negative smoothness + filters
table = (
    all_gen_act.groupby("code")
    .size()
    .sort_values(ascending=False)
    .reset_index(name="count_all")
)

# by country
for i in tc_lst:
    tmp = (
        all_gen_act_neg_smooth[all_gen_act_neg_smooth.transp_category == i]
        .groupby("code")
        .size()
        .sort_values(ascending=False)
        .reset_index(name="NS_count_" + i)
    )
    table = table.merge(tmp, on="code")

# save
table.to_csv(out_path + "h16_q2_tc.csv", index=False)
table