# Report Gender Analysis

## Analysis

Each analysis will be done for both women and men

- [worthiness values per transport modes based on top 10 transport modes](#worth_values_top10_transpmode)
- [Worthwhileness satisfaction for different distance segments: short, medium and long](#wasted_time_bydistance)
- [the percentage of CO2 per mode](#co2)
- [average assessment per mode of wasted vs worthwhileness](#avg_assessment_wt)

In [None]:
# Import libraries
import os
import json
import pandas as pd
import numpy as np
import time
from datetime import date, datetime
import seaborn as sns
import matplotlib
import matplotlib.pyplot as plt
from matplotlib import rcParams
import sys

from IPython.core.display import display, HTML

display(HTML("<style>.container { width:95% !important; }</style"))

from docx import Document
from docx.shared import Inches

rcParams["axes.titlepad"] = 45
rcParams["font.size"] = 16

In [None]:
# Global variables
cutting_date = "2019-05-01"  # remove trips and data published before this date
meta_data_path = "../data-campaigns/meta-data/"

legs = "all_legs_merged_no_outlier_0.01.pkl"
input_path = "../../2019-12-16.out/"
out_path = "../../2019-12-16.out/reports/"
img_path = "../../2019-12-16.out/reports/gender_analysis/"
report_name = "Results_01.05_16.12_gender_analysis.docx"

# try to create the img path folder
if not os.path.exists(img_path):
    os.makedirs(img_path)

**Read data**

In [None]:
all_legs = pd.read_pickle(input_path + legs)
#  Remove 'unknown' as correctedModeOfTransport_str
all_legs = all_legs[all_legs["correctedModeOfTransport_str"] != "unknown"]

trips_users_df = pd.read_pickle(input_path + "trips_users_df.pkl")
trips_df = pd.read_pickle(input_path + "trips_df.pkl")

print("Legs:", all_legs.shape[0])
print("Trips: ", len(all_legs.tripid.unique()))
print("Users:", len(all_legs.userid.unique()))
print()
## Divide between male and female users

all_legs_M = all_legs[all_legs.gender == "Male"]
# Remove 'unknown' as correctedModeOfTransport_str
# all_legs_M = all_legs_M[all_legs_M['correctedModeOfTransport_str'] != 'unknown']
print("Legs of male users:", all_legs_M.shape[0])
print("Male users:", len(all_legs_M.userid.unique()))
print()
all_legs_F = all_legs[all_legs.gender == "Female"]
# Remove 'unknown' as correctedModeOfTransport_str
# all_legs_F = all_legs_F[all_legs_F['correctedModeOfTransport_str'] != 'unknown']
print("Legs of female users:", all_legs_F.shape[0])
print("Female users:", len(all_legs_F.userid.unique()))

**Start document**

In [None]:
## Initial information

start_date = str(all_legs["startDate_formated"].min())[0:10]
end_date = str(all_legs["startDate_formated"].max())[0:10]


subtitle = (
    "Data from "
    + start_date
    + " to "
    + end_date
    + ' Post-processing legs merged - Outlier removed - Transport category "unknown" removed'
)


document = Document()
document.add_heading("Data Analysis")
document.add_heading(subtitle, level=2)

p = document.add_paragraph()
p.style = "List Bullet"
r = p.add_run()

p = document.add_paragraph()
p.style = "List Bullet"
r = p.add_run()
r.add_text("Total number of legs: " + str(all_legs.shape[0]))
print("Total number of legs: " + str(all_legs.shape[0]))

p = document.add_paragraph()
p.style = "List Bullet"
r = p.add_run()
r.add_text("Total number of trip: " + str(len(all_legs["tripid"].unique())))
print("Total number of trip: " + str(len(all_legs["tripid"].unique())))

p = document.add_paragraph()
p.style = "List Bullet"
r = p.add_run()
r.add_text("Total number of users: " + str(len(all_legs["userid"].unique())))
print("Total number of users: " + str(len(all_legs["userid"].unique())))

p = document.add_paragraph()
p.style = "List Bullet"
r = p.add_run()
r.add_text("Total number of Male users: " + str(len(all_legs_M["userid"].unique())))
print("Total number of Male users: " + str(len(all_legs_M["userid"].unique())))

p = document.add_paragraph()
p.style = "List Bullet"
r = p.add_run()
r.add_text("Total number of Female users: " + str(len(all_legs_F["userid"].unique())))
print("Total number of Female users: " + str(len(all_legs_F["userid"].unique())))

p = document.add_paragraph()
p.style = "List Bullet"
r = p.add_run()
r.add_text("Total number of Legs from Male users: " + str(len(all_legs_M)))
print("Total number of Legs from Male users: " + str(len(all_legs_M)))

p = document.add_paragraph()
p.style = "List Bullet"
r = p.add_run()
r.add_text("Total number of Legs from Female users: " + str(len(all_legs_F)))
print("Total number of Legs from Female users: " + str(len(all_legs_F)))


p = document.add_paragraph()
r = p.add_run()

<a id='worth_values_top10_transpmode' ></a>
### worthiness values per transport modes based on top 10 transport modes

**MALE**

In [None]:
# select top10 transport mode
top_10_modes = (
    all_legs_M.groupby("correctedModeOfTransport_str")["legid"].count().reset_index()
)
# remove unknown
top_10_modes = top_10_modes[top_10_modes["correctedModeOfTransport_str"] != "unknown"]
top_10_modes = top_10_modes.sort_values("legid", ascending=False).head(10)

### Value from trip
all_values_from_trip = pd.read_pickle(input_path + "values_from_trip.pkl")
all_values_from_trip = all_values_from_trip[
    ["tripid", "legid", "valueFromTrip", "value"]
]

all_legs_tmp = all_legs_M[["tripid", "legid", "correctedModeOfTransport_str"]]
values_from_trip = pd.merge(all_values_from_trip, all_legs_tmp, on=["tripid", "legid"])
# take the ones belonging to the top10 modes
values_from_trip_top10 = values_from_trip[
    values_from_trip["correctedModeOfTransport_str"].isin(
        list(top_10_modes.correctedModeOfTransport_str)
    )
]

avg_value_from_trip = (
    values_from_trip_top10.groupby(["correctedModeOfTransport_str", "valueFromTrip"])[
        "value"
    ]
    .mean()
    .reset_index()
)
avg_value_from_trip = avg_value_from_trip[
    avg_value_from_trip["valueFromTrip"] != "Unknown"
]

In [None]:
fig = plt.figure(figsize=(15, 12))
ax = plt.gca()

sns.set_style("whitegrid")
rcParams["figure.figsize"] = 12, 8

g = sns.barplot(
    data=avg_value_from_trip,
    x="correctedModeOfTransport_str",
    y="value",
    hue="valueFromTrip",
).set(xlabel="Transport mode", ylabel="Average assessment ")

plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.0)
plt.gcf().subplots_adjust(bottom=0.2)  # make space for labels
plt.title(
    "Average of Paid Work, Personal tasks, Enjoyment, and Fitness values \n Top 10 mode of transport - MALE users",
    y=1.0,
)
plt.xticks(rotation=90)

for p in ax.patches:
    ax.annotate(
        "%.2f" % p.get_height(),
        (p.get_x() + p.get_width() / 2.0, p.get_height()),
        ha="center",
        va="center",
        fontsize=10,
        color="black",
        rotation=90,
        xytext=(0, 20),
        textcoords="offset points",
    )

plt.savefig(img_path + "ww_values_allusers_M.png", dpi=600, bbox_inches="tight")
plt.savefig(img_path + "ww_values_allusers_M.pdf", dpi=600, bbox_inches="tight")

plt.tight_layout()

In [None]:
document.add_heading(
    "Average of Paid Work, Personal tasks, Enjoyment, and Fitness values, Top 10 mode of transport"
)

document.add_heading("MALE Users", level=2)
p = document.add_paragraph()
r = p.add_run()
r.add_picture(img_path + "ww_values_allusers_M.png", width=Inches(7.0))

**FEMALE**

In [None]:
# select top10 transport mode
top_10_modes = (
    all_legs_F.groupby("correctedModeOfTransport_str")["legid"].count().reset_index()
)
# remove unknown
top_10_modes = top_10_modes[top_10_modes["correctedModeOfTransport_str"] != "unknown"]
top_10_modes = top_10_modes.sort_values("legid", ascending=False).head(10)

### Value from trip
all_values_from_trip = pd.read_pickle(input_path + "values_from_trip.pkl")
all_values_from_trip = all_values_from_trip[
    ["tripid", "legid", "valueFromTrip", "value"]
]

all_legs_tmp = all_legs_F[["tripid", "legid", "correctedModeOfTransport_str"]]
values_from_trip = pd.merge(all_values_from_trip, all_legs_tmp, on=["tripid", "legid"])
# take the ones belonging to the top10 modes
values_from_trip_top10 = values_from_trip[
    values_from_trip["correctedModeOfTransport_str"].isin(
        list(top_10_modes.correctedModeOfTransport_str)
    )
]

avg_value_from_trip = (
    values_from_trip_top10.groupby(["correctedModeOfTransport_str", "valueFromTrip"])[
        "value"
    ]
    .mean()
    .reset_index()
)
avg_value_from_trip = avg_value_from_trip[
    avg_value_from_trip["valueFromTrip"] != "Unknown"
]

In [None]:
fig = plt.figure(figsize=(15, 12))
ax = plt.gca()

sns.set_style("whitegrid")
rcParams["figure.figsize"] = 12, 8

g = sns.barplot(
    data=avg_value_from_trip,
    x="correctedModeOfTransport_str",
    y="value",
    hue="valueFromTrip",
).set(xlabel="Transport mode", ylabel="Average assessment ")

plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.0)
plt.gcf().subplots_adjust(bottom=0.2)  # make space for labels
plt.title(
    "Average of Paid Work, Personal tasks, Enjoyment, and Fitness values \n Top 10 mode of transport - FEMALE users",
    y=1.0,
)
plt.xticks(rotation=90)

for p in ax.patches:
    ax.annotate(
        "%.2f" % p.get_height(),
        (p.get_x() + p.get_width() / 2.0, p.get_height()),
        ha="center",
        va="center",
        fontsize=10,
        color="black",
        rotation=90,
        xytext=(0, 20),
        textcoords="offset points",
    )

plt.savefig(img_path + "ww_values_allusers_F.png", dpi=600, bbox_inches="tight")
plt.savefig(img_path + "ww_values_allusers_F.pdf", dpi=600, bbox_inches="tight")

plt.tight_layout()

In [None]:
document.add_heading(
    "Average of Paid Work, Personal tasks, Enjoyment, and Fitness values, Top 10 mode of transport"
)

document.add_heading("FEMALE Users", level=2)
p = document.add_paragraph()
r = p.add_run()
r.add_picture(img_path + "ww_values_allusers_F.png", width=Inches(7.0))

<a id='wasted_time_bydistance' ></a>
### Worthwhileness satisfaction for different distance segments: short, medium and long

**MALE**

In [None]:
# Find the quantiles for each transport mode
dist_segs = (
    all_legs_M.groupby("correctedModeOfTransport_str")["legDistance"]
    .quantile([0.33, 0.66])
    .reset_index()
)
dist_segs = pd.pivot_table(
    dist_segs,
    values="legDistance",
    index=["correctedModeOfTransport_str"],
    columns="level_1",
).reset_index()
dist_segs.columns = ["correctedModeOfTransport_str", "0.33", "0.66"]

# add the info to all_legs and classify into short, medium, long distance
all_legs_tmp = pd.merge(
    all_legs_M, dist_segs, on="correctedModeOfTransport_str", how="left"
)
all_legs_tmp["dist_seg"] = all_legs_tmp.apply(
    lambda x: "short"
    if x["legDistance"] <= x["0.33"]
    else "long"
    if x["legDistance"] >= x["0.66"]
    else "medium",
    axis=1,
)

# select only wasted time and the distance category
all_legs_tmp_wt_dist_seg = all_legs_tmp[
    (all_legs_tmp["wastedTime"] > 0) & (all_legs_tmp["wastedTime"] <= 5)
]
all_legs_tmp_wt_dist_seg["wastedTime"] = pd.to_numeric(
    all_legs_tmp_wt_dist_seg["wastedTime"]
)

wasted_x_transp_dist_seg = (
    all_legs_tmp_wt_dist_seg.groupby(["correctedModeOfTransport_str", "dist_seg"])[
        "wastedTime"
    ]
    .mean()
    .reset_index()
)
wasted_x_transp_dist_seg.sort_values(by="wastedTime", ascending=False, inplace=True)
wasted_x_transp_dist_seg.sort_values(
    ["dist_seg", "wastedTime"], ascending=[False, False], inplace=True
)

# take the top 10 modes of transport
top_10_modes = (
    all_legs_M.groupby("correctedModeOfTransport_str")["legid"].count().reset_index()
)
top_10_modes = top_10_modes.sort_values("legid", ascending=False).head(10)

In [None]:
wasted_x_transp_dist_seg = wasted_x_transp_dist_seg[
    wasted_x_transp_dist_seg["correctedModeOfTransport_str"].isin(
        top_10_modes["correctedModeOfTransport_str"]
    )
]

for dist_seg in wasted_x_transp_dist_seg["dist_seg"].unique():

    fig = plt.figure(figsize=(12, 12))
    ax = plt.gca()
    wasted_x_transp_dist_seg_1 = wasted_x_transp_dist_seg[
        wasted_x_transp_dist_seg["dist_seg"] == dist_seg
    ]
    sns.set_style("whitegrid")
    rcParams["figure.figsize"] = 12, 8

    g = sns.barplot(
        data=wasted_x_transp_dist_seg_1,
        x="correctedModeOfTransport_str",
        y="wastedTime",
    ).set(xlabel="Transport mode", ylabel="Average assessment ")
    # plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
    plt.gcf().subplots_adjust(bottom=0.3)  # make space for labels
    plt.title(
        "Average assessment per mode of wasted vs worthwhileness for "
        + dist_seg
        + " legs\n MALE users and top 10 mode of transport",
        y=1.0,
    )
    plt.xticks(rotation=90)

    for p in ax.patches:
        ax.annotate(
            "%.2f" % p.get_height(),
            (p.get_x() + p.get_width() / 2.0, p.get_height()),
            ha="center",
            va="center",
            fontsize=14,
            color="black",
            rotation=90,
            xytext=(0, 20),
            textcoords="offset points",
        )
    plt.savefig(
        img_path + dist_seg + "_avg_ass_mode_dist_segs_M.png",
        dpi=600,
        bbox_inches="tight",
    )
    # plt.savefig(img_path + dist_seg+"_avg_ass_mode_dist_segs_M.pdf")
    plt.tight_layout()

In [None]:
for dist_seg in wasted_x_transp_dist_seg["dist_seg"].unique():
    document.add_heading(
        "Worthwhileness satisfaction for different distance segments: short, medium and long (quantiles 0.33 and 0.66) ",
        level=2,
    )
    document.add_heading(
        "Average assessment per mode of wasted vs worthwhileness for "
        + dist_seg
        + " legs  - MALE users",
        level=3,
    )
    p = document.add_paragraph()
    r = p.add_run()
    r.add_picture(
        img_path + dist_seg + "_avg_ass_mode_dist_segs_M.png", width=Inches(7.0)
    )

**FEMALE**

In [None]:
# Find the quantiles for each transport mode
dist_segs = (
    all_legs_F.groupby("correctedModeOfTransport_str")["legDistance"]
    .quantile([0.33, 0.66])
    .reset_index()
)
dist_segs = pd.pivot_table(
    dist_segs,
    values="legDistance",
    index=["correctedModeOfTransport_str"],
    columns="level_1",
).reset_index()
dist_segs.columns = ["correctedModeOfTransport_str", "0.33", "0.66"]

# add the info to all_legs and classify into short, medium, long distance
all_legs_tmp = pd.merge(
    all_legs_F, dist_segs, on="correctedModeOfTransport_str", how="left"
)
all_legs_tmp["dist_seg"] = all_legs_tmp.apply(
    lambda x: "short"
    if x["legDistance"] <= x["0.33"]
    else "long"
    if x["legDistance"] >= x["0.66"]
    else "medium",
    axis=1,
)

# select only wasted time and the distance category
all_legs_tmp_wt_dist_seg = all_legs_tmp[
    (all_legs_tmp["wastedTime"] > 0) & (all_legs_tmp["wastedTime"] <= 5)
]
all_legs_tmp_wt_dist_seg["wastedTime"] = pd.to_numeric(
    all_legs_tmp_wt_dist_seg["wastedTime"]
)

wasted_x_transp_dist_seg = (
    all_legs_tmp_wt_dist_seg.groupby(["correctedModeOfTransport_str", "dist_seg"])[
        "wastedTime"
    ]
    .mean()
    .reset_index()
)
wasted_x_transp_dist_seg.sort_values(by="wastedTime", ascending=False, inplace=True)
wasted_x_transp_dist_seg.sort_values(
    ["dist_seg", "wastedTime"], ascending=[False, False], inplace=True
)

# take the top 10 modes of transport
top_10_modes = (
    all_legs_F.groupby("correctedModeOfTransport_str")["legid"].count().reset_index()
)
top_10_modes = top_10_modes.sort_values("legid", ascending=False).head(10)

In [None]:
wasted_x_transp_dist_seg = wasted_x_transp_dist_seg[
    wasted_x_transp_dist_seg["correctedModeOfTransport_str"].isin(
        top_10_modes["correctedModeOfTransport_str"]
    )
]

for dist_seg in wasted_x_transp_dist_seg["dist_seg"].unique():

    fig = plt.figure(figsize=(12, 12))
    ax = plt.gca()
    wasted_x_transp_dist_seg_1 = wasted_x_transp_dist_seg[
        wasted_x_transp_dist_seg["dist_seg"] == dist_seg
    ]
    sns.set_style("whitegrid")
    rcParams["figure.figsize"] = 12, 8

    g = sns.barplot(
        data=wasted_x_transp_dist_seg_1,
        x="correctedModeOfTransport_str",
        y="wastedTime",
    ).set(xlabel="Transport mode", ylabel="Average assessment ")
    # plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
    plt.gcf().subplots_adjust(bottom=0.3)  # make space for labels
    plt.title(
        "Average assessment per mode of wasted vs worthwhileness for "
        + dist_seg
        + " legs\n FEMALE users and top 10 mode of transport",
        y=1.0,
    )
    plt.xticks(rotation=90)

    for p in ax.patches:
        ax.annotate(
            "%.2f" % p.get_height(),
            (p.get_x() + p.get_width() / 2.0, p.get_height()),
            ha="center",
            va="center",
            fontsize=14,
            color="black",
            rotation=90,
            xytext=(0, 20),
            textcoords="offset points",
        )
    plt.savefig(
        img_path + dist_seg + "_avg_ass_mode_dist_segs_F.png",
        dpi=600,
        bbox_inches="tight",
    )
    # plt.savefig(img_path + dist_seg+"_avg_ass_mode_dist_segs_F.pdf")
    plt.tight_layout()

In [None]:
for dist_seg in wasted_x_transp_dist_seg["dist_seg"].unique():
    document.add_heading(
        "Worthwhileness satisfaction for different distance segments: short, medium and long (quantiles 0.33 and 0.66) ",
        level=2,
    )
    document.add_heading(
        "Average assessment per mode of wasted vs worthwhileness for "
        + dist_seg
        + " legs  - FEMALE users",
        level=3,
    )
    p = document.add_paragraph()
    r = p.add_run()
    r.add_picture(
        img_path + dist_seg + "_avg_ass_mode_dist_segs_F.png", width=Inches(7.0)
    )

<a id='co2' ></a>
### percentage of CO2 per mode

In [None]:
transp_mode_dict = {
    "0": {"name": "vehicle", "max_speed": 200, "co2": 120},
    "1": {"name": "bicycle", "max_speed": 100, "co2": 0},
    "2": {"name": "onfoot", "max_speed": 12, "co2": 0},
    "3": {"name": "still", "max_speed": 0, "co2": 99999999},
    "4": {"name": "unknown", "max_speed": 100, "co2": 99999999},
    "5": {"name": "tilting", "max_speed": 100, "co2": 99999999},
    "6": {"name": "inexistent", "max_speed": 100, "co2": 99999999},
    "7": {"name": "walking", "max_speed": 12, "co2": 0},
    "8": {"name": "running", "max_speed": 20, "co2": 0},
    "9": {"name": "car", "max_speed": 250, "co2": 120},
    "10": {"name": "train", "max_speed": 350, "co2": 14},
    "11": {"name": "tram", "max_speed": 100, "co2": 14},
    "12": {"name": "subway", "max_speed": 100, "co2": 14},
    "13": {"name": "ferry", "max_speed": 200, "co2": 256.5},
    "14": {"name": "plane", "max_speed": 7000, "co2": 285},
    "15": {"name": "bus", "max_speed": 150, "co2": 68},
    "16": {"name": "electricBike", "max_speed": 50, "co2": 6},
    "17": {"name": "bikeSharing", "max_speed": 50, "co2": 0},
    "18": {"name": "microScooter", "max_speed": 50, "co2": 12},
    "19": {"name": "skate", "max_speed": 20, "co2": 0},
    "20": {"name": "motorcycle", "max_speed": 300, "co2": 80},
    "21": {"name": "moped", "max_speed": 80, "co2": 60},
    "22": {"name": "carPassenger", "max_speed": 250, "co2": 80},
    "23": {"name": "taxi", "max_speed": 250, "co2": 100},
    "24": {"name": "rideHailing", "max_speed": 100, "co2": 120},
    "25": {"name": "carSharing", "max_speed": 250, "co2": 120},
    "26": {"name": "carpooling", "max_speed": 250, "co2": 120},
    "27": {"name": "busLongDistance", "max_speed": 150, "co2": 68},
    "28": {"name": "highSpeedTrain", "max_speed": 350, "co2": 25},
    "29": {"name": "other", "max_speed": 100, "co2": 0},
    "30": {"name": "otherPublic", "max_speed": 300, "co2": 70},
    "31": {"name": "otherActive", "max_speed": 30, "co2": 0},
    "32": {"name": "otherPrivate", "max_speed": 250, "co2": 90},
    "33": {"name": "intercityTrain", "max_speed": 300, "co2": 14},
    "34": {"name": "wheelChair", "max_speed": 10, "co2": 0},
    "35": {"name": "cargoBike", "max_speed": 30, "co2": 0},
    "36": {"name": "carSharingPassenger", "max_speed": 250, "co2": 80},
    "37": {"name": "electricWheelchair", "max_speed": 30, "co2": 15},
}

**MALE**

In [None]:
all_legs_tmp = all_legs_M.copy()
all_legs_tmp["co2"] = all_legs_tmp.apply(
    lambda x: (x["legDistance"] / 1000)
    * (transp_mode_dict[str(int(x["correctedModeOfTransport"]))]["co2"]),
    axis=1,
)

tot_co2_mode = (
    all_legs_tmp.groupby("correctedModeOfTransport_str")["co2"]
    .sum()
    .reset_index()
    .sort_values("co2", ascending=False)
)
# remove unknown
tot_co2_mode = tot_co2_mode[tot_co2_mode["correctedModeOfTransport_str"] != "unknown"]
tot_co2_mode["co2"] = tot_co2_mode["co2"] / 1000
tot_co2_mode["co2_perc"] = tot_co2_mode["co2"] / tot_co2_mode["co2"].sum() * 100
tot_co2_mode.head()

In [None]:
fig = plt.figure(figsize=(12, 12))
ax = plt.gca()
sns.set_style("whitegrid")

g = sns.barplot(data=tot_co2_mode, x="correctedModeOfTransport_str", y="co2_perc").set(
    xlabel="Tranport mode", ylabel="Percentage CO2 "
)

# plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
plt.gcf().subplots_adjust(bottom=0.2)  # make space for labels
plt.title("Percentage of Co2 per mode  - MALE users", y=1.0)
plt.xticks(rotation=90)

for p in ax.patches:
    ax.annotate(
        "%.2f" % p.get_height(),
        (p.get_x() + p.get_width() / 2.0, p.get_height()),
        ha="center",
        va="center",
        fontsize=14,
        color="black",
        rotation=90,
        xytext=(0, 20),
        textcoords="offset points",
    )

plt.savefig(img_path + "perc_co2_mode_allusers_M.png")
# plt.savefig(img_path + "perc_co2_mode_allusers_M.pdf")
plt.tight_layout()

In [None]:
document.add_heading("Percentage CO2 - MALE Users", level=2)
p = document.add_paragraph()
r = p.add_run()
r.add_picture(img_path + "perc_co2_mode_allusers_M.png", width=Inches(7.0))

**Group all transport modes < 1 in a unique set named "other" and all train types in "Train"**

In [None]:
tot_co2_mode["correctedModeOfTransport_str"] = tot_co2_mode.apply(
    lambda x: "train"
    if x["correctedModeOfTransport_str"] in (["intercityTrain", "highSpeedTrain"])
    else x["correctedModeOfTransport_str"],
    axis=1,
)
tot_co2_mode = (
    tot_co2_mode.groupby("correctedModeOfTransport_str")
    .sum()
    .reset_index()
    .sort_values("co2_perc", ascending=False)
)
tot_co2_mode["correctedModeOfTransport_str"] = tot_co2_mode.apply(
    lambda x: x["correctedModeOfTransport_str"] if x["co2_perc"] >= 1 else "other",
    axis=1,
)
tot_co2_mode = (
    tot_co2_mode.groupby("correctedModeOfTransport_str")
    .sum()
    .reset_index()
    .sort_values("co2_perc", ascending=False)
)

In [None]:
fig = plt.figure(figsize=(12, 12))
ax = plt.gca()


sns.set_style("whitegrid")

g = sns.barplot(data=tot_co2_mode, x="correctedModeOfTransport_str", y="co2_perc").set(
    xlabel="Tranport mode", ylabel="Percentage CO2 "
)


# plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
plt.gcf().subplots_adjust(bottom=0.2)  # make space for labels
plt.title("Percentage of Co2 per mode  - MALE users", y=1.0)
plt.xticks(rotation=90)

for p in ax.patches:
    ax.annotate(
        "%.2f" % p.get_height(),
        (p.get_x() + p.get_width() / 2.0, p.get_height()),
        ha="center",
        va="center",
        fontsize=14,
        color="black",
        rotation=90,
        xytext=(0, 20),
        textcoords="offset points",
    )
plt.savefig(img_path + "perc_co2_mode_alluser_group_M.png")
# plt.savefig(img_path + "perc_co2_mode_alluser_group_M.pdf")
plt.tight_layout()

In [None]:
document.add_heading(
    'Percentage Co2 per mode  - MALE users - All transport modes < 1 => "other" and all train types in "Train"',
    level=2,
)
p = document.add_paragraph()
r = p.add_run()
r.add_picture(img_path + "perc_co2_mode_alluser_group_M.png", width=Inches(7.0))

**FEMALE**

In [None]:
all_legs_tmp = all_legs_F.copy()
all_legs_tmp["co2"] = all_legs_tmp.apply(
    lambda x: (x["legDistance"] / 1000)
    * (transp_mode_dict[str(int(x["correctedModeOfTransport"]))]["co2"]),
    axis=1,
)

tot_co2_mode = (
    all_legs_tmp.groupby("correctedModeOfTransport_str")["co2"]
    .sum()
    .reset_index()
    .sort_values("co2", ascending=False)
)
# remove unknown
tot_co2_mode = tot_co2_mode[tot_co2_mode["correctedModeOfTransport_str"] != "unknown"]
tot_co2_mode["co2"] = tot_co2_mode["co2"] / 1000
tot_co2_mode["co2_perc"] = tot_co2_mode["co2"] / tot_co2_mode["co2"].sum() * 100
tot_co2_mode.head()

In [None]:
fig = plt.figure(figsize=(12, 12))
ax = plt.gca()
sns.set_style("whitegrid")

g = sns.barplot(data=tot_co2_mode, x="correctedModeOfTransport_str", y="co2_perc").set(
    xlabel="Tranport mode", ylabel="Percentage CO2 "
)

# plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
plt.gcf().subplots_adjust(bottom=0.2)  # make space for labels
plt.title("Percentage of Co2 per mode  - FEMALE users", y=1.0)
plt.xticks(rotation=90)

for p in ax.patches:
    ax.annotate(
        "%.2f" % p.get_height(),
        (p.get_x() + p.get_width() / 2.0, p.get_height()),
        ha="center",
        va="center",
        fontsize=14,
        color="black",
        rotation=90,
        xytext=(0, 20),
        textcoords="offset points",
    )

plt.savefig(img_path + "perc_co2_mode_allusers_F.png")
# plt.savefig(img_path + "perc_co2_mode_allusers_F.pdf")
plt.tight_layout()

In [None]:
document.add_heading("Percentage CO2 - FEMALE Users", level=2)
p = document.add_paragraph()
r = p.add_run()
r.add_picture(img_path + "perc_co2_mode_allusers_F.png", width=Inches(7.0))

**Group all transport modes < 1 in a unique set named "other" and all train types in "Train"**

In [None]:
tot_co2_mode["correctedModeOfTransport_str"] = tot_co2_mode.apply(
    lambda x: "train"
    if x["correctedModeOfTransport_str"] in (["intercityTrain", "highSpeedTrain"])
    else x["correctedModeOfTransport_str"],
    axis=1,
)
tot_co2_mode = (
    tot_co2_mode.groupby("correctedModeOfTransport_str")
    .sum()
    .reset_index()
    .sort_values("co2_perc", ascending=False)
)
tot_co2_mode["correctedModeOfTransport_str"] = tot_co2_mode.apply(
    lambda x: x["correctedModeOfTransport_str"] if x["co2_perc"] >= 1 else "other",
    axis=1,
)
tot_co2_mode = (
    tot_co2_mode.groupby("correctedModeOfTransport_str")
    .sum()
    .reset_index()
    .sort_values("co2_perc", ascending=False)
)

In [None]:
fig = plt.figure(figsize=(12, 12))
ax = plt.gca()


sns.set_style("whitegrid")

g = sns.barplot(data=tot_co2_mode, x="correctedModeOfTransport_str", y="co2_perc").set(
    xlabel="Tranport mode", ylabel="Percentage CO2 "
)


# plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
plt.gcf().subplots_adjust(bottom=0.2)  # make space for labels
plt.title("Percentage of Co2 per mode  - FEMALE users", y=1.0)
plt.xticks(rotation=90)

for p in ax.patches:
    ax.annotate(
        "%.2f" % p.get_height(),
        (p.get_x() + p.get_width() / 2.0, p.get_height()),
        ha="center",
        va="center",
        fontsize=14,
        color="black",
        rotation=90,
        xytext=(0, 20),
        textcoords="offset points",
    )
plt.savefig(img_path + "perc_co2_mode_alluser_group_F.png")
# plt.savefig(img_path + "perc_co2_mode_alluser_group_F.pdf")
plt.tight_layout()

In [None]:
document.add_heading(
    'Percentage Co2 per mode  - FEMALE users - All transport modes < 1 => "other" and all train types in "Train"',
    level=2,
)
p = document.add_paragraph()
r = p.add_run()
r.add_picture(img_path + "perc_co2_mode_alluser_group_F.png", width=Inches(7.0))

<a id='avg_assessment_wt' ></a>
### average assessment per mode of wasted vs worthwhileness

For each mode of transport produce an average assessment of the variable `wastedTime`

**MALE**

In [None]:
# filtering only values between 1 and 5
all_legs_wt = all_legs_M[
    (all_legs_M["wastedTime"] > 0) & (all_legs_M["wastedTime"] <= 5)
]
all_legs_wt["wastedTime"] = pd.to_numeric(all_legs_wt["wastedTime"])
wasted_x_transp = (
    all_legs_wt.groupby("correctedModeOfTransport_str")["wastedTime"]
    .mean()
    .reset_index()
)
wasted_x_transp.sort_values(by="wastedTime", ascending=False, inplace=True)
wasted_x_transp.head()

In [None]:
fig = plt.figure(figsize=(12, 12))
ax = plt.gca()


sns.set_style("whitegrid")
rcParams["figure.figsize"] = 12, 8

g = sns.barplot(
    data=wasted_x_transp, x="correctedModeOfTransport_str", y="wastedTime"
).set(xlabel="Transport mode", ylabel="Average assessment ")

plt.gcf().subplots_adjust(bottom=0.2)  # make space for labels

# plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
plt.gcf().subplots_adjust(bottom=0.2)  # make space for labels
plt.title(
    "Average assessment per mode of wasted vs worthwhileness  - MALE users", y=1.0
)
plt.xticks(rotation=90)

for p in ax.patches:
    ax.annotate(
        "%.2f" % p.get_height(),
        (p.get_x() + p.get_width() / 2.0, p.get_height()),
        ha="center",
        va="center",
        fontsize=14,
        color="black",
        rotation=90,
        xytext=(0, 20),
        textcoords="offset points",
    )
plt.savefig(img_path + "avg_ass_mode_allusers_M.png")
# plt.savefig(img_path + "avg_ass_mode_allusers_M.pdf")
plt.tight_layout()

In [None]:
document.add_heading("Average assessment per mode of wasted vs worthwhileness")

document.add_heading("MALE Users", level=2)
p = document.add_paragraph()
r = p.add_run()
r.add_picture(img_path + "avg_ass_mode_allusers_M.png", width=Inches(7.0))

**Group all transportMode with relative frequency <1 and all trains into "Trains**

In [None]:
legs_x_mode = (
    all_legs_M.groupby("correctedModeOfTransport_str")["legid"].count().reset_index()
)
legs_x_mode.columns = ["correctedModeOfTransport_str", "nlegs"]
legs_x_mode.sort_values("nlegs", ascending=False, inplace=True)
legs_x_mode["perc_legs"] = legs_x_mode["nlegs"] / legs_x_mode["nlegs"].sum() * 100

# train
legs_x_mode["correctedModeOfTransport_str"] = legs_x_mode.apply(
    lambda x: "train"
    if x["correctedModeOfTransport_str"] in (["intercityTrain", "highSpeedTrain"])
    else x["correctedModeOfTransport_str"],
    axis=1,
)
legs_x_mode = (
    legs_x_mode.groupby("correctedModeOfTransport_str")
    .sum()
    .reset_index()
    .sort_values("perc_legs", ascending=False)
)
# group
legs_x_mode["correctedModeOfTransport_str"] = legs_x_mode.apply(
    lambda x: x["correctedModeOfTransport_str"] if x["perc_legs"] >= 1 else "other",
    axis=1,
)
legs_x_mode = (
    legs_x_mode.groupby("correctedModeOfTransport_str")
    .sum()
    .reset_index()
    .sort_values("perc_legs", ascending=False)
)

# Select the WT mean with the grouped transport mode
all_legs_tmp = all_legs_M.copy()
all_legs_tmp["gr_mode"] = all_legs_tmp["correctedModeOfTransport_str"].apply(
    lambda x: "train" if x in (["intercityTrain", "highSpeedTrain"]) else x
)
all_legs_tmp["gr_mode"] = all_legs_tmp["gr_mode"].apply(
    lambda x: x if x in list(legs_x_mode["correctedModeOfTransport_str"]) else "other"
)

# filtering only values between 1 and 5
all_legs_wt_grouped = all_legs_tmp[
    (all_legs_tmp["wastedTime"] > 0) & (all_legs_tmp["wastedTime"] <= 5)
]
all_legs_wt_grouped["wastedTime"] = pd.to_numeric(all_legs_wt_grouped["wastedTime"])
wasted_x_transp_grouped = (
    all_legs_wt_grouped.groupby("gr_mode")["wastedTime"].mean().reset_index()
)
wasted_x_transp_grouped.sort_values(by="wastedTime", ascending=False, inplace=True)

In [None]:
fig = plt.figure(figsize=(12, 12))
ax = plt.gca()

sns.set_style("whitegrid")
rcParams["figure.figsize"] = 12, 8

g = sns.barplot(data=wasted_x_transp_grouped, x="gr_mode", y="wastedTime").set(
    xlabel="Transport mode", ylabel="Average assessment "
)

# plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
plt.gcf().subplots_adjust(bottom=0.2)  # make space for labels
plt.title(
    "Average assessment per mode of wasted vs worthwhileness  - MALE users", y=1.0
)
plt.xticks(rotation=90)

for p in ax.patches:
    ax.annotate(
        "%.2f" % p.get_height(),
        (p.get_x() + p.get_width() / 2.0, p.get_height()),
        ha="center",
        va="center",
        fontsize=14,
        color="black",
        rotation=90,
        xytext=(0, 20),
        textcoords="offset points",
    )
plt.savefig(
    img_path + "avg_ass_mode_allusers_grouped_M.png", dpi=600, bbox_inches="tight"
)
plt.savefig(
    img_path + "avg_ass_mode_allusers_grouped_M.pdf", dpi=600, bbox_inches="tight"
)
plt.tight_layout()

In [None]:
document.add_heading(
    'All transport modes < 1 => "other" and all train types in "Train"', level=2
)
document.add_heading("MALE Users", level=2)
p = document.add_paragraph()
r = p.add_run()
r.add_picture(img_path + "avg_ass_mode_allusers_grouped_M.png", width=Inches(7.0))

**FEMALE**

In [None]:
# filtering only values between 1 and 5
all_legs_wt = all_legs_F[
    (all_legs_F["wastedTime"] > 0) & (all_legs_F["wastedTime"] <= 5)
]
all_legs_wt["wastedTime"] = pd.to_numeric(all_legs_wt["wastedTime"])
wasted_x_transp = (
    all_legs_wt.groupby("correctedModeOfTransport_str")["wastedTime"]
    .mean()
    .reset_index()
)
wasted_x_transp.sort_values(by="wastedTime", ascending=False, inplace=True)
wasted_x_transp.head()

In [None]:
fig = plt.figure(figsize=(12, 12))
ax = plt.gca()


sns.set_style("whitegrid")
rcParams["figure.figsize"] = 12, 8

g = sns.barplot(
    data=wasted_x_transp, x="correctedModeOfTransport_str", y="wastedTime"
).set(xlabel="Transport mode", ylabel="Average assessment ")

plt.gcf().subplots_adjust(bottom=0.2)  # make space for labels

# plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
plt.gcf().subplots_adjust(bottom=0.2)  # make space for labels
plt.title(
    "Average assessment per mode of wasted vs worthwhileness  - FEMALE users", y=1.0
)
plt.xticks(rotation=90)

for p in ax.patches:
    ax.annotate(
        "%.2f" % p.get_height(),
        (p.get_x() + p.get_width() / 2.0, p.get_height()),
        ha="center",
        va="center",
        fontsize=14,
        color="black",
        rotation=90,
        xytext=(0, 20),
        textcoords="offset points",
    )
plt.savefig(img_path + "avg_ass_mode_allusers_F.png", dpi=600, bbox_inches="tight")
plt.savefig(img_path + "avg_ass_mode_allusers_F.pdf", dpi=600, bbox_inches="tight")
plt.tight_layout()

In [None]:
document.add_heading("Average assessment per mode of wasted vs worthwhileness")

document.add_heading("FEMALE Users", level=2)
p = document.add_paragraph()
r = p.add_run()
r.add_picture(img_path + "avg_ass_mode_allusers_F.png", width=Inches(7.0))

**Group all transportMode with relative frequency <1 and all trains into "Trains**

In [None]:
legs_x_mode = (
    all_legs_F.groupby("correctedModeOfTransport_str")["legid"].count().reset_index()
)
legs_x_mode.columns = ["correctedModeOfTransport_str", "nlegs"]
legs_x_mode.sort_values("nlegs", ascending=False, inplace=True)
legs_x_mode["perc_legs"] = legs_x_mode["nlegs"] / legs_x_mode["nlegs"].sum() * 100

# train
legs_x_mode["correctedModeOfTransport_str"] = legs_x_mode.apply(
    lambda x: "train"
    if x["correctedModeOfTransport_str"] in (["intercityTrain", "highSpeedTrain"])
    else x["correctedModeOfTransport_str"],
    axis=1,
)
legs_x_mode = (
    legs_x_mode.groupby("correctedModeOfTransport_str")
    .sum()
    .reset_index()
    .sort_values("perc_legs", ascending=False)
)
# group
legs_x_mode["correctedModeOfTransport_str"] = legs_x_mode.apply(
    lambda x: x["correctedModeOfTransport_str"] if x["perc_legs"] >= 1 else "other",
    axis=1,
)
legs_x_mode = (
    legs_x_mode.groupby("correctedModeOfTransport_str")
    .sum()
    .reset_index()
    .sort_values("perc_legs", ascending=False)
)

# Select the WT mean with the grouped transport mode
all_legs_tmp = all_legs_F.copy()
all_legs_tmp["gr_mode"] = all_legs_tmp["correctedModeOfTransport_str"].apply(
    lambda x: "train" if x in (["intercityTrain", "highSpeedTrain"]) else x
)
all_legs_tmp["gr_mode"] = all_legs_tmp["gr_mode"].apply(
    lambda x: x if x in list(legs_x_mode["correctedModeOfTransport_str"]) else "other"
)

# filtering only values between 1 and 5
all_legs_wt_grouped = all_legs_tmp[
    (all_legs_tmp["wastedTime"] > 0) & (all_legs_tmp["wastedTime"] <= 5)
]
all_legs_wt_grouped["wastedTime"] = pd.to_numeric(all_legs_wt_grouped["wastedTime"])
wasted_x_transp_grouped = (
    all_legs_wt_grouped.groupby("gr_mode")["wastedTime"].mean().reset_index()
)
wasted_x_transp_grouped.sort_values(by="wastedTime", ascending=False, inplace=True)

In [None]:
fig = plt.figure(figsize=(12, 12))
ax = plt.gca()

sns.set_style("whitegrid")
rcParams["figure.figsize"] = 12, 8

g = sns.barplot(data=wasted_x_transp_grouped, x="gr_mode", y="wastedTime").set(
    xlabel="Transport mode", ylabel="Average assessment "
)

# plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
plt.gcf().subplots_adjust(bottom=0.2)  # make space for labels
plt.title(
    "Average assessment per mode of wasted vs worthwhileness  - FEMALE users", y=1.0
)
plt.xticks(rotation=90)

for p in ax.patches:
    ax.annotate(
        "%.2f" % p.get_height(),
        (p.get_x() + p.get_width() / 2.0, p.get_height()),
        ha="center",
        va="center",
        fontsize=14,
        color="black",
        rotation=90,
        xytext=(0, 20),
        textcoords="offset points",
    )
plt.savefig(
    img_path + "avg_ass_mode_allusers_grouped_F.png", dpi=600, bbox_inches="tight"
)
plt.savefig(
    img_path + "avg_ass_mode_allusers_grouped_F.pdf", dpi=600, bbox_inches="tight"
)
plt.tight_layout()

In [None]:
document.add_heading(
    'All transport modes < 1 => "other" and all train types in "Train"', level=2
)
document.add_heading("FEMALE Users", level=2)
p = document.add_paragraph()
r = p.add_run()
r.add_picture(img_path + "avg_ass_mode_allusers_grouped_F.png", width=Inches(7.0))

**SAVE**

In [None]:
document.save(out_path + report_name)

In [None]:
out_path + report_name