# WI Analyses

## Questions

- [Q1](#Q1): Number of trips pre and post clarification of the difference between worthwhileness at destionation and during trip.
- [Q2](#Q2): Average worthwhilness rating per transort category

In [None]:
import os
import re
import sys
import pandas as pd
import numpy as np
import importlib
import itertools
import pathlib
from pandas.io.json import json_normalize
import sklearn.metrics as metrics
import matplotlib.pyplot as plt
import seaborn as sns
import datetime
from matplotlib import rcParams
import json
import math
import packaging

%matplotlib inline

from IPython.core.display import display, HTML

display(HTML("<style>.container { width:95% !important; }</style"))

**READ DATA**

In [None]:
# Global variables
meta_data_path = pathlib.Path("../../data-campaigns/meta-data/")

legs = "all_legs_merged_no_outlier_0.01.pkl"
input_path = pathlib.Path("../../2019-12-16.out/")
out_path = pathlib.Path("../../2019-12-16.out/D5.1/")
img_path = pathlib.Path("../../2019-12-16.out/D5.1/images/")

# Graphical parameters
rcParams["axes.titlepad"] = 45
rcParams["font.size"] = 16
rcParams["figure.figsize"] = 12, 8
sns.set_style("whitegrid")

In [None]:
try:
    os.makedirs(os.path.abspath(out_path))
except FileExistsError:
    print("Directory '{}' already exists".format(out_path), file=sys.stderr)

try:
    os.makedirs(os.path.abspath(img_path))
except FileExistsError:
    print("Directory '{}' already exists".format(img_path), file=sys.stderr)

In [None]:
all_legs = pd.read_pickle(input_path / legs)
trips_users_df = pd.read_pickle(input_path / "trips_users_df.pkl")
trips_df = pd.read_pickle(input_path / "trips_df.pkl")

print("Legs:", all_legs.shape[0])
print("Trips: ", len(all_legs.tripid.unique()))
print("Users:", len(all_legs.userid.unique()))
print()

<a id='Q1' ></a>
### Q1:  Number of trips pre and post clarification of the difference between worthwhileness at destionation and during trip.

In [None]:
def which_os(os_desc):
    if "android" in os_desc.lower():
        return "Android"
    else:
        return "iOS"

In [None]:
ANDROID_APPVERSION_REGEX = re.compile("Android AppVersion (\d+)")
IOS_APPVERSION_REGEX = re.compile("(\d+).(\d+).(\d+)?")


def _which_app_version(os_desc, os_type):
    app_version = "-"
    if os_type == "Android":
        match = ANDROID_APPVERSION_REGEX.match(os_desc)
        if match:
            # just the app_version number
            app_version = match.group(1)
    elif os_type == "iOS":
        match = IOS_APPVERSION_REGEX.match(os_desc)
        if match:
            # all version string
            app_version = match.group(0)

    return app_version


which_app_version = lambda row: _which_app_version(row.oS, row.os_type)

In [None]:
trip_osinfo = trips_df[["tripid", "oS", "oSVersion"]].copy()
trip_osinfo.head(10)

In [None]:
trip_osinfo = trips_df[["tripid", "oS", "oSVersion"]].copy()
trip_osinfo["os_type"] = trip_osinfo["oS"].apply(which_os)
trip_osinfo["app_version"] = trip_osinfo[["oS", "os_type"]].apply(
    which_app_version, axis=1
)
trip_osinfo.columns = ["tripid", "os", "os_version", "os_type", "app_version"]

trip_osinfo_csv_name = "trip_osinfo.csv"
trip_osinfo_csv_path = out_path / trip_osinfo_csv_name

trip_osinfo.to_csv(trip_osinfo_csv_path, index=False)

In [None]:
trip_osinfo.head(3)

In [None]:
osinfo_ntrips = (
    trip_osinfo[["tripid", "os_type", "app_version"]]
    .groupby(["os_type", "app_version"])
    .size()
    .reset_index()
)
osinfo_ntrips.columns = ["os_type", "app_version", "ntrips"]
osinfo_ntrips.head(3)

In [None]:
osinfo_ntrips.loc[osinfo_ntrips["os_type"] == "iOS"]

In [None]:
ANDROID_MIN_VERSION = 167
IOS_MIN_VERSION = "2.0.6"

# How do I compare version numbers in Python?
# https://stackoverflow.com/a/11887885/2377454
#
# >>> from packaging import version
# >>> version.parse("2.3.1") < version.parse("10.1.2")
#     True
def _filter_app_version(os_type, app_version):
    app_version_filter = False
    if os_type == "Android":
        version = int(app_version)
        if version >= ANDROID_MIN_VERSION:
            app_version_filter = True
    elif os_type == "iOS":
        if app_version != "-":
            v = packaging.version.parse(app_version)
            minv = packaging.version.parse(IOS_MIN_VERSION)
            app_version_filter = v >= minv

    return app_version_filter


filter_app_version = lambda row: _filter_app_version(row.os_type, row.app_version)

In [None]:
pos_ntrips = osinfo_ntrips.loc[osinfo_ntrips.apply(filter_app_version, axis=1)]
neg_ntrips = osinfo_ntrips.loc[~osinfo_ntrips.apply(filter_app_version, axis=1)]

In [None]:
pos_ntrips.groupby("os_type").sum()

In [None]:
neg_ntrips.groupby("os_type").sum()

In [None]:
trip_osinfo["post-clarification"] = trip_osinfo[["os_type", "app_version"]].apply(
    filter_app_version, axis=1
)

In [None]:
tripid_pos = (
    trip_osinfo.loc[trip_osinfo["post-clarification"] == True].tripid.unique().tolist()
)
tripid_neg = (
    trip_osinfo.loc[trip_osinfo["post-clarification"] == False].tripid.unique().tolist()
)

print("# trips post-clarification: ", len(tripid_pos))
print("# trips post-clarification: ", len(tripid_neg))

In [None]:
all_legs["wastedTime"] = all_legs["wastedTime"].apply(lambda x: int(round(x, 0)))
all_legs_wt = all_legs.loc[
    (all_legs["wastedTime"] >= 1) & (all_legs["wastedTime"] <= 5)
].copy()

all_legs_pos_wt = all_legs_wt.loc[all_legs_wt.tripid.isin(tripid_pos)]
all_legs_neg_wt = all_legs_wt.loc[all_legs_wt.tripid.isin(tripid_neg)]

In [None]:
all_legs_pos_wt[["tripid", "transp_category", "wastedTime"]].head(3)

In [None]:
all_legs_pos_wt[["tripid", "transp_category", "wastedTime"]].groupby(
    "transp_category"
).agg([np.mean, np.std])

In [None]:
all_legs_neg_wt[["tripid", "transp_category", "wastedTime"]].groupby(
    "transp_category"
).agg([np.mean, np.std])

In [None]:
ntrips_pos_wt = (
    all_legs_pos_wt[["tripid", "transp_category", "wastedTime"]]
    .groupby(["transp_category", "wastedTime"])
    .size()
    .reset_index()
)
ntrips_pos_wt.columns = ["transp_category", "wastedTime", "ntrips"]
ntrips_pos_wt.head(3)

In [None]:
ntrips_neg_wt = (
    all_legs_neg_wt[["tripid", "transp_category", "wastedTime"]]
    .groupby(["transp_category", "wastedTime"])
    .size()
    .reset_index()
)
ntrips_neg_wt.columns = ["transp_category", "wastedTime", "ntrips"]
ntrips_neg_wt.head(3)

In [None]:
ax = sns.catplot(
    x="wastedTime",
    y="ntrips",
    hue="transp_category",
    data=ntrips_neg_wt,
    kind="bar",
    palette="muted",
    legend_out=False,
    size=6,
    aspect=2,
)
ax.set(xlabel="Worthwhileness rating", ylabel="# trips")
ax.set(title="Worthwhileness ratings per trip pre-clarification")

filepath = img_path / "D5.1_worthwhileness_rating_tc_pre.png"
plt.savefig(filepath)

In [None]:
sns.catplot(
    x="wastedTime",
    y="ntrips",
    hue="transp_category",
    data=ntrips_pos_wt,
    kind="bar",
    palette="muted",
    legend_out=False,
    size=6,
    aspect=2,
)
ax.set(xlabel="Worthwhileness rating", ylabel="# trips")
ax.set(title="Worthwhileness ratings per trip post-clarification")

filepath = img_path / "D5.1_worthwhileness_rating_tc_post.png"
plt.savefig(filepath)