## Generate static graphs

These are the input parameters for the notebook. They will be automatically changed when the scripts to generate monthly statistics are run. You can modify them manually to generate multiple plots locally as well.

Pass in `None` to remove the filters and plot all data. This is not recommended for production settings, but might be useful for reports based on data snapshots.

In [None]:
year = 2020
month = 11
program = "prepilot"

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from collections import defaultdict

sns.set_style("whitegrid")
sns.set()
%matplotlib inline

In [None]:
import scaffolding 
from plots import *

In [None]:
# Loading mapping dictionaries from mapping_dictionaries notebook
%store -r dic_ei
%store -r dic_re
%store -r dic_pur

# convert a dictionary to a defaultdict
dic_pur = defaultdict(lambda: 'Other',dic_pur)
dic_re = defaultdict(lambda: 'Other',dic_re)

In [None]:
tq = scaffolding.get_time_query(year, month)

In [None]:
participant_ct_df = scaffolding.load_all_participant_trips(program, tq)

In [None]:
labeled_ct = scaffolding.filter_labeled_trips(participant_ct_df)

In [None]:
expanded_ct = scaffolding.expand_userinputs(labeled_ct)

In [None]:
expanded_ct.shape

In [None]:
expanded_ct = scaffolding.data_quality_check(expanded_ct)
expanded_ct.shape

In [None]:
## Mapping new labels with dictionaries
expanded_ct['Trip_purpose']= expanded_ct['purpose_confirm'].map(dic_pur)
expanded_ct['Mode_confirm']= expanded_ct['mode_confirm'].map(dic_re)
expanded_ct['Replaced_mode']= expanded_ct['replaced_mode'].map(dic_re)

In [None]:
dic_pur

In [None]:
assert len(expanded_ct[(expanded_ct['Mode_confirm'] == 'Pilot ebike') & (expanded_ct["Replaced_mode"] == "Pilot ebike")]) == 0

In [None]:
# Energy Impact Calculation
scaffolding.unit_conversions(expanded_ct)

In [None]:
file_suffix = scaffolding.get_file_suffix(year, month, program)

# Ebike Data Only

In [None]:
data_eb = expanded_ct.query("Mode_confirm == 'Pilot ebike'")

In [None]:
quality_text = scaffolding.get_quality_text_ebike(expanded_ct, data_eb)

In [None]:
labels_tp = data_eb['Trip_purpose'].value_counts(dropna=True).keys().tolist()
values_tp = data_eb['Trip_purpose'].value_counts(dropna=True).tolist()
plot_title="Number of trips for each purpose for eBike only\n%s" % quality_text
file_name= 'ntrips_ebike_purpose%s.png' % file_suffix
pie_chart_purpose(plot_title,labels_tp,values_tp,file_name)

In [None]:
labels_eb = data_eb.Replaced_mode.value_counts(dropna=True).keys().tolist()
values_eb = data_eb.Replaced_mode.value_counts(dropna=True).tolist()
plot_title="Number of trips for each replaced transport mode for eBike only\n%s" % quality_text
file_name ='ntrips_ebike_replaced_mode%s.png' % file_suffix
pie_chart_mode(plot_title,labels_eb,values_eb,file_name)

# Miles replaced_mode by E-bike data only 

In [None]:
dg=data_eb.groupby('Replaced_mode').agg({'distance_miles': ['sum', 'count' , 'mean']},)
dg.columns = ['Total (miles)', 'Count' ,'Average (miles)']
dg = dg.reset_index()
dg = dg.sort_values(by=['Total (miles)'], ascending=False)

dg_dict = dict(zip(dg['Replaced_mode'], dg['Total (miles)']))
labels_m = []
values_m = []

for x, y in dg_dict.items():
    labels_m.append(x)
    values_m.append(y)

plot_title="Distribution of Miles Replaced by Ebike \n%s" % quality_text
file_name ='miles_ebike_replaced_mode%s.png' % file_suffix
pie_chart_mode(plot_title,labels_m,values_m,file_name)
print(dg)

# Average Miles per ebike trip

In [None]:
data = dg.drop((dg.query("Count < 3").index)).sort_values(by=['Average (miles)'], ascending=False)
x='Replaced_mode'
y='Average (miles)'
y2 = "Count"

plot_title=" Average Miles for each replaced mode with > 3 entries\n'Other' represents trips with a non-standard or missing replacement\n%s" % quality_text
file_name ='average_miles_replaced_mode%s.png' % file_suffix

barplot_mode(data,x,y,plot_title,file_name)

In [None]:
data = dg.drop((dg.query("Count < 3").index)).sort_values(by=['Average (miles)'], ascending=False)
x='Replaced_mode'
y='Average (miles)'
y2 = "Count"

plot_title=" Average Miles for each replaced mode with > 3 entries\n%s" % quality_text
file_name ='average_miles_replaced_mode2%s.png' % file_suffix

barplot_mode2(data,x,y,y2,plot_title,file_name)

# Number of Trips by Day

In [None]:
fq_days = data_eb.groupby(['start_local_dt_day']).agg({'start_local_dt_day': ['sum', 'count']})
fq_days = fq_days.reset_index()
fq_days.columns = ['Day of the Month', 'Total', 'Number of Trips']

In [None]:
data = fq_days
x = 'Day of the Month'
y = 'Number of Trips'

plot_title="Number of ebike trips by day\n%s" % quality_text
file_name ='ntrips_ebike_per_day%s.png' % file_suffix

barplot_day(data,x,y,plot_title,file_name)

# Number of Trips by day of week¶

In [None]:
fq_weekdays = data_eb.groupby(['start_local_dt_weekday']).agg({'start_local_dt_weekday': ['sum', 'count']})
fq_weekdays = fq_weekdays.reset_index()
fq_weekdays.columns = ['Weekday', 'Total', 'Number of Trips']
weekday_labels = ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"]
fq_weekdays["Weekday"] = fq_weekdays.Weekday.apply(lambda x: weekday_labels[x])
fq_weekdays

In [None]:
data = fq_weekdays
x = 'Weekday'
y = 'Number of Trips'

plot_title="Number of trips by weekday\n%s" % quality_text
file_name ='ntrips_per_weekday%s.png' % file_suffix

barplot_day(data,x,y,plot_title,file_name)