In [22]:
from psycopg2 import connect
import pandas.io.sql as pandasql
import pandas as pd
import configparser
import matplotlib.pyplot as plt
from datetime import datetime, timedelta
import matplotlib as mpl
import copy
import matplotlib.dates as mdates
import time
import numpy as np

CONFIG = configparser.ConfigParser()
CONFIG.read('C:\\Users\\rrodger\\db.cfg')
dbset = CONFIG['DBSETTINGS']
con = connect(**dbset)

In [31]:
temp_sql = '''
drop table if exists dt_30min_agg;
create temporary table DT_30min_agg as (
SELECT 
	bt.analysis_id as analysis_id,
	TIMESTAMP WITHOUT TIME ZONE 'epoch' +
		INTERVAL '1 second' * (floor((extract('epoch' from bt.datetime_bin)-1) / 1800) * 1800) as datetime_bin,
	sum(bt.tt*bt.obs)/sum(bt.obs) AS travel_time,
	sum(bt.obs) AS obs
FROM bluetooth.all_analyses aa
	INNER JOIN bluetooth.aggr_15min bt USING (analysis_id)
WHERE left(aa.report_name, 2) = 'DT'
GROUP BY bt.analysis_id, (floor((extract('epoch' from bt.datetime_bin)-1) / 1800) * 1800)
ORDER BY bt.analysis_id, (floor((extract('epoch' from bt.datetime_bin)-1) / 1800) * 1800))
'''
pandasql.read_sql(temp_sql, con)

TypeError: 'NoneType' object is not iterable

In [27]:
sql = '''select * from dt_30min_agg'''
pandasql.read_sql(sql, con)

DatabaseError: Execution failed on sql 'select * from dt_30min_agg': relation "dt_30min_agg" does not exist
LINE 1: select * from dt_30min_agg
                      ^


### Compare 15 minute buckets

In [17]:
basql_15 = '''
WITH bt as (
    SELECT *
    FROM bluetooth.aggr_15min
    WHERE datetime_bin::date NOT BETWEEN '2017-10-15' AND '2017-10-29')
    
SELECT '2017-11-12'::date + datetime_bin::time as time, 
    percentile_cont(0.5) WITHIN GROUP(ORDER BY bt.tt) as travel_time,
    CASE WHEN EXTRACT(ISODOW FROM bt.datetime_bin) < 6 THEN 'Work' ELSE 'Weekend' END as workingday,
    aa.report_name

FROM bt
    INNER JOIN bluetooth.all_analyses aa USING(analysis_id)
    LEFT OUTER JOIN ref.holiday hol ON (bt.datetime_bin::DATE = hol.dt)
    
WHERE hol.dt is NULL
    AND left(aa.report_name, 4) = 'DT-0'
    AND datetime_bin::date NOT BETWEEN '2017-10-15' AND '2017-10-29'

GROUP BY aa.report_name, 
    datetime_bin::time, 
    CASE WHEN EXTRACT(ISODOW FROM bt.datetime_bin) < 6 THEN 'Work' ELSE 'Weekend' END
'''

travelsql_15 = '''
SELECT bt.tt as travel_time, 
	bt.datetime_bin, 
	bt.analysis_id,
	EXTRACT(ISODOW FROM datetime_bin) as weekday,
	aa.report_name
    
FROM bluetooth.aggr_15min bt
	INNER JOIN bluetooth.all_analyses aa ON (bt.analysis_id = aa.analysis_id)
    LEFT OUTER JOIN ref.holiday hol ON (bt.datetime_bin::DATE = hol.dt)
    
WHERE hol.dt is NULL
    AND left(aa.report_name, 4) = 'DT-0'
    AND datetime_bin::date NOT BETWEEN '2017-10-15' AND '2017-10-29'
'''
baselines_15 = pandasql.read_sql(basql_15, con)
traveltime_15 = pandasql.read_sql(travelsql_15, con)


from the fifteen minute aggregated data, these queries fetch a baseline for working and nonworking days, and the bulk travel time data. 

In [18]:
colors = ['#003A72', '#d83904']

In [19]:
def plot_base(observations, r_name):
    #Divide data into Week and Weekend buckets for the given route name. 
    segments = {'Week' : observations[(observations['report_name'] == r_name) & 
                         (observations['workingday'] == 'Work')].sort_values(['time']),
                'Weekend' : observations[(observations['report_name'] == r_name) & 
                         (observations['workingday'] == 'Weekend')].sort_values(['time'])}
        
        
    fig, work = plt.subplots(1, 1, figsize = (16,14))
    
    weekend = work.twinx()
    weekend = work.twiny()
    days = [work, weekend]
    
    for i, (color, WD) in enumerate(zip(colors, ['Weekend', 'Week'])):        

        days[i].plot_date(x = segments[WD].time,
                          y = segments[WD].travel_time,
                          xdate = True,
                          fmt = '-o',
                          c = color,
                          label = WD)
        days[i].xaxis.set_major_locator(mdates.HourLocator(interval = 3))
        days[i].xaxis.set_major_formatter(mdates.DateFormatter('%H:%M:%S'))
            
    days[1].get_xaxis().set_visible(False)
    days[1].get_yaxis().set_visible(False)
    
    plt.title('Baseline for ' + r_name + ' by Working Day')
    
    days[1].xaxis.set_label_text('Time')
    days[1].yaxis.set_label_text('Travel Time')
    
    days[0].legend()
    days[1].legend(loc = 'upper left')

    plt.show()

In [40]:
basql_30 = '''
WITH bt as (
    SELECT *
    FROM king_pilot.real_tt_30min
    WHERE bt.dt NOT BETWEEN '2017-10-15' AND '2017-10-29')
SELECT '2017-11-12'::date + time_bin as time, 
    percentile_cont(0.5) WITHIN GROUP(ORDER BY bt.tt) as travel_time,
    CASE WHEN EXTRACT(ISODOW FROM bt.dt) < 6 THEN 'Work' ELSE 'Weekend' END as workingday,
    aa.report_name

FROM bt
    INNER JOIN king_pilot.bt_segments USING (bt_id)
    INNER JOIN bluetooth.all_analyses aa USING(analysis_id)
    LEFT OUTER JOIN ref.holiday hol ON (bt.dt::DATE = hol.dt)
    
WHERE hol.dt is NULL
    AND bt.dt NOT BETWEEN '2017-10-15' AND '2017-10-29'

GROUP BY aa.report_name, 
    time_bin, 
    CASE WHEN EXTRACT(ISODOW FROM bt.dt) < 6 THEN 'Work' ELSE 'Weekend' END
'''

travelsql_30 = '''
SELECT bt.tt, 
	bt.dt + bt.time_bin as datetime_bin, 
	bt.analysis_id,
    CASE WHEN EXTRACT(ISODOW FROM bt.dt) < 6 THEN 'Work' ELSE 'Weekend' END as workingday,
	aa.report_name
FROM king_pilot.real_tt_30min bt
    INNER JOIN king_pilot.bt_segments USING (bt_id)
	INNER JOIN bluetooth.all_analyses aa ON (bt.analysis_id = aa.analysis_id)
    LEFT OUTER JOIN ref.holiday hol ON (bt.dt = hol.dt)
    
WHERE hol.dt is NULL
    AND datetime_bin::date NOT BETWEEN '2017-10-15' AND '2017-10-29'
'''
#    AND datetime_bin::date NOT BETWEEN '2017-10-15' AND '2017-10-29'
baselines_30 = pandasql.read_sql(basql_30, con)
traveltime_30 = pandasql.read_sql(travelsql_30, con)

DatabaseError: Execution failed on sql '
WITH bt as (
    SELECT *
    FROM king_pilot.real_tt_30min
    WHERE bt.dt NOT BETWEEN '2017-10-15' AND '2017-10-29')
SELECT '2017-11-12'::date + time_bin as time, 
    percentile_cont(0.5) WITHIN GROUP(ORDER BY bt.tt) as travel_time,
    CASE WHEN EXTRACT(ISODOW FROM bt.dt) < 6 THEN 'Work' ELSE 'Weekend' END as workingday,
    aa.report_name

FROM bt
    INNER JOIN king_pilot.bt_segments USING (bt_id)
    INNER JOIN bluetooth.all_analyses aa USING(analysis_id)
    LEFT OUTER JOIN ref.holiday hol ON (bt.dt::DATE = hol.dt)
    
WHERE hol.dt is NULL
    AND bt.dt NOT BETWEEN '2017-10-15' AND '2017-10-29'

GROUP BY aa.report_name, 
    time_bin, 
    CASE WHEN EXTRACT(ISODOW FROM bt.dt) < 6 THEN 'Work' ELSE 'Weekend' END
': missing FROM-clause entry for table "bt"
LINE 5:     WHERE bt.dt NOT BETWEEN '2017-10-15' AND '2017-10-29')
                  ^


In [None]:
print('Fifteen minute buckets')
plot_base(baselines_15, traveltime_30['report_name'].unique()[58])
print('Thirty minute buckets')
plot_base(baselines_30, traveltime_30['report_name'].unique()[58])

The fifteen minute bucket baseline seems to vary rapidly, making it difficult to interpret. Changing to 30 minute aggregation makes for a much cleaner graph.

In [None]:
dicsql = '''
SELECT analysis_id as seg_id,
    translate(right(replace(aa.report_name, ' ', ''), length(replace(aa.report_name, ' ', '')) - 8), '-_', '  ') as segment_name
FROM bluetooth.all_analyses aa
WHERE report_name like 'DT-0%'
'''
diction = pandasql.read_sql(dicsql, con)


In [None]:
segs = {seg: name for i, seg, name in diction.itertuples()}

Makes dictionary to look up seg_id from pretty name

### Scatterplots for given (or all) weeks

In [None]:

plot_weeks_sql = '''
SELECT bt.travel_time, --y axis, as integer
    bt.datetime_bin, --x axis as complete timestamp
    bt.analysis_id as seg_id, --unique id
    EXTRACT(ISODOW FROM datetime_bin) as weekday,
    CASE WHEN EXTRACT(ISODOW FROM bt.datetime_bin) < 6 THEN 'Work' ELSE 'Weekend' END as day_type, --filter by daytype
    --aa.report_name as segment_name, --segment title (removed in favour of naming dictionary)
FROM dt_30min_agg bt
    INNER JOIN bluetooth.all_analyses aa ON (bt.analysis_id = aa.analysis_id)
    LEFT OUTER JOIN ref.holiday hol ON (bt.datetime_bin::DATE = hol.dt)

WHERE hol.dt is NULL
    AND datetime_bin::date NOT BETWEEN '2017-10-15' AND '2017-10-29'
'''
plot_weeks_df = pandasql.read_sql(plot_weeks_sql, con)

In [None]:
def WOY(x, W):
    return x.weekofyear == W

def week_dict(data, seg_id): #seperate segment into individual weeks.
    weeks = {W : data[(data['seg_id'] == seg_id) & 
                      data['datetime_bin'].apply(WOY, args = (W,))]
            for W in data.datetime_bin.apply(lambda x : x.weekofyear).unique()}
    
    temp = {}
    for week, df in weeks.items():
        if df.travel_time.count() > 0:
            temp[week] = weeks[week]
        else:
            if week in temp: #discard weeks without data to avoid MAXTICKS error
                del temp[week]
    return temp

Creates a dictionary to store the bluetooth observations (30 minute) divided by week and removes empty weeks to keep the graph clean

In [None]:
def plot_weeks(data, seg_id):
    weeks = week_dict(data, seg_id) #returns dictionary with seg_name divided into weeks
        
    fig, ax = plt.subplots(len(weeks), 1, sharex = False, sharey = True, figsize = (16, 5*len(weeks)))
    plt.suptitle('Travel times by week for ' + segs[seg_id])

    for i, week in enumerate(weeks):
            ax[i].plot_date(x = weeks[week].datetime_bin,
                            y = weeks[week].travel_time)

            ax[i].xaxis.set_major_locator(mdates.WeekdayLocator(byweekday = [0, 1, 2, 3, 4, 5, 6])) #axis setup
            ax[i].xaxis.set_major_formatter(mdates.DateFormatter('\n%a %Y-%m-%d'))
            ax[i].xaxis.set_minor_locator(mdates.HourLocator(interval = 3))
            ax[i].xaxis.set_minor_formatter(mdates.DateFormatter('%H'))
            
            xpad = timedelta(minutes = 60)
            ax[i].set_xlim(min(weeks[week].datetime_bin) - xpad, max(weeks[week].datetime_bin) + xpad)
            
            ax[i].set_title(str(week))# titles & labels
            ax[i].set_xlabel('Time')
            ax[i].set_ylabel('Travel Time')
            ax[i].legend()
            ax[i].xaxis.grid(True, which="major")
            ax[i].yaxis.grid(True, which="major")

    fig.tight_layout() #subplot titles bumping into  main title
    
#     if len(args) > 0:
#         fig.subplots_adjust(top=0.88) # keep titles from getting distracted by their phones and bumping into the axis above.
#     else:
#         fig.subplots_adjust(top=0.965)
    fig.subplots_adjust(top=0.965)
    
    plt.show()

For each segment identified in the baseline lookover, the above function will be used to first plot all weeks, then plot only the weeks with the questionable data, as identified from the first plot.

### Baselines overlaid onto percentile bands query and function

In [None]:
percentile_sql = '''SELECT base.daytype as day_type,
	('2017-11-12 ' || base.time::varchar)::timestamp as time,
	base.avg_tt as base_tt,
	base.analysis_id as seg_id,

	percentile_cont(0.1) WITHIN GROUP (ORDER BY bt.travel_time) as pct_10,
    percentile_cont(0.2) WITHIN GROUP (ORDER BY bt.travel_time) as pct_20,
	percentile_cont(0.4) WITHIN GROUP (ORDER BY bt.travel_time) as pct_40,
    
	percentile_cont(0.6) WITHIN GROUP (ORDER BY bt.travel_time) as pct_60,
	percentile_cont(0.8) WITHIN GROUP (ORDER BY bt.travel_time) as pct_80,
    percentile_cont(0.9) WITHIN GROUP (ORDER BY bt.travel_time) as pct_90,
	percentile_cont(1.0) WITHIN GROUP (ORDER BY bt.travel_time) as pct_100,
	
	
FROM king_pilot_baselines base
	INNER JOIN dt_30min_agg bt ON (bt.analysis_id = base.analysis_id AND bt.datetime_bin::time = base.time AND 
		CASE WHEN EXTRACT(ISODOW FROM bt.datetime_bin) < 6 THEN 'weekday' ELSE 'weekend' END = base.daytype)
	INNER JOIN bluetooth.all_analyses aa ON (bt.analysis_id = aa.analysis_id)

WHERE bt.datetime_bin::date <= '2017-11-12'
	AND bt.datetime_bin::date NOT BETWEEN '2017-10-15' AND '2017-10-29'

GROUP BY base.analysis_id, base.time, base.daytype, base.avg_tt
'''

percentile_band = pandasql.read_sql(percentile_sql, con)

In [None]:
percentile_band[:5]

In [None]:
def plot_base(data, seg_id):
    #Divide data into Week and Weekend buckets for the given route name. 
    segments = {'week' : data[(data['seg_id'] == seg_id) & 
                         (data['day_type'] == 'weekday')].sort_values(['time']),
                'weekend' : data[(data['seg_id'] == seg_id) & 
                         (data['day_type'] == 'weekend')].sort_values(['time'])}
        
    fig, day_type = plt.subplots(2, 1, figsize = (16,14))
    outliers = []
    
    for i, (color, WD) in enumerate(zip(colors, ['weekend', 'week'])):        
        
        day_type[i].set_title('Baseline for ' + segs[seg_id] + ' during the ' + str(WD))
        day_type[i].plot_date(x = segments[WD].time,
                              y = segments[WD].base_tt,
                              xdate = True,
                              fmt = '-o',
                              c = color,
                              label = WD)
        
        day_type[i].fill_between(segments[WD].time.values, 
                            y1=segments[WD]['pct_10'],
                            y2=segments[WD]['pct_90'],
                            alpha=0.15, facecolor=color)
        day_type[i].fill_between(segments[WD].time.values, 
                            y1=segments[WD]['pct_20'],
                            y2=segments[WD]['pct_80'],
                            alpha=0.25, facecolor=color)
        day_type[i].fill_between(x = segments[WD].time.values, 
                            y1=segments[WD]['pct_40'],
                            y2=segments[WD]['pct_60'],
                            alpha=0.35, facecolor=color)
               
        day_type[i].set_xlim(min(segments[WD].time) - timedelta(minutes = 30), max(segments[WD].time) + timedelta(minutes = 30))
        day_type[i].xaxis.set_major_locator(mdates.HourLocator(byhour = range(0,24), interval = 3))
        day_type[i].xaxis.set_major_formatter(mdates.DateFormatter('%H:%M:%S'))
        
        day_type[i].xaxis.set_label_text('Time')
        day_type[i].yaxis.set_label_text('Travel Time')
        
        day_type[i].yaxis.grid(True)
        day_type[i].xaxis.grid(True)
        
        outliers.append(daytype[i].twiny())
        
        outliers[i].plot_date(x = segments[WD]['time'],
                              y = segments[WD]['pct_100'],
                              fmt = 'x',
                              xdate = True,
                      #       alpha = 0.35,
                              c = color)
        
        outliers[i].xaxis.set_visible(False)
        outliers[i].set_xlim(min(segments[WD].time) - timedelta(minutes = 30), max(segments[WD].time) + timedelta(minutes = 30))
    
    daytype[0].legend()
    daytype[1].legend(loc = 'upper left')
    
    plt.show()

### Plot baseline with prospective outliers removed against old baseline

In [None]:
def daystring(cut_days):
    cut_day_str = '(\'' + cut_days[0]
    for day in cut_days[1:]:
        cut_day_str = cut_day_str + '\', \'' + day
    return cut_day_str + '\')'

In [None]:
new_sql = '''WITH bt as (
    SELECT bt.analysis_id,
	(TIMESTAMP WITHOUT TIME ZONE 'epoch' + INTERVAL '1 second' * (floor((extract('epoch' from bt.datetime_bin)-1) / 1800) * 1800)) as datetime_bin,
	sum(bt.tt*bt.obs)/sum(bt.obs) AS travel_time,
 	sum(bt.obs) AS obs,
    

    FROM bluetooth.aggr_5min bt
	INNER JOIN bluetooth.all_analyses aa USING (analysis_id)
        LEFT OUTER JOIN ref.holiday hol ON (bt.datetime_bin::date = hol.dt)
        
    WHERE bt.datetime_bin::date  NOT IN {0} AND
         datetime_bin::date NOT BETWEEN '2017-10-15' AND '2017-10-29'
        AND hol.dt is NULL
        AND aa.analysis_id = '{1}'

   GROUP BY datetime_bin, analysis_id)

SELECT analysis_id as seg_id,
    '2017-11-12'::date + datetime_bin::time as time, 
    avg(bt.travel_time) as base_tt,
    CASE WHEN EXTRACT(ISODOW FROM bt.datetime_bin::date) < 6 THEN 'weekday' ELSE 'weekend' END as day_type
    

FROM  bt

GROUP BY seg_id, 
    datetime_bin::time, 
    daytype'''


old_sql = '''WITH bt as(
    SELECT bt.analysis_id,
	(TIMESTAMP WITHOUT TIME ZONE 'epoch' + INTERVAL '1 second' * (floor((extract('epoch' from bt.datetime_bin)-1) / 1800) * 1800)) as datetime_bin,
	sum(bt.tt*bt.obs)/sum(bt.obs),
 	sum(bt.obs) AS obs

    FROM bluetooth.aggr_5min bt
	INNER JOIN bluetooth.all_analyses aa USING (analysis_id)
        LEFT OUTER JOIN ref.holiday hol ON (bt.datetime_bin::DATE = hol.dt)
    WHERE datetime_bin::date NOT BETWEEN '2017-10-15' AND '2017-10-29'
        AND hol.dt is NULL
        AND aa.analysis_id = '{0}'

   GROUP BY datetime_bin, analysis_id)
   
SELECT analysis_id as seg_id,
    '2017-11-12'::date + datetime_bin::time as time, 
    avg(bt.travel_time) as base_tt,
    CASE WHEN EXTRACT(ISODOW FROM bt.datetime_bin) < 6 THEN 'weekday' ELSE 'weekend' END as day_type

FROM  bt

GROUP BY seg_id, 
    datetime_bin::time, 
    daytype'''

The baseline query from above, modified to exclude anomalistic dates.

In [None]:
def alternate_baseline(data, seg_id, cut_day_str):
    
    new_base = pandasql.read_sql(new_sql.format(daystring(cut_day_str), seg_id), con)
    old_base = pandasql.read_sql(old_sql.format(seg_id), con)
    
    fig, days = plt.subplots(2, 1, figsize = (16,16))
    old = []

    segments = {order : {'Work' : observations[(observations['seg_id'] == seg_id) & 
                                               (observations['daytype'] == 'weekday')].sort_values(['time']),
                         'Weekend' : observations[(observations['seg_id'] == seg_id) & 
                                                  (observations['daytype'] == 'weekend')].sort_values(['time'])}
                for order, observations in zip(['New', 'Old'],[new_base, old_base])}


    for i, WD in enumerate(['Work', 'Weekend']):
        old.append(days[i].twiny())
        days[i].plot_date(x = segments['New'][WD].time,
                          y = segments['New'][WD].base_tt,
                          xdate = True,
                          fmt = '-o',
                          c = colors[0],
                          alpha = 0.5,
                          label = 'New Baseline')
        plt.legend()
        old[i].plot_date(x = segments['Old'][WD].time,
                         y = segments['Old'][WD].base_tt,
                         xdate = True,
                         fmt = '-o',
                         c = colors[1],
                         alpha = 0.5,
                         label = 'Old Baseline')

        maj = mdates.HourLocator(interval = 3)
        days[i].xaxis.set_major_locator(maj)
        days[i].xaxis.set_major_formatter(mdates.DateFormatter('%H:%M:%S'))

        #old[i].set_yticks(np.linspace(days[i].get_yticks()[0],days[i].get_yticks()[-1],len(days[i].get_yticks())))
        old[i].get_xaxis().set_visible(False)
        #old[i].get_yaxis().set_visible(True)

        #old[i].get_yaxis().set_visible(False)


        plt.title('Baseline for ' + segs[seg_id])

        days[i].xaxis.set_label_text('Time')
        days[i].yaxis.set_label_text('Travel Time')

        days[i].legend()
        old[i].legend(loc = 'upper left')

        fig.tight_layout()

    plt.show()

Dates affecting baselines:
parliament NB Queen to Dundas, September 24th
Jasrvis NB King to Queen, September 16th
Dufferin SB Queen to King, November 5th
Front EB Jarvis to Parliament, September 16th
Adelaide EB Jarvis to Parliament, October 30th, 31st, November 1st. More than single point. 
Queen WB Spadina to Bathurst September 19th.
Queen Yonge to University, Spetember 30th, October 1st, Nuit Blanche
Queen University to Yonge, September 24th, 30th, October 1st. Nuit Blanche and Single point.

##### The function plotting baselines returns both a weekend (blue) and weekday (orange) plot. Becasue of this not all baseline plots will be anomalistic.

Removing October 1st has an insignificant effect on the weekend baseline.

In [None]:
r_name = "1453138"
plot_weeks(r_name)
plot_base(travel_times, r_name)
alternate_baseline(r_name, ['2017-10-01'])

Removing October 30th has a minor effect in the early morning during the week.

In [None]:
r_name ="1453284"
plot_weeks(r_name)
plot_base(travel_times, r_name)
alternate_baseline(r_name, ['2017-10-30'])

Removing September 24th, 30th, and October 1st appears to have had a very major impact on this baseline around midnight and during midday on the weekend. Nuit Blanche had a huge impact on this baseline. These dates have been removed for Queen EB Universtiy to Yonge.

In [None]:
r_name = "1453627"
plot_weeks(r_name)
plot_base(travel_times, r_name)
alternate_baseline(r_name, ['2017-09-24', '2017-09-30', '2017-10-01'])

Removing November 5th from this baseline had an insignificant impact on the new baseline.

In [None]:
r_name = '1453653'
plot_weeks(r_name)
plot_base(travel_times, r_name)
alternate_baseline(r_name, ['2017-11-05'])

Removing September 30th and October 1st had a very major impact on this baseline around midnight on the weekend, due to Nuit Blanche. These dates have been removed from Queen WB Yonge to University. 

In [None]:
r_name = "1453719"
plot_weeks(r_name)
plot_base(travel_times, r_name)
alternate_baseline(r_name, ['2017-09-30', '2017-10-01'])

Excluding September 19th had a significant impact on the late evening baseline during the week. This date has been removed from Queen WB Spadina to Bathurst. 

In [None]:
r_name = "1453752"
plot_weeks(r_name)
plot_base(travel_times, r_name)
alternate_baseline(r_name, ['2017-09-19'])

October 30th, 31st, and November 1st had unique slowdowns significantly larger than any others on this segment. Removing these dates led to a very major drop in the week baseline around the PM peak. These dates have been removed from Adelaide EB Jarvis to Parliament. 

In [None]:
r_name = "1454050"
plot_weeks(r_name)
plot_base(travel_times, r_name)
alternate_baseline(r_name, ['2017-10-30', '2017-10-31', '2017-11-01'])

Removing september 24th had a noticable impact on the weekend baseline in the early morning.

In [None]:
r_name = "1454241"
plot_weeks(r_name)
plot_base(travel_times, r_name)
alternate_baseline(r_name, ['2017-09-24'])

Removing September 16th from this baseline had a major impact on the peak hour during the weekend. This date has been removed from Front EB Jarvis to Parliament. 

In [None]:
r_name = "1454605"
plot_weeks(r_name)
plot_base(travel_times, r_name)
alternate_baseline(r_name, ['2017-09-16'])

By removing November 5th, the weekend baseline changed in a major way around peak PM hour. This date has been removed from Dufferin SB Queen to King. 

In [None]:
r_name = "1454879"
plot_weeks(r_name)
plot_base(travel_times, r_name)
alternate_baseline(r_name, ['2017-11-05'])

Removing November 5th had a notable impact in the early and mid morning.

In [None]:
r_name = "1454907"
plot_weeks(r_name)
plot_base(travel_times, r_name)
alternate_baseline(r_name, ['2017-11-05'])

This outlier was minor and didn't affect the baseline.

In [None]:
r_name = "1454997"
plot_weeks(r_name)
plot_base(travel_times, r_name)
alternate_baseline(r_name, ['2017-10-01'])

Removing October 14th had a significant impact on the early morning of the weekend baseline.

In [None]:
r_name = "1455076"
plot_weeks(r_name)
plot_base(travel_times, r_name)
alternate_baseline(r_name, ['2017-10-14'])

Removing November 4th had a significant impact on the early morning, morning, and evening weekend baseline

In [None]:
r_name = "1455088"
plot_weeks(r_name)
plot_base(travel_times, r_name)
alternate_baseline(r_name, ['2017-11-04'])

Removing September 19th had a noticable impact on the weekday  baseline in the evening and at midday

In [None]:
r_name = "1455231"
plot_weeks(r_name)
plot_base(travel_times, r_name)
alternate_baseline(r_name, ['2017-09-19'])

Removing November 5th had a significant effect on the weekend baseline early in the morning.

In [None]:
r_name = "1455243"
plot_weeks(r_name)
plot_base(travel_times, r_name)
alternate_baseline(r_name, ['2017-11-05'])

Removing October and November 8th resulted in insignificant changes to the weekend baseline in the early morning.

In [None]:
r_name = "1455351"
plot_weeks(r_name)
plot_base(travel_times, r_name)
alternate_baseline(r_name, ['2017-10-08', '2017-11-08'])

Removing September 16th had a notable impact on the baseline during midday. This date has been removed from Jarvis NB Front to King. 

In [None]:
r_name = "1455538"
plot_weeks(r_name)
plot_base(travel_times, r_name)
alternate_baseline(r_name, ['2017-09-16'])

Removing September 17th had a significant impact on the weekend baseline during midday. This date has been removed from Jarvis NB King to Queen. 

In [None]:
r_name = "1455555"
plot_weeks(r_name)
plot_base(travel_times, r_name)
alternate_baseline(r_name, ['2017-09-17'])

Removing September 16th had a major impact on the weekend baseline early in the morning

In [None]:
r_name = "1455628"
plot_weeks(r_name)
plot_base(travel_times, r_name)
alternate_baseline(r_name, ['2017-09-16'])

Removing September 16th had a major impact on the weekend baseline in the morning. This date has been removed from Parliament NB Queen to Dundas. 

In [None]:
r_name = "1455676"
plot_weeks(r_name)
plot_base(travel_times, r_name)
alternate_baseline(r_name, ['2017-09-24'])

All the segments with outliers are listed sinnister segments below

In [None]:
sinnister_segments = ["DT-0001.College-EB_Bathurst-to-University",
"DT-0007. Dundas-EB_Bathurst-to-Spadina",
"DT-0024. Queen-EB_University-to-Yonge",
"DT-0026. Queen-EB_Jarvis-to-Parliament",
"DT-0031. Queen-WB_Yonge-to-University",
"DT-0033. Queen-WB_Spadina-to-Bathurst",
"DT-0047. Adelaide-EB_Jarvis-to-Parliament",
"DT-0056. King-EB_Jarvis-to-Parliament",
"DT-0074. Front-EB_Jarvis-to-Parliament",
"DT-0086. Dufferin-SB_Queen-to-King",
"DT-0088. Dufferin-NB_Queen-to-Dundas",
"DT-0094. Bathurst-SB_King-to-Front",
"DT-0099. Bathurst-NB_Queen-to-Dundas",
"DT-0100. Bathurst-NB_Dundas-to-College",
"DT-0108. Spadina-NB_Queen-to-Dundas",
"DT-0109. University-SB_College-to-Dundas",
"DT-0115. University-NB_Front-to-King",
"DT-0128. Jarvis-NB_Front-to-King",
"DT-0129. Jarvis-NB_King-to-Queen",
"DT-0134. Parliament-SB_King-to-Front",
"DT-0137. Parliament-NB_Queen-to-Dundas"]


All dates removed are listed below

In [None]:
removed_dates = [
    ["DT-0024. Queen-EB_University-to-Yonge", '2017-09-24', '2017-09-30', '2017-10-01'],
    ["DT-0031. Queen-WB_Yonge-to-University", '2017-09-30', '2017-10-01'],
    ["DT-0033. Queen-WB_Spadina-to-Bathurst", '2017-09-19'],
    ["DT-0047. Adelaide-EB_Jarvis-to-Parliament", '2017-10-30', '2017-10-31', '2017-11-01'],
    ["DT-0074. Front-EB_Jarvis-to-Parliament", '2017-09-16'],
    ["DT-0086. Dufferin-SB_Queen-to-King", '2017-11-05'],
    ["DT-0128. Jarvis-NB_Front-to-King", '2017-09-16'],
    ["DT-0129. Jarvis-NB_King-to-Queen", '2017-09-17'],
    ["DT-0137. Parliament-NB_Queen-to-Dundas", '2017-09-24'],
]

After looking at single points with highly inflated travel times and longer periods where the baseline was affected by an anomalistic slowdown, it doesn't look like removing either of these types of anomalies has a very controllable impact on the baselines. Since the baseline data is so limited, removing days can change the part of the baseline that wasn't affected by the anomaly as much or even more than the baseline at the time of the anomaly.