# Compare output with processed data provided by TTC

In [2]:
from psycopg2 import connect
import configparser
%matplotlib inline
import numpy as np
import pandas as pd
import pandas.io.sql as pandasql
import matplotlib.pyplot as plt
import matplotlib as mpl
import seaborn as sns
import matplotlib.ticker as ticker
import folium

CONFIG = configparser.ConfigParser()
CONFIG.read('db.cfg')
dbset = CONFIG['DBSETTINGS']
con = connect(**dbset)

from IPython.display import HTML

def print_table(sql, con):
    return HTML(pandasql.read_sql(sql, con).to_html(index=False))

## Explore the data provided by the TTC 

### Route 504

There are 77 combinations of distinct from stops, to stops, and directions that we have time data from. 

In [44]:
sql_avg = '''
SELECT DISTINCT fromstopname, tostopname, from_stop_id, to_stop_id, directionid, COUNT(*) cnt, routenumber, 
AVG(EXTRACT(EPOCH FROM (to_char(journeydate, 'YYYY-MM-DD') || ' ' || to_char(toarrstoptime, 'HH24:MI:SS'))::timestamp
- (to_char(journeydate, 'YYYY-MM-DD') || ' ' || to_char(fromstopdepaturetime, 'HH24:MI:SS'))::timestamp)  ) /60
AS time_diff_minutes
FROM section_runs
WHERE routenumber = 504
GROUP BY fromstopname, tostopname, directionid,from_stop_id, to_stop_id, routenumber
ORDER BY directionid; 
'''

pandasql.read_sql(sql_avg, con)

Unnamed: 0,fromstopname,tostopname,from_stop_id,to_stop_id,directionid,cnt,routenumber,time_diff_minutes
0,BROADVIEW STATION AT BAY 6,DUNDAS WEST STATION AT BAY 4,13050,13209,0,2,504,-940.666667
1,DUFFERIN GATE LOOP,DUNDAS WEST STATION AT BAY 4,6113,13209,0,1,504,36.000000
2,DUFFERIN GATE LOOP,KING ST WEST AT BATHURST ST,6113,2253,0,10,504,12.166667
3,DUNDAS WEST STATION AT BAY 4,BROADVIEW STATION AT BAY 6,13209,13050,0,2,504,55.666667
4,DUNDAS WEST STATION AT BAY 4,KING ST WEST AT BATHURST ST,13209,2253,0,9,504,-137.592593
5,DUNDAS WEST STATION AT BAY 4,KING ST WEST AT DUFFERIN ST,13209,4568,0,1505,504,7.017043
6,KING ST EAST AT JARVIS ST,BROADVIEW STATION AT BAY 6,1897,13050,0,4,504,-344.833333
7,KING ST EAST AT JARVIS ST,KING ST EAST AT JARVIS ST,1897,1897,0,4,504,1.083333
8,KING ST EAST AT JARVIS ST,KING ST EAST AT PARLIAMENT ST,1897,8207,0,1696,504,-0.682390
9,KING ST EAST AT JARVIS ST,KING ST EAST AT YONGE ST,1897,7034,0,1,504,3.000000


Look at all the stops that segments start at. 

In [26]:
sql_from_stop = '''
SELECT DISTINCT fromstopname, from_stop_id
FROM section_runs
WHERE routenumber = 504
order by from_stop_id; 
'''

pandasql.read_sql(sql_from_stop, con)

Unnamed: 0,fromstopname,from_stop_id
0,KING ST WEST AT SPADINA AVE,436
1,KING ST EAST AT PARLIAMENT ST,1389
2,KING ST WEST AT UNIVERSITY AVE,1845
3,KING ST EAST AT JARVIS ST,1897
4,KING ST WEST AT BATHURST ST,2253
5,KING ST WEST AT YONGE ST,3070
6,KING ST WEST AT UNIVERSITY AVE,3357
7,KING ST WEST AT DUFFERIN ST,4341
8,KING ST WEST AT DUFFERIN ST,4568
9,KING ST WEST AT SPADINA AVE,4748


Look at all stops where the segments end. 

In [27]:
sql_to_stop = '''
SELECT DISTINCT tostopname, to_stop_id
FROM section_runs
WHERE routenumber = 504
order by to_stop_id; 
'''

pandasql.read_sql(sql_to_stop, con)

Unnamed: 0,tostopname,to_stop_id
0,KING ST WEST AT SPADINA AVE,436
1,KING ST EAST AT PARLIAMENT ST,1389
2,KING ST WEST AT UNIVERSITY AVE,1845
3,KING ST EAST AT JARVIS ST,1897
4,KING ST WEST AT BATHURST ST,2253
5,KING ST WEST AT YONGE ST,3070
6,KING ST WEST AT UNIVERSITY AVE,3357
7,KING ST WEST AT DUFFERIN ST,4341
8,KING ST WEST AT DUFFERIN ST,4568
9,KING ST WEST AT SPADINA AVE,4748


### Route 514

There are 60 combinations of distinct from stops, to stops, and directions that we have time data from.

In [42]:
sql_avg = '''
SELECT fromstopname, tostopname, from_stop_id, to_stop_id, directionid, COUNT(*) cnt, routenumber, 
AVG(EXTRACT(EPOCH FROM (to_char(journeydate, 'YYYY-MM-DD') || ' ' || to_char(toarrstoptime, 'HH24:MI:SS'))::timestamp
- (to_char(journeydate, 'YYYY-MM-DD') || ' ' || to_char(fromstopdepaturetime, 'HH24:MI:SS'))::timestamp)  ) /60
AS time_diff_minutes
FROM section_runs
WHERE routenumber = 514
GROUP BY fromstopname, tostopname, directionid,from_stop_id, to_stop_id, routenumber
ORDER BY directionid, fromstopname; 
'''

pandasql.read_sql(sql_avg, con)

Unnamed: 0,fromstopname,tostopname,from_stop_id,to_stop_id,directionid,cnt,routenumber,time_diff_minutes
0,DUFFERIN GATE LOOP,KING ST WEST AT DUFFERIN ST,6113,4568,0,2,514,1.666667
1,DUFFERIN GATE LOOP,KING ST WEST AT SPADINA AVE,6113,4748,0,5,514,18.733333
2,DUFFERIN GATE LOOP,KING ST WEST AT BATHURST ST,6113,2253,0,592,514,6.285473
3,KING ST EAST AT JARVIS ST,KING ST EAST AT JARVIS ST,1897,1897,0,4,514,0.333333
4,KING ST EAST AT JARVIS ST,DISTILLERY LOOP,1897,15439,0,7,514,7.714286
5,KING ST EAST AT JARVIS ST,KING ST EAST AT PARLIAMENT ST,1897,8207,0,601,514,0.419246
6,KING ST EAST AT PARLIAMENT ST,DISTILLERY LOOP,8207,15439,0,588,514,0.722251
7,KING ST EAST AT PARLIAMENT ST,KING ST EAST AT PARLIAMENT ST,8207,8207,0,2,514,14.333333
8,KING ST EAST AT PARLIAMENT ST,KING ST EAST AT JARVIS ST,8207,1897,0,1,514,4.333333
9,KING ST EAST AT PARLIAMENT ST,KING ST EAST AT YONGE ST,8207,7034,0,1,514,5.333333


Find the stops where the segments start at. 

In [28]:
sql_from_stop = '''
SELECT DISTINCT fromstopname, from_stop_id
FROM section_runs
WHERE routenumber = 514
order by from_stop_id; 
'''

pandasql.read_sql(sql_from_stop, con)

Unnamed: 0,fromstopname,from_stop_id
0,KING ST WEST AT SPADINA AVE,436
1,KING ST EAST AT PARLIAMENT ST,1389
2,KING ST WEST AT UNIVERSITY AVE,1845
3,KING ST EAST AT JARVIS ST,1897
4,KING ST WEST AT BATHURST ST,2253
5,KING ST WEST AT YONGE ST,3070
6,KING ST WEST AT UNIVERSITY AVE,3357
7,KING ST WEST AT DUFFERIN ST,4341
8,KING ST WEST AT DUFFERIN ST,4568
9,KING ST WEST AT SPADINA AVE,4748


Find the stops where the segments start at. 

In [29]:
sql_to_stop = '''
SELECT DISTINCT tostopname, to_stop_id
FROM section_runs
WHERE routenumber = 514
order by to_stop_id; 
'''

pandasql.read_sql(sql_to_stop, con)

Unnamed: 0,tostopname,to_stop_id
0,KING ST WEST AT SPADINA AVE,436
1,KING ST EAST AT PARLIAMENT ST,1389
2,KING ST WEST AT UNIVERSITY AVE,1845
3,DISTILLERY LOOP,1845
4,KING ST EAST AT JARVIS ST,1897
5,KING ST WEST AT BATHURST ST,2253
6,KING ST WEST AT YONGE ST,3070
7,KING ST WEST AT UNIVERSITY AVE,3357
8,KING ST WEST AT DUFFERIN ST,4341
9,KING ST WEST AT DUFFERIN ST,4568


## Compare averages between TTC and our processed data

### Route 504

Now calculate the average time between this segments from our CIS data processing.

Filtering for where arrival_time at the last stop in the segment is greater than departure time of the first stop in the segment means positive and negative values will not get filtered together. However, it also means that some values with the opposite direction are filtered out. 

In [3]:
sql_cis_avg = '''
WITH to_stop_table AS (
SELECT t.stop_id to_stop, trip_id, direction_id, arrival_time
FROM crosic.cis_504_11192017_11252017_tripids t
WHERE t.stop_id IN (SELECT to_stop_id FROM crosic.section_runs)
),

from_stop_table AS (
SELECT t.stop_id from_stop, trip_id, direction_id, departure_time
FROM crosic.cis_504_11192017_11252017_tripids t 
WHERE t.stop_id IN (SELECT from_stop_id FROM crosic.section_runs)
), 

stops AS (
SELECT to_stop, from_stop, t.direction_id, t.trip_id, arrival_time, departure_time -- , 
-- AVG(EXTRACT(EPOCH FROM arrival_time - departure_time)) /60 AS time_diff_minutes_our_cis
FROM from_stop_table f JOIN to_stop_table t ON f.trip_id = t.trip_id and f.direction_id = t.direction_id
WHERE arrival_time > departure_time and from_stop <> to_stop
-- GROUP BY to_stop, from_stop, t.direction_id
),

ttc_cis AS (
SELECT DISTINCT fromstopname, tostopname, directionid, COUNT(*) cnt, to_stop_id, from_stop_id, routenumber,
AVG(EXTRACT(EPOCH FROM (to_char(journeydate, 'YYYY-MM-DD') || ' ' || to_char(toarrstoptime, 'HH24:MI:SS'))::timestamp
- (to_char(journeydate, 'YYYY-MM-DD') || ' ' || to_char(fromstopdepaturetime, 'HH24:MI:SS'))::timestamp)  ) /60
AS time_diff_minutes_ttc
FROM section_runs
WHERE routenumber = 504 and toarrstoptime > fromstopdepaturetime
GROUP BY fromstopname, tostopname, to_stop_id, from_stop_id, directionid, routenumber
ORDER BY time_diff_minutes_ttc
)

SELECT routenumber, to_stop_id, from_stop_id,  fromstopname, tostopname, directionid, cnt cnt_ttc, time_diff_minutes_ttc, 
COUNT(*) cnt_our_cis , AVG(EXTRACT(EPOCH FROM arrival_time - departure_time)) /60 AS time_diff_minutes_our_cis
FROM stops s JOIN ttc_cis ttc ON s.to_stop = ttc.to_stop_id AND s.from_stop = ttc.from_stop_id 
WHERE s.direction_id = ttc.directionid and ttc.routenumber = 504 -- and arrival_time > departure_time
GROUP BY to_stop_id, from_stop_id,  fromstopname, tostopname, directionid, ttc.cnt, time_diff_minutes_ttc, routenumber
ORDER BY time_diff_minutes_ttc, time_diff_minutes_our_cis; 
'''

pandasql.read_sql(sql_cis_avg, con)

Unnamed: 0,routenumber,to_stop_id,from_stop_id,fromstopname,tostopname,directionid,cnt_ttc,time_diff_minutes_ttc,cnt_our_cis,time_diff_minutes_our_cis
0,504,7211,436,KING ST WEST AT SPADINA AVE,KING ST WEST AT BATHURST ST,1,1686,2.059134,1387,4.120896
1,504,8207,1897,KING ST EAST AT JARVIS ST,KING ST EAST AT PARLIAMENT ST,0,1692,2.715721,1607,3.792989
2,504,7034,5334,KING ST EAST AT JARVIS ST,KING ST EAST AT YONGE ST,1,1628,2.753675,1546,2.626574
3,504,2253,4748,KING ST WEST AT SPADINA AVE,KING ST WEST AT BATHURST ST,0,2,3.0,1,1075.0
4,504,1897,3070,KING ST WEST AT YONGE ST,KING ST EAST AT JARVIS ST,0,1561,3.08328,1042,4.495218
5,504,3070,1845,KING ST WEST AT UNIVERSITY AVE,KING ST WEST AT YONGE ST,0,1562,3.221084,1106,4.101552
6,504,3357,7034,KING ST EAST AT YONGE ST,KING ST WEST AT UNIVERSITY AVE,1,1588,3.40827,1549,5.388401
7,504,4748,2253,KING ST WEST AT BATHURST ST,KING ST WEST AT SPADINA AVE,0,1695,3.450157,1597,3.710499
8,504,5334,1389,KING ST EAST AT PARLIAMENT ST,KING ST EAST AT JARVIS ST,1,1634,3.47144,1488,3.912399
9,504,1845,3070,KING ST WEST AT YONGE ST,KING ST WEST AT UNIVERSITY AVE,0,2,3.666667,1,1073.333333


There is not a lot of data for when the departure time is greater than the arrival time. The data seems inaccurate, so these records will be filtered out of further analysis. 

In [12]:
sql_cis_avg = '''
WITH to_stop_table AS (
SELECT t.stop_id to_stop, trip_id, direction_id, arrival_time
FROM crosic.cis_504_11192017_11252017_tripids t
WHERE t.stop_id IN (SELECT to_stop_id FROM crosic.section_runs)
),

from_stop_table AS (
SELECT t.stop_id from_stop, trip_id, direction_id, departure_time
FROM crosic.cis_504_11192017_11252017_tripids t 
WHERE t.stop_id IN (SELECT from_stop_id FROM crosic.section_runs)
), 

stops AS (
SELECT to_stop, from_stop, t.direction_id, t.trip_id, arrival_time, departure_time 
FROM from_stop_table f JOIN to_stop_table t ON f.trip_id = t.trip_id and f.direction_id = t.direction_id
),

ttc_cis AS (
SELECT DISTINCT fromstopname, tostopname, directionid, COUNT(*) cnt, to_stop_id, from_stop_id, routenumber,
AVG(EXTRACT(EPOCH FROM (to_char(journeydate, 'YYYY-MM-DD') || ' ' || to_char(fromstopdepaturetime, 'HH24:MI:SS'))::timestamp
- (to_char(journeydate, 'YYYY-MM-DD') || ' ' || to_char(toarrstoptime, 'HH24:MI:SS'))::timestamp)  ) /60
AS time_diff_minutes_ttc
FROM section_runs
WHERE routenumber = 504 and toarrstoptime < fromstopdepaturetime
GROUP BY fromstopname, tostopname, to_stop_id, from_stop_id, directionid, routenumber
ORDER BY time_diff_minutes_ttc
)

SELECT routenumber, to_stop_id, from_stop_id,  fromstopname, tostopname, directionid, cnt cnt_ttc, time_diff_minutes_ttc, COUNT(*) cnt_our_cis,

AVG(EXTRACT(EPOCH FROM departure_time -  arrival_time)) /60
AS time_diff_minutes_our_cis


FROM stops s JOIN ttc_cis ttc ON s.to_stop = ttc.to_stop_id AND s.from_stop = ttc.from_stop_id 

WHERE s.direction_id = ttc.directionid and ttc.routenumber = 504 and arrival_time < departure_time
GROUP BY to_stop_id, from_stop_id,  fromstopname, tostopname, directionid, ttc.cnt, time_diff_minutes_ttc, routenumber
ORDER BY time_diff_minutes_ttc, time_diff_minutes_our_cis; 

'''



pandasql.read_sql(sql_cis_avg, con)

Unnamed: 0,routenumber,to_stop_id,from_stop_id,fromstopname,tostopname,directionid,cnt_ttc,time_diff_minutes_ttc,cnt_our_cis,time_diff_minutes_our_cis
0,504,13209,13050,BROADVIEW STATION AT BAY 6,DUNDAS WEST STATION AT BAY 4,0,2,940.666667,251,83.767596
1,504,8207,8207,KING ST EAST AT PARLIAMENT ST,KING ST EAST AT PARLIAMENT ST,0,2,1420.666667,2571,0.646694
2,504,4341,4341,KING ST WEST AT DUFFERIN ST,KING ST WEST AT DUFFERIN ST,1,2,1422.833333,2735,1.618013
3,504,4341,7211,KING ST WEST AT BATHURST ST,KING ST WEST AT DUFFERIN ST,1,6,1432.611111,1,1397.666667
4,504,2253,4568,KING ST WEST AT DUFFERIN ST,KING ST WEST AT BATHURST ST,0,5,1433.4,1,304.0


Filter out rows where the count for either our data or the ttc data is less than 100 because the averages for these time differences could be easily skewed by outliers. Also segments with less than 100 counts from the TTC are not considered to be important. 

Overall, most segments have similar travel times. In the last three rows in the table below, there seems to be very big differences in the travel times accross the segments. However, the last two rows have a count of under 20 for the ttc processed CIS data, so there could be some outliers that are skewing the results. 

In [4]:
sql_cis_avg  = '''
WITH to_stop_table AS (
SELECT t.stop_id to_stop, trip_id, direction_id, arrival_time
FROM crosic.cis_504_11192017_11252017_tripids t
WHERE t.stop_id IN (SELECT to_stop_id FROM crosic.section_runs)
),

from_stop_table AS (
SELECT t.stop_id from_stop, trip_id, direction_id, departure_time
FROM crosic.cis_504_11192017_11252017_tripids t 
WHERE t.stop_id IN (SELECT from_stop_id FROM crosic.section_runs)
), 

stops AS (
SELECT to_stop, from_stop, t.direction_id, t.trip_id, arrival_time, departure_time 
FROM from_stop_table f INNER JOIN to_stop_table t ON f.trip_id = t.trip_id and f.direction_id = t.direction_id
),

ttc_cis AS (
SELECT DISTINCT fromstopname, tostopname, directionid, COUNT(*) cnt, to_stop_id, from_stop_id, routenumber,
AVG(EXTRACT(EPOCH FROM (to_char(journeydate, 'YYYY-MM-DD') || ' ' || to_char(toarrstoptime, 'HH24:MI:SS'))::timestamp
- (to_char(journeydate, 'YYYY-MM-DD') || ' ' || to_char(fromstopdepaturetime, 'HH24:MI:SS'))::timestamp)  ) /60
AS time_diff_minutes_ttc
FROM section_runs
WHERE routenumber = 504 and toarrstoptime > fromstopdepaturetime
GROUP BY fromstopname, tostopname, to_stop_id, from_stop_id, directionid, routenumber
ORDER BY time_diff_minutes_ttc
),

output AS (
SELECT routenumber, to_stop_id, from_stop_id,  fromstopname, tostopname, directionid, cnt cnt_ttc, time_diff_minutes_ttc, 
COUNT(*) cnt_our_cis,

AVG(EXTRACT(EPOCH FROM arrival_time - departure_time)) /60
AS time_diff_minutes_our_cis


FROM stops s JOIN ttc_cis ttc ON s.to_stop = ttc.to_stop_id AND s.from_stop = ttc.from_stop_id 

WHERE s.direction_id = ttc.directionid and ttc.routenumber = 504 and arrival_time > departure_time
GROUP BY to_stop_id, from_stop_id,  fromstopname, tostopname, directionid, ttc.cnt, time_diff_minutes_ttc, routenumber
ORDER BY time_diff_minutes_ttc, time_diff_minutes_our_cis
) 

SELECT routenumber, to_stop_id, from_stop_id,  fromstopname, tostopname, directionid, cnt_ttc, time_diff_minutes_ttc, 
cnt_our_cis, time_diff_minutes_our_cis, abs(time_diff_minutes_our_cis - time_diff_minutes_ttc) ttc_our_time_difference
FROM output
WHERE cnt_ttc > 100 AND cnt_our_cis > 100 
ORDER BY ttc_our_time_difference; 
'''

pandasql.read_sql(sql_cis_avg, con)

Unnamed: 0,routenumber,to_stop_id,from_stop_id,fromstopname,tostopname,directionid,cnt_ttc,time_diff_minutes_ttc,cnt_our_cis,time_diff_minutes_our_cis,ttc_our_time_difference
0,504,4748,2253,KING ST WEST AT BATHURST ST,KING ST WEST AT SPADINA AVE,0,1695,3.450157,4924,3.342134,0.108024
1,504,8207,1897,KING ST EAST AT JARVIS ST,KING ST EAST AT PARLIAMENT ST,0,1692,2.715721,5048,2.85176,0.136039
2,504,7034,5334,KING ST EAST AT JARVIS ST,KING ST EAST AT YONGE ST,1,1628,2.753675,5218,2.517056,0.236619
3,504,1897,3070,KING ST WEST AT YONGE ST,KING ST EAST AT JARVIS ST,0,1561,3.08328,3780,3.322227,0.238947
4,504,5334,1389,KING ST EAST AT PARLIAMENT ST,KING ST EAST AT JARVIS ST,1,1634,3.47144,4678,3.120418,0.351022
5,504,4341,7211,KING ST WEST AT BATHURST ST,KING ST WEST AT DUFFERIN ST,1,1661,8.396338,4883,7.911277,0.48506
6,504,2253,4568,KING ST WEST AT DUFFERIN ST,KING ST WEST AT BATHURST ST,0,1663,9.069363,4902,8.455056,0.614307
7,504,1845,4748,KING ST WEST AT SPADINA AVE,KING ST WEST AT UNIVERSITY AVE,0,1642,4.447818,4682,3.807661,0.640157
8,504,3070,1845,KING ST WEST AT UNIVERSITY AVE,KING ST WEST AT YONGE ST,0,1562,3.221084,3739,2.574735,0.646349
9,504,3357,7034,KING ST EAST AT YONGE ST,KING ST WEST AT UNIVERSITY AVE,1,1588,3.40827,4975,2.760121,0.64815


Remove weekends 

In [38]:
sql_cis_avg  = '''
WITH to_stop_table AS (
SELECT t.stop_id to_stop, trip_id, direction_id, arrival_time
FROM crosic.cis_504_11192017_11252017_tripids t
WHERE t.stop_id IN (SELECT to_stop_id FROM crosic.section_runs)
),

from_stop_table AS (
SELECT t.stop_id from_stop, trip_id, direction_id, departure_time
FROM crosic.cis_504_11192017_11252017_tripids t 
WHERE t.stop_id IN (SELECT from_stop_id FROM crosic.section_runs)
), 

stops AS (
SELECT to_stop, from_stop, t.direction_id, t.trip_id, arrival_time, departure_time
FROM from_stop_table f JOIN to_stop_table t ON f.trip_id = t.trip_id and f.direction_id = t.direction_id
),

ttc_cis AS (
SELECT DISTINCT fromstopname, tostopname, directionid, COUNT(*) cnt, to_stop_id, from_stop_id, routenumber,
AVG(EXTRACT(EPOCH FROM (to_char(journeydate, 'YYYY-MM-DD') || ' ' || to_char(toarrstoptime, 'HH24:MI:SS'))::timestamp
- (to_char(journeydate, 'YYYY-MM-DD') || ' ' || to_char(fromstopdepaturetime, 'HH24:MI:SS'))::timestamp)  ) /60
AS time_diff_minutes_ttc
FROM section_runs
WHERE routenumber = 504 and toarrstoptime > fromstopdepaturetime and journeydate <> '2017-11-19' and journeydate <> '2017-11-25'
GROUP BY fromstopname, tostopname, to_stop_id, from_stop_id, directionid, routenumber
ORDER BY time_diff_minutes_ttc
),

output AS (
SELECT routenumber, to_stop_id, from_stop_id,  fromstopname, tostopname, directionid, cnt cnt_ttc, time_diff_minutes_ttc, 
COUNT(*) cnt_our_cis,

AVG(EXTRACT(EPOCH FROM arrival_time - departure_time)) /60
AS time_diff_minutes_our_cis


FROM stops s JOIN ttc_cis ttc ON s.to_stop = ttc.to_stop_id AND s.from_stop = ttc.from_stop_id 

WHERE s.direction_id = ttc.directionid and ttc.routenumber = 504 and arrival_time > departure_time 
and date(arrival_time) <> '2017-11-19' and date(arrival_time) <> '2017-11-25'
and date(departure_time) <> '2017-11-19' and date(departure_time) <> '2017-11-25'
GROUP BY to_stop_id, from_stop_id,  fromstopname, tostopname, directionid, ttc.cnt, time_diff_minutes_ttc, routenumber
ORDER BY time_diff_minutes_ttc, time_diff_minutes_our_cis
) 

SELECT routenumber, to_stop_id, from_stop_id,  fromstopname, tostopname, directionid, cnt_ttc, time_diff_minutes_ttc, 
cnt_our_cis, time_diff_minutes_our_cis, abs(time_diff_minutes_our_cis - time_diff_minutes_ttc) ttc_our_time_difference
FROM output
WHERE cnt_ttc > 10 AND cnt_our_cis > 10
ORDER BY ttc_our_time_difference; 

'''



pandasql.read_sql(sql_cis_avg, con)

Unnamed: 0,routenumber,to_stop_id,from_stop_id,fromstopname,tostopname,directionid,cnt_ttc,time_diff_minutes_ttc,cnt_our_cis,time_diff_minutes_our_cis,ttc_our_time_difference
0,504,1897,4748,KING ST WEST AT SPADINA AVE,KING ST EAST AT JARVIS ST,0,30,10.877778,3942,10.825647,0.052131
1,504,8207,1897,KING ST EAST AT JARVIS ST,KING ST EAST AT PARLIAMENT ST,0,1397,2.639466,4169,2.766451,0.126985
2,504,4748,2253,KING ST WEST AT BATHURST ST,KING ST WEST AT SPADINA AVE,0,1392,3.539763,4066,3.391949,0.147813
3,504,7034,5334,KING ST EAST AT JARVIS ST,KING ST EAST AT YONGE ST,1,1339,2.761003,4338,2.541179,0.219825
4,504,1897,3070,KING ST WEST AT YONGE ST,KING ST EAST AT JARVIS ST,0,1291,2.87245,3122,3.101543,0.229093
5,504,5334,1389,KING ST EAST AT PARLIAMENT ST,KING ST EAST AT JARVIS ST,1,1346,3.512382,3842,3.154429,0.357953
6,504,4341,436,KING ST WEST AT SPADINA AVE,KING ST WEST AT DUFFERIN ST,1,12,11.166667,3748,11.593312,0.426645
7,504,4341,7211,KING ST WEST AT BATHURST ST,KING ST WEST AT DUFFERIN ST,1,1362,8.441263,3991,7.915593,0.525669
8,504,436,7034,KING ST EAST AT YONGE ST,KING ST WEST AT SPADINA AVE,1,39,8.57265,4077,8.012182,0.560467
9,504,3070,1845,KING ST WEST AT UNIVERSITY AVE,KING ST WEST AT YONGE ST,0,1294,3.144513,3078,2.555545,0.588968


Weekday rush hour

In [37]:
sql_cis_avg  = '''
WITH to_stop_table AS (
SELECT t.stop_id to_stop, trip_id, direction_id, arrival_time
FROM crosic.cis_504_11192017_11252017_tripids t
WHERE t.stop_id IN (SELECT to_stop_id FROM crosic.section_runs)
),

from_stop_table AS (
SELECT t.stop_id from_stop, trip_id, direction_id, departure_time
FROM crosic.cis_504_11192017_11252017_tripids t 
WHERE t.stop_id IN (SELECT from_stop_id FROM crosic.section_runs)
), 

stops AS (
SELECT to_stop, from_stop, t.direction_id, t.trip_id, arrival_time, departure_time
FROM from_stop_table f JOIN to_stop_table t ON f.trip_id = t.trip_id and f.direction_id = t.direction_id
),

ttc_cis AS (
SELECT DISTINCT fromstopname, tostopname, directionid, COUNT(*) cnt, to_stop_id, from_stop_id, routenumber,
AVG(EXTRACT(EPOCH FROM (to_char(journeydate, 'YYYY-MM-DD') || ' ' || to_char(toarrstoptime, 'HH24:MI:SS'))::timestamp
- (to_char(journeydate, 'YYYY-MM-DD') || ' ' || to_char(fromstopdepaturetime, 'HH24:MI:SS'))::timestamp)  ) /60
AS time_diff_minutes_ttc
FROM section_runs
WHERE routenumber = 504 AND toarrstoptime > fromstopdepaturetime AND journeydate <> '2017-11-19' AND journeydate <> '2017-11-25'
AND (EXTRACT(HOUR FROM fromstopdepaturetime) between 7 AND 10 OR EXTRACT(HOUR FROM fromstopdepaturetime) between 16 AND 19)
AND (EXTRACT(HOUR FROM toarrstoptime) between 7 AND 10 OR EXTRACT(HOUR FROM toarrstoptime) between 16 AND 19)
GROUP BY fromstopname, tostopname, to_stop_id, from_stop_id, directionid, routenumber
ORDER BY time_diff_minutes_ttc
),

output AS (
SELECT routenumber, to_stop_id, from_stop_id,  fromstopname, tostopname, directionid, cnt cnt_ttc, time_diff_minutes_ttc, 
COUNT(*) cnt_our_cis,

AVG(EXTRACT(EPOCH FROM arrival_time - departure_time)) /60
AS time_diff_minutes_our_cis


FROM stops s JOIN ttc_cis ttc ON s.to_stop = ttc.to_stop_id AND s.from_stop = ttc.from_stop_id 

WHERE s.direction_id = ttc.directionid and ttc.routenumber = 504 and arrival_time > departure_time 
and date(arrival_time) <> '2017-11-19' and date(arrival_time) <> '2017-11-25'
and date(departure_time) <> '2017-11-19' and date(departure_time) <> '2017-11-25'
AND (EXTRACT(HOUR FROM departure_time) between 7 AND 10 OR EXTRACT(HOUR FROM departure_time) between 16 AND 19)
AND (EXTRACT(HOUR FROM arrival_time) between 7 AND 10 OR EXTRACT(HOUR FROM arrival_time) between 16 AND 19)
GROUP BY to_stop_id, from_stop_id,  fromstopname, tostopname, directionid, ttc.cnt, time_diff_minutes_ttc, routenumber
ORDER BY time_diff_minutes_ttc, time_diff_minutes_our_cis
) 

SELECT routenumber, to_stop_id, from_stop_id,  fromstopname, tostopname, directionid, cnt_ttc, time_diff_minutes_ttc, 
cnt_our_cis, time_diff_minutes_our_cis, abs(time_diff_minutes_our_cis - time_diff_minutes_ttc) ttc_our_time_difference 
FROM output
WHERE cnt_ttc > 10 AND cnt_our_cis > 10
ORDER BY ttc_our_time_difference; 

'''


pandasql.read_sql(sql_cis_avg, con)

Unnamed: 0,routenumber,to_stop_id,from_stop_id,fromstopname,tostopname,directionid,cnt_ttc,time_diff_minutes_ttc,cnt_our_cis,time_diff_minutes_our_cis,ttc_our_time_difference
0,504,8207,1897,KING ST EAST AT JARVIS ST,KING ST EAST AT PARLIAMENT ST,0,659,2.772888,1955,2.919352,0.146464
1,504,436,5334,KING ST EAST AT JARVIS ST,KING ST WEST AT SPADINA AVE,1,17,11.666667,1729,11.511085,0.155581
2,504,7034,5334,KING ST EAST AT JARVIS ST,KING ST EAST AT YONGE ST,1,627,2.894737,2102,2.736759,0.157978
3,504,1897,3070,KING ST WEST AT YONGE ST,KING ST EAST AT JARVIS ST,0,586,3.120023,1479,3.284201,0.164178
4,504,1897,4748,KING ST WEST AT SPADINA AVE,KING ST EAST AT JARVIS ST,0,15,11.444444,1782,11.655817,0.211373
5,504,13050,8207,KING ST EAST AT PARLIAMENT ST,BROADVIEW STATION AT BAY 6,0,555,16.364595,167,16.031936,0.332658
6,504,5334,1389,KING ST EAST AT PARLIAMENT ST,KING ST EAST AT JARVIS ST,1,614,3.84962,1801,3.378308,0.471312
7,504,436,7034,KING ST EAST AT YONGE ST,KING ST WEST AT SPADINA AVE,1,19,9.105263,1944,8.632888,0.472376
8,504,4748,2253,KING ST WEST AT BATHURST ST,KING ST WEST AT SPADINA AVE,0,662,3.917925,1937,3.393048,0.524878
9,504,3070,1845,KING ST WEST AT UNIVERSITY AVE,KING ST WEST AT YONGE ST,0,588,3.42517,1460,2.803653,0.621517


Weekday not rush hour

In [36]:
sql_cis_avg  = '''
WITH to_stop_table AS (
SELECT t.stop_id to_stop, trip_id, direction_id, arrival_time
FROM crosic.cis_504_11192017_11252017_tripids t
WHERE t.stop_id IN (SELECT to_stop_id FROM crosic.section_runs)
),

from_stop_table AS (
SELECT t.stop_id from_stop, trip_id, direction_id, departure_time
FROM crosic.cis_504_11192017_11252017_tripids t 
WHERE t.stop_id IN (SELECT from_stop_id FROM crosic.section_runs)
), 

stops AS (
SELECT to_stop, from_stop, t.direction_id, t.trip_id, arrival_time, departure_time
FROM from_stop_table f JOIN to_stop_table t ON f.trip_id = t.trip_id and f.direction_id = t.direction_id
),

ttc_cis AS (
SELECT DISTINCT fromstopname, tostopname, directionid, COUNT(*) cnt, to_stop_id, from_stop_id, routenumber,
AVG(EXTRACT(EPOCH FROM (to_char(journeydate, 'YYYY-MM-DD') || ' ' || to_char(toarrstoptime, 'HH24:MI:SS'))::timestamp
- (to_char(journeydate, 'YYYY-MM-DD') || ' ' || to_char(fromstopdepaturetime, 'HH24:MI:SS'))::timestamp)  ) /60
AS time_diff_minutes_ttc
FROM section_runs
WHERE routenumber = 504 AND toarrstoptime > fromstopdepaturetime AND journeydate <> '2017-11-19' AND journeydate <> '2017-11-25'
AND NOT (EXTRACT(HOUR FROM fromstopdepaturetime) between 7 AND 10 OR EXTRACT(HOUR FROM fromstopdepaturetime) between 16 AND 19)
AND NOT (EXTRACT(HOUR FROM toarrstoptime) between 7 AND 10 OR EXTRACT(HOUR FROM toarrstoptime) between 16 AND 19)
GROUP BY fromstopname, tostopname, to_stop_id, from_stop_id, directionid, routenumber
ORDER BY time_diff_minutes_ttc
),

output AS (
SELECT routenumber, to_stop_id, from_stop_id,  fromstopname, tostopname, directionid, cnt cnt_ttc, time_diff_minutes_ttc, 
COUNT(*) cnt_our_cis,

AVG(EXTRACT(EPOCH FROM arrival_time - departure_time)) /60
AS time_diff_minutes_our_cis


FROM stops s JOIN ttc_cis ttc ON s.to_stop = ttc.to_stop_id AND s.from_stop = ttc.from_stop_id 

WHERE s.direction_id = ttc.directionid and ttc.routenumber = 504 and arrival_time > departure_time 
and date(arrival_time) <> '2017-11-19' and date(arrival_time) <> '2017-11-25'
and date(departure_time) <> '2017-11-19' and date(departure_time) <> '2017-11-25'
AND NOT (EXTRACT(HOUR FROM departure_time) between 7 AND 10 OR EXTRACT(HOUR FROM departure_time) between 16 AND 19)
AND NOT (EXTRACT(HOUR FROM arrival_time) between 7 AND 10 OR EXTRACT(HOUR FROM arrival_time) between 16 AND 19)
GROUP BY to_stop_id, from_stop_id,  fromstopname, tostopname, directionid, ttc.cnt, time_diff_minutes_ttc, routenumber
ORDER BY time_diff_minutes_ttc, time_diff_minutes_our_cis
) 

SELECT routenumber, to_stop_id, from_stop_id,  fromstopname, tostopname, directionid, cnt_ttc, time_diff_minutes_ttc, 
cnt_our_cis, time_diff_minutes_our_cis, abs(time_diff_minutes_our_cis - time_diff_minutes_ttc) ttc_our_time_difference 
FROM output
WHERE cnt_ttc > 10 AND cnt_our_cis > 10
ORDER BY ttc_our_time_difference; 

'''


pandasql.read_sql(sql_cis_avg, con)

Unnamed: 0,routenumber,to_stop_id,from_stop_id,fromstopname,tostopname,directionid,cnt_ttc,time_diff_minutes_ttc,cnt_our_cis,time_diff_minutes_our_cis,ttc_our_time_difference
0,504,8207,1897,KING ST EAST AT JARVIS ST,KING ST EAST AT PARLIAMENT ST,0,730,2.521918,2181,2.627694,0.105776
1,504,5334,1389,KING ST EAST AT PARLIAMENT ST,KING ST EAST AT JARVIS ST,1,713,3.207574,2004,2.953751,0.253823
2,504,1897,3070,KING ST WEST AT YONGE ST,KING ST EAST AT JARVIS ST,0,691,2.662325,1613,2.933881,0.271556
3,504,7034,5334,KING ST EAST AT JARVIS ST,KING ST EAST AT YONGE ST,1,701,2.637637,2200,2.3515,0.286137
4,504,4341,7211,KING ST WEST AT BATHURST ST,KING ST WEST AT DUFFERIN ST,1,695,7.992326,2057,7.621682,0.370644
5,504,4748,2253,KING ST WEST AT BATHURST ST,KING ST WEST AT SPADINA AVE,0,716,3.193226,2084,2.791267,0.401959
6,504,2253,4568,KING ST WEST AT DUFFERIN ST,KING ST WEST AT BATHURST ST,0,689,8.471214,2080,8.024359,0.446855
7,504,436,7034,KING ST EAST AT YONGE ST,KING ST WEST AT SPADINA AVE,1,18,7.907407,2012,7.421471,0.485936
8,504,4568,13209,DUNDAS WEST STATION AT BAY 4,KING ST WEST AT DUFFERIN ST,0,618,15.444957,1520,14.950439,0.494518
9,504,1845,4748,KING ST WEST AT SPADINA AVE,KING ST WEST AT UNIVERSITY AVE,0,701,3.980956,2000,3.483367,0.497589


Weekends

In [35]:
sql_cis_avg  = '''
WITH to_stop_table AS (
SELECT t.stop_id to_stop, trip_id, direction_id, arrival_time
FROM crosic.cis_504_11192017_11252017_tripids t
WHERE t.stop_id IN (SELECT to_stop_id FROM crosic.section_runs)
),

from_stop_table AS (
SELECT t.stop_id from_stop, trip_id, direction_id, departure_time
FROM crosic.cis_504_11192017_11252017_tripids t 
WHERE t.stop_id IN (SELECT from_stop_id FROM crosic.section_runs)
), 

stops AS (
SELECT to_stop, from_stop, t.direction_id, t.trip_id, arrival_time, departure_time
FROM from_stop_table f JOIN to_stop_table t ON f.trip_id = t.trip_id and f.direction_id = t.direction_id
),

ttc_cis AS (
SELECT DISTINCT fromstopname, tostopname, directionid, COUNT(*) cnt, to_stop_id, from_stop_id, routenumber,
AVG(EXTRACT(EPOCH FROM (to_char(journeydate, 'YYYY-MM-DD') || ' ' || to_char(toarrstoptime, 'HH24:MI:SS'))::timestamp
- (to_char(journeydate, 'YYYY-MM-DD') || ' ' || to_char(fromstopdepaturetime, 'HH24:MI:SS'))::timestamp)  ) /60
AS time_diff_minutes_ttc
FROM section_runs
WHERE routenumber = 504 and toarrstoptime > fromstopdepaturetime and (journeydate = '2017-11-19' OR journeydate = '2017-11-25')
GROUP BY fromstopname, tostopname, to_stop_id, from_stop_id, directionid, routenumber
ORDER BY time_diff_minutes_ttc
),

output AS (
SELECT routenumber, to_stop_id, from_stop_id,  fromstopname, tostopname, directionid, cnt cnt_ttc, time_diff_minutes_ttc, 
COUNT(*) cnt_our_cis,

AVG(EXTRACT(EPOCH FROM arrival_time - departure_time)) /60
AS time_diff_minutes_our_cis


FROM stops s JOIN ttc_cis ttc ON s.to_stop = ttc.to_stop_id AND s.from_stop = ttc.from_stop_id 

WHERE s.direction_id = ttc.directionid and ttc.routenumber = 504 and arrival_time > departure_time 
and (date(arrival_time) = '2017-11-19' OR date(arrival_time) = '2017-11-25')
and (date(departure_time) = '2017-11-19' OR date(departure_time) = '2017-11-25')
GROUP BY to_stop_id, from_stop_id,  fromstopname, tostopname, directionid, ttc.cnt, time_diff_minutes_ttc, routenumber
ORDER BY time_diff_minutes_ttc, time_diff_minutes_our_cis
) 

SELECT routenumber, to_stop_id, from_stop_id,  fromstopname, tostopname, directionid, cnt_ttc, time_diff_minutes_ttc, 
cnt_our_cis, time_diff_minutes_our_cis, abs(time_diff_minutes_our_cis - time_diff_minutes_ttc) ttc_our_time_difference
FROM output
WHERE cnt_ttc > 10 AND cnt_our_cis > 10
ORDER BY ttc_our_time_difference; 

'''



pandasql.read_sql(sql_cis_avg, con)

Unnamed: 0,routenumber,to_stop_id,from_stop_id,fromstopname,tostopname,directionid,cnt_ttc,time_diff_minutes_ttc,cnt_our_cis,time_diff_minutes_our_cis,ttc_our_time_difference
0,504,4748,2253,KING ST WEST AT BATHURST ST,KING ST WEST AT SPADINA AVE,0,303,3.038504,858,3.106061,0.067557
1,504,4568,13209,DUNDAS WEST STATION AT BAY 4,KING ST WEST AT DUFFERIN ST,0,277,15.918171,595,15.763361,0.15481
2,504,8207,1897,KING ST EAST AT JARVIS ST,KING ST EAST AT PARLIAMENT ST,0,295,3.076836,878,3.257802,0.180966
3,504,2253,4568,KING ST WEST AT DUFFERIN ST,KING ST WEST AT BATHURST ST,0,297,8.499439,860,8.254264,0.245175
4,504,1897,3070,KING ST WEST AT YONGE ST,KING ST EAST AT JARVIS ST,0,270,4.091358,658,4.369301,0.277943
5,504,4341,7211,KING ST WEST AT BATHURST ST,KING ST WEST AT DUFFERIN ST,1,299,8.191695,884,7.887217,0.304477
6,504,5334,1389,KING ST EAST AT PARLIAMENT ST,KING ST EAST AT JARVIS ST,1,288,3.280093,836,2.964115,0.315978
7,504,7034,5334,KING ST EAST AT JARVIS ST,KING ST EAST AT YONGE ST,1,289,2.719723,880,2.398144,0.321579
8,504,436,7034,KING ST EAST AT YONGE ST,KING ST WEST AT SPADINA AVE,1,13,7.179487,810,7.638601,0.459114
9,504,3357,7034,KING ST EAST AT YONGE ST,KING ST WEST AT UNIVERSITY AVE,1,276,3.237923,826,2.637934,0.599989


### Route 514 

The time difference numbers for segments where departure time from the first stop is occurs after the arrival time to the last stop in the segment is extremely large. These records should be filtered out of further analysis. 

In [24]:
sql_cis_avg = '''
WITH to_stop_table AS (
SELECT t.stop_id to_stop, trip_id, direction_id, arrival_time
FROM crosic.cis_514_11192017_11252017_tripids t
WHERE t.stop_id IN (SELECT to_stop_id FROM crosic.section_runs)
),

from_stop_table AS (
SELECT t.stop_id from_stop, trip_id, direction_id, departure_time
FROM crosic.cis_514_11192017_11252017_tripids t 
WHERE t.stop_id IN (SELECT from_stop_id FROM crosic.section_runs)
), 

stops AS (
SELECT to_stop, from_stop, t.direction_id, t.trip_id, arrival_time, departure_time 
FROM from_stop_table f JOIN to_stop_table t ON f.trip_id = t.trip_id and f.direction_id = t.direction_id
),

ttc_cis AS (
SELECT DISTINCT fromstopname, tostopname, directionid, COUNT(*) cnt, to_stop_id, from_stop_id, routenumber,
AVG(EXTRACT(EPOCH FROM (to_char(journeydate, 'YYYY-MM-DD') || ' ' || to_char(fromstopdepaturetime, 'HH24:MI:SS'))::timestamp
- (to_char(journeydate, 'YYYY-MM-DD') || ' ' || to_char(toarrstoptime, 'HH24:MI:SS'))::timestamp)  ) /60
AS time_diff_minutes_ttc
FROM section_runs
WHERE routenumber = 514 and toarrstoptime < fromstopdepaturetime
GROUP BY fromstopname, tostopname, to_stop_id, from_stop_id, directionid, routenumber
ORDER BY time_diff_minutes_ttc
)

SELECT routenumber, to_stop_id, from_stop_id,  fromstopname, tostopname, directionid, cnt cnt_ttc, time_diff_minutes_ttc, 
COUNT(*) cnt_our_cis,

AVG(EXTRACT(EPOCH FROM departure_time -  arrival_time)) /60
AS time_diff_minutes_our_cis


FROM stops s JOIN ttc_cis ttc ON s.to_stop = ttc.to_stop_id AND s.from_stop = ttc.from_stop_id 

WHERE s.direction_id = ttc.directionid and ttc.routenumber = 514 and arrival_time < departure_time
GROUP BY to_stop_id, from_stop_id,  fromstopname, tostopname, directionid, ttc.cnt, time_diff_minutes_ttc, routenumber
ORDER BY time_diff_minutes_ttc, time_diff_minutes_our_cis; 

'''



pandasql.read_sql(sql_cis_avg, con)

Unnamed: 0,routenumber,to_stop_id,from_stop_id,fromstopname,tostopname,directionid,cnt_ttc,time_diff_minutes_ttc,cnt_our_cis,time_diff_minutes_our_cis
0,514,4341,7211,KING ST WEST AT BATHURST ST,KING ST WEST AT DUFFERIN ST,1,3,1432.111111,4,2119.0
1,514,6113,4341,KING ST WEST AT DUFFERIN ST,DUFFERIN GATE LOOP,1,1,1436.333333,4,2122.333333
2,514,436,3357,KING ST WEST AT UNIVERSITY AVE,KING ST WEST AT SPADINA AVE,1,2,1436.333333,1,2123.333333
3,514,3357,7034,KING ST EAST AT YONGE ST,KING ST WEST AT UNIVERSITY AVE,1,1,1437.0,2,3104.666667
4,514,7034,5334,KING ST EAST AT JARVIS ST,KING ST EAST AT YONGE ST,1,1,1438.0,1,2125.333333
5,514,7211,436,KING ST WEST AT SPADINA AVE,KING ST WEST AT BATHURST ST,1,1,1438.666667,1,2124.0


In [34]:
sql_cis_avg  = '''
WITH to_stop_table AS (
SELECT t.stop_id to_stop, trip_id, direction_id, arrival_time
FROM crosic.cis_514_11192017_11252017_tripids t
WHERE t.stop_id IN (SELECT to_stop_id FROM crosic.section_runs)
),

from_stop_table AS (
SELECT t.stop_id from_stop, trip_id, direction_id, departure_time
FROM crosic.cis_514_11192017_11252017_tripids t 
WHERE t.stop_id IN (SELECT from_stop_id FROM crosic.section_runs)
), 

stops AS (
SELECT to_stop, from_stop, t.direction_id, t.trip_id, arrival_time, departure_time 
FROM from_stop_table f JOIN to_stop_table t ON f.trip_id = t.trip_id and f.direction_id = t.direction_id
),

ttc_cis AS (
SELECT DISTINCT fromstopname, tostopname, directionid, COUNT(*) cnt, to_stop_id, from_stop_id, routenumber,
AVG(EXTRACT(EPOCH FROM (to_char(journeydate, 'YYYY-MM-DD') || ' ' || to_char(toarrstoptime, 'HH24:MI:SS'))::timestamp
- (to_char(journeydate, 'YYYY-MM-DD') || ' ' || to_char(fromstopdepaturetime, 'HH24:MI:SS'))::timestamp)  ) /60
AS time_diff_minutes_ttc
FROM section_runs
WHERE routenumber = 514 and toarrstoptime > fromstopdepaturetime
GROUP BY fromstopname, tostopname, to_stop_id, from_stop_id, directionid, routenumber
ORDER BY time_diff_minutes_ttc
),

output AS (
SELECT routenumber, to_stop_id, from_stop_id,  fromstopname, tostopname, directionid, cnt cnt_ttc, time_diff_minutes_ttc, 
COUNT(*) cnt_our_cis,

AVG(EXTRACT(EPOCH FROM arrival_time - departure_time)) /60
AS time_diff_minutes_our_cis


FROM stops s JOIN ttc_cis ttc ON s.to_stop = ttc.to_stop_id AND s.from_stop = ttc.from_stop_id 

WHERE s.direction_id = ttc.directionid and ttc.routenumber = 514 and arrival_time > departure_time
GROUP BY to_stop_id, from_stop_id,  fromstopname, tostopname, directionid, ttc.cnt, time_diff_minutes_ttc, routenumber
ORDER BY time_diff_minutes_ttc, time_diff_minutes_our_cis
) 

SELECT routenumber, to_stop_id, from_stop_id,  fromstopname, tostopname, directionid, cnt_ttc, time_diff_minutes_ttc, 
cnt_our_cis, time_diff_minutes_our_cis, abs(time_diff_minutes_our_cis - time_diff_minutes_ttc) ttc_our_time_difference
FROM output
WHERE cnt_ttc > 10 AND cnt_our_cis > 10
ORDER BY ttc_our_time_difference; 

'''
pandasql.read_sql(sql_cis_avg, con)

Unnamed: 0,routenumber,to_stop_id,from_stop_id,fromstopname,tostopname,directionid,cnt_ttc,time_diff_minutes_ttc,cnt_our_cis,time_diff_minutes_our_cis,ttc_our_time_difference
0,514,1845,4748,KING ST WEST AT SPADINA AVE,KING ST WEST AT UNIVERSITY AVE,0,606,4.29703,1187,4.320135,0.023105
1,514,3070,1845,KING ST WEST AT UNIVERSITY AVE,KING ST WEST AT YONGE ST,0,577,3.094743,819,3.269841,0.175098
2,514,1897,1845,KING ST WEST AT UNIVERSITY AVE,KING ST EAST AT JARVIS ST,0,34,6.509804,1089,6.732798,0.222994
3,514,4748,2253,KING ST WEST AT BATHURST ST,KING ST WEST AT SPADINA AVE,0,612,3.3061,1162,3.053356,0.252744
4,514,1897,3070,KING ST WEST AT YONGE ST,KING ST EAST AT JARVIS ST,0,570,3.271988,750,3.556911,0.284923
5,514,8207,1897,KING ST EAST AT JARVIS ST,KING ST EAST AT PARLIAMENT ST,0,600,2.816056,1052,3.619439,0.803384
6,514,3357,7034,KING ST EAST AT YONGE ST,KING ST WEST AT UNIVERSITY AVE,1,575,3.235333,1134,4.787478,1.552145
7,514,5334,1389,KING ST EAST AT PARLIAMENT ST,KING ST EAST AT JARVIS ST,1,613,3.55193,1025,5.309593,1.757663
8,514,436,7034,KING ST EAST AT YONGE ST,KING ST WEST AT SPADINA AVE,1,22,8.348485,965,10.254922,1.906437
9,514,436,3357,KING ST WEST AT UNIVERSITY AVE,KING ST WEST AT SPADINA AVE,1,591,4.782854,1004,6.88179,2.098936


Remove weekends

In [33]:
sql_cis_avg  = '''
WITH to_stop_table AS (
SELECT t.stop_id to_stop, trip_id, direction_id, arrival_time
FROM crosic.cis_514_11192017_11252017_tripids t
WHERE t.stop_id IN (SELECT to_stop_id FROM crosic.section_runs)
),

from_stop_table AS (
SELECT t.stop_id from_stop, trip_id, direction_id, departure_time
FROM crosic.cis_514_11192017_11252017_tripids t 
WHERE t.stop_id IN (SELECT from_stop_id FROM crosic.section_runs)
), 

stops AS (
SELECT to_stop, from_stop, t.direction_id, t.trip_id, arrival_time, departure_time
FROM from_stop_table f JOIN to_stop_table t ON f.trip_id = t.trip_id and f.direction_id = t.direction_id
),

ttc_cis AS (
SELECT DISTINCT fromstopname, tostopname, directionid, COUNT(*) cnt, to_stop_id, from_stop_id, routenumber,
AVG(EXTRACT(EPOCH FROM (to_char(journeydate, 'YYYY-MM-DD') || ' ' || to_char(toarrstoptime, 'HH24:MI:SS'))::timestamp
- (to_char(journeydate, 'YYYY-MM-DD') || ' ' || to_char(fromstopdepaturetime, 'HH24:MI:SS'))::timestamp)  ) /60
AS time_diff_minutes_ttc
FROM section_runs
WHERE routenumber = 514 and toarrstoptime > fromstopdepaturetime and journeydate <> '2017-11-19' and journeydate <> '2017-11-25'
GROUP BY fromstopname, tostopname, to_stop_id, from_stop_id, directionid, routenumber
ORDER BY time_diff_minutes_ttc
),

output AS (
SELECT routenumber, to_stop_id, from_stop_id,  fromstopname, tostopname, directionid, cnt cnt_ttc, time_diff_minutes_ttc, 
COUNT(*) cnt_our_cis,

AVG(EXTRACT(EPOCH FROM arrival_time - departure_time)) /60
AS time_diff_minutes_our_cis


FROM stops s JOIN ttc_cis ttc ON s.to_stop = ttc.to_stop_id AND s.from_stop = ttc.from_stop_id 

WHERE s.direction_id = ttc.directionid and ttc.routenumber = 514 and arrival_time > departure_time 
and date(arrival_time) <> '2017-11-19' and date(arrival_time) <> '2017-11-25'
and date(departure_time) <> '2017-11-19' and date(departure_time) <> '2017-11-25'
GROUP BY to_stop_id, from_stop_id,  fromstopname, tostopname, directionid, ttc.cnt, time_diff_minutes_ttc, routenumber
ORDER BY time_diff_minutes_ttc, time_diff_minutes_our_cis
) 

SELECT routenumber, to_stop_id, from_stop_id,  fromstopname, tostopname, directionid, cnt_ttc, time_diff_minutes_ttc, 
cnt_our_cis, time_diff_minutes_our_cis, abs(time_diff_minutes_our_cis - time_diff_minutes_ttc) ttc_our_time_difference
FROM output
WHERE cnt_ttc > 10 AND cnt_our_cis > 10
ORDER BY ttc_our_time_difference; 

'''



pandasql.read_sql(sql_cis_avg, con)

Unnamed: 0,routenumber,to_stop_id,from_stop_id,fromstopname,tostopname,directionid,cnt_ttc,time_diff_minutes_ttc,cnt_our_cis,time_diff_minutes_our_cis,ttc_our_time_difference
0,514,1897,1845,KING ST WEST AT UNIVERSITY AVE,KING ST EAST AT JARVIS ST,0,26,6.666667,901,6.583426,0.083241
1,514,1845,4748,KING ST WEST AT SPADINA AVE,KING ST WEST AT UNIVERSITY AVE,0,475,4.192281,970,4.294158,0.101877
2,514,1897,3070,KING ST WEST AT YONGE ST,KING ST EAST AT JARVIS ST,0,458,3.09607,618,3.295577,0.199507
3,514,3070,1845,KING ST WEST AT UNIVERSITY AVE,KING ST WEST AT YONGE ST,0,460,3.05942,683,3.265983,0.206563
4,514,4748,2253,KING ST WEST AT BATHURST ST,KING ST WEST AT SPADINA AVE,0,480,3.38125,946,3.037703,0.343547
5,514,8207,1897,KING ST EAST AT JARVIS ST,KING ST EAST AT PARLIAMENT ST,0,479,2.707724,852,3.467136,0.759412
6,514,3357,7034,KING ST EAST AT YONGE ST,KING ST WEST AT UNIVERSITY AVE,1,465,3.290287,924,5.24026,1.949973
7,514,5334,1389,KING ST EAST AT PARLIAMENT ST,KING ST EAST AT JARVIS ST,1,487,3.55989,845,5.712032,2.152141
8,514,436,7034,KING ST EAST AT YONGE ST,KING ST WEST AT SPADINA AVE,1,18,8.296296,789,10.81918,2.522884
9,514,436,3357,KING ST WEST AT UNIVERSITY AVE,KING ST WEST AT SPADINA AVE,1,467,4.85439,791,7.521681,2.667292


**Weekday rush hour** 

In [30]:
sql_cis_avg  = '''
WITH to_stop_table AS (
SELECT t.stop_id to_stop, trip_id, direction_id, arrival_time
FROM crosic.cis_514_11192017_11252017_tripids t
WHERE t.stop_id IN (SELECT to_stop_id FROM crosic.section_runs)
),

from_stop_table AS (
SELECT t.stop_id from_stop, trip_id, direction_id, departure_time
FROM crosic.cis_514_11192017_11252017_tripids t 
WHERE t.stop_id IN (SELECT from_stop_id FROM crosic.section_runs)
), 

stops AS (
SELECT to_stop, from_stop, t.direction_id, t.trip_id, arrival_time, departure_time
FROM from_stop_table f JOIN to_stop_table t ON f.trip_id = t.trip_id and f.direction_id = t.direction_id
),

ttc_cis AS (
SELECT DISTINCT fromstopname, tostopname, directionid, COUNT(*) cnt, to_stop_id, from_stop_id, routenumber,
AVG(EXTRACT(EPOCH FROM (to_char(journeydate, 'YYYY-MM-DD') || ' ' || to_char(toarrstoptime, 'HH24:MI:SS'))::timestamp
- (to_char(journeydate, 'YYYY-MM-DD') || ' ' || to_char(fromstopdepaturetime, 'HH24:MI:SS'))::timestamp)  ) /60
AS time_diff_minutes_ttc
FROM section_runs
WHERE routenumber = 514 AND toarrstoptime > fromstopdepaturetime AND journeydate <> '2017-11-19' AND journeydate <> '2017-11-25'
AND (EXTRACT(HOUR FROM fromstopdepaturetime) between 7 AND 10 OR EXTRACT(HOUR FROM fromstopdepaturetime) between 16 AND 19)
AND (EXTRACT(HOUR FROM toarrstoptime) between 7 AND 10 OR EXTRACT(HOUR FROM toarrstoptime) between 16 AND 19)
GROUP BY fromstopname, tostopname, to_stop_id, from_stop_id, directionid, routenumber
ORDER BY time_diff_minutes_ttc
),

output AS (
SELECT routenumber, to_stop_id, from_stop_id,  fromstopname, tostopname, directionid, cnt cnt_ttc, time_diff_minutes_ttc, 
COUNT(*) cnt_our_cis,

AVG(EXTRACT(EPOCH FROM arrival_time - departure_time)) /60
AS time_diff_minutes_our_cis


FROM stops s JOIN ttc_cis ttc ON s.to_stop = ttc.to_stop_id AND s.from_stop = ttc.from_stop_id 

WHERE s.direction_id = ttc.directionid and ttc.routenumber = 514 and arrival_time > departure_time 
and date(arrival_time) <> '2017-11-19' and date(arrival_time) <> '2017-11-25'
and date(departure_time) <> '2017-11-19' and date(departure_time) <> '2017-11-25'
AND (EXTRACT(HOUR FROM departure_time) between 7 AND 10 OR EXTRACT(HOUR FROM departure_time) between 16 AND 19)
AND (EXTRACT(HOUR FROM arrival_time) between 7 AND 10 OR EXTRACT(HOUR FROM arrival_time) between 16 AND 19)
GROUP BY to_stop_id, from_stop_id,  fromstopname, tostopname, directionid, ttc.cnt, time_diff_minutes_ttc, routenumber
ORDER BY time_diff_minutes_ttc, time_diff_minutes_our_cis
) 

SELECT routenumber, to_stop_id, from_stop_id,  fromstopname, tostopname, directionid, cnt_ttc, time_diff_minutes_ttc, 
cnt_our_cis, time_diff_minutes_our_cis, abs(time_diff_minutes_our_cis - time_diff_minutes_ttc) ttc_our_time_difference 
FROM output
WHERE cnt_ttc > 10 AND cnt_our_cis > 10
ORDER BY ttc_our_time_difference; 

'''


pandasql.read_sql(sql_cis_avg, con)

Unnamed: 0,routenumber,to_stop_id,from_stop_id,fromstopname,tostopname,directionid,cnt_ttc,time_diff_minutes_ttc,cnt_our_cis,time_diff_minutes_our_cis,ttc_our_time_difference
0,514,1845,4748,KING ST WEST AT SPADINA AVE,KING ST WEST AT UNIVERSITY AVE,0,241,4.478562,481,4.532918,0.054356
1,514,1897,3070,KING ST WEST AT YONGE ST,KING ST EAST AT JARVIS ST,0,220,3.401515,317,3.492114,0.090598
2,514,436,3357,KING ST WEST AT UNIVERSITY AVE,KING ST WEST AT SPADINA AVE,1,219,5.179604,383,5.315013,0.135409
3,514,1897,1845,KING ST WEST AT UNIVERSITY AVE,KING ST EAST AT JARVIS ST,0,22,6.924242,444,7.165916,0.241673
4,514,3070,1845,KING ST WEST AT UNIVERSITY AVE,KING ST WEST AT YONGE ST,0,225,3.317037,350,3.567619,0.250582
5,514,4748,2253,KING ST WEST AT BATHURST ST,KING ST WEST AT SPADINA AVE,0,249,3.504685,521,3.193218,0.311467
6,514,7034,5334,KING ST EAST AT JARVIS ST,KING ST EAST AT YONGE ST,1,223,2.726532,453,3.063282,0.33675
7,514,5334,1389,KING ST EAST AT PARLIAMENT ST,KING ST EAST AT JARVIS ST,1,226,3.809735,419,3.410501,0.399233
8,514,3357,7034,KING ST EAST AT YONGE ST,KING ST WEST AT UNIVERSITY AVE,1,221,3.557994,454,3.074156,0.483838
9,514,8207,1897,KING ST EAST AT JARVIS ST,KING ST EAST AT PARLIAMENT ST,0,244,2.901639,448,3.803571,0.901932


Take a look at the trips with the segment KING ST WEST AT DUFFERIN ST to DUFFERIN GATE LOOP with a time of over 4 mins and 30 seconds, which is about the average from the ttc data (See chart above, this segment has the greatest time difference in comparison to the TTC processed data). 

There is only one trip that meets the criteria, and it has a inaccurate and very large travel time between the two stops, making this point an outlier. 

In [39]:
sql_werid_trips = '''
WITH to_stop_table AS (
SELECT t.stop_id to_stop, trip_id, direction_id, arrival_time
FROM crosic.cis_514_11192017_11252017_tripids t
WHERE t.stop_id IN (SELECT to_stop_id FROM crosic.section_runs)
),

from_stop_table AS (
SELECT t.stop_id from_stop, trip_id, direction_id, departure_time
FROM crosic.cis_514_11192017_11252017_tripids t 
WHERE t.stop_id IN (SELECT from_stop_id FROM crosic.section_runs)
), 

stops AS (
SELECT to_stop, from_stop, t.direction_id, t.trip_id, arrival_time, departure_time
FROM from_stop_table f JOIN to_stop_table t ON f.trip_id = t.trip_id and f.direction_id = t.direction_id
)

SELECT to_stop, from_stop, direction_id, trip_id,

abs(EXTRACT(EPOCH FROM arrival_time - departure_time)) /60
AS time_diff_minutes_our_cis


FROM stops s

WHERE arrival_time <= departure_time AND from_stop = 6113 AND to_stop = 4341 
and date(arrival_time) <> '2017-11-19' and date(arrival_time) <> '2017-11-25'
and date(departure_time) <> '2017-11-19' and date(departure_time) <> '2017-11-25'
AND (EXTRACT(HOUR FROM departure_time) between 7 AND 10 OR EXTRACT(HOUR FROM departure_time) between 16 AND 19)
AND (EXTRACT(HOUR FROM arrival_time) between 7 AND 10 OR EXTRACT(HOUR FROM arrival_time) between 16 AND 19)
AND abs(EXTRACT(EPOCH FROM arrival_time - departure_time) /60) > 4.3
GROUP BY to_stop, from_stop, direction_id, trip_id, arrival_time, departure_time
ORDER BY time_diff_minutes_our_cis

'''


pandasql.read_sql(sql_werid_trips, con)

Unnamed: 0,to_stop,from_stop,direction_id,trip_id,time_diff_minutes_our_cis
0,4341,6113,1,583,4.333333
1,4341,6113,1,60,4.333333
2,4341,6113,1,531,4.333333
3,4341,6113,1,1479,4.333333
4,4341,6113,1,1288,4.333333
5,4341,6113,1,1096,4.333333
6,4341,6113,1,779,4.333333
7,4341,6113,1,731,4.333333
8,4341,6113,1,30,4.666667
9,4341,6113,1,135,4.666667


After removing the outliers from our processed dataset by filtering out travel times that are greater than or equal to 30 minutes, the times measured by our processed data makes more sense. 

In [48]:
sql_cis_avg  = '''
WITH to_stop_table AS (
SELECT t.stop_id to_stop, trip_id, direction_id, arrival_time
FROM crosic.cis_514_11192017_11252017_tripids t
WHERE t.stop_id IN (SELECT to_stop_id FROM crosic.section_runs)
),

from_stop_table AS (
SELECT t.stop_id from_stop, trip_id, direction_id, departure_time
FROM crosic.cis_514_11192017_11252017_tripids t 
WHERE t.stop_id IN (SELECT from_stop_id FROM crosic.section_runs)
), 

stops AS (
SELECT to_stop, from_stop, t.direction_id, t.trip_id, arrival_time, departure_time
FROM from_stop_table f JOIN to_stop_table t ON f.trip_id = t.trip_id and f.direction_id = t.direction_id
),

ttc_cis_start AS (
SELECT DISTINCT fromstopname, tostopname, directionid, COUNT(*) cnt, to_stop_id, from_stop_id, routenumber,
AVG(abs(EXTRACT(EPOCH FROM (to_char(journeydate, 'YYYY-MM-DD') || ' ' || to_char(toarrstoptime, 'HH24:MI:SS'))::timestamp
- (to_char(journeydate, 'YYYY-MM-DD') || ' ' || to_char(fromstopdepaturetime, 'HH24:MI:SS'))::timestamp)  ) /60)
AS time_diff_minutes_ttc
FROM section_runs
WHERE routenumber = 514 AND journeydate <> '2017-11-19' AND journeydate <> '2017-11-25'
AND (EXTRACT(HOUR FROM fromstopdepaturetime) between 7 AND 10 OR EXTRACT(HOUR FROM fromstopdepaturetime) between 16 AND 19)
AND (EXTRACT(HOUR FROM toarrstoptime) between 7 AND 10 OR EXTRACT(HOUR FROM toarrstoptime) between 16 AND 19)
GROUP BY fromstopname, tostopname, to_stop_id, from_stop_id, directionid, routenumber
ORDER BY time_diff_minutes_ttc
),

ttc_cis AS (
SELECT * FROM ttc_cis_start WHERE cnt > 100
),

output AS (
SELECT routenumber, to_stop_id, from_stop_id,  fromstopname, tostopname, directionid, cnt cnt_ttc, time_diff_minutes_ttc, 
COUNT(*) cnt_our_cis,

AVG(abs(EXTRACT(EPOCH FROM arrival_time - departure_time))) /60
AS time_diff_minutes_our_cis


FROM stops s JOIN ttc_cis ttc ON s.to_stop = ttc.to_stop_id AND s.from_stop = ttc.from_stop_id 

WHERE s.direction_id = ttc.directionid and ttc.routenumber = 514 
and date(arrival_time) <> '2017-11-19' and date(arrival_time) <> '2017-11-25'
and date(departure_time) <> '2017-11-19' and date(departure_time) <> '2017-11-25'
AND (EXTRACT(HOUR FROM departure_time) between 7 AND 10 OR EXTRACT(HOUR FROM departure_time) between 16 AND 19)
AND (EXTRACT(HOUR FROM arrival_time) between 7 AND 10 OR EXTRACT(HOUR FROM arrival_time) between 16 AND 19)
AND abs(EXTRACT(EPOCH FROM arrival_time - departure_time) /60) < 30 
GROUP BY to_stop_id, from_stop_id,  fromstopname, tostopname, directionid, ttc.cnt, time_diff_minutes_ttc, routenumber
ORDER BY time_diff_minutes_ttc, time_diff_minutes_our_cis
) 

SELECT routenumber, to_stop_id, from_stop_id,  fromstopname, tostopname, directionid, cnt_ttc, time_diff_minutes_ttc, 
cnt_our_cis, time_diff_minutes_our_cis, abs(time_diff_minutes_our_cis - time_diff_minutes_ttc) ttc_our_time_difference 
FROM output
WHERE cnt_ttc > 100 AND cnt_our_cis > 100
ORDER BY directionid; 

'''


pandasql.read_sql(sql_cis_avg, con)

Unnamed: 0,routenumber,to_stop_id,from_stop_id,fromstopname,tostopname,directionid,cnt_ttc,time_diff_minutes_ttc,cnt_our_cis,time_diff_minutes_our_cis,ttc_our_time_difference
0,514,1897,3070,KING ST WEST AT YONGE ST,KING ST EAST AT JARVIS ST,0,220,3.401515,317,3.492114,0.090598
1,514,1845,4748,KING ST WEST AT SPADINA AVE,KING ST WEST AT UNIVERSITY AVE,0,241,4.478562,481,4.532918,0.054356
2,514,4748,2253,KING ST WEST AT BATHURST ST,KING ST WEST AT SPADINA AVE,0,249,3.504685,521,3.193218,0.311467
3,514,8207,1897,KING ST EAST AT JARVIS ST,KING ST EAST AT PARLIAMENT ST,0,244,2.901639,448,3.803571,0.901932
4,514,3070,1845,KING ST WEST AT UNIVERSITY AVE,KING ST WEST AT YONGE ST,0,225,3.317037,350,3.567619,0.250582
5,514,2253,6113,DUFFERIN GATE LOOP,KING ST WEST AT BATHURST ST,0,234,12.47151,393,14.439355,1.967845
6,514,436,3357,KING ST WEST AT UNIVERSITY AVE,KING ST WEST AT SPADINA AVE,1,219,5.179604,383,5.315013,0.135409
7,514,4341,7211,KING ST WEST AT BATHURST ST,KING ST WEST AT DUFFERIN ST,1,227,9.095448,381,8.904637,0.190811
8,514,5334,1389,KING ST EAST AT PARLIAMENT ST,KING ST EAST AT JARVIS ST,1,226,3.809735,419,3.410501,0.399233
9,514,7034,5334,KING ST EAST AT JARVIS ST,KING ST EAST AT YONGE ST,1,223,2.726532,453,3.063282,0.33675


In [19]:
sql_hist  = '''
WITH to_stop_table AS (
SELECT t.stop_id to_stop, trip_id, direction_id, arrival_time
FROM crosic.cis_514_11192017_11252017_tripids t
WHERE t.stop_id IN (SELECT to_stop_id FROM crosic.section_runs)
),

from_stop_table AS (
SELECT t.stop_id from_stop, trip_id, direction_id, departure_time
FROM crosic.cis_514_11192017_11252017_tripids t 
WHERE t.stop_id IN (SELECT from_stop_id FROM crosic.section_runs)
), 

stops AS (
SELECT to_stop, from_stop, t.direction_id, t.trip_id, arrival_time, departure_time
FROM from_stop_table f JOIN to_stop_table t ON f.trip_id = t.trip_id and f.direction_id = t.direction_id
),

ttc_cis AS (
SELECT fromstopname, tostopname, directionid, to_stop_id, from_stop_id, routenumber
FROM section_runs
WHERE routenumber = 514 AND toarrstoptime > fromstopdepaturetime 
AND journeydate = '2017-11-20'
-- AND journeydate <> '2017-11-19' AND journeydate <> '2017-11-25'
AND (EXTRACT(HOUR FROM fromstopdepaturetime) between 7 AND 10 OR EXTRACT(HOUR FROM fromstopdepaturetime) between 16 AND 19)
AND (EXTRACT(HOUR FROM toarrstoptime) between 7 AND 10 OR EXTRACT(HOUR FROM toarrstoptime) between 16 AND 19)
-- GROUP BY fromstopname, tostopname, to_stop_id, from_stop_id, directionid, routenumber
)

SELECT arrival_time 


FROM stops s JOIN ttc_cis ttc ON s.to_stop = ttc.to_stop_id AND s.from_stop = ttc.from_stop_id 

WHERE s.direction_id = ttc.directionid and ttc.routenumber = 514 
and date(arrival_time) = '2017-11-20' and date(departure_time) = '2017-11-20'

-- and date(arrival_time) <> '2017-11-19' and date(arrival_time) <> '2017-11-25'
-- and date(departure_time) <> '2017-11-19' and date(departure_time) <> '2017-11-25'

AND (EXTRACT(HOUR FROM departure_time) between 7 AND 10 OR EXTRACT(HOUR FROM departure_time) between 16 AND 19)
AND (EXTRACT(HOUR FROM arrival_time) between 7 AND 10 OR EXTRACT(HOUR FROM arrival_time) between 16 AND 19)
AND to_stop_id = 4748  AND from_stop_id = 2253 AND directionid = 0
GROUP BY to_stop_id, from_stop_id,  fromstopname, tostopname, directionid, routenumber, arrival_time

'''

# need to fiure out bins ??? 

df = pandasql.read_sql(sql_hist, con)
#bins = np.linspace(50, 2000, 1950)
df["arrival_time"] = pd.to_datetime(df['arrival_time'],  infer_datetime_format=True)

df.set_index(['arrival_time'])


'''
df.groupby(pd.Grouper(freq='10Min')).count().plot(kind='bar')
#df.hist()
plt.title('Route 514 - Travel times TTC processing versus our processing')
plt.xlabel("Travel Time")
#ax = plt.gca()
#ax.set_xlim([50, 2000])
# ax.xaxis.set_major_locator(ticker.MultipleLocator(200))


plt.show()
'''
#pandasql.read_sql(sql_hist, con)

          arrival_time
0  2017-11-20 07:03:20
1  2017-11-20 07:16:40
2  2017-11-20 07:21:40
3  2017-11-20 07:29:20
4  2017-11-20 07:39:20
5  2017-11-20 07:46:40
6  2017-11-20 07:56:00
7  2017-11-20 08:09:20
8  2017-11-20 08:19:40
9  2017-11-20 08:26:40
10 2017-11-20 08:36:40
11 2017-11-20 08:43:00
12 2017-11-20 08:53:20
13 2017-11-20 08:59:20
14 2017-11-20 09:06:40
15 2017-11-20 09:31:00
16 2017-11-20 09:35:40
17 2017-11-20 09:41:20
18 2017-11-20 09:43:20
19 2017-11-20 10:12:40
20 2017-11-20 10:19:00
21 2017-11-20 10:34:20
22 2017-11-20 10:53:20
23 2017-11-20 16:07:00
24 2017-11-20 16:15:00
25 2017-11-20 16:22:40
26 2017-11-20 16:36:40
27 2017-11-20 16:47:20
28 2017-11-20 16:53:20
29 2017-11-20 16:58:40
30 2017-11-20 17:11:40
31 2017-11-20 17:18:40
32 2017-11-20 17:29:40
33 2017-11-20 17:40:40
34 2017-11-20 17:47:20
35 2017-11-20 18:01:00
36 2017-11-20 18:06:00
37 2017-11-20 18:13:20
38 2017-11-20 18:31:40
39 2017-11-20 18:38:00
40 2017-11-20 18:52:00
41 2017-11-20 19:10:20
42 2017-11-

ValueError: num must be 1 <= num <= 0, not 1

<matplotlib.figure.Figure at 0xc492e48>

Weekday not rush hour

In [31]:
sql_cis_avg  = '''
WITH to_stop_table AS (
SELECT t.stop_id to_stop, trip_id, direction_id, arrival_time
FROM crosic.cis_514_11192017_11252017_tripids t
WHERE t.stop_id IN (SELECT to_stop_id FROM crosic.section_runs)
),

from_stop_table AS (
SELECT t.stop_id from_stop, trip_id, direction_id, departure_time
FROM crosic.cis_514_11192017_11252017_tripids t 
WHERE t.stop_id IN (SELECT from_stop_id FROM crosic.section_runs)
), 

stops AS (
SELECT to_stop, from_stop, t.direction_id, t.trip_id, arrival_time, departure_time
FROM from_stop_table f JOIN to_stop_table t ON f.trip_id = t.trip_id and f.direction_id = t.direction_id
),

ttc_cis AS (
SELECT DISTINCT fromstopname, tostopname, directionid, COUNT(*) cnt, to_stop_id, from_stop_id, routenumber,
AVG(EXTRACT(EPOCH FROM (to_char(journeydate, 'YYYY-MM-DD') || ' ' || to_char(toarrstoptime, 'HH24:MI:SS'))::timestamp
- (to_char(journeydate, 'YYYY-MM-DD') || ' ' || to_char(fromstopdepaturetime, 'HH24:MI:SS'))::timestamp)  ) /60
AS time_diff_minutes_ttc
FROM section_runs
WHERE routenumber = 514 AND toarrstoptime > fromstopdepaturetime AND journeydate <> '2017-11-19' AND journeydate <> '2017-11-25'
AND NOT (EXTRACT(HOUR FROM fromstopdepaturetime) between 7 AND 10 OR EXTRACT(HOUR FROM fromstopdepaturetime) between 16 AND 19)
AND NOT (EXTRACT(HOUR FROM toarrstoptime) between 7 AND 10 OR EXTRACT(HOUR FROM toarrstoptime) between 16 AND 19)
GROUP BY fromstopname, tostopname, to_stop_id, from_stop_id, directionid, routenumber
ORDER BY time_diff_minutes_ttc
),

output AS (
SELECT routenumber, to_stop_id, from_stop_id,  fromstopname, tostopname, directionid, cnt cnt_ttc, time_diff_minutes_ttc, 
COUNT(*) cnt_our_cis,

AVG(EXTRACT(EPOCH FROM arrival_time - departure_time)) /60
AS time_diff_minutes_our_cis


FROM stops s JOIN ttc_cis ttc ON s.to_stop = ttc.to_stop_id AND s.from_stop = ttc.from_stop_id 

WHERE s.direction_id = ttc.directionid and ttc.routenumber = 514 and arrival_time > departure_time 
and date(arrival_time) <> '2017-11-19' and date(arrival_time) <> '2017-11-25'
and date(departure_time) <> '2017-11-19' and date(departure_time) <> '2017-11-25'
AND NOT (EXTRACT(HOUR FROM departure_time) between 7 AND 10 OR EXTRACT(HOUR FROM departure_time) between 16 AND 19)
AND NOT (EXTRACT(HOUR FROM arrival_time) between 7 AND 10 OR EXTRACT(HOUR FROM arrival_time) between 16 AND 19)
GROUP BY to_stop_id, from_stop_id,  fromstopname, tostopname, directionid, ttc.cnt, time_diff_minutes_ttc, routenumber
ORDER BY time_diff_minutes_ttc, time_diff_minutes_our_cis
) 

SELECT routenumber, to_stop_id, from_stop_id,  fromstopname, tostopname, directionid, cnt_ttc, time_diff_minutes_ttc, 
cnt_our_cis, time_diff_minutes_our_cis, abs(time_diff_minutes_our_cis - time_diff_minutes_ttc) ttc_our_time_difference 
FROM output
WHERE cnt_ttc > 10 AND cnt_our_cis > 10
ORDER BY ttc_our_time_difference; 

'''


pandasql.read_sql(sql_cis_avg, con)

Unnamed: 0,routenumber,to_stop_id,from_stop_id,fromstopname,tostopname,directionid,cnt_ttc,time_diff_minutes_ttc,cnt_our_cis,time_diff_minutes_our_cis,ttc_our_time_difference
0,514,4341,7211,KING ST WEST AT BATHURST ST,KING ST WEST AT DUFFERIN ST,1,224,7.959821,419,7.881464,0.078358
1,514,7034,5334,KING ST EAST AT JARVIS ST,KING ST EAST AT YONGE ST,1,249,2.532932,487,2.635181,0.10225
2,514,3070,1845,KING ST WEST AT UNIVERSITY AVE,KING ST WEST AT YONGE ST,0,234,2.814815,325,2.929231,0.114416
3,514,1845,4748,KING ST WEST AT SPADINA AVE,KING ST WEST AT UNIVERSITY AVE,0,222,3.884384,470,4.057447,0.173062
4,514,436,3357,KING ST WEST AT UNIVERSITY AVE,KING ST WEST AT SPADINA AVE,1,238,4.567227,393,4.378287,0.18894
5,514,3357,7034,KING ST EAST AT YONGE ST,KING ST WEST AT UNIVERSITY AVE,1,243,3.049383,467,2.800857,0.248526
6,514,1897,3070,KING ST WEST AT YONGE ST,KING ST EAST AT JARVIS ST,0,231,2.815296,290,3.106897,0.291601
7,514,5334,1389,KING ST EAST AT PARLIAMENT ST,KING ST EAST AT JARVIS ST,1,257,3.341115,419,2.976134,0.364982
8,514,4748,2253,KING ST WEST AT BATHURST ST,KING ST WEST AT SPADINA AVE,0,228,3.233918,418,2.836523,0.397395
9,514,8207,1897,KING ST EAST AT JARVIS ST,KING ST EAST AT PARLIAMENT ST,0,227,2.497797,393,3.089907,0.592109


Weekends

In [32]:
sql_cis_avg  = '''
WITH to_stop_table AS (
SELECT t.stop_id to_stop, trip_id, direction_id, arrival_time
FROM crosic.cis_514_11192017_11252017_tripids t
WHERE t.stop_id IN (SELECT to_stop_id FROM crosic.section_runs)
),

from_stop_table AS (
SELECT t.stop_id from_stop, trip_id, direction_id, departure_time
FROM crosic.cis_514_11192017_11252017_tripids t 
WHERE t.stop_id IN (SELECT from_stop_id FROM crosic.section_runs)
), 

stops AS (
SELECT to_stop, from_stop, t.direction_id, t.trip_id, arrival_time, departure_time
FROM from_stop_table f JOIN to_stop_table t ON f.trip_id = t.trip_id and f.direction_id = t.direction_id
),

ttc_cis AS (
SELECT DISTINCT fromstopname, tostopname, directionid, COUNT(*) cnt, to_stop_id, from_stop_id, routenumber,
AVG(EXTRACT(EPOCH FROM (to_char(journeydate, 'YYYY-MM-DD') || ' ' || to_char(toarrstoptime, 'HH24:MI:SS'))::timestamp
- (to_char(journeydate, 'YYYY-MM-DD') || ' ' || to_char(fromstopdepaturetime, 'HH24:MI:SS'))::timestamp)  ) /60
AS time_diff_minutes_ttc
FROM section_runs
WHERE routenumber = 514 and toarrstoptime > fromstopdepaturetime and (journeydate = '2017-11-19' OR journeydate = '2017-11-25')
GROUP BY fromstopname, tostopname, to_stop_id, from_stop_id, directionid, routenumber
ORDER BY time_diff_minutes_ttc
),

output AS (
SELECT routenumber, to_stop_id, from_stop_id,  fromstopname, tostopname, directionid, cnt cnt_ttc, time_diff_minutes_ttc, 
COUNT(*) cnt_our_cis,

AVG(EXTRACT(EPOCH FROM arrival_time - departure_time)) /60
AS time_diff_minutes_our_cis


FROM stops s JOIN ttc_cis ttc ON s.to_stop = ttc.to_stop_id AND s.from_stop = ttc.from_stop_id 

WHERE s.direction_id = ttc.directionid and ttc.routenumber = 514 and arrival_time > departure_time 
and (date(arrival_time) = '2017-11-19' OR date(arrival_time) = '2017-11-25')
and (date(departure_time) = '2017-11-19' OR date(departure_time) = '2017-11-25')
GROUP BY to_stop_id, from_stop_id,  fromstopname, tostopname, directionid, ttc.cnt, time_diff_minutes_ttc, routenumber
ORDER BY time_diff_minutes_ttc, time_diff_minutes_our_cis
) 

SELECT routenumber, to_stop_id, from_stop_id,  fromstopname, tostopname, directionid, cnt_ttc, time_diff_minutes_ttc, 
cnt_our_cis, time_diff_minutes_our_cis, abs(time_diff_minutes_our_cis - time_diff_minutes_ttc) ttc_our_time_difference
FROM output
WHERE cnt_ttc > 10 AND cnt_our_cis > 10
ORDER BY ttc_our_time_difference; 

'''



pandasql.read_sql(sql_cis_avg, con)

Unnamed: 0,routenumber,to_stop_id,from_stop_id,fromstopname,tostopname,directionid,cnt_ttc,time_diff_minutes_ttc,cnt_our_cis,time_diff_minutes_our_cis,ttc_our_time_difference
0,514,436,3357,KING ST WEST AT UNIVERSITY AVE,KING ST WEST AT SPADINA AVE,1,124,4.513441,211,4.513428,1.3e-05
1,514,3070,1845,KING ST WEST AT UNIVERSITY AVE,KING ST WEST AT YONGE ST,0,117,3.233618,136,3.289216,0.055597
2,514,4748,2253,KING ST WEST AT BATHURST ST,KING ST WEST AT SPADINA AVE,0,132,3.032828,216,3.121914,0.089085
3,514,5334,1389,KING ST EAST AT PARLIAMENT ST,KING ST EAST AT JARVIS ST,1,126,3.521164,180,3.42037,0.100794
4,514,4341,7211,KING ST WEST AT BATHURST ST,KING ST WEST AT DUFFERIN ST,1,128,8.46875,178,8.269663,0.199087
5,514,3357,7034,KING ST EAST AT YONGE ST,KING ST WEST AT UNIVERSITY AVE,1,110,3.00303,210,2.795238,0.207792
6,514,7034,5334,KING ST EAST AT JARVIS ST,KING ST EAST AT YONGE ST,1,113,2.705015,200,2.943333,0.238319
7,514,1845,4748,KING ST WEST AT SPADINA AVE,KING ST WEST AT UNIVERSITY AVE,0,131,4.676845,217,4.436252,0.240593
8,514,1897,3070,KING ST WEST AT YONGE ST,KING ST EAST AT JARVIS ST,0,112,3.991369,131,4.801654,0.810285
9,514,6113,4341,KING ST WEST AT DUFFERIN ST,DUFFERIN GATE LOOP,1,124,4.215054,126,3.296296,0.918757


Overall we found that our CIS processing had higher counts than the TTC processed CIS counts, part of which may be because the TTC filters out a lot of their trips. 

In [6]:
con.close()