# Algorithm Cage Match

This notebook generates sets of fake STTCs from PTCs, and determines the mean square error produced by TEPs-I, Bagheri and my count matching algorithms.

In [1]:
%matplotlib inline
import sys
sys.path.append('../')
import importlib
import matplotlib.pyplot as plt
import numpy as np
import knowyourdata as kyd

import pandas as pd
from traffic_prophet import cfg
import pathlib, os
import configparser

from traffic_prophet import connection
from traffic_prophet.countmatch import reader
from traffic_prophet.countmatch import growthfactor as gf
from traffic_prophet.countmatch import neighbour

defaultcolours = plt.rcParams['axes.prop_cycle'].by_key()['color']

filepath = pathlib.Path.home().joinpath('.charlesconfig')
if os.path.isfile(filepath):
    vol_conn = connection.Connection(filepath, 'POSTGRES',
                                     'czhu.btp_centreline_daily_counts')
    ll_conn = connection.Connection(filepath, 'POSTGRES',
                                    'czhu.btp_centreline_lonlat')
    config = configparser.RawConfigParser()
    config.read(filepath.as_posix())
    MAPBOX_TOKEN = config['MAPBOX']['token']
    PLOTLY_USER = config['PLOTLY']['user']
    PLOTLY_KEY = config['PLOTLY']['key']
else:
    filepath = pathlib.Path.home().joinpath('cf.txt')
    vol_conn = connection.Connection(filepath, 'localpg',
                                     'prj_vol.btp_centreline_daily_counts')
    ll_conn = connection.Connection(filepath, 'localpg',
                                    'gis.btp_centreline_lonlat')
    config = configparser.RawConfigParser()
    config.read(filepath.as_posix())
    MAPBOX_TOKEN = config['mapbox']['token']
    PLOTLY_USER = config['plotly']['user']
    PLOTLY_KEY = config['plotly']['apikey']

In [2]:
rdr = reader.Reader(vol_conn)
%time rdr.read()

CPU times: user 1min 34s, sys: 268 ms, total: 1min 34s
Wall time: 1min 35s


In [3]:
gf.get_growth_factors(rdr)

In [4]:
ptc_ids = np.unique(np.abs(list(rdr.ptcs.keys())))
nb = neighbour.NeighbourLonLatEuclidean(ll_conn, 20, ptc_ids)
%time nb.find_neighbours()

CPU times: user 17 s, sys: 68 ms, total: 17.1 s
Wall time: 17.1 s


## STTC-MC generator

This routine randomly generates a set of STTCs 

## TEPs-like CountMatch

Testing the `countmatch_teps` module.

In [5]:
import countmatch_teps as cmt
importlib.reload(cmt)

<module 'countmatch_teps' from '/home/cczhu/GitHub/bdit_traffic_prophet/development_notebooks/countmatch_teps.py'>

In [8]:
import numpy as np
import pandas as pd
from tqdm.auto import tqdm

import countmatch_common as cmc
importlib.reload(cmc)

want_year = 2016
n_neighbours = 5
single_direction = True
progress_bar = True

In [10]:
for p in tqdm(rdr.ptcs.values(),
              desc='Calculating PTC annual/DoW averages',
              disable=(not progress_bar)):
    cmt.mse_preprocess_ptc(p)

HBox(children=(IntProgress(value=0, description='Calculating PTC annual/DoW averages', max=166, style=Progress…




In [11]:
citywide_growth_factor = cmc.get_citywide_growth_factor(rdr)

In [12]:
citywide_growth_factor

1.0222564979935564

In [15]:
for tc in tqdm(rdr.sttcs.values(),
               desc='Calculating STTC normalized monthly patterns',
               disable=(not progress_bar)):
    tc.tc_mse = cmt.get_normalized_seasonal_patterns(
        tc, rdr.ptcs, nb, want_year, n_neighbours=n_neighbours,
        single_direction=single_direction)

HBox(children=(IntProgress(value=0, description='Calculating STTC normalized monthly patterns', max=15553, sty…




In [17]:
rdr.sttcs[8540609].data

Unnamed: 0_level_0,Unnamed: 1_level_0,Date,Daily Count
Year,Day of Year,Unnamed: 2_level_1,Unnamed: 3_level_1
2007,1,2007-01-01,218.0
2007,2,2007-01-02,472.0
2007,6,2007-01-06,284.0
2007,7,2007-01-07,354.0
2007,8,2007-01-08,484.0
...,...,...,...
2018,122,2018-05-02,2083.0
2018,123,2018-05-03,941.0
2018,124,2018-05-04,832.0
2018,125,2018-05-05,993.0


In [18]:
rdr.sttcs[117].tc_mse

Unnamed: 0,Year,Day of Year,Date,Daily Count,Day of Week,PTC ID,PTC Day-to-AADT Ratio,PTC MADT Avg.,PTC DoM Factor Avg.,PTC DoMADT Avg.,PTC Closest Year,PTC Closest Year AADT
0,2009,63,2009-03-04,1236.0,2,1033,0.975932,74875.398171,0.974378,76903.711111,2016,74771.768446
1,2009,63,2009-03-04,1236.0,2,446012,0.905438,17404.933889,0.862139,20272.557692,2013,17382.176328
2,2009,63,2009-03-04,1236.0,2,444516,0.931978,1716.94273,0.902882,1911.44186,2015,1718.241066
3,2009,63,2009-03-04,1236.0,2,444782,0.99723,65950.264092,0.995287,66298.409091,2010,65963.771956
4,2009,63,2009-03-04,1236.0,2,1003,0.97581,3340.586895,0.967456,3459.525,2010,3350.164546
5,2010,160,2010-06-09,1252.0,2,1033,0.975932,74875.398171,0.974378,76903.711111,2016,74771.768446
6,2010,160,2010-06-09,1252.0,2,446012,0.905438,17404.933889,0.862139,20272.557692,2013,17382.176328
7,2010,160,2010-06-09,1252.0,2,444516,0.931978,1716.94273,0.902882,1911.44186,2015,1718.241066
8,2010,160,2010-06-09,1252.0,2,444782,0.99723,65950.264092,0.995287,66298.409091,2010,65963.771956
9,2010,160,2010-06-09,1252.0,2,1003,0.97581,3340.586895,0.967456,3459.525,2010,3350.164546


In [21]:
aadt_estimates = []
for tc in tqdm(rdr.sttcs.values(),
               desc='Determining minimum MSE and estimating AADT',
               disable=(not progress_bar)):
    aadt_estimates.append(
        cmt.get_aadt_estimate_for_sttc(tc, rdr,
                                       citywide_growth_factor, want_year))

HBox(children=(IntProgress(value=0, description='Determining minimum MSE and estimating AADT', max=15553, styl…




In [24]:
rdr.sttcs[117].tc_mse

Unnamed: 0,Year,Day of Year,Date,Daily Count,Day of Week,PTC ID,PTC Day-to-AADT Ratio,PTC MADT Avg.,PTC DoM Factor Avg.,PTC DoMADT Avg.,PTC Closest Year,PTC Closest Year AADT,AADT_prelim,MADT_pj,MF_STTC,MF_PTC,Square Deviation
0,2009,63,2009-03-04,1236.0,2,1033,0.975932,74875.398171,0.974378,76903.711111,2016,74771.768446,1416.311964,1404.963007,0.991987,1.001386,8.834086e-05
1,2009,63,2009-03-04,1236.0,2,446012,0.905438,17404.933889,0.862139,20272.557692,2013,17382.176328,1314.008766,1243.124739,0.946055,1.001309,0.003053017
2,2009,63,2009-03-04,1236.0,2,444516,0.931978,1716.94273,0.902882,1911.44186,2015,1718.241066,1352.524625,1301.873643,0.962551,0.999244,0.00134642
3,2009,63,2009-03-04,1236.0,2,444782,0.99723,65950.264092,0.995287,66298.409091,2010,65963.771956,1447.22155,1435.112007,0.991633,0.999795,6.66291e-05
4,2009,63,2009-03-04,1236.0,2,1003,0.97581,3340.586895,0.967456,3459.525,2010,3350.164546,1416.135001,1394.98316,0.985064,0.997141,0.000145865
5,2010,160,2010-06-09,1252.0,2,1033,0.975932,74875.398171,0.974378,76903.711111,2016,74771.768446,1385.476118,1392.165502,1.004828,1.001386,1.184925e-05
6,2010,160,2010-06-09,1252.0,2,446012,0.905438,17404.933889,0.862139,20272.557692,2013,17382.176328,1285.400257,1231.801384,0.958302,1.001309,0.00184964
7,2010,160,2010-06-09,1252.0,2,444516,0.931978,1716.94273,0.902882,1911.44186,2015,1718.241066,1323.077552,1290.015156,0.975011,0.999244,0.0005872571
8,2010,160,2010-06-09,1252.0,2,444782,0.99723,65950.264092,0.995287,66298.409091,2010,65963.771956,1415.712742,1422.039881,1.004469,0.999795,2.18463e-05
9,2010,160,2010-06-09,1252.0,2,1003,0.97581,3340.586895,0.967456,3459.525,2010,3350.164546,1385.303008,1382.276559,0.997815,0.997141,4.54513e-07


In [22]:
aadt_estimates = pd.DataFrame(aadt_estimates)

In [23]:
aadt_estimates

Unnamed: 0,Count ID,PTC ID,D_ij,Closest Year,AADT Estimate
0,-175,-445884,0.976424,2010,2067.006122
1,175,446012,0.915809,2010,1702.492050
2,-201,-443987,0.937714,2006,318.251312
3,201,445280,0.984605,2006,250.726292
4,-202,-443975,0.980700,2006,199.622586
...,...,...,...,...,...
15548,-30078909,-9313402,0.961568,2018,16861.506096
15549,30078909,5849817,0.971144,2018,17348.187815
15550,-30078912,-8171,0.957668,2018,16644.649348
15551,-30085792,-441170,0.999262,2018,5720.452748


In [7]:
    year = 2010    
    doyr.loc[year, 'Day-to-AADT Ratio'] = (
            p.data['AADT'].at[year, 'AADT'] /
            p.data['Daily Count'].loc[year, 'Daily Count']).values

In [8]:
doyr

Unnamed: 0_level_0,Unnamed: 1_level_0,Date,Day-to-AADT Ratio
Year,Day of Year,Unnamed: 2_level_1,Unnamed: 3_level_1
2010,1,2010-01-01,1.439357
2010,2,2010-01-02,1.098346
2010,3,2010-01-03,1.284496
2010,6,2010-01-06,0.952754
2010,7,2010-01-07,0.939115
2010,8,2010-01-08,0.896251
2010,9,2010-01-09,0.938695
2010,10,2010-01-10,1.058197
2010,11,2010-01-11,0.926145
2010,12,2010-01-12,0.896869


In [79]:
import importlib

In [80]:
import countmatch_teps as cmt
importlib.reload(cmt)

<module 'countmatch_teps' from '/mnt/c/Users/czhu5/Documents/Ubuntu/GitHub/bdit_traffic_prophet/development_notebooks/countmatch_teps.py'>