# Countmatch Postgres Integration Test

Countmatch now has the ability to read either inputs from file or our Postgres database.  We need to test that the read-in pipeline is identical for both.  To do this, I generated test suite materialized views in Postgres.

Locations:

```sql
CREATE VIEW czhu.btp_centreline_lonlat_testsuite AS (
    SELECT *
    FROM czhu.btp_centreline_lonlat
    WHERE centreline_id IN (170, 241, 252, 410, 427, 487, 680, 890, 1978, 104870, 446378)
)
```

Daily counts:

```sql
CREATE VIEW czhu.btp_centreline_daily_counts_testsuite AS (
    SELECT *
    FROM czhu.btp_centreline_daily_counts
    WHERE (count_year = 2010 AND centreline_id IN (241, 252, 410, 427, 487, 890, 104870, 446378) AND direction = -1)
		OR (count_year = 2011 AND centreline_id IN (170, 680, 890, 104870, 446378) AND direction = -1)
		OR (count_year = 2011 AND centreline_id IN (170, 104870) AND direction = 1)
		OR (count_year = 2012 AND centreline_id IN (241, 890, 1978, 104870, 446378) AND direction = -1)
    ORDER BY count_year, centreline_id, direction, count_date
)
```

While this notebook is being stored in `sandbox`, it was tested on `9e9d93586732e2b2872df58ba4580a95dff48b9c` of the `countmatch_sql` branch.

In [1]:
%matplotlib inline
import sys
sys.path.append('./bdit_traffic_prophet/')
import importlib
import numpy as np

In [2]:
import pandas as pd
from traffic_prophet import cfg
import pathlib
filepath = pathlib.Path.home().joinpath('.charlesconfig')

In [3]:
from traffic_prophet import connection
from traffic_prophet.countmatch import reader
from traffic_prophet.countmatch import neighbour

vol_conn = connection.Connection(filepath, 'POSTGRES',
                                 'czhu.btp_centreline_daily_counts_testsuite')

ll_conn = connection.Connection(filepath, 'POSTGRES',
                                'czhu.btp_centreline_lonlat_testsuite')

## Read in Postgres Tables

In [4]:
rdr_p = reader.Reader(vol_conn)
rdr_p.read()

nb_p = neighbour.NeighbourLonLatManhattan(ll_conn, 5)

In [5]:
nb_p.data

Unnamed: 0,Centreline ID,Lon,Lat
0,170,-79.367524,43.717086
1,241,-79.376962,43.713921
2,252,-79.368784,43.713683
3,410,-79.367367,43.710128
4,427,-79.369148,43.709757
5,487,-79.302295,43.707757
6,680,-79.368196,43.705244
7,890,-79.335566,43.701201
8,1978,-79.362524,43.681949
9,104870,-79.199426,43.801128


In [6]:
rdr_p.ptcs

{-890: <traffic_prophet.countmatch.reader.Count at 0x7f49d68a5978>,
 -104870: <traffic_prophet.countmatch.reader.Count at 0x7f49d6891f60>}

In [7]:
rdr_p.sttcs

{-241: <traffic_prophet.countmatch.reader.Count at 0x7f49d68a5b38>,
 -252: <traffic_prophet.countmatch.reader.Count at 0x7f49d8b81860>,
 -410: <traffic_prophet.countmatch.reader.Count at 0x7f49d8e13b38>,
 -427: <traffic_prophet.countmatch.reader.Count at 0x7f49d6888588>,
 -487: <traffic_prophet.countmatch.reader.Count at 0x7f49d68889e8>,
 -446378: <traffic_prophet.countmatch.reader.Count at 0x7f49d8bb43c8>,
 -170: <traffic_prophet.countmatch.reader.Count at 0x7f4a24bb0f60>,
 170: <traffic_prophet.countmatch.reader.Count at 0x7f49d8bb4240>,
 -680: <traffic_prophet.countmatch.reader.Count at 0x7f49d68b4668>,
 -890: <traffic_prophet.countmatch.reader.Count at 0x7f49d68b4ba8>,
 -104870: <traffic_prophet.countmatch.reader.Count at 0x7f49d6888e10>,
 104870: <traffic_prophet.countmatch.reader.Count at 0x7f49d68b4ac8>,
 -1978: <traffic_prophet.countmatch.reader.Count at 0x7f49d8b81e48>}

## Read in Test Suite Data

In [8]:
from traffic_prophet.data import SAMPLE_ZIP
from traffic_prophet.data import SAMPLE_LONLAT

rdr_f = reader.Reader(SAMPLE_ZIP)
rdr_f.read()

nb_f = neighbour.NeighbourLonLatManhattan(SAMPLE_LONLAT, 5)

## Check Neighbour Data Frames Are the Same

In [9]:
assert np.array_equal(nb_f.data['Centreline ID'].values, nb_p.data['Centreline ID'].values)
assert np.allclose(nb_f.data['Lon'].values, nb_p.data['Lon'].values, rtol=1e-6, atol=1e-6)
assert np.allclose(nb_f.data['Lat'].values, nb_p.data['Lat'].values, rtol=1e-6, atol=1e-6)

## Check Count Data Frames Are the Same (or Explainably Different)

In [10]:
rdr_p.sttcs[-241].data

Unnamed: 0_level_0,Unnamed: 1_level_0,Date,Daily Count
Year,Day of Year,Unnamed: 2_level_1,Unnamed: 3_level_1
2010,96,2010-04-06,4126.0
2010,97,2010-04-07,4313.0
2010,98,2010-04-08,4288.0
2012,157,2012-06-05,2243.0
2012,158,2012-06-06,2312.0
2012,159,2012-06-07,2341.0


Check if the daily counts data frames are identical between Postgres and file readers.

In [11]:
bad_keys = []
for k in rdr_f.sttcs.keys():
    if (not rdr_f.sttcs[k].data.index.equals(rdr_p.sttcs[k].data.index) or
            not np.array_equal(rdr_f.sttcs[k].data['Date'], rdr_p.sttcs[k].data['Date']) or
            not np.allclose(rdr_f.sttcs[k].data['Daily Count'], rdr_p.sttcs[k].data['Daily Count'],
                            rtol=1e-6, atol=1e-4)):
        bad_keys.append(k)

In [12]:
bad_keys

[]

In [13]:
def check_ptcs_equal(ptc1, ptc2):
    # MADT
    assert ptc1['MADT'].index.equals(ptc2['MADT'].index), "MADT indices differ"
    mask = ~(np.isnan(ptc1['MADT']['MADT']) | np.isnan(ptc2['MADT']['MADT']))
    assert np.allclose(ptc1['MADT']['MADT'].fillna(-9999999.),
                       ptc2['MADT']['MADT'].fillna(-9999999.), rtol=1e-6, atol=1e-4), "MADT values differ"
    assert ptc1['MADT']['Days in Month'].equals(ptc2['MADT']['Days in Month']), "MADT Days in Month differ"
    
    # DoMADT
    assert ptc1['DoMADT'].index.equals(ptc2['DoMADT'].index), "DoMADT indices differ"
    assert ptc1['DoMADT'].columns.equals(ptc2['DoMADT'].columns), "DoMADT columns differ"
    assert np.allclose(ptc1['DoMADT'].fillna(-9999999.).values,
                       ptc2['DoMADT'].fillna(-9999999.).values, rtol=1e-6, atol=1e-4), "DoMADT values differ"
    
    # DoM Factor
    assert ptc1['DoM Factor'].index.equals(ptc2['DoM Factor'].index), "DoMADT indices differ"
    assert ptc1['DoM Factor'].columns.equals(ptc2['DoM Factor'].columns), "DoMADT columns differ"
    assert np.allclose(ptc1['DoM Factor'].fillna(-9999999.).values,
                       ptc2['DoM Factor'].fillna(-9999999.).values, rtol=1e-6, atol=1e-4), "DoMADT values differ"
    
    # Daily Counts
    assert ptc1['Daily Count'].index.equals(ptc2['Daily Count'].index), "Daily count indices differ"
    assert np.array_equal(ptc1['Daily Count']['Date'], ptc2['Daily Count']['Date']), "Daily count dates indices differ"
    assert np.allclose(ptc1['Daily Count']['Daily Count'], ptc1['Daily Count']['Daily Count'],
                       rtol=1e-6, atol=1e-4), "Daily count values differ"

    # AADT
    assert ptc1['AADT'].index.equals(ptc2['AADT'].index), "AADT indices differ"
    assert np.allclose(ptc1['AADT']['AADT'], ptc1['AADT']['AADT'],
                       rtol=1e-6, atol=1e-4), "AADT values differ"

bad_keys = []
for k in rdr_f.ptcs.keys():
    try:
        check_ptcs_equal(rdr_f.ptcs[k].data, rdr_p.ptcs[k].data)
    except AssertionError as exc:
        print("ERROR ENCOUNTERED IN ", k)
        raise exc

In [14]:
bad_keys

[]

## Check Count Metadata Are the Same

In [15]:
bad_keys = []
for k in rdr_f.sttcs.keys():
    if ((rdr_f.sttcs[k].centreline_id != rdr_p.sttcs[k].centreline_id) or
            (rdr_f.sttcs[k].count_id != rdr_p.sttcs[k].count_id) or
            (rdr_f.sttcs[k].direction != rdr_p.sttcs[k].direction)):
        bad_keys.append(k)

In [16]:
bad_keys = []
for k in rdr_f.ptcs.keys():
    if ((rdr_f.ptcs[k].centreline_id != rdr_p.ptcs[k].centreline_id) or
            (rdr_f.ptcs[k].count_id != rdr_p.ptcs[k].count_id) or
            (rdr_f.ptcs[k].direction != rdr_p.ptcs[k].direction)):
        bad_keys.append(k)