# Manual Data Refresh Smoke Test

This notebook checks that the 15-minute ATR aggregation table `miovision_csv.volumes2020_15min`, generated from the manual csv dump in `miovision_csv.volumes_2020`, is largely consistent with the backed-up hour-bin data in `covid.miovision_summary_20200922backup`.

In [1]:
import psycopg2
import datetime
import pytz
import pathlib
import configparser
import numpy as np
import pandas as pd
from plotly import graph_objs as go
from ipywidgets import interact

import intersection_tmc_notebook03test as itmc

config = configparser.ConfigParser()
config.read(pathlib.Path.home().joinpath('.charlesconfig').as_posix())
postgres_settings = config['POSTGRES']

In [2]:
local_tz = pytz.timezone('US/Eastern')

In [119]:
old_miov_query = """SELECT datetime_bin, volume_actual volume FROM covid.miovision_summary_20200922backup
WHERE intersection_uid = {intersection_uid} AND class_type = '{class_type}'
      AND datetime_bin BETWEEN '2020-03-01' AND '2020-04-30 23:59:59'
ORDER BY datetime_bin"""

new_miov_query = """WITH valid_bins AS (
    SELECT intersection_uid,
           datetime_bin
    FROM {voltable}
    WHERE intersection_uid = {intersection_uid}
          AND datetime_bin BETWEEN '2020-03-01' AND '2020-04-30 23:59:59'
    GROUP BY intersection_uid, datetime_bin
    HAVING SUM(
       CASE
           WHEN classification_uid = 1 THEN volume
           ELSE NULL::numeric
       END) > 0::numeric
), valid_classes AS (
    SELECT unnest(x.classes) AS class_type
    FROM ( SELECT ARRAY['Lights'::text, 'Trucks'::text, 'Cyclists'::text, 'Pedestrians'::text] AS classes) x
), intersection_classes(intersection_uid, class_type) AS (
    VALUES (2,'Cyclists'::text), (2,'Pedestrians'::text), (2,'Lights'::text),
           (2,'Trucks'::text), (4,'Cyclists'::text),  (4,'Pedestrians'::text),
           (4,'Lights'::text), (4,'Trucks'::text), (8,'Pedestrians'::text),
           (8,'Lights'::text), (8,'Trucks'::text), (17,'Pedestrians'::text),
           (17,'Lights'::text), (17,'Trucks'::text), (18,'Cyclists'::text),
           (18,'Pedestrians'::text), (18,'Lights'::text), (18,'Trucks'::text),
           (21,'Cyclists'::text), (21,'Pedestrians'::text), (21,'Lights'::text),
           (21,'Trucks'::text), (25,'Cyclists'::text), (25,'Pedestrians'::text),
           (25,'Lights'::text), (25,'Trucks'::text), (26,'Cyclists'::text),
           (26,'Pedestrians'::text), (26,'Lights'::text), (26,'Trucks'::text),
           (28,'Cyclists'::text), (28,'Pedestrians'::text), (28,'Lights'::text),
           (28,'Trucks'::text), (31,'Cyclists'::text), (31,'Pedestrians'::text),
           (31,'Lights'::text), (31,'Trucks'::text)
), all_data AS (
    SELECT a.intersection_uid,
           CASE
               WHEN a.classification_uid = 1 THEN 'Lights'::text
               WHEN a.classification_uid = ANY (ARRAY[4, 5]) THEN 'Trucks'::text
               ELSE b.class_type
           END AS class_type,
           a.datetime_bin,
           sum(a.volume) AS total_volume
    FROM {voltable} a
    JOIN miovision_api.classifications b USING (classification_uid)
    WHERE a.intersection_uid = {intersection_uid}
          AND (b.class_type = ANY (ARRAY['Vehicles'::text, 'Pedestrians'::text, 'Cyclists'::text]))
          AND a.datetime_bin >= '2019-01-01 00:00:00'::timestamp without time zone
          AND "left"(a.leg, 1) <> "left"(a.dir, 1)
    GROUP BY a.intersection_uid, (
        CASE
            WHEN a.classification_uid = 1 THEN 'Lights'::text
            WHEN a.classification_uid = ANY (ARRAY[4, 5]) THEN 'Trucks'::text
            ELSE b.class_type
        END), a.datetime_bin
)
SELECT date_trunc('hour'::text, a.datetime_bin) AS datetime_bin,
       COALESCE(sum(d.total_volume), 0::numeric) AS volume
FROM valid_bins a
CROSS JOIN valid_classes b
JOIN intersection_classes c USING (intersection_uid, class_type)
LEFT JOIN all_data d USING (intersection_uid, datetime_bin, class_type)
WHERE b.class_type = '{class_type}'
GROUP BY a.intersection_uid, b.class_type, (date_trunc('hour'::text, a.datetime_bin))
HAVING count(DISTINCT a.datetime_bin) = 4;
"""

def merge_vol_tables(df_old, df_new):
    df_merged = (pd.merge(df_old, df_new, on="datetime_bin", how="outer", suffixes=("_old", "_new"))
                 .set_index('datetime_bin').sort_index())
    not_in_new = df_merged['volume_new'].isna()
    not_in_old = df_merged['volume_old'].isna()
    in_both = ~not_in_new & ~not_in_old
    df_merged.fillna(0., inplace=True)
    df_merged['vol_difference'] = df_merged['volume_new'] - df_merged['volume_old']
    return df_merged, not_in_old, not_in_new, in_both


def get_comparison_plot(intersection_uid=26, class_type='Lights'):

    with psycopg2.connect(database='bigdata', **postgres_settings) as conn:
        df_old = pd.read_sql(
            old_miov_query.format(intersection_uid=intersection_uid,
                                  class_type=class_type),
            con=conn)
        df_new_api = pd.read_sql(
            new_miov_query.format(voltable='miovision_api.volumes_15min',
                                  intersection_uid=intersection_uid,
                                  class_type=class_type),
            con=conn)
        df_new_csv = pd.read_sql(
            new_miov_query.format(voltable='miovision_csv.volumes2020_15min',
                                  intersection_uid=intersection_uid,
                                  class_type=class_type),
            con=conn)

    df_merged_api, _d1, _d2, in_both_api = merge_vol_tables(df_old, df_new_api)
    df_merged_csv, not_in_old, not_in_new, in_both_csv = (
        merge_vol_tables(df_old, df_new_csv))

    fig = go.Figure()

    fig.add_trace(go.Scatter(
        x=df_merged_csv.loc[in_both_csv, 'vol_difference'].index,
        y=df_merged_csv.loc[in_both_csv, 'vol_difference'],
        mode='lines',
        line=dict(width=6),
        name='Valid'))

    fig.add_trace(go.Scatter(
        x=df_merged_csv.loc[not_in_new, 'volume_old'].index,
        y=-df_merged_csv.loc[not_in_new, 'volume_old'],
        mode='markers',
        marker=dict(color='#f79400'),
        name='Old Only'))
    
    fig.add_trace(go.Scatter(
        x=df_merged_csv.loc[not_in_old, 'volume_new'].index,
        y=df_merged_csv.loc[not_in_old, 'volume_new'],
        mode='markers',
        marker=dict(color='#006130'),
        name='New Only'))

    fig.add_trace(go.Scatter(
        x=df_merged_api.loc[in_both_api, 'vol_difference'].index,
        y=df_merged_api.loc[in_both_api, 'vol_difference'],
        mode='lines',
        line=dict(width=2, color='#ff5b45'),
        name='Valid API'))
    
    fig.update_layout(
        title={
            'text': ("Difference in Volumes for "
                     "intersection_uid = {0}; class = {1}").format(intersection_uid, class_type),
            'font_size': 14
        },
        xaxis_title="Date",
        yaxis_title="New - Old Volume",
        xaxis_rangeslider_visible=True,
        margin=dict(l=40, r=40, t=80, b=40),
    )

    fig2 = go.Figure()

    fig2.add_trace(
        go.Histogram(
            histfunc="count",
            histnorm="probability",
            x=df_merged_csv.loc[in_both_csv, 'vol_difference'].values,
        )
    )

    fig2.update_layout(
        xaxis_title="New - Old Volume",
        yaxis_title="Fraction of Valid Data",
        height=200,
        margin=dict(l=40, r=40, t=40, b=40),
    )
    
    fig.show()
    fig2.show();

In [120]:
sql_query = """SELECT DISTINCT intersection_uid FROM covid.miovision_summary_20200922backup
ORDER BY 1"""

with psycopg2.connect(database='bigdata', **postgres_settings) as conn:
    df_intnames = pd.read_sql(sql_query, con=conn)
    intnames = list(df_intnames['intersection_uid'].values)

In [121]:
interact(get_comparison_plot, intersection_uid=intnames, class_type=['Lights', 'Pedestrians', 'Trucks']);

interactive(children=(Dropdown(description='intersection_uid', index=7, options=(2, 4, 8, 17, 18, 21, 25, 26, …

This dashboard was used to investigate the validity of the CSV dump data aggregation. The results are written up [in this GitHub comment](https://github.com/CityofToronto/bdit_data-sources/issues/341#issuecomment-713909748).