In [None]:
import os
import sys
sys.path.append("../")

import pandas as pd
import numpy as np
import datetime as dt
import re
import altair as alt

from src.utils.parser import parse_config
from src.utils.vault import get_secrets
from src.utils.processing import downcast
import snowflake.connector

config = parse_config(os.path.abspath(os.path.join(os.getcwd(), "../src/settings.yml")))

In [None]:
data_config = get_secrets("snowflake")
data_config.update(config["snowflake"]["data"])
meta_config = config["snowflake"]["meta_box"]
snowflake_ctx = snowflake.connector.connect(**data_config)

In [None]:
%%time
vestiging = "Breda"

query = f"""
SELECT t1.BOXID AS boxid
FROM {meta_config["database"]}.{meta_config["schema"]}.{meta_config["table"]} t1
WHERE t1.VESTIGING LIKE '{vestiging}'
"""

df_box = pd.read_sql(sql=query, con=snowflake_ctx)

In [None]:
%%time
ids_breda = df_box.squeeze().to_list()[:3]

query = f"""
SELECT
    t0.BOXID AS boxid,
--    t0.CHANNELID AS channelid,
    YEAROFWEEKISO(t0.DATUMTIJD) AS year,
    WEEKISO(t0.DATUMTIJD) AS week,
    COUNT(DISTINCT t0.DATUMTIJD)/(4*24*7) AS COMPLETENESS
FROM {data_config["database"]}.{data_config["schema"]}.{data_config["table"]} t0
WHERE t0.CHANNELID LIKE 'register://electricity/0/activepower/sumli?avg=15'
  AND t0.DATUMTIJD < DATEADD(DAY, -DAYOFWEEKISO(CURRENT_DATE), CURRENT_DATE)
  AND t0.BOXID IN ({",".join([f"'{id}'" for id in ids_breda])})
GROUP BY t0.BOXID, t0.CHANNELID, YEAROFWEEKISO(t0.DATUMTIJD), WEEKISO(t0.DATUMTIJD)
"""

df_query = (
    pd.read_sql(sql=query, con=snowflake_ctx)
    .apply(downcast, try_numeric=True, category=True)
    .sort_values(["YEAR", "WEEK"])
)
df_query

In [None]:
%%time
data_config = config["snowflake"]["data"]

query = f"""
SELECT 
    t1.*
FROM
    (
    SELECT
        t0.BOXID AS boxid,
    --    t0.CHANNELID AS channelid,
        YEAROFWEEKISO(t0.DATUMTIJD) AS year,
        WEEKISO(t0.DATUMTIJD) AS week,
        COUNT(DISTINCT t0.DATUMTIJD)/(4*24*7) AS COMPLETENESS
    FROM {data_config["database"]}.{data_config["schema"]}.{data_config["table"]} t0
    WHERE t0.CHANNELID LIKE 'register://electricity/0/activepower/sumli?avg=15'
      AND t0.DATUMTIJD < DATEADD(DAY, -DAYOFWEEKISO(CURRENT_DATE), CURRENT_DATE)
    GROUP BY t0.BOXID, t0.CHANNELID, YEAROFWEEKISO(t0.DATUMTIJD), WEEKISO(t0.DATUMTIJD)
    ) t1
INNER JOIN
(
    SELECT t2.BOXID AS boxid
    FROM {meta_config["database"]}.{meta_config["schema"]}.{meta_config["table"]} t2
    WHERE t2.VESTIGING LIKE '{vestiging}'
) t3
ON t1.BOXID = t3.BOXID
"""

df_query = (
    pd.read_sql(sql=query, con=snowflake_ctx)
    .apply(downcast, try_numeric=True, category=True)
    .sort_values(["YEAR", "WEEK"])
)
df_query

In [None]:
df = df_query.copy()
df

In [None]:
df["DATE"] =  df.apply(lambda d: dt.datetime.fromisocalendar(d["YEAR"], d["WEEK"], 1), axis=1)
df["DATE"] = df.apply(lambda d: f'{d["YEAR"]} - {d["WEEK"]:02}', axis=1)
df

In [None]:
(
    alt.Chart(df)
    .mark_rect()
    .encode(
        alt.X('DATE:O', title='date'),
        alt.Y('BOXID:O', title='box-ID'),
        alt.Color(
            'COMPLETENESS:Q',
            scale=alt.Scale(scheme='greenblue')
        )
    )
    .properties(
        width=800,
        height=100,
    )
)
# , scale=alt.Scale(type='log', reverse=True), title='completeness')
# )