In [None]:
import os
import sys
sys.path.append("../")

import pandas as pd
import numpy as np
import datetime as dt

from src.utils.vault import get_secrets
from pyathena import connect

In [None]:
athena_config = get_secrets("athena")

athena_ctx = connect(
    aws_access_key_id=athena_config["gateway_aws_iam_access_id"],
    aws_secret_access_key=athena_config["gateway_aws_iam_access_key"],
    s3_staging_dir=athena_config[" S3 Output Location"],
    region_name=athena_config[" AWS Region"],
)

In [None]:
n_extremes = 5

# last_processed = "2000-1-1 15:35:04.518 UTC"

last_processed = pd.read_parquet("../data/raw", columns=["processed_on"]).max()[0]
last_processed = dt.datetime.strptime(last_processed, "%Y-%m-%d %H:%M:%S.%f %Z").date()
from_date = last_processed - dt.timedelta(days=last_processed.weekday())

channel_like = "register://electricity/0/activepower/sumli?avg=15"
# channel_like = "register://electricity/0/activepower/l_?avg=15"

query = f"""
SELECT *
FROM
(
    SELECT
        t1.boxid,
        t1.channelid,
        t1.value,
        t1.year,
        t1.week,
        ROW_NUMBER() OVER (PARTITION BY (t1.boxid, t1.channelid, t1.year, t1.week) ORDER BY (t1.value) DESC) AS top,
        -ROW_NUMBER() OVER (PARTITION BY (t1.boxid, t1.channelid, t1.year, t1.week) ORDER BY (t1.value) ASC) AS bottom,
        CURRENT_TIMESTAMP AS processed_on 
     FROM (
        SELECT 
            t0.boxid AS boxid,
            t0.channelid AS channelid,
            t0.value AS value,
            YEAR_OF_WEEK(t0.timestamp) AS year,
            WEEK(t0.timestamp) AS week
        FROM "enxp401-src-dali".enxp401_src_dali_readings t0
        WHERE t0.channelid LIKE '{channel_like}'
         AND t0.timestamp >= DATE('{from_date}')
         AND t0.timestamp < DATE_ADD('DAY', -DAY_OF_WEEK(CURRENT_DATE), CURRENT_DATE)
--         AND t0.boxid IN ('075.547-1', '069.509-1')
--         LIMIT 100
         ) t1
) t2
WHERE t2.top <= {n_extremes} OR t2.bottom >=-{n_extremes}
"""

In [None]:
%%time
df_query = pd.read_sql(sql=query, con=athena_ctx)

In [None]:
df_query #.query("year == 2020").sort_values(["year", "week"])

In [None]:
# df_query.to_parquet("../data/raw/", partition_cols=["year", "week"])

In [None]:
pd.read_parquet("../data/raw")

In [None]:
# SELECT t.*
# FROM "enxp401-src-dali".enxp401_src_dali_readings t
# WHERE t.channelid LIKE 'register://electricity/0/activepower/sumli?avg=15'
# AND YEAR_OF_WEEK(t.timestamp) < 2019


# SELECT t0.boxid AS boxid,
#     t0.value AS value,
#     YEAR_OF_WEEK(t0.timestamp) AS year,
#     WEEK(t0.timestamp) AS week
# FROM "enxp401-src-dali".enxp401_src_dali_readings t0
# WHERE t0.channelid like 'register://electricity/0/activepower/sumli?avg=15'
#  AND t0.timestamp >= DATE('2021-05-05')
# LIMIT 5


# SELECT DATEPART('iso_week', CURRENT_DATE)

# SELECT DATE_ADD('DAY', -DAY_OF_WEEK(CURRENT_DATE), CURRENT_DATE)



# SELECT *
# FROM
# (
#     SELECT t1.boxid,
#         t1.value,
#         t1.year,
#         t1.week,
#         ROW_NUMBER()  OVER (
#         PARTITION BY (t1.year, t1.week)
#         ORDER BY (t1.value) DESC
#         ) as top,
#         -ROW_NUMBER() OVER (
#         PARTITION BY (t1.year, t1.week)
#         ORDER BY (t1.value) ASC
#         ) as bottom
#      FROM (
#         SELECT t0.boxid AS boxid,
#             t0.value AS value,
#             YEAR(t0.timestamp) AS year,
#             WEEK(t0.timestamp) AS week
#         FROM
#             "enxp401-src-dali".enxp401_src_dali_readings t0
#         WHERE TRUE
#              AND t0.channelid like 'register://electricity/0/activepower/sumli?avg=15'
# --              AND t0.boxid IN ('075.547-1', '069.509-1')
# -- --         LIMIT 100
#          ) t1
# ) t2
# WHERE t2.top <= 5 OR t2.bottom >=-5



#     SELECT t1.boxid AS boxid,
#         YEAR(t1.timestamp) AS year,
#         WEEK(t1.timestamp) AS week,
# --         MAX (ABS(t1.value)) AS S_max
#         ROW_NUMBER() OVER (ORDER BY (YEAR(t1.timestamp), WEEK(t1.timestamp))) as Top
#     FROM "enxp401-src-dali".enxp401_src_dali_readings t1
#     WHERE TRUE
#       AND t1.channelid like 'register://electricity/0/activepower/sumli?avg=15'
#       AND t1.boxid IN ('075.547-1', '069.509-1')
#     GROUP BY t1.boxid, YEAR(t1.timestamp), WEEK(t1.timestamp)




# SELECT
#  t1.boxid AS boxid,
#  YEAR(t1.timestamp) AS year,
#  WEEK(t1.timestamp) AS week,
#  MAX(ABS(t1.value)) AS S_max
# FROM "enxp401-src-dali".enxp401_src_dali_readings t1
# WHERE TRUE
#  AND t1.channelid like 'register://electricity/0/activepower/sumli?avg=15'
#  AND t1.boxid IN ('075.547-1', '069.509-1')
# GROUP BY t1.boxid, YEAR(t1.timestamp), WEEK(t1.timestamp)

