# Spark, FiloDB and Flint POC.
## Separate Metadata and history approach

In [16]:
from au.com.gegroup.ts.graphframes_reader import GraphFramesReader
reader = GraphFramesReader(sqlContext, "iot_history_metaless", "iotDF", "siteRef = 'Site'")
reader.get_df().show()

+-------------+--------------+-------------------+----------+-------+---------+-----+
|    pointName|      equipRef|           datetime|  levelRef|siteRef|yearMonth|value|
+-------------+--------------+-------------------+----------+-------+---------+-----+
|Chiller-2_STS|Site Chiller 2|1507046744000000000|Site Plant|   Site|  2017-10|  0.0|
|Chiller-2_ENB|Site Chiller 2|1507050417000000000|Site Plant|   Site|  2017-10|  0.0|
|   CDWP-2_ENB|   Site CDWP 2|1507050476000000000|Site Plant|   Site|  2017-10|  0.0|
|   CDWP-2_STS|   Site CDWP 2|1507050489000000000|Site Plant|   Site|  2017-10|  0.0|
|   CHWP-2_ENB|   Site CHWP 2|1507050535000000000|Site Plant|   Site|  2017-10|  0.0|
|   CHWP-2_STS|   Site CHWP 2|1507050551000000000|Site Plant|   Site|  2017-10|  0.0|
| Boiler-1_STS| Site Boiler 1|1507544565000000000|Site Plant|   Site|  2017-10|  0.0|
|    HWP-8_STS|    Site HWP 8|1507544634000000000|Site Plant|   Site|  2017-10|  0.0|
| Boiler-2_STS| Site Boiler 2|1507545158000000000|Site

In [17]:
from au.com.gegroup.ts.datetime.utils import *
import pandas as pd
# todo different options for reading history like yesterday(), currentMonth(), last("24h"), etc
swt_ts = reader.metadata("supply and water and temp and sensor and equipRef != 'Site Chiller Plant'").history(last(pd.Timedelta("90 days"))).read()
lwt_ts = reader.metadata("return and water and temp and sensor and equipRef != 'Site Chiller Plant'").history(last(pd.Timedelta("90 days"))).read()

In [18]:
swt_ts.show()

+-------------------+------------------+---------------+------------------+----------+-------+
|               time|             value|      pointName|          equipRef|  levelRef|siteRef|
+-------------------+------------------+---------------+------------------+----------+-------+
|1507679100000000000| 24.50189971923828|   Boiler-1_LWT|     Site Boiler 1|Site Plant|   Site|
|1507679100000000000| 24.92799949645996|   Boiler-2_LWT|     Site Boiler 2|Site Plant|   Site|
|1507679700000000000|20.811500549316406|Tenant-CT-2_LWT|  Site Tenant-CT-2|Site Plant|   Site|
|1507679880000000000|22.670900344848633|        CT1_LWT|Site Chiller Plant|Site Plant|   Site|
|1507679880000000000|22.670900344848633|        CT2_LWT|Site Chiller Plant|Site Plant|   Site|
|1507679880000000000|22.670900344848633| Chiller-1_ECWT|Site Chiller Plant|Site Plant|   Site|
|1507679880000000000|15.946599960327148|Chiller-1_LCHWT|    Site Chiller 1|Site Plant|   Site|
|1507679880000000000|22.670900344848633| Chiller-2

In [19]:
lwt_ts.show()

+-------------------+------------------+---------------+------------------+----------+-------+
|               time|             value|      pointName|          equipRef|  levelRef|siteRef|
+-------------------+------------------+---------------+------------------+----------+-------+
|1507679100000000000|27.546600341796875|Tenant-CT-1_EWT|  Site Tenant-CT-1|Site Plant|   Site|
|1507679100000000000|20.219100952148438|Tenant-CT-2_EWT|  Site Tenant-CT-2|Site Plant|   Site|
|1507679880000000000| 25.53059959411621|        CT2_EWT|Site Chiller Plant|Site Plant|   Site|
|1507679880000000000|14.758399963378906|Chiller-1_ECHWT|    Site Chiller 1|Site Plant|   Site|
|1507679880000000000| 25.53059959411621| Chiller-1_LCWT|Site Chiller Plant|Site Plant|   Site|
|1507679880000000000|22.425500869750977|Chiller-2_ECHWT|    Site Chiller 2|Site Plant|   Site|
|1507679880000000000| 25.53059959411621| Chiller-2_LCWT|Site Chiller Plant|Site Plant|   Site|
|1507679880000000000|18.954700469970703|Chiller-3_

In [20]:
from au.com.gegroup.ts.spark.utils import join_keys
# keys = ["equipRef", "levelRef"]
joined_ts = lwt_ts.leftJoin(swt_ts, tolerance='1M', key=join_keys(), left_alias="lwt", right_alias="swt")
joined_ts.show()

+-------------------+------------------+---------------+------------------+----------+-------+------------------+---------------+
|               time|         lwt_value|  lwt_pointName|          equipRef|  levelRef|siteRef|         swt_value|  swt_pointName|
+-------------------+------------------+---------------+------------------+----------+-------+------------------+---------------+
|1507679100000000000|27.546600341796875|Tenant-CT-1_EWT|  Site Tenant-CT-1|Site Plant|   Site|              null|           null|
|1507679100000000000|20.219100952148438|Tenant-CT-2_EWT|  Site Tenant-CT-2|Site Plant|   Site|              null|           null|
|1507679880000000000| 25.53059959411621|        CT2_EWT|Site Chiller Plant|Site Plant|   Site|22.670900344848633| Chiller-3_ECWT|
|1507679880000000000|14.758399963378906|Chiller-1_ECHWT|    Site Chiller 1|Site Plant|   Site|15.946599960327148|Chiller-1_LCHWT|
|1507679880000000000| 25.53059959411621| Chiller-1_LCWT|Site Chiller Plant|Site Plant|   S

In [21]:
from au.com.gegroup.ts.spark.utils import *
filtered_df = joined_ts.filter(filter_null("lwt", "swt"))

In [10]:
from pyspark.sql.functions import udf, col
# user's rule to calculate deviation

def delta(firstValue, secondValue):
    return firstValue - secondValue
    
# spark's udf calling user's rule
delta_udf = udf(lambda lwt_val, swt_val: delta(lwt_val, swt_val))

rule_df = filtered_df.withColumn("delta", delta_udf(get_value_col("lwt"), get_value_col("swt")))
rule_df.show()

+-------------------+------------------+---------------+------------------+----------+-------+------------------+---------------+-------------------+
|               time|         lwt_value|  lwt_pointName|          equipRef|  levelRef|siteRef|         swt_value|  swt_pointName|              delta|
+-------------------+------------------+---------------+------------------+----------+-------+------------------+---------------+-------------------+
|1507679880000000000| 25.53059959411621|        CT2_EWT|Site Chiller Plant|Site Plant|   Site|22.670900344848633| Chiller-3_ECWT|  2.859699249267578|
|1507679880000000000|14.758399963378906|Chiller-1_ECHWT|    Site Chiller 1|Site Plant|   Site|15.946599960327148|Chiller-1_LCHWT|-1.1881999969482422|
|1507679880000000000| 25.53059959411621| Chiller-1_LCWT|Site Chiller Plant|Site Plant|   Site|22.670900344848633| Chiller-3_ECWT|  2.859699249267578|
|1507679880000000000|22.425500869750977|Chiller-2_ECHWT|    Site Chiller 2|Site Plant|   Site|  22.9

In [11]:
rule_df.createOrReplaceTempView("ruleDf")

In [12]:
sqlContext.sql("select equipRef, min(delta), max(delta) from ruleDf group by equipRef order by equipRef ASC").show()

+------------------+--------------------+-------------------+
|          equipRef|          min(delta)|         max(delta)|
+------------------+--------------------+-------------------+
|    Site Chiller 1|-0.00629997253417...|  7.227100372314453|
|    Site Chiller 2| -0.4890003204345703|-0.8651008605957031|
|    Site Chiller 3|  0.4233999252319336|  9.285099029541016|
|Site Chiller Plant| 0.04319953918457031|  7.701999664306641|
|  Site Tenant-CT-2|-0.01070022583007...|0.09740066528320312|
+------------------+--------------------+-------------------+



In [13]:
from au.com.gegroup.ts.writer import Writer
# Writer(dataframe, dataset, row_keys)
writer = Writer(rule_df, "metaless_chiller_delta_oct2017", "time,swt_pointName,lwt_pointName,equipRef")
cols = ["time", "equipRef", "swt_pointName", "lwt_pointName", "swt_value", "lwt_value", "delta"]
writer.cols(cols).mode("overwrite").write()

In [15]:
delta_reader = GraphFramesReader(sqlContext, "metaless_chiller_delta_oct2017", "deltaDF")
delta_reader.get_df().show()

+------------------+------------------+------------------+-------------------+-------------------+---------------+---------------+
|          equipRef|         swt_value|         lwt_value|              delta|               time|  swt_pointName|  lwt_pointName|
+------------------+------------------+------------------+-------------------+-------------------+---------------+---------------+
|    Site Chiller 1|15.946599960327148|14.758399963378906|-1.1881999969482422|1507679880000000000|Chiller-1_LCHWT|Chiller-1_ECHWT|
|    Site Chiller 2|  22.9867000579834|22.425500869750977|-0.5611991882324219|1507679880000000000|Chiller-2_LCHWT|Chiller-2_ECHWT|
|Site Chiller Plant|22.670900344848633| 25.53059959411621|  2.859699249267578|1507679880000000000| Chiller-3_ECWT|        CT2_EWT|
|Site Chiller Plant|22.670900344848633| 25.53059959411621|  2.859699249267578|1507679880000000000| Chiller-3_ECWT| Chiller-1_LCWT|
|Site Chiller Plant|22.670900344848633| 25.53059959411621|  2.859699249267578|15076