In [1]:
import requests
import pandas as pd
import numpy as np
import nbconvert
import datetime
import warnings; warnings.simplefilter('ignore')
import plotly.graph_objects as go
from IPython.display import display, Markdown, Image, SVG
import re
import bmondata
from bmondata import Server
from dateutil.relativedelta import relativedelta


In [6]:
heat_flow_sensor_dict = {
    'Quinhagak': 'quinhagak_hr_heat_rate'
}

total_heat_sensor_dict = {
    'Quinhagak': 'quinhagak_hr_total_heat'
}

In [7]:
# Papermill variables to change to look at different buildings / organizations 
building_id = 32
server_web_address = 'http://rm.anthc.webfactional.com'

In [8]:
server = Server(server_web_address)
building_df = server.buildings(building_id)
current_building_name = building_df[0]['title']
sensors=building_df[0]['sensors']

heat_flow_sensor = []
total_heat_sensor = []
for sensor in sensors:
    if sensor['sensor_id'] == heat_flow_sensor_dict['Quinhagak']:
        heat_flow_sensor = sensor
        
    if sensor['sensor_id'] == total_heat_sensor_dict['Quinhagak']:
        total_heat_sensor = sensor 


In [9]:
all_buildings = server.buildings()
org_df = pd.DataFrame(all_buildings)

In [10]:
title_md = '''# Heat Flow for {} building'''
title_md = title_md.format(current_building_name)

In [11]:
Markdown(title_md)

# Heat Flow for Quinhagak building

In [12]:
fifteen_min_averages = server.sensor_readings((heat_flow_sensor_dict[current_building_name], 'heat_flow'),
                                          start_ts = datetime.datetime.now() - relativedelta(years=1),
                                            end_ts = datetime.datetime.now(),
                                          averaging = '15min')

In [13]:
# Get rid of any potential erroneous data that is listed as a negative electric usage
fifteen_min_averages = fifteen_min_averages.query("heat_flow >= 0")

In [14]:
fifteen_min_averages = fifteen_min_averages.reset_index()

In [15]:
fifteen_min_averages = fifteen_min_averages.rename(columns={'index':'datetime_col'})

In [16]:
fifteen_min_averages['dayofweek'] = fifteen_min_averages.datetime_col.apply(lambda x: x.dayofweek)

In [17]:
fifteen_min_averages['date'] = fifteen_min_averages.datetime_col.apply(lambda x: x.date())

In [18]:
fifteen_min_averages['time'] = fifteen_min_averages.datetime_col.apply(lambda x: x.time())

In [19]:
current_week_start = fifteen_min_averages.date.iloc[-1] - pd.offsets.Day(7)

In [20]:
# Create dataframes for each of the last three weeks for graphing comparison purposes.
last_week_start = current_week_start - pd.offsets.Day(7)
last_week_df = fifteen_min_averages.query("datetime_col >= @last_week_start")
last_week_df = last_week_df.query("datetime_col < @current_week_start")

two_weeks_ago_start = last_week_start - pd.offsets.Day(7)
two_weeks_ago_df = fifteen_min_averages.query("datetime_col >= @two_weeks_ago_start")
two_weeks_ago_df = two_weeks_ago_df.query("datetime_col < @last_week_start")

three_weeks_ago_start = two_weeks_ago_start - pd.offsets.Day(7)
three_weeks_ago_df = fifteen_min_averages.query("datetime_col >= @three_weeks_ago_start")
three_weeks_ago_df = three_weeks_ago_df.query("datetime_col < @two_weeks_ago_start")

In [21]:
# Adjust the datetime column so the visualization will have the heat flow data overlaying the current week.
last_week_df['adjusted_datetime_col'] = last_week_df.datetime_col.apply(lambda x: x + pd.offsets.Day(7))
two_weeks_ago_df['adjusted_datetime_col'] = two_weeks_ago_df.datetime_col.apply(lambda x: x + pd.offsets.Day(14))
three_weeks_ago_df['adjusted_datetime_col'] = three_weeks_ago_df.datetime_col.apply(lambda x: x + pd.offsets.Day(21))

In [22]:
current_week = fifteen_min_averages.query("datetime_col >= @current_week_start")

In [23]:
historical_complete = fifteen_min_averages.query("datetime_col < @current_week_start")

In [24]:
day_avgs = historical_complete.groupby(['dayofweek', 'time']).mean()
day_avgs = day_avgs.reset_index()

In [25]:
current_week_w_day_avgs = pd.merge(current_week, day_avgs, how='left',
                                  left_on=['dayofweek', 'time'],
                                  right_on=['dayofweek', 'time'], 
                                  suffixes=('', '_historical_avg'))

In [28]:
current_week_w_day_avgs['current_use_vs_historical_difference'] = current_week_w_day_avgs.heat_flow - current_week_w_day_avgs.heat_flow_historical_avg

In [29]:
# Define anomalous range as three standard deviations above the mean
three_std = current_week_w_day_avgs.current_use_vs_historical_difference.mean() + current_week_w_day_avgs.current_use_vs_historical_difference.std() * 3

In [30]:
# This works, but the graph looks odd unless we get one data point on either side to connect them
current_week_w_day_avgs['heat_flow_anomalies'] = np.where(current_week_w_day_avgs.current_use_vs_historical_difference > three_std,
                                                            current_week_w_day_avgs.heat_flow,
                                                            np.nan)

In [31]:
anomaly_df = current_week_w_day_avgs.query("heat_flow_anomalies == heat_flow_anomalies")

In [32]:
diff_series = np.diff(anomaly_df.index, n=1)

In [33]:
diff_series = np.insert(diff_series, 0, 0)
anomaly_df['diff_series'] = diff_series

In [34]:
# This keeps track of the number of groups of distinct anomalous periods in the data
group_counter = 0

# This padding is a cut-off used to group together time series indices that are close 
# enough to still be considered a group (e.g. there might be an anomaly at one time,
# followed by another anomalous reading 45 minutes later, which should probably all just
# be considered the same group)
index_padding = 4

for idx, row in anomaly_df.iterrows():
    if row['diff_series'] <= index_padding:
        anomaly_df.at[idx, 'group_number'] = group_counter
    else:
        group_counter += 1
        anomaly_df.at[idx, 'group_number'] = group_counter

In [36]:
anomaly_max = anomaly_df.groupby(['group_number']).max()[['datetime_col', 'heat_flow']]
anomaly_min = anomaly_df.groupby(['group_number']).min()[['datetime_col', 'heat_flow']]

In [37]:
anomaly_groups = pd.merge(anomaly_max, anomaly_min, how='outer',
                         left_index=True, right_index=True, 
                         suffixes=('_max', '_min'))

In [38]:
if anomaly_groups.datetime_col_min.isna().all():
    anomaly_rectangles = []
    image_list = []
else:
    anomaly_rectangles = []
    image_list = []
    for idx, row in anomaly_groups.iterrows():
        
        anomaly_start = row['datetime_col_min']
        anomaly_end = row['datetime_col_max']
        anomaly_midpoint = ((anomaly_end - anomaly_start) / 2) + anomaly_start
        
        anomaly_rectangles.append({'type':'rect',
                            'xref':'x',
                            'yref':'paper',
                            'x0':anomaly_start,
                            'y0':0,
                            'x1':anomaly_end,
                            'y1':1,
                            'fillcolor':('rgb(205, 12, 24)'),
                            'opacity':0.5,
                            'line': {
                                'width':1,
                            }
                            })
        
        image_list.append({'source': 'https://github.com/dustin-cchrc/cchrc_python_for_non_programmers/blob/master/energy_savings_icon.png',
                'xref': "paper",
                'yref': "paper",
                'x': 1.0, #anomaly_midpoint ,
                'y': 0.9,
                'sizex': 0.2,
                'sizey': 0.2,
                'xanchor': "right",
                'yanchor': "bottom"})

In [40]:
this_week = go.Scatter(x = current_week_w_day_avgs.datetime_col,
                   y = current_week_w_day_avgs.heat_flow,
                   line = dict(color = '#ca0020'),
                   name='Current week Heat Flow')

last_week = go.Scatter(x = last_week_df.adjusted_datetime_col,
                      y = last_week_df.heat_flow,
                       line = dict(dash = 'dashdot',
                                   color = '#3182bd'),
                       name = "Heat Flow last week")

two_weeks_ago = go.Scatter(x = two_weeks_ago_df.adjusted_datetime_col,
                          y = two_weeks_ago_df.heat_flow,
                           line = dict(dash = 'dashdot',
                                       color = '#6baed6'),
                           name = "Heat Flow 2 weeks ago")

three_weeks_ago = go.Scatter(x = three_weeks_ago_df.adjusted_datetime_col,
                            y = three_weeks_ago_df.heat_flow,
                             line = dict(dash = 'dashdot',
                                         color = '#bdd7e7'),
                            name = "Heat Flow 3 weeks ago")
    
layout = dict(title = 'Heat Flow: Current vs. Recent Weeks',
              xaxis = dict(title='Date and Time'),
              yaxis = dict(title='Heat Flow (BTU, 15 minute average)')
             )

data = [this_week, last_week, two_weeks_ago, three_weeks_ago]

fig = go.Figure(dict(data=data, layout=layout))

fig.show()

In [41]:
this_week = go.Scatter(x = current_week_w_day_avgs.datetime_col,
                   y = current_week_w_day_avgs.heat_flow,
                   line = dict(color = ('rgb(22, 96, 167)')),
                   name='Current Week Heat Flow')

historical_avg = go.Scatter(x = current_week_w_day_avgs.datetime_col,
                   y = current_week_w_day_avgs.heat_flow_historical_avg,
                   line = dict(dash = 'dashdot',
                              color = ('rgb(22, 96, 167)')),
                            opacity = 0.6,
                   name='Historical Average Heat Flow')
    
layout = dict(title = 'Current Heat Flow vs. Historical Average',
              xaxis = dict(title='Date and Time'),
              yaxis = dict(title='Heat Flow (BTU, 15 minute average)'),
              images = image_list,
              shapes = anomaly_rectangles
             )

data = [this_week, historical_avg]

fig = go.Figure(dict(data=data, layout=layout))

fig.show()

In [42]:
if anomaly_groups.datetime_col_min.isna().all():
    md_results = '''#### <font color='green'>There were no periods of extreme heat flow this week-- keep up the good work!</font>'''
else:
     md_results = '''#### <font color='red'>The periods highlighted in red signify much higher heat flow than normal; try to identify what happened and how to prevent it in the future.</font>'''

In [43]:
Markdown(md_results)

#### <font color='red'>The periods highlighted in red signify much higher heat flow than normal; try to identify what happened and how to prevent it in the future.</font>