# eLog Visualisations
In this notebook the first functions to manipulate Bokeh


In [49]:
import numpy as np
import pandas as pd

from bokeh.plotting import figure, show, output_file
from bokeh.io import show
from bokeh.layouts import column, row, gridplot
from collections import OrderedDict
from bokeh.models.markers import Triangle

from bokeh.models import (
    ColumnDataSource,
    HoverTool,
    LinearColorMapper,
    BasicTicker,
    PrintfTickFormatter,
    ColorBar,
    BoxAnnotation,
    Band,
    LogColorMapper,
    FuncTickFormatter,
    PrintfTickFormatter,
    NumeralTickFormatter
)


from math import pi
import sys
import warnings
warnings.filterwarnings('ignore')

# Grid Plot Line + Heat map

Examples:

### Iteractive heat-map
from: http://bokeh.pydata.org/en/latest/docs/gallery/unemployment.html

### Line Chart
from: https://bokeh.pydata.org/en/latest/docs/user_guide/annotations.html


In [2]:
data_aggregated = pd.read_csv('data/Data_heat_maps/aggregated_day/aggregated_day_total.csv')
data_cc = pd.read_csv('data/Data_heat_maps/Customer Contacts/limited_occ_with_gps_time.csv', sep = ';')

In [53]:
def multiple_plot(location, data_aggregated, window_size = 30):
    retrieve = str(location) + '.csv'
    data = pd.read_csv('data/Data_heat_maps/hour_consuption/' + retrieve)
    #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    #heat_map
    output_file("heat_map.html", title="heat_map.py")
    data.columns.name = 'date'
    data.index.name = 'hour'
    data.index = data.index.astype(str)
    hours = list(data.index)
    date = list(data.columns)
    date = list(pd.date_range(start = date[0], end = date[-1]).strftime('%Y-%m-%d'))
    df = pd.DataFrame(data.stack(), columns=['rate']).reset_index()
    # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    
    output_file("box_annotation.html", title="box_annotation.py example")
    colors = ['#fff7fb', '#ece7f2', '#d0d1e6', '#a6bddb', '#74a9cf', '#3690c0', '#0570b0', '#045a8d', '#023858']
#     mapper = LinearColorMapper(palette=colors, low=df.rate.min(), high=df.rate.max())
    mapper = LogColorMapper(palette=colors, low= 0, high=df.rate.max())
    source = ColumnDataSource(df)
    TOOLS = "save,pan ,reset, wheel_zoom"
    p1 = figure(title="Water consumption in Log(Liters) from {0} to {1} - Location: {2}".format(date[0], date[-1:], str(location)), 
                x_axis_type="datetime", x_range = date, y_range = list(reversed(hours)), tools=TOOLS)

    p1.grid.grid_line_color = None
    p1.axis.axis_line_color = None
    p1.axis.major_tick_line_color = None
    p1.xaxis.major_label_text_font_size = '0pt'  # turn off x-axis tick labels
    p1.yaxis.axis_label = 'Hour'
    p1.axis.major_label_standoff = 0
    
    heat_map = p1.rect(x="date", y="hour", width=1, height=1, source = source, fill_color={'field': 'rate', 'transform': mapper},
            line_color=None)
    
    color_bar = ColorBar(color_mapper=mapper, border_line_color=None,label_standoff=12, location=(0, 0))
    p1.add_layout(color_bar, 'right')
    
    heat_map_hover = HoverTool(renderers=[heat_map],
                        tooltips=OrderedDict([('Water Consumption (Liters)', '@rate'),
                                            ('date hour', '@date'), 
                                             ('hour', '@hour'), 
                                            ]))
    
    p1.add_tools(heat_map_hover)
    
    #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    # Aggregated water consumption
    data = data_aggregated
    data = data[data['location'] == location]
    data['date'] = pd.to_datetime(data_aggregated['norm_date']).apply(lambda x: x.strftime('%Y-%m-%d'))
    data['delta_total'] = data['delta_total']/1000000
    sem = lambda x: x.std() / np.sqrt(x.size)
    rolling = data['delta_total'].rolling(window = window_size).agg({"y_mean": np.mean, "y_std": np.std, "y_sem": sem})
    rolling = rolling.fillna(method='bfill')
    rolling['ub'] = rolling.y_mean + 2 * rolling.y_std
    rolling['date'] = data['norm_date']
    #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% Outlier detection %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%        
    data['c'] = '#377eb8'
    data['c'][data['delta_total']>rolling['ub']] = '#d53e4f'
    
    data['s'] = 6
    data['s'][data['delta_total']>rolling['ub']] = 8
    
    data['a'] = 0.4
    data['a'][data['delta_total']>rolling['ub']] = 1
    #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%    
    mean_plot = round(np.mean(data['delta_total']),3)
    std_plot = round(np.std(data['delta_total']),3)
    #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    p2 = figure(title="Daily water consumptions in million of Liters", x_axis_type="datetime", tools=TOOLS, x_range = date)
    
    source2 = ColumnDataSource(data)
    
    p_circle = p2.circle(x = 'date', y = 'delta_total', size='s', color= 'c', alpha='a', 
              legend= "Consumption in ML (mean = {0}, std = {1})".format(mean_plot,std_plot), source = source2)
    
    source3 = ColumnDataSource(rolling)
    p_line_1 = p2.line(x='date', y='ub', legend='upper_bound (2 sigma)', line_dash = 'dashed', line_width = 4, 
            color = '#984ea3',source = source3)
    
    p_line_2 = p2.line(x='date', y='y_mean', source = source3, line_dash = 'dashed', line_width = 3, 
            legend='moving_average (window = {0} days)'.format(window_size), color = '#4daf4a')
    
    p2.legend.location = "top_left"
    p2.legend.click_policy= "hide"
    p2.ygrid.band_fill_color = "olive"
    p2.ygrid.band_fill_alpha = 0.1
    p2.xaxis.axis_label = 'Date'
    p2.yaxis.axis_label = 'Million of Liters'
    p2.xaxis.major_label_orientation = pi / 3
    p2.x_range = p1.x_range# Same axes as the heatMap
    p2.xaxis.formatter = FuncTickFormatter(code=""" var labels = %s; return labels[tick];""" % date)
    
    circle_hover = HoverTool(renderers=[p_circle],
                        tooltips=OrderedDict([('date', '@date'), 
                                              ('Water Consumption (ML)', '@delta_total'),
                                             ]))
    
    p_line_1_hover = HoverTool(renderers=[p_line_1],
                        tooltips=OrderedDict([('date', '@date'), 
                                              ('UpperBound water consumption (ML)', '@ub'),
                                             ]))
    
    p_line_2_hover = HoverTool(renderers=[p_line_2],
                        tooltips=OrderedDict([('date', '@date'), 
                                              ('Mean water consumption (ML)', '@y_mean'),
                                             ]))
        
    p2.add_tools(circle_hover)
    p2.add_tools(p_line_1_hover)
    p2.add_tools(p_line_2_hover)

    show(gridplot([[p1,None],[p2,None]] , plot_width=1200, plot_height=400, toolbar_location = 'below'))

multiple_plot(1163208, data_aggregated)     
multiple_plot(1255365, data_aggregated)
multiple_plot(1813229, data_aggregated)

E-1001 (BAD_COLUMN_NAME): Glyph refers to nonexistent column name: date [renderer: GlyphRenderer(id='579e5c01-1abc-4d12-acd1-99719f596f7a', ...)]
E-1001 (BAD_COLUMN_NAME): Glyph refers to nonexistent column name: norm_date [renderer: GlyphRenderer(id='dad8759a-3fa3-47c5-aae7-6ce09e215073', ...)]
E-1001 (BAD_COLUMN_NAME): Glyph refers to nonexistent column name: date [renderer: GlyphRenderer(id='579e5c01-1abc-4d12-acd1-99719f596f7a', ...)]
E-1001 (BAD_COLUMN_NAME): Glyph refers to nonexistent column name: norm_date [renderer: GlyphRenderer(id='dad8759a-3fa3-47c5-aae7-6ce09e215073', ...)]
E-1001 (BAD_COLUMN_NAME): Glyph refers to nonexistent column name: date [renderer: GlyphRenderer(id='579e5c01-1abc-4d12-acd1-99719f596f7a', ...)]
E-1001 (BAD_COLUMN_NAME): Glyph refers to nonexistent column name: norm_date [renderer: GlyphRenderer(id='dad8759a-3fa3-47c5-aae7-6ce09e215073', ...)]


In [52]:
data_aggregated['location'].unique()

array([1163208, 1164250, 1164530, 1164726, 1164778, 1164811, 1165809,
       1165886, 1166041, 1168469, 1197792, 1221495, 1221526, 1221545,
       1226321, 1227180, 1232676, 1233534, 1235056, 1235908, 1237870,
       1239115, 1250893, 1252288, 1252433, 1253682, 1253857, 1254883,
       1255336, 1255365, 1256656, 1256748, 1256801, 1257299, 1257684,
       1257736, 1259358, 1268025, 1268089, 1268128, 1268214, 1271383,
       1301387, 1304122, 1304871, 1305604, 1317199, 1317440, 1317820,
       1321005, 1327194, 1333901, 1333934, 1334033, 1335520, 1338495,
       1352181, 1404804, 1581850, 1590265, 1591055, 1591356, 1777541,
       1778765, 1784273, 1784470, 1788300, 1788548, 1788785, 1788906,
       1803450, 1805455, 1805822, 1806880, 1813229, 1814060, 1816705,
       1862885, 1876147, 1880014, 2039867, 2040397, 2074490, 2080187,
       2082468, 2090199, 2094965, 2100919, 2102701, 2116409, 2127150,
       2133519, 2143698, 2147185, 2156898, 2187602, 2246404], dtype=int64)