# eLog Visualisations
In this notebook the first functions to manipulate Bokeh


In [93]:
import numpy as np
import pandas as pd

from bokeh.plotting import figure, show, output_file
from bokeh.io import show
from bokeh.layouts import column, row, gridplot

from bokeh.models import (
    ColumnDataSource,
    HoverTool,
    LinearColorMapper,
    BasicTicker,
    PrintfTickFormatter,
    ColorBar,
    BoxAnnotation,
    Band,
    LogColorMapper,
    FuncTickFormatter
)


from math import pi
import sys

## Heat-Map for Water Consuption


### Iteractive heat-map
from: http://bokeh.pydata.org/en/latest/docs/gallery/unemployment.html

In [2]:
def plot_bokeh(data):
    
    output_file("heat_map.html", title="heat_map.py")
    data.columns.name = 'date'
    data.index.name = 'hour'
    data.index = data.index.astype(str)

    hours = list(data.index)
    months = list(data.columns)
    
    # reshape to 1D array or rates with a month and year for each row.
    df = pd.DataFrame(data.stack(), columns=['rate']).reset_index()

    # this is the colormap from the original NYTimes plot
    mapper = LinearColorMapper(palette="Greys256", low=df.rate.min(), high=df.rate.max())
    source = ColumnDataSource(df)

    TOOLS = "hover,save,pan,box_zoom,reset,wheel_zoom"

    p1 = figure(title="Water Consumption ({0} - {1} 2017)".format(months[0], months[-1]), x_axis_type="datetime",
               x_range = months, y_range = list(reversed(hours)),
               x_axis_location="above", plot_width=900, plot_height=400,
               tools=TOOLS, toolbar_location='below')

    p1.grid.grid_line_color = None
    p1.axis.axis_line_color = None
    p1.axis.major_tick_line_color = None
    p1.axis.major_label_text_font_size = "5pt"
    p1.axis.major_label_standoff = 0
    p1.xaxis.major_label_orientation = pi / 3

    p1.rect(x="date", y="hour", width=1, height=1,
           source=source,
           fill_color={'field': 'rate', 'transform': mapper},
           line_color=None)

    color_bar = ColorBar(color_mapper=mapper, major_label_text_font_size="5pt",
                         label_standoff=6, border_line_color=None, location=(0, 0))

    p1.add_layout(color_bar, 'right')

    p1.select_one(HoverTool).tooltips = [
         ('date', '@date @hour'),
         ('Water Consumption (Liters)', '@rate'),
    ]

    show(p1)      # show the plot
        

    

In [144]:
# Example
data = pd.read_csv('data/Data_heat_maps/hour_consuption/2100919.csv')
plot_bokeh(data)


# Line Chart
based on:
https://bokeh.pydata.org/en/latest/docs/user_guide/annotations.html

In [148]:
def aggregated_plot(data, location, plot_ = False):
    data = data[data['location'] == location]
    data['norm_date'] = pd.to_datetime(data['norm_date'])
    data = data.set_index('norm_date')
    
    
#     window_size = 30
#     window = np.ones(window_size)/float(window_size)
#     mean = np.convolve(data['delta_total'], window, 'same')
    
    
    output_file("box_annotation.html", title="box_annotation.py example")
    TOOLS = "pan,wheel_zoom,box_zoom,reset,save"

    #reduce data size
    p2 = figure(x_axis_type="datetime", tools=TOOLS)

    p2.circle(data.index.to_series(), data['delta_total'], size=4, color='darkgrey', alpha=0.6, legend= 
             "Consumption (mean = {0}, std = {1})".format(0, 0))
#     p2.line(data.index.to_series(), mean, legend='avg', color='navy') 
    p2.legend.location = "top_left"
    

    p2.title.text = "Water Consumption in liters (location: {})".format(str(location))
#     p.xgrid[0].grid_line_color=None
#     p.ygrid[0].grid_line_alpha=0.5
    p2.ygrid.band_fill_color = "olive"
    p2.ygrid.band_fill_alpha = 0.1
    p2.xaxis.axis_label = 'Time'
    p2.yaxis.axis_label = 'Liters'
    
    show(p2)
    

In [152]:
# Example
#data_aggregated = pd.read_csv('data/Data_heat_maps/aggregated_day/aggregated_day_total.csv')
aggregated_plot(data_aggregated, 1163208)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  This is separate from the ipykernel package so we can avoid doing imports until


In [156]:
def multiple_plot(location, data_aggregated, window_size = 30):
    retrieve = str(location) + '.csv'
    data = pd.read_csv('data/Data_heat_maps/hour_consuption/' + retrieve)
    #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    #heat_map
    output_file("heat_map.html", title="heat_map.py")
    data.columns.name = 'date'
    data.index.name = 'hour'
    data.index = data.index.astype(str)
    hours = list(data.index)
    date = list(data.columns)
    df = pd.DataFrame(data.stack(), columns=['rate']).reset_index()
    # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    
    output_file("box_annotation.html", title="box_annotation.py example")
    colors = ['#fff7fb', '#ece7f2', '#d0d1e6', '#a6bddb', '#74a9cf', '#3690c0', '#0570b0', '#045a8d', '#023858']
#     mapper = LinearColorMapper(palette=colors, low=df.rate.min(), high=df.rate.max())
    mapper = LogColorMapper(palette=colors, low= 0, high=df.rate.max())
    source = ColumnDataSource(df)
    TOOLS = "hover,save,pan, box_zoom,reset, wheel_zoom"
    p1 = figure(title="Water consumption in Log(Liters) from {0} to {1} - Location: {2}".format(date[0], date[-1], str(location)), 
                x_axis_type="datetime", x_range = date, y_range = list(reversed(hours)), x_axis_location="above", 
                tools=TOOLS, toolbar_location='left')

    p1.grid.grid_line_color = None
    p1.axis.axis_line_color = None
    p1.axis.major_tick_line_color = None
    p1.xaxis.major_label_text_font_size = '0pt'  # turn off x-axis tick labels
    p1.yaxis.axis_label = 'Hour'
    p1.axis.major_label_standoff = 0
    p1.rect(x="date", y="hour", width=1, height=1, source=source, fill_color={'field': 'rate', 'transform': mapper},
            line_color=None)
    
    color_bar = ColorBar(color_mapper=mapper, border_line_color=None,label_standoff=12, location=(0, 0))
    p1.add_layout(color_bar, 'right')
    p1.select_one(HoverTool).tooltips = [('date hour', '@date @hour'),('Water Consumption (Liters)', '@rate'),]
    
    #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    data = data_aggregated
    data = data[data['location'] == location]
    data['norm_date'] = pd.to_datetime(data['norm_date'])
    data = data.set_index('norm_date')
    data['delta_total'] = data['delta_total']/1000000
    data['date'] = date
    sem = lambda x: x.std() / np.sqrt(x.size)
    rolling = data['delta_total'].rolling(window = window_size).agg({"y_mean": np.mean, "y_std": np.std, "y_sem": sem})
    rolling = rolling.fillna(method='bfill')
    rolling['ub'] = rolling.y_mean + 2 * rolling.y_std
    rolling['date'] = date
    #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% Outlier detection %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%        
    data['c'] = '#377eb8'
    data['c'][data['delta_total']>rolling['ub']] = '#d53e4f'
    
    data['s'] = 6
    data['s'][data['delta_total']>rolling['ub']] = 8
    
    data['a'] = 0.4
    data['a'][data['delta_total']>rolling['ub']] = 1
    #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%    
    mean_plot = round(np.mean(data['delta_total']),3)
    std_plot = round(np.std(data['delta_total']),3)
    #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    p2 = figure(title="Daily water consumptions in million of Liters", x_axis_type="datetime", tools=TOOLS, x_range = date,
               toolbar_location='left')
    
    source2 = ColumnDataSource(data)
    
    p2.circle(x = 'date', y = 'delta_total', size='s', color= 'c', alpha='a', 
              legend= "Consumption in ML (mean = {0}, std = {1})".format(mean_plot,std_plot), source = source2)
    
    source3 = ColumnDataSource(rolling)
    p2.line(x='date', y='ub', legend='upper_bound (2 sigma)', line_dash = 'dashed', line_width = 4, 
            color = '#984ea3',source = source3)
    
    p2.line(x='date', y='y_mean', source = source3, line_dash = 'dashed', line_width = 3, 
            legend='moving_average (window = {0} days)'.format(window_size), color = '#4daf4a')
    
    p2.legend.location = "top_left"
    p2.ygrid.band_fill_color = "olive"
    p2.ygrid.band_fill_alpha = 0.1
    p2.xaxis.axis_label = 'Date'
    p2.yaxis.axis_label = 'Million of Liters'
    p2.xaxis.major_label_orientation = pi / 3
    p2.x_range = p1.x_range# Same axes as the heatMap
    p2.xaxis.formatter = FuncTickFormatter(code=""" var labels = %s; return labels[tick];""" % date)
    
    p2.select_one(HoverTool).tooltips = [('date', '@date'), ('Water Consumption (ML)', '@delta_total'),]
    show(gridplot([[p1,None],[p2,None]] , plot_width=1200, plot_height=400))
    
multiple_plot(1163208, data_aggregated)   

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
