<div class="alert alert-info"><b>Import Libraries<b>

In [1]:
from matplotlib import pyplot as plt
import json
import numpy as np
import random
import datetime
import folium
from math import *
import folium
import altair as alt
import pandas as pd
import pickle
import vincent
from altair_saver import save
from tqdm.notebook import tqdm_notebook
import warnings
warnings.filterwarnings('ignore')
%run utils_dana.ipynb

<div class="alert alert-info"><b>Global Variables<b>

In [2]:
TILES = 'OpenStreetMap'
FILE_NAME = 'gichon.csv'
RESAMPLE = '10min'

In [3]:
def create_anomaly_df(point_id):
    
    point_df = load_point_data(f'./Data_and_anomalies/{point_id}_data_with_anomalies')
    point_df = point_df[['EC', 'PH', 'ORP', 'TEMPERATURE', 'EC_mp', 'PH_mp', 'ORP_mp', 
                         'TEMPERATURE_mp', 'EC_if', 'PH_if', 'ORP_if', 'TEMPERATURE_if', 
                         'EC_eif', 'PH_eif', 'ORP_eif', 'TEMPERATURE_eif']]

    point_df = point_df.reset_index()
    for col in ['EC_mp', 'PH_mp', 'ORP_mp', 'TEMPERATURE_mp', 'EC_if', 'PH_if', 'ORP_if',
                'TEMPERATURE_if', 'EC_eif', 'PH_eif', 'ORP_eif', 'TEMPERATURE_eif']:
        ind = point_df[point_df[col] == 1].index

        for i in ind:
            point_df[col].iloc[i-12:i+13] = 1

    if len(point_df) >= 20000:
        RESAMPLE = '15min'
    elif len(point_df) >= 10000:
        RESAMPLE = '10min'
    else:
        RESAMPLE = '5min'

    point_df.set_index('DateTime', inplace=True)
    point_df = point_df.resample(RESAMPLE).mean()
    point_df = point_df.reset_index()
    point_df = point_df.rename(columns={'DateTime': 'date'})

    point_df['EC_legend'] = 'EC'
    point_df['PH_legend'] = 'PH'
    point_df['ORP_legend'] = 'ORP'
    point_df['TEMPERATURE_legend'] = 'TEMPERATURE'

    point_df[['EC_mp', 'PH_mp', 'ORP_mp', 'TEMPERATURE_mp', 'EC_if', 'PH_if', 
              'ORP_if', 'TEMPERATURE_if', 'EC_eif', 'PH_eif', 'ORP_eif', 
              'TEMPERATURE_eif']] = point_df[['EC_mp', 'PH_mp', 'ORP_mp', 'TEMPERATURE_mp', 
                                              'EC_if', 'PH_if', 'ORP_if', 'TEMPERATURE_if', 
                                              'EC_eif', 'PH_eif', 'ORP_eif', 'TEMPERATURE_eif']].apply(np.ceil)

    point_df = point_df.replace(0.0, np.nan)

    for model in ['mp', 'if', 'eif']:
        for feature in ['EC', 'PH', 'ORP', 'TEMPERATURE']:
            for i, row in point_df.iterrows():
                if row[f'{feature}_{model}'] == 1.0:
                    if point_df[f'{feature}_legend'].iloc[i] == feature:                    
                        point_df[f'{feature}_legend'].iloc[i] = model
                    elif point_df[f'{feature}_legend'].iloc[i] == model:
                        continue
                    elif f'{model}+' in point_df[f'{feature}_legend'].iloc[i] or f'+{model}' in point_df[f'{feature}_legend'].iloc[i]:
                        continue
                    else:
                        point_df[f'{feature}_legend'].iloc[i] = point_df[f'{feature}_legend'].iloc[i] + f'+{model}'
    return point_df

In [8]:
def anomaly_graph(df, col):
    
    alt.data_transformers.enable('default', max_rows=None)

    color = alt.Color(f'{col}_legend:N')
    brush = alt.selection_interval(encodings=['x'])
    click = alt.selection_multi(encodings=['color'])

    graph = alt.Chart(
              ).mark_bar(
              ).transform_impute(col, 
                                 key='date', 
                                 value=None, 
                                 groupby=[f'{col}_legend']
              ).encode(alt.X('date:T', title='Date'),
                       alt.Y(f'{col}:Q',
                             title=f'{col}'),
                       color=alt.condition(brush,
                                           color, 
                                           alt.value('lightgray'))
              ).properties(width=350,
                           height=100
              ).add_selection(brush
              ).transform_filter(click)

    anomaly = alt.Chart(
                ).mark_line(
                ).transform_impute(col, 
                                   key='date', 
                                   value=None, 
                                   groupby=[f'{col}_legend']
                ).encode(alt.X('date:T', 
                               title='Date'),
                         alt.Y(f'{col}:Q',
                               title=f'{col}'),
                         color=alt.condition(click, 
                                             f'{col}_legend:N', 
                                             alt.value('lightgray'))
                ).transform_filter(brush
                ).properties(width=350,
                             height=100
                ).add_selection(click)

    concat = alt.hconcat(graph,
                         anomaly,
                         data=df,
                         title=f'{col} Anomaly Detection')
    
    return concat

In [9]:
def create_total_graph(df):    
    graphs = []
    for col in ['PH', 'EC', 'ORP', 'TEMPERATURE']:
        graphs.append(anomaly_graph(df, col))

    anomaly = alt.vconcat(graphs[0],
                          graphs[1],
                          graphs[2],
                          graphs[3]).resolve_scale(color='independent')
    return anomaly

In [10]:
node = 1012
point_df = create_anomaly_df(node)
anomaly = create_total_graph(point_df)

In [11]:
anomaly