In [1]:
import pandas as pd
import numpy as np
from tqdm import tqdm
import eif
from kando import kando_client
import pickle
%run utils.ipynb
# import libraries
from sklearn.preprocessing import MinMaxScaler
from sklearn.ensemble import IsolationForest

%matplotlib notebook
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import axes3d   

In [36]:
WINDOW = 24
ponit_id = 911

In [3]:
def load_point_mps(point_id):
    infile = open(f"{point_id}_mps.pkl",'rb')
    mps = pickle.load(infile)
    infile.close()
    return(mps)

In [4]:
mps = load_point_mps(911)

In [5]:
def load_point_data(point_id):
    infile = open(f"{point_id}_data.pkl",'rb')
    df = pickle.load(infile)
    infile.close()
    return(df)

In [6]:
df = load_point_data(911)

In [8]:
df.columns[:-1]

Index(['EC', 'PH', 'ORP', 'TEMPERATURE'], dtype='object')

In [20]:
def add_matrix_profile_anomalies_labels(df, mps):
    for col_ in ['EC', 'PH', 'ORP', 'TEMPERATURE']:
        df[f'{col_}_legend'] = 0
        matrix_profile = mps[f'mp_{col_}'][:, 0]
        max_index = np.argwhere(
            matrix_profile == matrix_profile.max()).flatten()[0]
        df[f'{col_}_legend'].iloc[max_index:max_index + WINDOW] = 1
    df['ALL_DIMS'] = 0
    matrix_profile = mps[f'All_dimensions'][0][:, 0]
    max_index = np.argwhere(
        matrix_profile == matrix_profile.max()).flatten()[0]
    df['ALL_DIMS'].iloc[max_index:max_index + WINDOW] = 1
    return (df)

In [21]:
df = add_matrix_profile_anomalies_labels(df, mps)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)


In [31]:
def add_isolation_forest_anomalies_labels(df):

    # normalize the data
    x = df[['EC', 'PH', 'ORP', 'TEMPERATURE']].values  #returns a numpy array
    min_max_scaler = MinMaxScaler()
    x_scaled = min_max_scaler.fit_transform(x)

    clf = IsolationForest(random_state=0)
    clf.fit(x_scaled)

    isolation_forest_anomaly_score = clf.decision_function(x_scaled)
    min_multivariant_index = np.argwhere(
        isolation_forest_anomaly_score ==
        isolation_forest_anomaly_score.min())[0][0]
    half = int(WINDOW / 2)
    df['ALL_DIMS'].iloc[min_multivariant_index - half:min_multivariant_index +
                        half] += 2

    indx = np.arange(df.shape[0])
    for col in ['EC', 'PH', 'ORP', 'TEMPERATURE']:
        values = df[col].values
        tempdf = pd.DataFrame({
            'time': indx,
            'ph': values,
        })
        clf.fit(tempdf)
        isolation_forest_anomaly_score = clf.decision_function(tempdf)
        temp_min_index = np.argwhere(
            isolation_forest_anomaly_score ==
            isolation_forest_anomaly_score.min())[0][0]
        df[f'{col}_legend'].iloc[temp_min_index - half:temp_min_index +
                                 half] += 2
    return (df)

In [32]:
df = add_isolation_forest_anomalies_labels(df)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, val

In [38]:
def save_df_for_anomaly_map(df, point_id):
    b_file = open(f"{point_id}_df_for_anomaly_map.pkl", "wb")
    pickle.dump(df, b_file)
    b_file.close()

In [40]:
save_df_for_anomaly_map(df, 911)