In [93]:
#!/usr/bin/env python
# coding: utf-8

import sys
import os
import matplotlib.pyplot as plt
import glob

from datetime import datetime
from datetime import timedelta
from plotly.subplots import make_subplots
import plotly.graph_objs as go
from ipywidgets import interactive, HBox, VBox
# import keras
import pandas as pd
import numpy as np
import random
from dash import Dash, dcc, html, Input, Output
import plotly.express as px
from functools import partial
from tensorflow import keras


def load_model():  
    """
    Load the most recent Keras model file from the './models' directory.

    Returns:
    -------
    keras.engine.training.Model
        The loaded Keras model.
    """
    model_dir = "./models"
    model_files = glob.glob(os.path.join(model_dir, "*.h5"))
    model_files.sort(key=os.path.getctime, reverse=True)
    latest_model_file = model_files[0]
    loaded_model = keras.saving.load_model(latest_model_file)
    return loaded_model


def create_dataset(X, y, time_steps=1):
    """
    Create a time series dataset from the input and target data.

    Parameters:
    ----------
    X : pandas.DataFrame
        The input data.
    y : pandas.Series
        The target data.
    time_steps : int, optional (default=1)
        The number of time steps to include in each sample.

    Returns:
    -------
    tuple of numpy.ndarray
        The input and target data formatted as a time series dataset.

    Raises:
    ------
    ValueError
        If the length of X and y are different.
    """
    if len(X) != len(y):  
        raise ValueError("X and y must have the same length.")    
    
    Xs, ys = [], []
    for i in range(len(X) - time_steps):
        v = X.iloc[i:(i + time_steps)].values
        Xs.append(v)
        ys.append(y.iloc[i + time_steps])

    return np.array(Xs), np.array(ys)


def create_test_score_df(test, time_steps, mae_loss, threshold, column):
    """
    Create a pandas DataFrame with test score information for anomaly detection.

    Parameters:
    ----------
    test : pandas.DataFrame
        The test data used for anomaly detection.
    time_steps : int
        The number of time steps used to detect anomalies.
    mae_loss : float
        The mean absolute error loss threshold used for anomaly detection.
    threshold : float
        The anomaly detection threshold value.
    column : str
        The column name for the data being tested.

    Returns:
    -------
    pandas.DataFrame
        A DataFrame containing test score information for anomaly detection.
    """
    test_score_df = pd.DataFrame(index=test[time_steps:].index)
    test_score_df["Date/Time"] = test["Date/Time"]
    test_score_df["Loss"] = mae_loss
    test_score_df["Threshold"] = threshold
    test_score_df["Anomaly"] = test_score_df.Loss > test_score_df.Threshold
    test_score_df["Water_level"] = test[time_steps:][column]
    return test_score_df


def mae_loss(X_pred, X):
    """
    Calculates the mean absolute error (MAE) loss between predicted values and true values.

    Parameters:
        X_pred (array-like): Predicted values.
        X (array-like): True values.

    Returns:
        float: Mean absolute error (MAE) loss.
    """
    return np.mean(np.abs(X_pred, X), axis=1)


def get_anomalies_df(model, df, column, time_steps):
    """
    Obtains a DataFrame of anomalies detected by a given model on a specified column of a DataFrame.

    Parameters:
        model (object): An anomaly detection model capable of predicting anomalies.
        df (pandas.DataFrame): The DataFrame containing the data to be analyzed.
        column (str): The name of the column in the DataFrame to analyze for anomalies.
        time_steps (int): The number of time steps used when analyzing if a data point is an anomaly or not.

    Returns:
        pandas.DataFrame: A DataFrame containing the detected anomalies.
    """
    X, _ = create_dataset(df[["Date/Time-unix"]], df[[column]], time_steps)
    pred = model.predict(X)
    X = X.astype("float64")
    data_mae_loss = mae_loss(pred, X)
    THRESHOLD = 0.8
    score_df = create_test_score_df(df, time_steps, data_mae_loss, THRESHOLD, column)
    anomalies = score_df[score_df.Anomaly == True]
    return anomalies


def parse_to_datetime(x):
    """
    Parses a given input into a datetime object.

    Parameters:
        x (str or datetime): The input to be parsed into a datetime object.

    Returns:
        datetime: A datetime object representing the parsed value.
    """
    if isinstance(x, datetime):
        return x
    try:
        return datetime.strptime(x, '%Y-%m-%d %H:%M:%S')
    except ValueError:
        try:
            return datetime.strptime(x, '%Y-%m-%d %H:%M')
        except ValueError:
            try:
                return datetime.strptime(x, '%Y-%m-%d %H')
            except ValueError:
                return datetime.strptime(x, '%Y-%m-%d')  # Get rid of data points without hours and/or minutes?

class interactive_data_chooser:
    """
    Class for selecting data graphically and displaying it
    """
    def __init__(self, df, columns):
        self.df = df.copy()
        self.columns = columns
        self.df["Manual_Outlier"] = -1
        self.df["Model_Outlier"] = 0
        self.axis_dropdowns = None
        self.chosen_color_column = self.df["Manual_Outlier"]
        self.trace1_color = None
        self.trace2_color = None
    
    def activate_plot(self):
        """
        Activates the plot by initializing the necessary traces, adding dummy traces for the legend, and configuring the layout.

        Returns:
            ipywidgets.VBox: A VBox containing the plot and necessary controls.
        """
        self.df.reset_index(inplace=True,drop=True)
        numeric_df = self.df.select_dtypes(include=np.number)
        numeric_columns = numeric_df.columns
        model = load_model()

        # Get anomalies calculated by the model and set the value in "Model_Outlier" to 1
        anomalies = get_anomalies_df(model, self.df, 'Water level, Nap (cm)', 30)
        self.df.loc[self.df["Date/Time"].isin(anomalies["Date/Time"]), "Model_Outlier"] = 1
        
        # Trace1 is the non-outlier trace, where manually confirmed non-outlier data points are marked 
        # as triangles and the other data points as circles
        trace1 = go.Scatter(x=self.df[(self.df["Model_Outlier"] != 1) | \
                                        (self.df["Model_Outlier"] == 1) & (self.df["Manual_Outlier"] == 0)]["Date/Time"], 
                            y=self.df[(self.df["Model_Outlier"] != 1) | \
                                        (self.df["Model_Outlier"] == 1) & (self.df["Manual_Outlier"] == 0)]["Water level, Nap (cm)"],
                            mode="markers+lines", 
                            selected_marker_color = "orange",
                            visible=True,
                            opacity=1.0,
                            marker=dict(size=10,
                                        colorscale=["blue", "green"], 
                                        color=self.trace1_color), 
                                        marker_symbol=["circle", "triangle-up"],
                                        showlegend=True,
                                        name="non-outlier")

        # Trace 2 is the outlier trace, data points are marked with a chross
        trace2 = go.Scatter(x=self.df[(self.df["Model_Outlier"] == 1) & (self.df["Manual_Outlier"] != 0)]["Date/Time"],  
                            y=self.df[(self.df["Model_Outlier"] == 1) & (self.df["Manual_Outlier"] != 0)]["Water level, Nap (cm)"],
                            mode="markers", 
                            selected_marker_color = "orange",
                            visible=True,
                            opacity=1.0,
                            marker=dict(size=10, 
                                        colorscale=["blue", "red"], 
                                        color=self.trace2_color), 
                                        marker_symbol="x", 
                                        showlegend=True,
                                        name="outlier")
        
        # Create scatter plot with two different colors based on the "color" column
        self.f = go.FigureWidget(data=[trace1, trace2])

        # Add dummy traces for a legend that explains the marker symbols
        self.f.add_trace(go.Scatter(x=[None], y=[None], mode="markers",
                         marker=dict(symbol="circle", color="blue", size=10),
                         name="Not manually chosen", legendgroup="Dummy Traces"
                         ))
        self.f.add_trace(go.Scatter(x=[None], y=[None], mode="markers",
                         marker=dict(symbol="triangle-up", color="green", size=10),
                         name="Not outlier", legendgroup="Dummy Traces"
                         ))
        self.f.add_trace(go.Scatter(x=[None], y=[None],mode="markers",
                         marker=dict(symbol="x", color="red", size=10),
                         name="Outlier", legendgroup="Dummy Traces"
                         ))
        
        # Add title annotation above the dummy traces legend group
        self.f.add_annotation(
            x=1.125, y=0.78,
            xref="paper", yref="paper",
            text="<b>Marker symbols</b>",
            showarrow=False,
            font=dict(size=12)
        )

        # Drop down menus for axes and plot color, not used for the moment
        # self.axis_dropdowns = interactive(self.update_axes, yaxis = self.columns, xaxis = self.columns, color = numeric_columns)  # , trace_idx=0

        # Add x-axis and y-axis labels and add space between the traces in the legend
        self.f.update_layout(xaxis_title="Date/Time", yaxis_title="Water level, Nap (cm)", legend=dict(tracegroupgap=15))  

        # For both traces, activate selection_fn if user marks datapoints in them
        self.f.data[0].on_selection(self.selection_fn)
        self.f.data[1].on_selection(self.selection_fn)
        
        return VBox((HBox(), self.f)) 
        # return VBox((HBox(self.axis_dropdowns.children), self.f))  # Use instead of the line above if drop downs are wanted
    
    def update_axes(self, xaxis, yaxis, color):  # Not used for the moment, self.f.data now consists of two traces and this method has not been implemented
        """
        Update the axes and marker color of the scatter plot.

        Parameters:
            xaxis (str): The column name for the x-axis.
            yaxis (str): The column name for the y-axis.
            color (str): The column name for the marker color.
        """
        scatter = self.f.data[0]  
        scatter.x = self.df[xaxis]
        scatter.y = self.df[yaxis]
        scatter.marker.color = self.df[color]
        with self.f.batch_update():
            self.f.layout.xaxis.title = xaxis
            self.f.layout.yaxis.title = yaxis     

    def remove_selected_data_points(self, current_list_x, current_list_y, points):
        """
        Remove the selected data points from the current lists of x and y coordinates.

        Parameters:
            current_list_x (numpy.ndarray): The current list of x coordinates.
            current_list_y (numpy.ndarray): The current list of y coordinates.
            points (plotly.graph_objects.FigureWidget.PointSelection): The selected data points.

        Returns:
            tuple: A tuple containing the updated lists of x and y coordinates.
        """
        current_list_x = np.delete(current_list_x, points.point_inds) 
        current_list_y = np.delete(current_list_y, points.point_inds)
        return current_list_x, current_list_y

    def get_x_and_y_values_current_trace(self, trace):  
        """
        Get the x and y values for the current trace.

        Parameters:
            trace (plotly.graph_objects.Scatter): The trace.

        Returns:
            tuple: A tuple containing the x and y values of the current trace.
        """
        trace_value = 0 if trace.name == "non-outlier" else 1
        x_values, y_values = self.get_x_and_y_values(trace_value)
        return x_values, y_values
    
    def get_x_and_y_values_other_trace(self, trace):
        """
        Get the x and y values for the other trace.

        Parameters:
            trace (plotly.graph_objects.Scatter): The trace.

        Returns:
            tuple: A tuple containing the x and y values of the other trace.
        """
        trace_value = 0 if trace.name == "outlier" else 1
        x_values, y_values = self.get_x_and_y_values(trace_value)
        return x_values, y_values  
    
    def get_x_and_y_values(self, trace_value):
        """
        Get the x and y values for a specific trace.

        Parameters:
            trace_value (int): The index of the trace.

        Returns:
            tuple: A tuple containing the x and y values of the trace.
        """
        x_values = np.array(self.f.data[trace_value].x)
        y_values = np.array(self.f.data[trace_value].y)
        return x_values, y_values

    def append_selected_data_points(self, x_values, y_values, points):
        """
        Append the selected data points in points to the lists of x and y values.

        Parameters:
            x_values (np.ndarray): The current list of x values.
            y_values (np.ndarray): The current list of y values.
            points (plotly.graph_objs.Points): The selected data points.

        Returns:
            tuple: A tuple containing the x and y values with the new values appended using the correct data type.
        """
        xs_to_datetime = []
        for x in points.xs:  
            x = parse_to_datetime(x)
            xs_to_datetime.append(x)

        appended_list_x = np.append(x_values, xs_to_datetime)
        appended_list_y = np.append(y_values, points.ys)
        return appended_list_x, appended_list_y

    def selection_fn(self,trace,points,selector):
        """
        Updates the plot and data based on the selected data points. Selected data points switch trace.

        Parameters:
            trace (go.Scatter): The current trace. The method is called once for each of the two traces (non-outlier and outlier).
            points (dict): The selected data points containing the x-values in 'xs' and the y-values in 'ys'.
            selector (dict): The selector used to identify the trace.

        Returns:
            None
        """     
        # Set "Manual_Outlier" values for the data points selected
        mask = self.df["Date/Time"].isin(points.xs)     
        self.df.loc[mask, 'Manual_Outlier'] = self.df.loc[mask, 'Manual_Outlier'].apply(lambda x: 0 if x == 1 else 1)
        
        # Add selected data points to the other trace
        other_trace_x, other_trace_y = self.get_x_and_y_values_other_trace(trace)
        other_trace_x, other_trace_y = self.append_selected_data_points(other_trace_x, other_trace_y, points)
        
        # If data points in "outlier" have been added to "non-outlier"-trace, then sort x axis on datetime 
        other_trace_name = "outlier" if trace.name == "non-outlier" else "non-outlier"
        if trace.name == "outlier":
            sort_indices = np.argsort(other_trace_x)
            other_trace_x = other_trace_x[sort_indices]
            other_trace_y = other_trace_y[sort_indices]
            self.df.loc[mask, "Manual_Outlier"] = 0  
        
        # Update the other trace 
        self.f.update_traces(x=other_trace_x, y=other_trace_y, selector=dict(name=other_trace_name))

        # Remove selected data points from current trace and update it
        trace_x, trace_y = self.get_x_and_y_values_current_trace(trace)
        trace_x, trace_y = self.remove_selected_data_points(trace_x, trace_y, points)
        self.f.update_traces(x=trace_x, y=trace_y, selector=dict(name=trace.name))
        
        # Update marker symbol in trace1, non-outlier
        symbols = {-1: "circle", 0: "triangle-up"}
        trace1_x_values = self.f.data[0].x
        self.trace1_color = [x["Manual_Outlier"] for _, x in self.df.iterrows() if x["Date/Time"] in trace1_x_values and x["Manual_Outlier"] != 1]
        marker_symbols = [symbols[i] for i in self.trace1_color]  
        self.f.update_traces(marker_color=self.trace1_color, marker_symbol=marker_symbols, selector=dict(name="non-outlier")) 





In [94]:
#!/usr/bin/env python
# coding: utf-8

from dash import Dash, dcc, html, Input, Output
import plotly.express as px
import time

df = pd.read_csv("data/asset-data-export_vEm3Jd5916-64098d190a5ca-1678347545.csv", delimiter=";", header=4) 

df["Date/Time"] = pd.to_datetime(df["Date/Time"]) 
df["Date/Time-unix"] = df["Date/Time"].apply(lambda x: int(time.mktime(x.timetuple())))

chooser = interactive_data_chooser(df[:100], df.columns)  
chooser.activate_plot()


70
             Date/Time  Loss  Threshold  Anomaly  Water_level
30 2022-04-22 22:40:00   1.0        0.8     True           10
31 2022-04-22 23:00:00   1.0        0.8     True            7
32 2022-04-22 23:20:00   1.0        0.8     True            6
33 2022-04-22 23:40:00   1.0        0.8     True            5
34 2022-04-23 00:00:00   1.0        0.8     True            5
..                 ...   ...        ...      ...          ...
95 2022-04-23 20:20:00   1.0        0.8     True            0
96 2022-04-23 20:40:00   1.0        0.8     True            2
97 2022-04-23 21:00:00   1.0        0.8     True            2
98 2022-04-23 21:20:00   1.0        0.8     True            3
99 2022-04-23 21:40:00   1.0        0.8     True            9

[70 rows x 5 columns]


VBox(children=(HBox(), FigureWidget({
    'data': [{'marker': {'colorscale': [[0.0, 'blue'], [1.0, 'green']], …

In [63]:

df = pd.read_csv("data/manipulated_data.csv", delimiter=";", header=3)  #, index_col="Date/Time") 
df["Date/Time"] = pd.to_datetime(df["Date/Time"]) 
df["Date/Time-unix"] = df["Date/Time"].apply(lambda x: int(time.mktime(x.timetuple())))
df["Model_Outlier"] = 0
df["Manual_Outlier"] = -1
df = df[:100]
# df.loc[30:]["Model_Outlier"] = 1 Den här funkar inte, då finns alla 100 datapunkter med i trace och alla 100 plottas
df.loc[30:, "Model_Outlier"] = 1  # Den här funkar, då finns enbart de första 30 datapunkterna med i trace och dessa 30 plottas
trace = go.Scatter(x=df[(df["Model_Outlier"] != 1) | \
                                           (df["Model_Outlier"] == 1) & (df["Manual_Outlier"] == 0)]["Date/Time"], 
                                y=df[(df["Model_Outlier"] != 1) | \
                                           (df["Model_Outlier"] == 1) & (df["Manual_Outlier"] == 0)]["Water level, Nap (cm)"],
                                mode="markers+lines", 
                                selected_marker_color = "orange",
                                visible=True,
                                opacity=1.0,
                                marker=dict(size=10, 
                                            colorscale=["blue", "green"]), # color=numeric_df[numeric_columns[0]]),
                                            showlegend=True,
                                            name="non-outlier") 
layout = go.Layout(title="Simple Scatter Plot",
                   xaxis=dict(title="X-axis"),
                   yaxis=dict(title="Y-axis"))

fig = go.FigureWidget(data=trace, layout=layout)
fig.show()