In [36]:
#!/usr/bin/env python
# coding: utf-8

import sys
import os
import matplotlib.pyplot as plt
import glob

from datetime import datetime
from datetime import timedelta
from plotly.subplots import make_subplots
import plotly.graph_objs as go
from ipywidgets import interactive, HBox, VBox
# import keras
import pandas as pd
import numpy as np
import random
from dash import Dash, dcc, html, Input, Output
import plotly.express as px
from functools import partial
from tensorflow import keras


def load_model():  
    """
    Load the most recent Keras model file from the './models' directory.

    Returns:
    -------
    keras.engine.training.Model
        The loaded Keras model.

    Raises:  # Really?
    ------
    OSError
        If no model files are found in the directory.
    """
    model_dir = "./models"
    model_files = glob.glob(os.path.join(model_dir, "*.h5"))
    model_files.sort(key=os.path.getctime, reverse=True)
    latest_model_file = model_files[0]
    loaded_model = keras.saving.load_model(latest_model_file)
    return loaded_model


def create_dataset(X, y, time_steps=1):
    """
    Create a time series dataset from the input and target data.

    Parameters:
    ----------
    X : pandas.DataFrame
        The input data.
    y : pandas.Series
        The target data.
    time_steps : int, optional (default=1)
        The number of time steps to include in each sample.

    Returns:
    -------
    tuple of numpy.ndarray
        The input and target data formatted as a time series dataset.

    Raises:
    ------
    ValueError
        If the length of X and y are different.
    """
    if len(X) != len(y):  # ChatGPTs egna initiativ
        raise ValueError("X and y must have the same length.")    
    
    Xs, ys = [], []
    for i in range(len(X) - time_steps):
        v = X.iloc[i:(i + time_steps)].values
        Xs.append(v)
        ys.append(y.iloc[i + time_steps])

    return np.array(Xs), np.array(ys)


def predict(model, X):  # Ta bort :)
    return model.predict(X)


def create_test_score_df(test, time_steps, mae_loss, threshold, column):
    """
    Create a pandas DataFrame with test score information for anomaly detection.

    Parameters:
    ----------
    test : pandas.DataFrame
        The test data used for anomaly detection.
    time_steps : int
        The number of time steps used to create the input data.
    mae_loss : float
        The mean absolute error loss threshold used for anomaly detection.
    threshold : float
        The anomaly detection threshold value.
    column : str
        The column name for the data being tested.

    Returns:
    -------
    pandas.DataFrame
        A DataFrame containing test score information for anomaly detection.
    """
    test_score_df = pd.DataFrame(index=test[time_steps:].index)
    test_score_df["Date/Time"] = test["Date/Time"]
    test_score_df["Loss"] = mae_loss
    test_score_df["Threshold"] = threshold
    test_score_df["Anomaly"] = test_score_df.Loss > test_score_df.Threshold
    test_score_df["Water_level"] = test[time_steps:][column]
    return test_score_df


def create_anomaly_df(df):
    return df[df.Anomaly == True]


def mae_loss(X_pred, X):
    return np.mean(np.abs(X_pred, X), axis=1)


def get_anomalies_df(model, df, column, time_steps):
    X, _ = create_dataset(df[["Date/Time-unix"]], df[[column]], time_steps)
    pred = predict(model, X)  # Ändra :)
    X = X.astype("float64")
    # X = float(X)  # Change float on all values in the array!!!
    data_mae_loss = mae_loss(pred, X)
    THRESHOLD = 0.8
    score_df = create_test_score_df(df, time_steps, data_mae_loss, THRESHOLD, column)
    anomalies = create_anomaly_df(score_df)
    return anomalies

def parse_to_datetime(x):
        if isinstance(x, datetime):
            return x
        try:
            return datetime.strptime(x, '%Y-%m-%d %H:%M:%S')
        except ValueError:
            try:
                return datetime.strptime(x, '%Y-%m-%d %H:%M')
            except ValueError:
                try:
                    return datetime.strptime(x, '%Y-%m-%d %H')
                except ValueError:
                    return datetime.strptime(x, '%Y-%m-%d')  # Ta bort dessa värden?  

class interactive_data_chooser:
    """
    Class for selecting data graphically and displaying it
    """
    def __init__(self, df, columns):
        # self.df = df
        self.df = df.copy()
        self.columns = columns
        self.df["Manual_Outlier"] = -1
        self.df["Model_Outlier"] = 0
        # Funktion som returnerar anomalies_df
        # self.df[self.df["Date/Time"] == anomalies["Date/Time"]]["model_outlier"] = 1
        # Testa i debug console
        self.axis_dropdowns = None
        self.chosen_color_column = self.df["Manual_Outlier"]
        self.trace1_color = None
        self.trace2_color = None
    
    def activate_plot(self):
        self.df.reset_index(inplace=True,drop=True)
        numeric_df = self.df.select_dtypes(include=np.number)
        numeric_columns = numeric_df.columns
        model = load_model()
        anomalies = get_anomalies_df(model, self.df, 'Water level, Nap (cm)', 30)
        print(len(anomalies))
        print(anomalies)
        self.df.loc[self.df["Date/Time"].isin(anomalies["Date/Time"]), "Model_Outlier"] = 1
        
        
        trace1 = go.Scatter(x=self.df[(self.df["Model_Outlier"] != 1) | \
                                        (self.df["Model_Outlier"] == 1) & (self.df["Manual_Outlier"] == 0)]["Date/Time"], 
                            y=self.df[(self.df["Model_Outlier"] != 1) | \
                                        (self.df["Model_Outlier"] == 1) & (self.df["Manual_Outlier"] == 0)]["Water level, Nap (cm)"],
                            mode="markers+lines", 
                            selected_marker_color = "orange",
                            visible=True,
                            opacity=1.0,
                            marker=dict(size=10, 
                                        colorscale=["blue", "green"], 
                                        color=self.trace1_color), # color=numeric_df[numeric_columns[0]]),
                                        marker_symbol=["circle", "triangle-up"],
                                        showlegend=True,
                                        name="non-outlier")

        trace2 = go.Scatter(x=self.df[(self.df["Model_Outlier"] == 1) & (self.df["Manual_Outlier"] != 0)]["Date/Time"],  
                            y=self.df[(self.df["Model_Outlier"] == 1) & (self.df["Manual_Outlier"] != 0)]["Water level, Nap (cm)"],
                            mode="markers", 
                            selected_marker_color = "orange",
                            visible=True,
                            opacity=1.0,
                            marker=dict(size=10, 
                                        colorscale=["blue", "red"], 
                                        color=self.trace2_color), #  numeric_df[numeric_columns[0]]),
                                        marker_symbol="x", 
                                        showlegend=True,
                                        name="outlier")
        
        # Create scatter plot with two different colors based on the "color" column
        self.f = go.FigureWidget(data=[trace1, trace2])

        """
        go.Scatter(x=local_df["Date/Time"], y=local_df["Water level, Nap (cm)"], 
                                        mode="markers", marker_color=local_df["color"])
        """
        trace1.hovertemplate = "<b>Trace 1</b><br>X: %{x}<br>Y: %{y}"  # De här funkar inte, men kanske inte spelar någon roll
        trace2.hovertemplate = "<b>Trace 2</b><br>X: %{x}<br>Y: %{y}"
        
        # self.axis_dropdowns = interactive(self.update_axes, yaxis = self.columns, xaxis = self.columns, color = numeric_columns, trace_idx=0)  # Den här blev det något knas med
        
        # Add x-axis and y-axis labels
        self.f.update_layout(xaxis_title="Date/Time", yaxis_title="Water level, Nap (cm)")

        # For both traces, activate selection_fn if user marks datapoints in them
        self.f.data[0].on_selection(self.selection_fn)
        self.f.data[1].on_selection(self.selection_fn)
        # Show the plot
        # self.f.show()
            
    
        return VBox((HBox(), self.f)) # self.axis_dropdowns.children in HBox
        return


        # Create the scatter trace with markers and lines for datapoints categorized as non-outliers by the model or the user (user categorization overrides the model) 
        trace1 = go.Scatter(x=self.df[(self.df["Model_Outlier"] != 1) | \
                                           (self.df["Model_Outlier"] == 1) & (self.df["Manual_Outlier"] == 0)]["Date/Time"], 
                                y=self.df[(self.df["Model_Outlier"] != 1) | \
                                           (self.df["Model_Outlier"] == 1) & (self.df["Manual_Outlier"] == 0)]["Water level, Nap (cm)"],
                                mode="markers+lines", 
                                selected_marker_color = "orange",
                                visible=True,
                                opacity=1.0,
                                marker=dict(size=10, 
                                            colorscale=["blue", "green"], 
                                            color=self.trace1_color), # color=numeric_df[numeric_columns[0]]),
                                            showlegend=True,
                                            name="non-outlier")   

        # Add a second scatter trace with markers only for datapoints categorized as outliers by the model or the user (user categorization overrides the model) 
        trace2 = go.Scatter(x=self.df[(self.df["Model_Outlier"] == 1) & (self.df["Manual_Outlier"] != 0)]["Date/Time"],  
                                y=self.df[(self.df["Model_Outlier"] == 1) & (self.df["Manual_Outlier"] != 0)]["Water level, Nap (cm)"],
                                mode="markers", 
                                selected_marker_color = "orange",
                                visible=True,
                                opacity=1.0,
                                marker=dict(size=10, 
                                            colorscale=["blue", "red"], 
                                            color=self.trace2_color), #  numeric_df[numeric_columns[0]]),
                                            marker_symbol="x", 
                                            showlegend=True,
                                            name="outlier")
        
        trace1.hovertemplate = "<b>Trace 1</b><br>X: %{x}<br>Y: %{y}"
        trace2.hovertemplate = "<b>Trace 2</b><br>X: %{x}<br>Y: %{y}"
        
        self.f = go.FigureWidget(data=[trace1, trace2])
        self.f.update_layout(xaxis_type="date")

        # Customized legend
        self.f.add_trace(go.Scatter(y=[None], mode="markers",
                         marker=dict(symbol="circle", color="blue", size=10),
                         name="Not manually chosen"
                         ))
        self.f.add_trace(go.Scatter(y=[None], mode="markers",
                         marker=dict(symbol="triangle-up", color="green", size=10),
                         name="Not outlier",
                         ))
        self.f.add_trace(go.Scatter(y=[None], mode="markers",
                         marker=dict(symbol="x", color="red", size=10),
                         name="Outlier",
                         ))
        self.f.data[0].showlegend = True
        self.f.data[1].showlegend = True
        
        
        self.axis_dropdowns = interactive(self.update_axes, yaxis = self.columns, xaxis = self.columns, color = numeric_columns)
        
        self.f.data[0].on_selection(self.selection_fn)
        self.f.data[1].on_selection(self.selection_fn)
        print(f"{trace1=}")
        print(f"{trace2=}")
        return VBox((HBox(self.axis_dropdowns.children), self.f))
    
    def update_axes(self, xaxis, yaxis, color, trace_idx):
        scatter = self.f.data[trace_idx]  # Borde jag ändra den här med två trace?
        scatter.x = self.df[xaxis]
        scatter.y = self.df[yaxis]
        scatter.marker.color = self.df[color]
        with self.f.batch_update():
            self.f.layout.xaxis.title = xaxis
            self.f.layout.yaxis.title = yaxis
   
    def update_manual_outlier(self, row):
        row["Manual_Outlier"] = 1 if self.df[row[0]]["Manual_Outlier"] != 1 else 0
        return row
    
    # def multiply_rows(row): Use this solution instead of iterrows
        # return row['column1'] * row['column2']

        # my_df['multiplied'] = my_df.apply(multiply_rows,axis=1)

    def update_temp_df_last_sel(self, row, last_selected):
        row["Last_selected"] = last_selected
        return row        

    def remove_selected_data_points(self, current_list_x, current_list_y, points):
        current_list_x = np.delete(current_list_x, points.point_inds)  # Ska det vara så här?
        current_list_y = np.delete(current_list_y, points.point_inds)
        return current_list_x, current_list_y

    def get_x_and_y_values_current_trace(self, trace):  # En bra idé att slå ihop de två get x_and_y-metoderna alternativt bryta ut raderna som är lika?
        trace_value = 0 if trace.name == "non-outlier" else 1
        x_values = np.array(self.f.data[trace_value].x)
        y_values = np.array(self.f.data[trace_value].y)
        return x_values, y_values
    
    def get_x_and_y_values_other_trace(self, trace):
        trace_value = 0 if trace.name == "outlier" else 1
        x_values = np.array(self.f.data[trace_value].x)
        y_values = np.array(self.f.data[trace_value].y)
        return x_values, y_values  

    def append_selected_data_points(self, current_list_x, current_list_y, points):
        xs_to_datetime = []
        for x in points.xs:  
            x = parse_to_datetime(x)
            xs_to_datetime.append(x)
        """ xs_series = pd.Series(points.xs)
        xs_to_datetime = xs_series.apply(parse_to_datetime) """

        appended_list_x = np.append(current_list_x, xs_to_datetime)
        appended_list_y = np.append(current_list_y, points.ys)
        # appended_list_x = current_list_x.append(pd.Series(points.xs))
        # appended_list_y = current_list_y.append(pd.Series(points.ys))
        return appended_list_x, appended_list_y

    def selection_fn(self,trace,points,selector):
        # self.chosen_color_column = self.axis_dropdowns.children[2].value 
        
        """ temp_df["Last_selected"] = temp_df.apply(lambda row: self.update_temp_df_last_sel(row, last_selected), axis=1) """
        # Get the selected points based on x values
        # TODO: Should I change to index instead?
        # TODO: Skip temp_df and change df to df
        # TODO: Ändra namn på trace1 och trace2 samt trace1_color
     
        mask = self.df["Date/Time"].isin(points.xs)
        
        # 1 => 0
        # 0 => 1
        # -1 => 1
        """ self.df[mask]['Manual_Outlier'] = abs(self.df[mask]['Manual_Outlier'] - 1)
        self.df[mask]['Manual_Outlier'][self.df[mask]['Manual_Outlier'] == 2] = 1 """
        
        self.df.loc[mask, 'Manual_Outlier'] = self.df.loc[mask, 'Manual_Outlier'].apply(lambda x: 0 if x == 1 else 1)
        
        

        """ # funktion som kollar om vilka värden som finns i båda listorna. Borde finnas i NumPy.
        # set(a,b).union()
        # mo = self.df[gemensamma värden]['Manual_Outliers]
        # 

        np.where(self.df.index in points.xs,[self.df, points.xs])
        self.df.index where index in points.xs """
        # Add selected data points to the other trace and update it
        # Fixa så att när datapunkten hoppar tillbaka non-outlier ska värdet vara 0
        other_trace_x, other_trace_y = self.get_x_and_y_values_other_trace(trace)
        other_trace_x, other_trace_y = self.append_selected_data_points(other_trace_x, other_trace_y, points)
        
        other_trace_name = "outlier" if trace.name == "non-outlier" else "non-outlier"
        print(f"{mask=}")
        print(f"{self.df['Manual_Outlier']=}")
        # If data points in "outlier" have been added to "non-outlier"-trace, then sort x axis on datetime 
        if trace.name == "outlier":
            sort_indices = np.argsort(other_trace_x)
            other_trace_x = other_trace_x[sort_indices]
            other_trace_y = other_trace_y[sort_indices]
            self.df.loc[mask, "Manual_Outlier"] = 0  # Varför blir det bara trianglar på de datapunkter som låg i trace 1 från början?
        print(f"{self.df['Manual_Outlier']=}")
        self.f.update_traces(x=other_trace_x, y=other_trace_y, selector=dict(name=other_trace_name))

        # Remove selected data points from current trace and update it
        trace_x, trace_y = self.get_x_and_y_values_current_trace(trace)
        trace_x, trace_y = self.remove_selected_data_points(trace_x, trace_y, points)
        self.f.update_traces(x=trace_x, y=trace_y, selector=dict(name=trace.name))
        
        # Update marker symbol in trace1
        symbols = {-1: "circle", 0: "triangle-up"}
        trace1_x_values = self.f.data[0].x
        self.trace1_color = [x["Manual_Outlier"] for _, x in self.df.iterrows() if x["Date/Time"] in trace1_x_values and x["Manual_Outlier"] != 1]
        marker_symbols = [symbols[i] for i in self.trace1_color]  
        self.f.update_traces(marker_color=self.trace1_color, marker_symbol=marker_symbols, selector=dict(name="non-outlier")) 
        print(f"{self.f}")





In [37]:
#!/usr/bin/env python
# coding: utf-8

from dash import Dash, dcc, html, Input, Output
import plotly.express as px
import time

df = pd.read_csv("data/asset-data-export_vEm3Jd5916-64098d190a5ca-1678347545.csv", delimiter=";", header=4)  #, index_col="Date/Time") 
# df = df.rename(columns={"Unnamed: 0": "Date/Time"})

df["Date/Time"] = pd.to_datetime(df["Date/Time"]) 
df["Date/Time-unix"] = df["Date/Time"].apply(lambda x: int(time.mktime(x.timetuple())))

chooser = interactive_data_chooser(df[:100], df.columns)  # Även om jag tar med hela datasetet är det endast de första 30 datapunkterna, fram till 8/3 14.30 som inte är outliers
chooser.activate_plot()


70
             Date/Time  Loss  Threshold  Anomaly  Water_level
30 2022-04-22 22:40:00   1.0        0.8     True           10
31 2022-04-22 23:00:00   1.0        0.8     True            7
32 2022-04-22 23:20:00   1.0        0.8     True            6
33 2022-04-22 23:40:00   1.0        0.8     True            5
34 2022-04-23 00:00:00   1.0        0.8     True            5
..                 ...   ...        ...      ...          ...
95 2022-04-23 20:20:00   1.0        0.8     True            0
96 2022-04-23 20:40:00   1.0        0.8     True            2
97 2022-04-23 21:00:00   1.0        0.8     True            2
98 2022-04-23 21:20:00   1.0        0.8     True            3
99 2022-04-23 21:40:00   1.0        0.8     True            9

[70 rows x 5 columns]


VBox(children=(HBox(), FigureWidget({
    'data': [{'marker': {'colorscale': [[0.0, 'blue'], [1.0, 'green']], …

mask=0     False
1     False
2     False
3     False
4     False
      ...  
95    False
96    False
97    False
98    False
99    False
Name: Date/Time, Length: 100, dtype: bool
self.df['Manual_Outlier']=0    -1
1    -1
2    -1
3    -1
4    -1
     ..
95   -1
96   -1
97   -1
98   -1
99   -1
Name: Manual_Outlier, Length: 100, dtype: int64
self.df['Manual_Outlier']=0    -1
1    -1
2    -1
3    -1
4    -1
     ..
95   -1
96   -1
97   -1
98   -1
99   -1
Name: Manual_Outlier, Length: 100, dtype: int64
self.trace1_color=[-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1]
marker_symbols=['circle', 'circle', 'circle', 'circle', 'circle', 'circle', 'circle', 'circle', 'circle', 'circle', 'circle', 'circle', 'circle', 'circle', 'circle', 'circle', 'circle', 'circle', 'circle', 'circle', 'circle', 'circle', 'circle', 'circle', 'circle', 'circle', 'circle', 'circle', 'circle', 'circle']
FigureWidget({
    'data': [{'marker': {'c

In [84]:

df = pd.read_csv("data/manipulated_data.csv", delimiter=";", header=3)  #, index_col="Date/Time") 
df["Date/Time"] = pd.to_datetime(df["Date/Time"]) 
df["Date/Time-unix"] = df["Date/Time"].apply(lambda x: int(time.mktime(x.timetuple())))
df["Model_Outlier"] = 0
df["Manual_Outlier"] = -1
df = df[:100]
# df.loc[30:]["Model_Outlier"] = 1 Den här funkar inte, då finns alla 100 datapunkter med i trace och alla 100 plottas
df.loc[30:, "Model_Outlier"] = 1  # Den här funkar, då finns enbart de första 30 datapunkterna med i trace och dessa 30 plottas
trace = go.Scatter(x=df[(df["Model_Outlier"] != 1) | \
                                           (df["Model_Outlier"] == 1) & (df["Manual_Outlier"] == 0)]["Date/Time"], 
                                y=df[(df["Model_Outlier"] != 1) | \
                                           (df["Model_Outlier"] == 1) & (df["Manual_Outlier"] == 0)]["Water level, Nap (cm)"],
                                mode="markers+lines", 
                                selected_marker_color = "orange",
                                visible=True,
                                opacity=1.0,
                                marker=dict(size=10, 
                                            colorscale=["blue", "green"]), # color=numeric_df[numeric_columns[0]]),
                                            showlegend=True,
                                            name="non-outlier") 
layout = go.Layout(title="Simple Scatter Plot",
                   xaxis=dict(title="X-axis"),
                   yaxis=dict(title="Y-axis"))

fig = go.FigureWidget(data=trace, layout=layout)
fig.show()

trace=Scatter({
    'marker': {'colorscale': [[0.0, 'blue'], [1.0, 'green']], 'size': 10},
    'mode': 'markers+lines',
    'name': 'non-outlier',
    'opacity': 1.0,
    'selected': {'marker': {'color': 'orange'}},
    'showlegend': True,
    'visible': True,
    'x': array([datetime.datetime(2022, 3, 8, 0, 0, 4),
                datetime.datetime(2022, 3, 8, 0, 30),
                datetime.datetime(2022, 3, 8, 1, 0),
                datetime.datetime(2022, 3, 8, 1, 30),
                datetime.datetime(2022, 3, 8, 2, 0),
                datetime.datetime(2022, 3, 8, 2, 30),
                datetime.datetime(2022, 3, 8, 3, 0),
                datetime.datetime(2022, 3, 8, 3, 30, 4),
                datetime.datetime(2022, 3, 8, 4, 0),
                datetime.datetime(2022, 3, 8, 4, 30),
                datetime.datetime(2022, 3, 8, 5, 0),
                datetime.datetime(2022, 3, 8, 5, 30, 4),
                datetime.datetime(2022, 3, 8, 6, 0),
                datetime.datetime(2