In [68]:
#!/usr/bin/env python
# coding: utf-8

import sys
import os
import matplotlib.pyplot as plt
import glob

from datetime import datetime
from datetime import timedelta
from plotly.subplots import make_subplots
import plotly.graph_objs as go
from ipywidgets import interactive, HBox, VBox
# import keras
import pandas as pd
import numpy as np
import random
from dash import Dash, dcc, html, Input, Output
import plotly.express as px
from functools import partial
from tensorflow import keras


def load_model():  
    """
    Load the most recent Keras model file from the './models' directory.

    Returns:
    -------
    keras.engine.training.Model
        The loaded Keras model.

    Raises:  # Really?
    ------
    OSError
        If no model files are found in the directory.
    """
    model_dir = "./models"
    model_files = glob.glob(os.path.join(model_dir, "*.h5"))
    model_files.sort(key=os.path.getctime, reverse=True)
    latest_model_file = model_files[0]
    loaded_model = keras.saving.load_model(latest_model_file)
    return loaded_model


def create_dataset(X, y, time_steps=1):
    """
    Create a time series dataset from the input and target data.

    Parameters:
    ----------
    X : pandas.DataFrame
        The input data.
    y : pandas.Series
        The target data.
    time_steps : int, optional (default=1)
        The number of time steps to include in each sample.

    Returns:
    -------
    tuple of numpy.ndarray
        The input and target data formatted as a time series dataset.

    Raises:
    ------
    ValueError
        If the length of X and y are different.
    """
    if len(X) != len(y):  # ChatGPTs egna initiativ
        raise ValueError("X and y must have the same length.")    
    
    Xs, ys = [], []
    for i in range(len(X) - time_steps):
        v = X.iloc[i:(i + time_steps)].values
        Xs.append(v)
        ys.append(y.iloc[i + time_steps])

    return np.array(Xs), np.array(ys)


def predict(model, X):  # Ta bort :)
    return model.predict(X)


def create_test_score_df(test, time_steps, mae_loss, threshold, column):
    """
    Create a pandas DataFrame with test score information for anomaly detection.

    Parameters:
    ----------
    test : pandas.DataFrame
        The test data used for anomaly detection.
    time_steps : int
        The number of time steps used to create the input data.
    mae_loss : float
        The mean absolute error loss threshold used for anomaly detection.
    threshold : float
        The anomaly detection threshold value.
    column : str
        The column name for the data being tested.

    Returns:
    -------
    pandas.DataFrame
        A DataFrame containing test score information for anomaly detection.
    """
    test_score_df = pd.DataFrame(index=test[time_steps:].index)
    test_score_df["Date/Time"] = test["Date/Time"]
    test_score_df["Loss"] = mae_loss
    test_score_df["Threshold"] = threshold
    test_score_df["Anomaly"] = test_score_df.Loss > test_score_df.Threshold
    test_score_df["Water_level"] = test[time_steps:][column]
    return test_score_df


def create_anomaly_df(df):
    return df[df.Anomaly == True]


def mae_loss(X_pred, X):
    return np.mean(np.abs(X_pred, X), axis=1)


def get_anomalies_df(model, df, column, time_steps):
    X, _ = create_dataset(df[[column]], df[[column]], time_steps)
    pred = predict(model, X)
    X = X.astype("float64")
    # X = float(X)  # Change float on all values in the array!!!
    data_mae_loss = mae_loss(pred, X)
    THRESHOLD = 0.8
    score_df = create_test_score_df(df, time_steps, data_mae_loss, THRESHOLD, column)
    anomalies = create_anomaly_df(score_df)
    return anomalies
    

class interactive_data_chooser:
    """
    Class for selecting data graphically and displaying it
    """
    def __init__(self, df, columns):
        # we don't need this dataframe, make a df_copy instead?
        self.outlier_df = pd.DataFrame()

        # self.df = df
        self.df_copy = df.copy()
        self.columns = columns
        self.df_copy["Manual_Outlier"] = -1
        self.df_copy["Model_Outlier"] = 0
        # Funktion som returnerar anomalies_df
        # self.df_copy["model_outlier"] 
        # self.df_copy[self.df_copy["Date/Time"] == anomalies["Date/Time"]]["model_outlier"] = 1
        # Testa i debug console
        self.axis_dropdowns = None
        self.chosen_color_column = self.df_copy["Manual_Outlier"]
        self.trace1_color = None
        self.trace2_color = None
    
    def activate_plot(self):
        self.df_copy.reset_index(inplace=True,drop=True)
        numeric_df = self.df_copy.select_dtypes(include=np.number)
        numeric_columns = numeric_df.columns
        model = load_model()
        anomalies = get_anomalies_df(model, self.df_copy, 'Water level, Nap (cm)', 30)
        # self.df_copy[self.df_copy["Date/Time"] == anomalies["Date/Time"]]["model_outlier"] = 1
        self.df_copy.loc[self.df_copy.iloc[:, 0].isin(anomalies["Date/Time"]), "Model_Outlier"] = 1  # self.df_copy.loc[self.df_copy["Date/Time"].isin(anomalies["Date/Time"]), "model_outlier"] = 1
        """ self.df_copy["print_outlier"] = 1
        self.df_copy["print_outlier"] = 0 if self.df_copy["manual_outlier"] == 0 else 1 """
        # Create the scatter trace with markers and lines for z < 1
        trace1 = go.Scatter(x=self.df_copy[self.chosen_color_column < 1]["Date/Time"],  # ["Unnamed: 0"],   
                                y=self.df_copy[self.chosen_color_column < 1]["Water level, Nap (cm)"],
                                mode="markers+lines", 
                                selected_marker_color = "orange",
                                visible=True,
                                opacity=1.0,
                                marker=dict(size=10, 
                                            colorscale=["blue", "green"], 
                                            color=self.trace1_color), # color=numeric_df[numeric_columns[0]]),
                                            showlegend=True,
                                            name="non-outlier")

        # Add a second scatter trace with markers only for z = 1
        # self.df_copy[self.df_copy["model_outlier"] == 1] if self.df_copy["manual_outlier"] == self.df_copy["model_outlier"]
        trace2 = go.Scatter(x=self.df_copy[(self.df_copy["Model_Outlier"] == 1) & (self.df_copy["Manual_Outlier"] != 0)],# x=self.df_copy[self.chosen_color_column == 1]['Date/Time'], 
                                y=self.df_copy[self.chosen_color_column == 1]["Water level, Nap (cm)"],
                                mode="markers", 
                                selected_marker_color = "orange",
                                visible=True,
                                opacity=1.0,
                                marker=dict(size=10, 
                                            colorscale=["blue", "green", "red"], 
                                            color=self.trace2_color), #  numeric_df[numeric_columns[0]]),
                                            marker_symbol="x", 
                                            showlegend=True,
                                            name="outlier")
        
        trace1.hovertemplate = "<b>Trace 1</b><br>X: %{x}<br>Y: %{y}"
        trace2.hovertemplate = "<b>Trace 2</b><br>X: %{x}<br>Y: %{y}"
        
        self.f = go.FigureWidget(data=[trace1, trace2])

        # Customized legend
        self.f.add_trace(go.Scatter(y=[None], mode="markers",
                         marker=dict(symbol="circle", color="blue", size=10),
                         name="Not manually chosen"
                         ))
        self.f.add_trace(go.Scatter(y=[None], mode="markers",
                         marker=dict(symbol="triangle-up", color="green", size=10),
                         name="Not outlier",
                         ))
        self.f.add_trace(go.Scatter(y=[None], mode="markers",
                         marker=dict(symbol="x", color="red", size=10),
                         name="Outlier",
                         ))
        self.f.data[0].showlegend = False
        self.f.data[1].showlegend = False
        
        self.axis_dropdowns = interactive(self.update_axes, yaxis = self.columns, xaxis = self.columns, color = numeric_columns)
        
        self.f.data[0].on_selection(self.selection_fn)
        self.f.data[1].on_selection(self.selection_fn)
        
        return VBox((HBox(self.axis_dropdowns.children), self.f))
    
    def update_axes(self, xaxis, yaxis,color):
        scatter = self.f.data[0]
        scatter.x = self.df_copy[xaxis]
        scatter.y = self.df_copy[yaxis]
        scatter.marker.color = self.df_copy[color]
        with self.f.batch_update():
            self.f.layout.xaxis.title = xaxis
            self.f.layout.yaxis.title = yaxis
   
    def update_manual_outlier(self, row):
        row["Manual_Outlier"] = 1 if self.df_copy[row[0]]["Manual_Outlier"] != 1 else 0
        return row
    
    # def multiply_rows(row): Use this solution instead of iterrows
        # return row['column1'] * row['column2']

        # my_df['multiplied'] = my_df.apply(multiply_rows,axis=1)

    def update_temp_df_last_sel(self, row, last_selected):
        row["Last_selected"] = last_selected
        return row        

    def remove_selected_data_points(self, current_list_x, current_list_y, points):
        current_list_x = np.delete(current_list_x, points.point_inds)
        current_list_y = np.delete(current_list_y, points.point_inds)
        return current_list_x, current_list_y

    def get_x_and_y_values_current_trace(self, trace):  # Funkar det att slå ihop de två get x_and_y-metoderna?
        trace_value = 0 if trace.name == "non-outlier" else 1
        x_values = np.array(self.f.data[trace_value].x)
        y_values = np.array(self.f.data[trace_value].y)
        return x_values, y_values
    
    def get_x_and_y_values_other_trace(self, trace):
        trace_value = 0 if trace.name == "outlier" else 1
        print(f"{trace_value=}")
        x_values = np.array(self.f.data[trace_value].x)
        y_values = np.array(self.f.data[trace_value].y)
        return x_values, y_values
    
    def append_selected_data_points(self, current_list_x, current_list_y, points):
        appended_list_x = np.append(current_list_x, points.xs)
        appended_list_y = np.append(current_list_y, points.ys)
        return appended_list_x, appended_list_y

    def selection_fn(self,trace,points,selector):
        # Store the selected data points in temp_df
        # temp_df = self.df_copy[self.df_copy.index.isin(points.point_inds)]
        self.chosen_color_column = self.axis_dropdowns.children[2].value 
        
        """ temp_df["Last_selected"] = temp_df.apply(lambda row: self.update_temp_df_last_sel(row, last_selected), axis=1) """
        # Get the selected points based on x values
        # TODO: Should I change to index instead?
        # TODO: Skip temp_df and change df_copy to df
        # TODO: Ändra namn på trace1 och trace2 samt trace1_color
     
        print(f"{points.xs=}")
        mask = self.df_copy["Date/Time"].isin(points.xs)
        row_idx = self.df_copy[mask].index
        # For all rows in row_idx: set the value in Manual_Outlier to 1 if !=1, else 0
        # For the rest of the rows: keep the original values  
        condition = self.df_copy.index.isin(row_idx)
        x_param = np.where(self.df_copy["Manual_Outlier"] != 1, 1, 0)
        y_param = self.df_copy["Manual_Outlier"]
        self.df_copy["Manual_Outlier"] = np.where(condition, x_param, y_param)
        

        """ # funktion som kollar om vilka värden som finns i båda listorna. Borde finnas i NumPy.
        # set(a,b).union()
        # mo = self.df_copy[gemensamma värden]['Manual_Outliers]
        # 

        np.where(self.df_copy.index in points.xs,[self.df_copy, points.xs])
        self.df_copy.index where index in points.xs """
        # List only values in manual outlier for trace1 to get a correct plot

        self.trace1_color = [x for x in self.df_copy["Manual_Outlier"] if x != 1]
        #self.trace1_color = self.df_copy[self.df_copy['Manual_Outlier] != 1].values.to_list() 
        # FÖRSLAG: self.trace1_color = self.df_copy.loc[self.df_copy["Manual_Outlier"] != 1, "Manual_Outlier"]

        # Add selected data points to the other trace and update it
        # Timestamp på ena tracet och datetime på andra ... 
        other_trace_x, other_trace_y = self.get_x_and_y_values_other_trace(trace)
        print(f"{trace}")
        print(f"{other_trace_x=}")
        other_trace_x, other_trace_y = self.append_selected_data_points(other_trace_x, other_trace_y, points)
        other_trace_name = "outlier" if trace.name == "non-outlier" else "non-outlier"

        # If data points in "outlier" have been added to "non-outlier"-trace, then sort x axis on datetime 
        if trace.name == "outlier":
            sort_indices = np.argsort(other_trace_x)
            other_trace_x = other_trace_x[sort_indices]
            other_trace_y = other_trace_y[sort_indices]
        self.f.update_traces(x=other_trace_x, y=other_trace_y, selector=dict(name=other_trace_name))

        # Remove selected data points from current trace and update it
        trace_x, trace_y = self.get_x_and_y_values_current_trace(trace)
        trace_x, trace_y = self.remove_selected_data_points(trace_x, trace_y, points)
        self.f.update_traces(x=trace_x, y=trace_y, selector=dict(name=trace.name))

        # Update marker symbol in trace1
        symbols = {-1: "circle", 0: "triangle-up"}
        marker_symbols = [symbols[i] for i in self.trace1_color]
        self.f.update_traces(marker_color=self.trace1_color, marker_symbol=marker_symbols, selector=dict(name="non-outlier")) 




In [69]:
#!/usr/bin/env python
# coding: utf-8

from dash import Dash, dcc, html, Input, Output
import plotly.express as px

def create_fake_df(n):
    """
    Creates a dataframe with n rows and columns "x", "y1" and "y2". 
    The data are integers, 0-100.
    """
    x = []
    y1 = []
    y2 = []

    for i in range(n):    
        x_int = i
        x.append(x_int)
        y1_int = random.randint(0, 100)
        y1.append(y1_int)
        y2_int = random.randint(0, 100)
        y2.append(y2_int)

    int_dict = {"x": np.sort(x), "y1": y1, "y2": y2}
    df = pd.DataFrame(int_dict)
    return df

df = pd.read_csv("data/manipulated_data.csv", delimiter=";", header=3)  #, index_col="Date/Time") 
df["Date/Time"] = pd.to_datetime(df["Date/Time"]) 
# df["Unnamed: 0"] = pd.to_datetime(df["Unnamed: 0"])
# df["Date/Time"]
chooser = interactive_data_chooser(df[:100], df.columns)
chooser.activate_plot()




VBox(children=(HBox(children=(Dropdown(description='xaxis', options=('Date/Time', 'Water level, Nap (cm)'), va…

points.xs=['2022-03-08 00:30', '2022-03-08 01:00']
trace_value=1
Scatter({
    'hovertemplate': '<b>Trace 1</b><br>X: %{x}<br>Y: %{y}',
    'marker': {'color': array([-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
                               -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
                               -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
                               -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
                               -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
                               -1, -1, -1, -1, -1, -1, -1, -1, -1, -1], dtype=int64),
               'colorscale': [[0.0, 'blue'], [1.0, 'green']],
               'size': 10},
    'mode': 'markers+lines',
    'name': 'non-outlier',
    'opacity': 1.0,
    'selected': {'marker': {'color': 'orange'}},
    'selectedpoints': [1, 2],
    'showlegend': 

In [15]:
df[:100].sort_values("Date/Time")

Unnamed: 0,Date/Time,"Water level, Nap (cm)"
0,2022-03-08 00:00:04,24
1,2022-03-08 00:30:00,24
2,2022-03-08 01:00:00,24
3,2022-03-08 01:30:00,20
4,2022-03-08 02:00:00,20
...,...,...
95,2022-03-09 23:30:04,24
96,2022-03-10 00:00:00,24
97,2022-03-10 00:30:00,24
98,2022-03-10 01:00:00,24


In [None]:
# Kolla datatyp på date/time (pd.to_datetime, ibland behövs info om format), ta bort nullvärden
glen_1 = pd.read_csv("data/asset-data-export_Zga3AM63oO-64098cbc36e23-1678347452.csv", delimiter=";", header=3)
glen_1 = glen_1.dropna()
# glen_1["Date/Time"] = pd.to_datetime(glen_1["Date/Time"])
glen_1["Unnamed: 0"] = pd.to_datetime(glen_1["Unnamed: 0"])
glen_1

In [None]:
import plotly.graph_objs as go
from ipywidgets import Button, Dropdown
from IPython.display import display

# Create example data
x = [1, 2, 3, 4, 5]
y = [10, 20, 30, 40, 50]

# Create plotly plot with box select enabled
trace = go.Scatter(x=x, y=y, mode='markers')
layout = go.Layout(title='Selected Points', xaxis=dict(range=[0, 6]), yaxis=dict(range=[0, 60]), dragmode='select')
fig = go.Figure(data=[trace], layout=layout)

# Define event handler function to display dropdown menu
def handle_selection(event):
    if event is None or len(event) == 0:
        return
    
    # Create the dropdown menu with the selected values
    dropdown_options = [
        {'label': 'Mean', 'value': 'mean'},
        {'label': 'Max', 'value': 'max'},
        {'label': 'Min', 'value': 'min'},
    ]
    dropdown = Dropdown(options=dropdown_options, description='Select an operation')
    display(dropdown)
    
# Add event handler function to the plotly plot
fig.layout.on_change(handle_selection, 'selectedpoints')

# Create a button to clear the selection and hide the dropdown menu
button = Button(description='Clear Selection')
button.on_click(lambda _: dropdown.close() if 'dropdown' in globals() else None)
display(fig, button)




In [None]:
glen_2 = pd.read_csv("data/asset-data-export_Zga3AM63oO-64098cbc36e23-1678347452.csv")
glen_2

In [None]:
glen_3 = pd.read_csv("data/asset-data-export_vEm3Jd5916-64098d190a5ca-1678347545.csv")
glen_3.head()