In [21]:
#!/usr/bin/env python
# coding: utf-8

import sys
import os
import matplotlib.pyplot as plt
import glob

from datetime import datetime
from datetime import timedelta
from plotly.subplots import make_subplots
import plotly.graph_objs as go
from ipywidgets import interactive, HBox, VBox
# import keras
import pandas as pd
import numpy as np
import random
from dash import Dash, dcc, html, Input, Output
import plotly.express as px
from functools import partial

class interactive_data_chooser:
    """
    Class for selecting data graphically and displaying it
    """
    def __init__(self, df, columns):
        # we don't need this dataframe, make a df_copy instead?
        self.outlier_df = pd.DataFrame()

        # self.df = df
        self.df_copy = df.copy()
        self.columns = columns
        self.df_copy["manual_outlier"] = -1
        self.df_copy["model_outlier"] = 0

        self.axis_dropdowns = None
        self.chosen_color_column = self.df_copy["manual_outlier"]
        self.trace1_color = None
        self.trace2_color = None
    
    def activate_plot(self):
        """
        Display interactive plot where images (data points in the plot)
        can be selected using box select or lasso select. 
        """
        # TODO: cmin and cmax depending on chosen_color_column (manual_outlier will always be -1 to 1) 
        # color_continuous_scale=[(0, "red"), (0.5, "green"), (1, "blue")]) maybe work to not have dynamic values?
        self.df_copy.reset_index(inplace=True,drop=True)
        numeric_df = self.df_copy.select_dtypes(include=np.number)
        numeric_columns = numeric_df.columns
        
        
        
        """ self.f = go.FigureWidget([go.Scatter(y = self.df_copy[self.columns[0]], 
                                             x = self.df_copy[self.columns[1]], 
                                             mode = 'markers',
                                             selected_marker_color = "red", 
                                             name="default",
                                             marker=dict(color=numeric_df[numeric_columns[0]],
                                                        colorbar=dict(thickness=10), 
                                                        colorscale=["blue", "green", "orange"]))]) """
        
        

        # Create the scatter plot with markers and lines for z < 1
        trace1 = go.Scatter(x=self.df_copy.loc[self.chosen_color_column < 1, 'x'], 
                                    y=self.df_copy.loc[self.chosen_color_column < 1, 'y1'],
                                    mode='markers+lines', 
                                    selected_marker_color = "orange",
                                    visible=True,
                                    opacity=1.0,
                                    marker=dict(size=10, 
                                                colorscale=["blue", "green"], 
                                                color=self.trace1_color), # color=numeric_df[numeric_columns[0]]),
                                                showlegend=True,
                                                name="non-outlier")
        """ self.f.add_trace(go.Scatter(x=self.df_copy.loc[self.chosen_color_column < 1, 'x'], 
                                    y=self.df_copy.loc[self.chosen_color_column < 1, 'y1'],
                                    mode='markers+lines', 
                                    selected_marker_color = "red", 
                                    marker=dict(size=10, 
                                                colorscale=["blue", "green", "orange"], 
                                                color=numeric_df[numeric_columns[0]]),
                                                legendgroup="Trace",
                                                showlegend=True,
                                                name="non-outlier")) """

        # Add a second scatter trace with markers only for z = 1
        trace2 = go.Scatter(x=self.df_copy.loc[self.chosen_color_column == 1, 'x'], 
                                    y=self.df_copy.loc[self.chosen_color_column == 1, 'y1'],
                                    mode='markers', 
                                    selected_marker_color = "orange",
                                    visible=True,
                                    opacity=1.0,
                                    marker=dict(size=10, 
                                                colorscale=["blue", "green", "red"], 
                                                color=self.trace2_color), #  numeric_df[numeric_columns[0]]),
                                                marker_symbol="x", 
                                                showlegend=True,
                                                name="outlier")
        """ self.f.add_trace(go.Scatter(x=self.df_copy.loc[self.chosen_color_column == 1]['x'], 
                                    y=self.df_copy.loc[self.chosen_color_column == 1]['y1'],
                                    mode='markers', 
                                    selected_marker_color = "red", 
                                    marker=dict(size=10, 
                                                colorscale=["blue", "green", "orange"], 
                                                color=numeric_df[numeric_columns[0]]),
                                                legendgroup="Trace", 
                                                showlegend=True,
                                                name="outlier")) """
        
        
        
        trace1.hovertemplate = '<b>Trace 1</b><br>X: %{x}<br>Y: %{y}'
        trace2.hovertemplate = '<b>Trace 2</b><br>X: %{x}<br>Y: %{y}'
        
        self.f = go.FigureWidget(data=[trace1, trace2])
        # Adding trace1 and trace2 to trace when selecting data points
        """ for trace in self.f.data:
            trace.selectedpoints = []
 """
        # Customized legend
        self.f.add_trace(go.Scatter(y=[None], mode='markers',
                         marker=dict(symbol='circle', color='blue'),
                         name='Not manually chosen'
                         ))
        self.f.add_trace(go.Scatter(y=[None], mode='markers',
                         marker=dict(symbol='triangle-up', color='green'),
                         name='Not outlier',
                         ))
        self.f.add_trace(go.Scatter(y=[None], mode='markers',
                         marker=dict(symbol='x', color='red'),
                         name='Outlier',
                         ))
        self.f.data[0].showlegend = True
        
        scatter = self.f.data[0]
        scatter.marker.opacity = 0.5
        
        self.axis_dropdowns = interactive(self.update_axes, yaxis = self.columns, xaxis = self.columns, color = numeric_columns)
        # scatter.on_selection(self.selection_fn)
        """ for trace in self.f.data[0]:
            trace.on_selection(self.selection_fn) """
        self.f.data[0].on_selection(self.selection_fn)
        self.f.data[1].on_selection(self.selection_fn)
        
        # Put everything together
        return VBox((HBox(self.axis_dropdowns.children),self.f))
    
    def update_axes(self, xaxis, yaxis,color):
        scatter = self.f.data[0]
        scatter.x = self.df_copy[xaxis]
        scatter.y = self.df_copy[yaxis]
        scatter.marker.color = self.df_copy[color]
        with self.f.batch_update():
            self.f.layout.xaxis.title = xaxis
            self.f.layout.yaxis.title = yaxis
   
    def update_manual_outlier(self, row):
        row["manual_outlier"] = 1 if self.df_copy[row[0]]["manual_outlier"] != 1 else 0
        return row
    
    # def multiply_rows(row): Use this solution instead of iterrows
        # return row['column1'] * row['column2']

        # my_df['multiplied'] = my_df.apply(multiply_rows,axis=1)

    def update_temp_df_last_sel(self, row, last_selected):
        row["last_selected"] = last_selected
        return row        

    def selection_fn(self,trace,points,selector):
        """
        Keeping track of points manually selected and change values in column ["manual_outlier"].
        Value for points not manually selected is -1. If selected to be an outlier, value is set to 1 (and displayed as an x).
        If selected again not to be an outlier, value is set to 0 (and displayed as a triangle). Previous value is stored for future 
        possibility to undo selection. TODO: Skip previous value since undo button is not required?

        Each selection is stored in a temp_df and all temp_df's are stored in self.outlier_df.
        The dataframe drop_duplicates_df is the df which will be used to train the model, where only 
        the last manually made change to a data point is included. 
        """
        # Store the selected data points in temp_df
        temp_df = self.df_copy[self.df_copy["x"].isin(points.point_inds)]
        self.chosen_color_column = self.axis_dropdowns.children[2].value  
        
        last_selected = len(temp_df)

        
        """ temp_df["last_selected"] = temp_df.apply(lambda row: self.update_temp_df_last_sel(row, last_selected), axis=1) """
        # Get the selected points based on x values
        for x_value in points.xs:  
            """ temp_df.at[idx, "last_selected"] = last_selected """
            # This is needed for keeping track of the changes
            temp_df.at[x_value, "manual_outlier"] = 1 if self.df_copy.at[x_value, "manual_outlier"] != 1 else 0
            # This is needed for displaying values in the plot
            self.df_copy.at[x_value, "manual_outlier"] = 1 if (self.df_copy.at[x_value, "manual_outlier"] != 1) else 0
        
        selected_x = np.array([i for i in points.xs])
        selected_y = np.array([i for i in points.ys])

        # TODO: Ändra namn på trace1 och trace2 
        # List only values in manual outlier for trace1 to get a correct plot
        self.trace1_color = [x for x in self.df_copy["manual_outlier"] if x != 1]

        # Swap traces for selected data points and update the traces
        if trace.name == "non-outlier":
            # Add selected data points in non-outlier trace to outlier trace
            outlier_x = np.array(self.f.data[1].x)
            outlier_y = np.array(self.f.data[1].y)
            
            outlier_x = np.append(outlier_x, selected_x)
            outlier_y = np.append(outlier_y, selected_y)
            self.f.update_traces(y=outlier_y, x=outlier_x, selector=dict(name="outlier"))

            # Remove selected data points from non-outlier trace
            not_outlier_x = np.array(self.f.data[0].x)
            not_outlier_y = np.array(self.f.data[0].y)
            
            # TODO: Gör argsort här
            not_outlier_x = np.array([x for x in not_outlier_x if x not in selected_x])
            not_outlier_y = np.array([y for y in not_outlier_y if y not in selected_y])
            self.f.update_traces(y=not_outlier_y, x=not_outlier_x, selector=dict(name="non-outlier"))

        if trace.name == "outlier":
            # Add selected data points in outlier trace to non-outlier trace
            not_outlier_x = np.array(self.f.data[0].x)
            not_outlier_y = np.array(self.f.data[0].y)
           
            not_outlier_x = np.append(not_outlier_x, selected_x)
            not_outlier_y = np.append(not_outlier_y, selected_y)
            
            # Make an ascending sorting on x values and store the order of the changed indices in sort_indices
            # Then apply sort_indices on not_outlier x and y values, so that the data points values are kept as a pair
            sort_indices = np.argsort(not_outlier_x)
            not_outlier_x = not_outlier_x[sort_indices]
            not_outlier_y = not_outlier_y[sort_indices]

            self.f.update_traces(y=not_outlier_y, x=not_outlier_x, selector=dict(name="non-outlier"))

            # Remove selected data points from outlier trace
            outlier_x = np.array(self.f.data[1].x)
            outlier_y = np.array(self.f.data[1].y)
            outlier_x = np.array([x for x in outlier_x if x not in selected_x])
            outlier_y = np.array([y for y in outlier_y if y not in selected_y])
            self.f.update_traces(y=outlier_y, x=outlier_x, selector=dict(name="outlier"))
        
        # Update marker symbol in trace1
        symbols = {-1: "circle", 0: "triangle-up"}
        marker_symbols = [symbols[i] for i in self.trace1_color]
        self.f.update_traces(marker_color=self.trace1_color, marker_symbol=marker_symbols,selector=dict(name="non-outlier")) 

    def clear_selection(self):
        self.outlier_df = self.outlier_df.iloc[0:0]
    
    def show_selected(self):
        for index, row in self.outlier_df.iterrows():
            plt.figure()
            plt.imshow(plt.imread(row['file']))
            plt.title(f"{row['time']}, wl: {row['wl']}, turb_s: {row['turb_sensor']}, turb_p: {row['turb_post']}")



In [22]:
#!/usr/bin/env python
# coding: utf-8

from dash import Dash, dcc, html, Input, Output
import plotly.express as px

def create_fake_df(n):
    """
    Creates a dataframe with n rows and columns "x", "y1" and "y2". 
    The data are integers, 0-100.
    """
    x = []
    y1 = []
    y2 = []

    for i in range(n):    
        x_int = i
        x.append(x_int)
        y1_int = random.randint(0, 100)
        y1.append(y1_int)
        y2_int = random.randint(0, 100)
        y2.append(y2_int)

    int_dict = {"x": np.sort(x), "y1": y1, "y2": y2}
    df = pd.DataFrame(int_dict)
    return df

df = create_fake_df(100)
chooser = interactive_data_chooser(df, df.columns)
chooser.activate_plot()


In activate_plot


VBox(children=(HBox(children=(Dropdown(description='xaxis', options=('x', 'y1', 'y2'), value='x'), Dropdown(de…

IndexError: index 98 is out of bounds for axis 0 with size 98

IndexError: index 97 is out of bounds for axis 0 with size 97



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



IndexError: index 98 is out of bounds for axis 0 with size 98

IndexError: index 98 is out of bounds for axis 0 with size 98

In [2]:
# Kolla datatyp på date/time (pd.to_datetime, ibland behövs info om format), ta bort nullvärden
glen_1 = pd.read_csv("data/asset-data-export_O12QnL6kAl-640876dfe5066-1678276319.csv", delimiter=";", header=3)
glen_1 = glen_1.dropna()
glen_1["Date/Time"] = pd.to_datetime(glen_1["Date/Time"])
glen_1

Unnamed: 0,Date/Time,"Water level, Nap (cm)"
0,2022-03-08 00:00:04,24
1,2022-03-08 00:30:00,24
2,2022-03-08 01:00:00,24
3,2022-03-08 01:30:00,20
4,2022-03-08 02:00:00,20
...,...,...
17518,2023-03-07 23:00:00,26
17519,2023-03-07 23:30:00,25
17520,2023-03-08 00:00:04,25
17521,2023-03-08 00:30:00,25


In [2]:
import plotly.graph_objs as go
from ipywidgets import Button, Dropdown
from IPython.display import display

# Create example data
x = [1, 2, 3, 4, 5]
y = [10, 20, 30, 40, 50]

# Create plotly plot with box select enabled
trace = go.Scatter(x=x, y=y, mode='markers')
layout = go.Layout(title='Selected Points', xaxis=dict(range=[0, 6]), yaxis=dict(range=[0, 60]), dragmode='select')
fig = go.Figure(data=[trace], layout=layout)

# Define event handler function to display dropdown menu
def handle_selection(event):
    if event is None or len(event) == 0:
        return
    
    # Create the dropdown menu with the selected values
    dropdown_options = [
        {'label': 'Mean', 'value': 'mean'},
        {'label': 'Max', 'value': 'max'},
        {'label': 'Min', 'value': 'min'},
    ]
    dropdown = Dropdown(options=dropdown_options, description='Select an operation')
    display(dropdown)
    
# Add event handler function to the plotly plot
fig.layout.on_change(handle_selection, 'selectedpoints')

# Create a button to clear the selection and hide the dropdown menu
button = Button(description='Clear Selection')
button.on_click(lambda _: dropdown.close() if 'dropdown' in globals() else None)
display(fig, button)




ValueError: Invalid property specification(s): ['selectedpoints']

In [6]:
glen_2 = pd.read_csv("data/asset-data-export_Zga3AM63oO-64098cbc36e23-1678347452.csv")
glen_2

Unnamed: 0,Asset name;NU4201 Spektrumgatan
Exported on;9 March 2023,08:37:32
Timezone;UTC +1,
;,
Date/Time;Water level,Nap (cm)
2022-03-09 00:00;5,
...,...
2023-03-08 23:00;1,
2023-03-08 23:30;1,
2023-03-09 00:00;1,
2023-03-09 00:30;1,


In [12]:
glen_3 = pd.read_csv("data/asset-data-export_vEm3Jd5916-64098d190a5ca-1678347545.csv")
glen_3.head()

Unnamed: 0,Asset name;NU3328 Strandvägen
Exported on;9 March 2023,08:39:05
Timezone;UTC +1,
;,
Date/Time;Water level,Nap (cm)
2022-04-22 12:47;11,
