In [51]:
#!/usr/bin/env python
# coding: utf-8

import sys
import os
import matplotlib.pyplot as plt
import glob

from datetime import datetime
from datetime import timedelta
from plotly.subplots import make_subplots
import plotly.graph_objs as go
from ipywidgets import interactive, HBox, VBox
# import keras
import pandas as pd
import numpy as np
import random
from dash import Dash, dcc, html, Input, Output
import plotly.express as px
from functools import partial

class interactive_data_chooser:
    """
    Class for selecting data graphically and displaying it
    """
    def __init__(self, df, columns):
        # we don't need this dataframe, make a df_copy instead?
        self.outlier_df = pd.DataFrame()

        # self.df = df
        self.df_copy = df.copy()
        self.columns = columns
        self.df_copy["manual_outlier"] = -1
        self.df_copy["model_outlier"] = 0

        self.axis_dropdowns = None
        self.chosen_color_column = self.df_copy["manual_outlier"]
    
    def activate_plot(self):
        """
        Display interactive plot where images (data points in the plot)
        can be selected using box select or lasso select. 
        """
        # TODO: cmin and cmax depending on chosen_color_column (manual_outlier will always be -1 to 1) 
        self.df_copy.reset_index(inplace=True,drop=True)
        numeric_df = self.df_copy.select_dtypes(include=np.number)
        numeric_columns = numeric_df.columns
        
        self.f = go.FigureWidget([go.Scatter(y = self.df_copy[self.columns[0]], x = self.df_copy[self.columns[1]], mode = 'markers+lines',
                                       selected_marker_color = "red",
                                             marker=dict(color=numeric_df[numeric_columns[0]], 
                                                        colorscale=["blue", "green", "orange"],
                                                        size=10))])
        
        self.f.add_trace(go.Scatter(y=[None], mode='markers',
                         marker=dict(symbol='circle', color='blue'),
                         name='Not manually chosen'
                         ))
        
        self.f.add_trace(go.Scatter(y=[None], mode='markers',
                         marker=dict(symbol='triangle-up', color='green'),
                         name='Not outlier',
                         ))
        
        self.f.add_trace(go.Scatter(y=[None], mode='markers',
                         marker=dict(symbol='x', color='orange'),
                         name='Outlier',
                         ))
        
        
        
        scatter = self.f.data[0]
        scatter.marker.opacity = 0.5
        
        self.axis_dropdowns = interactive(self.update_axes, yaxis = self.columns, xaxis = self.columns, color = numeric_columns)
        # fig.for_each_trace(lambda trace: trace.on_selection(on_selection))
        scatter.on_selection(self.selection_fn)
        
        # Put everything together
        return VBox((HBox(self.axis_dropdowns.children),self.f))
    
    def update_axes(self, xaxis, yaxis,color):
        scatter = self.f.data[0]
        scatter.x = self.df_copy[xaxis]
        scatter.y = self.df_copy[yaxis]
        scatter.marker.color = self.df_copy[color]
        with self.f.batch_update():
            self.f.layout.xaxis.title = xaxis
            self.f.layout.yaxis.title = yaxis
   
    def update_manual_outlier(self, row):
        row["manual_outlier"] = 1 if self.df_copy[row[0]]["manual_outlier"] != 1 else 0
        return row
    
    # def multiply_rows(row): Use this solution instead of iterrows
        # return row['column1'] * row['column2']

        # my_df['multiplied'] = my_df.apply(multiply_rows,axis=1)

    def update_temp_df_last_sel(self, row, last_selected):
        row["last_selected"] = last_selected
        return row

    def selection_fn(self,trace,points,selector):
        """
        Keeping track of points manually selected and change values in column ["manual_outlier"].
        Value for points not manually selected is -1. If selected to be an outlier, value is set to 1 (and displayed as an x).
        If selected again not to be an outlier, value is set to 0 (and displayed as a triangle). Previous value is stored for future 
        possibility to undo selection. TODO: Skip previous value since undo button is not required?

        Each selection is stored in a temp_df and all temp_df's are stored in self.outlier_df.
        The dataframe drop_duplicates_df is the df which will be used to train the model, where only 
        the last manually made change to a data point is included. 
        """
        temp_df = self.df_copy.loc[points.point_inds]
        self.chosen_color_column = self.axis_dropdowns.children[2].value  
        
        last_selected = len(temp_df)
        
        # temp_df["last_selected"] = temp_df.apply(lambda row: self.update_temp_df_last_sel(row, last_selected), axis=1)
        for i in temp_df.iterrows():
            idx = i[0]
            temp_df.at[idx, "last_selected"] = last_selected
            # This is needed for keeping track of the changes
            temp_df.at[idx, "manual_outlier"] = 1 if self.df_copy.at[idx, "manual_outlier"] != 1 else 0
            # This is needed for displaying values in the plot
            self.df_copy.at[idx, "manual_outlier"] = 1 if self.df_copy.at[idx, "manual_outlier"] != 1 else 0

        self.outlier_df = pd.concat([self.outlier_df, temp_df], ignore_index=False, axis=0)

        # Needed? Just for printing in the notebook
        no_points = "point" if last_selected == 1 else "points"
        print(f"Selected {last_selected} new {no_points}. Total: {len(self.outlier_df)}")

        # Drop duplicates (needed?) 
        drop_duplicates_df = self.outlier_df.drop_duplicates(subset=["x", "y1"], keep="last")
        drop_duplicates_df.sort_values(by=["x"], inplace=True)
        print(f"Unique points selected ({len(drop_duplicates_df)}):")
        for i in drop_duplicates_df.iterrows():
            outlier = "yes" if i[1][3] == 1 else "no"
            print(f"x: {int(i[1][0])}, y1: {int(i[1][1])}, outlier: {outlier}")

        # Update color and symbols based on value in manual_outlier
        symbols = self.df_copy["manual_outlier"].map({-1: "circle", 0: "triangle-up", 1: "x"})
        trace.update(marker_color=self.df_copy[self.chosen_color_column], marker_symbol=symbols)

    def clear_selection(self):
        self.outlier_df = self.outlier_df.iloc[0:0]
    
    def show_selected(self):
        for index, row in self.outlier_df.iterrows():
            plt.figure()
            plt.imshow(plt.imread(row['file']))
            plt.title(f"{row['time']}, wl: {row['wl']}, turb_s: {row['turb_sensor']}, turb_p: {row['turb_post']}")

    # create train model function based on outlier status in self.df

    # visualize result in graph

    # function to mark point as non-outlier DONE

    # button to undo choice

    # button to confirm (then train model), disable if not choosen areas == 1



In [52]:
#!/usr/bin/env python
# coding: utf-8

from dash import Dash, dcc, html, Input, Output
import plotly.express as px

def create_fake_df(n):
    """
    Creates a dataframe with n rows and columns "x", "y1" and "y2". 
    The data are integers, 0-100.
    """
    x = []
    y1 = []
    y2 = []

    for i in range(n):    
        x_int = i
        x.append(x_int)
        y1_int = random.randint(0, 100)
        y1.append(y1_int)
        y2_int = random.randint(0, 100)
        y2.append(y2_int)

    int_dict = {"x": np.sort(x), "y1": y1, "y2": y2}
    df = pd.DataFrame(int_dict)
    return df

df = create_fake_df(1000)
chooser = interactive_data_chooser(df, df.columns)
chooser.activate_plot()


VBox(children=(HBox(children=(Dropdown(description='xaxis', options=('x', 'y1', 'y2'), value='x'), Dropdown(de…

Selected 7 new points. Total: 7
Unique points selected (7):
x: 489, y1: 76, outlier: yes
x: 492, y1: 94, outlier: yes
x: 496, y1: 90, outlier: yes
x: 500, y1: 78, outlier: yes
x: 501, y1: 77, outlier: yes
x: 505, y1: 82, outlier: yes
x: 506, y1: 88, outlier: yes
Selected 5 new points. Total: 12
Unique points selected (8):
x: 489, y1: 76, outlier: no
x: 492, y1: 94, outlier: yes
x: 495, y1: 57, outlier: yes
x: 496, y1: 90, outlier: no
x: 500, y1: 78, outlier: no
x: 501, y1: 77, outlier: no
x: 505, y1: 82, outlier: yes
x: 506, y1: 88, outlier: yes




A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



Selected 4 new points. Total: 16
Unique points selected (12):
x: 489, y1: 76, outlier: no
x: 492, y1: 94, outlier: yes
x: 495, y1: 57, outlier: yes
x: 496, y1: 90, outlier: no
x: 500, y1: 78, outlier: no
x: 501, y1: 77, outlier: no
x: 505, y1: 82, outlier: yes
x: 506, y1: 88, outlier: yes
x: 528, y1: 96, outlier: yes
x: 530, y1: 73, outlier: yes
x: 531, y1: 100, outlier: yes
x: 532, y1: 90, outlier: yes




A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [27]:
import plotly.graph_objects as go

# Create your plot data
x = [1, 2, 3, 4]
y = [10, 11, 12, 13]
symbols = ['circle', 'square', 'diamond', 'cross']

# Create a trace for each symbol
traces = []
for i in range(len(symbols)):
    traces.append(go.Scatter(
        x=[x[i]],
        y=[y[i]],
        mode='markers',
        marker=dict(symbol=symbols[i])
    ))

# Create the layout
layout = go.Layout(
    title='My Plot',
    xaxis=dict(title='X-axis'),
    yaxis=dict(title='Y-axis'),
    legend=dict(
        title='Symbol Legend',
        orientation='h',
        y=-0.2
    )
)

# Create the figure and add the traces and layout
fig = go.Figure(data=traces, layout=layout)

# Show the figure
fig.show()



In [3]:
import plotly.graph_objs as go
from plotly.subplots import make_subplots

# Create example data
import numpy as np
x = np.linspace(0, 2*np.pi, 100)
y = np.sin(x)

# Create scatter plot
fig = go.Figure()
fig.add_trace(go.Scatter(x=x, y=y, mode='markers'))

# Add dropdown menu
updatemenus = [{'buttons': [{'args': [{'selectedpoints': [0]}],
                              'label': 'Mean',
                              'method': 'restyle'},
                             {'args': [{'selectedpoints': [0]}],
                              'label': 'Median',
                              'method': 'restyle'}],
                'direction': 'down',
                'showactive': True}]
fig.update_layout(updatemenus=updatemenus)

# Add box select event handler
fig.add_layout_image(dict(
    source="https://raw.githubusercontent.com/plotly/plotly.js/master/test/image/gitbook/images/lasso.png",
    xref="paper", yref="paper",
    x=1.05, y=1.2,
    sizex=0.2, sizey=0.2,
    xanchor="right", yanchor="bottom"
))

fig.update_layout(
    dragmode="select",
    annotations=[
        dict(
            text="Draw a box around the points to select",
            showarrow=False,
            xref="paper", yref="paper",
            x=0.5, y=1.1, font_size=16
        )
    ]
)

fig.update_layout(
    xaxis_title="X Axis Title",
    yaxis_title="Y Axis Title",
    title="Box Select and Dropdown Example"
)

# Define callback function for dropdown menu
def update_fig(selection):
    if selection == 'Mean':
        fig.update_traces(marker=dict(color='red', size=10))
        selected_points = fig.data[0].selectedpoints
        if selected_points:
            mean_value = np.mean(y[selected_points])
            fig.add_annotation(text=f"Mean: {mean_value:.2f}", xref="paper", yref="paper",
            x=0.8, y=0.9, showarrow=False, font_size=16)
    elif selection == 'Median':
        fig.update_traces(marker=dict(color='green', size=10))
        selected_points = fig.data[0].selectedpoints
        if selected_points:
            median_value = np.median(y[selected_points])
            fig.add_annotation(text=f"Median: {median_value:.2f}", xref="paper", yref="paper",
            x=0.8, y=0.8, showarrow=False, font_size=16)

# Set up callback for dropdown menu
fig.for_each_trace(lambda trace: trace.on_selection(lambda x,y,trace=trace: update_fig('Mean')))

fig.show()


In [2]:
import plotly.graph_objs as go
from ipywidgets import Button, Dropdown
from IPython.display import display

# Create example data
x = [1, 2, 3, 4, 5]
y = [10, 20, 30, 40, 50]

# Create plotly plot with box select enabled
trace = go.Scatter(x=x, y=y, mode='markers')
layout = go.Layout(title='Selected Points', xaxis=dict(range=[0, 6]), yaxis=dict(range=[0, 60]), dragmode='select')
fig = go.Figure(data=[trace], layout=layout)

# Define event handler function to display dropdown menu
def handle_selection(event):
    if event is None or len(event) == 0:
        return
    
    # Create the dropdown menu with the selected values
    dropdown_options = [
        {'label': 'Mean', 'value': 'mean'},
        {'label': 'Max', 'value': 'max'},
        {'label': 'Min', 'value': 'min'},
    ]
    dropdown = Dropdown(options=dropdown_options, description='Select an operation')
    display(dropdown)
    
# Add event handler function to the plotly plot
fig.layout.on_change(handle_selection, 'selectedpoints')

# Create a button to clear the selection and hide the dropdown menu
button = Button(description='Clear Selection')
button.on_click(lambda _: dropdown.close() if 'dropdown' in globals() else None)
display(fig, button)




ValueError: Invalid property specification(s): ['selectedpoints']

In [5]:
glen_1 = pd.read_csv("data/asset-data-export_O12QnL6kAl-640876dfe5066-1678276319.csv")
glen_1

Unnamed: 0,"Asset name;""NU4202 Naset"""
"Exported on;""8 March 2023","12:51:59"""
"Timezone;""UTC +1""",
"Date/Time;""Water level","Nap (cm)"""
2022-03-08 00:00:04;24,
2022-03-08 00:30:00;24,
...,...
2023-03-07 23:00:00;26,
2023-03-07 23:30:00;25,
2023-03-08 00:00:04;25,
2023-03-08 00:30:00;25,


In [6]:
glen_2 = pd.read_csv("data/asset-data-export_Zga3AM63oO-64098cbc36e23-1678347452.csv")
glen_2

Unnamed: 0,Asset name;NU4201 Spektrumgatan
Exported on;9 March 2023,08:37:32
Timezone;UTC +1,
;,
Date/Time;Water level,Nap (cm)
2022-03-09 00:00;5,
...,...
2023-03-08 23:00;1,
2023-03-08 23:30;1,
2023-03-09 00:00;1,
2023-03-09 00:30;1,


In [12]:
glen_3 = pd.read_csv("data/asset-data-export_vEm3Jd5916-64098d190a5ca-1678347545.csv")
glen_3.head()

Unnamed: 0,Asset name;NU3328 Strandvägen
Exported on;9 March 2023,08:39:05
Timezone;UTC +1,
;,
Date/Time;Water level,Nap (cm)
2022-04-22 12:47;11,
