In [42]:
#!/usr/bin/env python
# coding: utf-8

import sys
import os
import matplotlib.pyplot as plt
import glob
from datetime import datetime
from datetime import timedelta
from plotly.subplots import make_subplots
import plotly.graph_objs as go
from ipywidgets import interactive, HBox, VBox
# import keras
import pandas as pd
import numpy as np
import random


class interactive_data_chooser:
    """
    Class for selecting data graphically and displaying it
    """
    def __init__(self, df, columns):
        # we don't need this dataframe
        self.outlier_df = pd.DataFrame(df, columns)

        self.df = df
        self.columns = columns
        self.df["manual_outlier"] = -1
        self.df["model_outlier"] = 0
    
    def activate_plot(self):
        """
        Display interactive plot where images (data points in the plot)
        can be selected using box select or lasso select. 
        Selected values are stored in the global dataframe outlier_df
        """
        self.df.reset_index(inplace=True,drop=True)
        numeric_df = self.df.select_dtypes(include=np.number)
        numeric_columns = numeric_df.columns
        self.f = go.FigureWidget([go.Scatter(y = self.df[self.columns[0]], x = self.df[self.columns[0]], mode = 'markers',
                                       selected_marker_color = "red", 
                                             marker=dict(color=self.df["manual_outlier"],
                                                        colorbar=dict(thickness=10)))])
        scatter = self.f.data[0]

        scatter.marker.opacity = 0.5
        
        axis_dropdowns = interactive(self.update_axes, yaxis = self.columns, xaxis = self.columns, color = numeric_columns)
        scatter.on_selection(self.selection_fn)

        # Put everything together
        return VBox((HBox(axis_dropdowns.children),self.f))
    
    def update_axes(self, xaxis, yaxis,color):
        scatter = self.f.data[0]
        scatter.x = self.df[xaxis]
        scatter.y = self.df[yaxis]
        scatter.marker.color = self.df[color]
        with self.f.batch_update():
            self.f.layout.xaxis.title = xaxis
            self.f.layout.yaxis.title = yaxis

        self.outlier_df = pd.DataFrame(columns=self.df.columns.values)

    def selection_fn(self,trace,points,selector):
        temp_df = self.df.loc[points.point_inds]
        for i in temp_df.iterrows():
            idx = i[0]
            self.df.at[idx, "manual_outlier"] = 1 if self.df.at[idx, "manual_outlier"] == -1 else 0
        
        old_selected_number = len(self.outlier_df)
        self.outlier_df = pd.concat([self.outlier_df, temp_df], ignore_index=True, axis=0)
        print(f"Selected {len(self.outlier_df) - old_selected_number} new points. Total: {len(self.outlier_df)}")

    def clear_selection(self):
        self.outlier_df = self.outlier_df.iloc[0:0]
    
    def show_selected(self):
        for index, row in self.outlier_df.iterrows():
            plt.figure()
            plt.imshow(plt.imread(row['file']))
            plt.title(f"{row['time']}, wl: {row['wl']}, turb_s: {row['turb_sensor']}, turb_p: {row['turb_post']}")

    # create train model function based on outlier status in self.df

    # visualize result in graph

    # function to mark point as non-outlier

def create_fake_df(n):
    """
    Creates a dataframe with n rows and columns "x", "y1" and "y2". 
    The data is integers 0-100.
    """
    x = []
    y1 = []
    y2 = []

    for _ in range(n):    
        x_int = random.randint(0, 100)
        x.append(x_int)
        y1_int = random.randint(0, 100)
        y1.append(y1_int)
        y2_int = random.randint(0, 100)
        y2.append(y2_int)

    int_dict = {"x": x, "y1": y1, "y2": y2}
    df = pd.DataFrame(int_dict)
    return df

In [44]:
#!/usr/bin/env python
# coding: utf-8


from dash import Dash, dcc, html, Input, Output
import plotly.express as px


df = create_fake_df(33)

chooser = interactive_data_chooser(df, df.columns)
chooser.activate_plot()

VBox(children=(HBox(children=(Dropdown(description='xaxis', options=('x', 'y1', 'y2'), value='x'), Dropdown(de…

points: Points(point_inds=[10],
       xs=[99],
       ys=[100],
       trace_name='trace 0',
       trace_index=0)
temp_df.iterrows()
10
point x
99
point y
100
     x   y1   y2  manual_outlier  model_outlier
0    1   68   32              -1              0
1   38   62  100              -1              0
2   77   50   29              -1              0
3   51    4   70              -1              0
4   49   99    5              -1              0
5   94   23   70              -1              0
6   82   50   11              -1              0
7   73   62    9              -1              0
8   75   33   36              -1              0
9   19   59   30              -1              0
10  99  100   90               1              0
11  65   58   37              -1              0
12  23   49   51              -1              0
13  28   68   46              -1              0
14  37   54   57              -1              0
15  45   62   70              -1              0
16  90   59   92       

In [17]:
df.head()

Unnamed: 0,x,y1,y2
0,6,97,5
1,98,73,50
2,17,83,42
3,55,95,50
4,35,67,56
