In [1]:
import os
import glob
from pathlib import Path

import requests
import difflib
import random


import json
import csv
import numpy as np
import pandas as pd 
import sklearn as sk 
import umap
from sklearn.manifold import TSNE
from sklearn.cluster import DBSCAN
from sklearn import preprocessing
%matplotlib notebook  
import matplotlib.pyplot as plt
import mplcursors
from ipywidgets import widgets
from ipywidgets import GridspecLayout
from ipywidgets import interactive

In [2]:
#get data 
os.getcwd()
os.listdir('./transformed-data/')

filepath = "./transformed-data/aggdf.csv"
df = pd.read_csv(filepath)
print(df.columns)

Index(['player', 'aces', 'bk_pts', 'bp_saved', 'crosscourt', 'deep', 'dfs',
       'down_middle', 'down_the_line', 'err_deep', 'err_net', 'err_wide',
       'err_wide_deep', 'first_aces', 'first_forced', 'first_in', 'first_pts',
       'first_pts_won', 'first_unret', 'first_won', 'first_won_lte_3_shots',
       'forced_err', 'in_play', 'in_play_won', 'induced_forced', 'inside_in',
       'inside_out', 'net_pts', 'net_unforced', 'net_winner', 'passed_at_net',
       'passing_shot_induced_forced', 'pt_ending', 'pts', 'pts_won',
       'pts_won_lte_3_shots', 'return_forced', 'return_pts', 'return_pts_won',
       'returnable', 'returnable_won', 'second_aces', 'second_forced',
       'second_in', 'second_pts', 'second_pts_won', 'second_unret',
       'second_won', 'second_won_lte_3_shots', 'serve_pts', 'serve_return',
       'shallow', 'shots', 'shots_in_pts_lost', 'shots_in_pts_won', 'snv_pts',
       'total_shots', 'unforced', 'unforced_bh', 'unforced_fh', 'unret',
       'very_deep', 'w

In [3]:
# split player labels and attributes
X = df.loc[:, df.columns != 'player']
labels = df['player']

# params for cluster
eps = 0.025
min_samples= 5

In [8]:
class State_Component:
    def __init__(self,data,labels):
        self.data = data
        self.transformed_data = data.to_numpy()
        self.clusterlabels = [0]*len(data)#data.columns[1:-1].to_numpy()
        self.labels = labels
        self.options = X.columns #[1:-1]
        
        # make menu widgets
        self.text = widgets.Text(description="PlayerSearch")
        self.textresult = widgets.HTML(
                                        value="[ ]",
                                        placeholder='[ ]',
                                        description='Player Loc:',
                                      )
        self.combined = widgets.VBox([self.text,self.textresult])
        
        
        self.min_samples_widget = widgets.IntSlider(description='min samples',
                                                    value=7,
                                                    min=1,
                                                    max=100,
                                                    step=1,
                                                    orientation='horizontal',
                                                    readout=True,
                                                    readout_format='d'
                                                   )
        self.eps_widget = widgets.FloatSlider(description='eps',
                                                    value=0.5,
                                                    min=0.001,
                                                    max=1,
                                                    step=0.001,
                                                    orientation='horizontal',
                                                    readout=True,
                                                    readout_format='f'
                                                   )
        
        self.outlier_player_widget = widgets.HTML(
                                        value="[ ]",
                                        placeholder='[ ]',
                                        description='outliers:',
                                      )
        
        self.param_widget = widgets.VBox([self.min_samples_widget,self.eps_widget])
        
        
        self.button = widgets.Button(
                        description='Confirm Params',
                        disabled=False,
                        button_style='', # 'success', 'info', 'warning', 'danger' or ''
                        tooltip='Click me',
                        icon='check' # (FontAwesome names without the `fa-` prefix)
                    )
        self.slider_button = widgets.VBox([self.min_samples_widget,
                                           self.eps_widget,
                                           self.button
                                          ])
        
        self.multi_checkbox_widget()
        
        #widget events
        self.text.on_submit(self.handle_text_submit)
        self.button.on_click(self.handle_button_event)
        
        #draw grid
        self.grid()
        
        #draw plot
        self.norm = plt.Normalize(1,4)
        self.cmap = plt.cm.Spectral

        self.fig,self.ax = plt.subplots()
        self.callback = self.fig.canvas.mpl_connect("motion_notify_event", self.hover)
        self.c = [-1]
        self.sc = plt.scatter([0],[0],c=self.c, s=100, cmap=self.cmap, norm=self.norm)
        self.annot = self.ax.annotate("", xy=(0,0), xytext=(20,20),textcoords="offset points",
                            bbox=dict(boxstyle="round", fc="w"),
                            arrowprops=dict(arrowstyle="->"))

        self.annot.set_visible(False)
        self.fig.set_size_inches(9,8)
        plt.xlabel('x1')
        plt.ylabel('x2')
        plt.title('PCA test for reduction to 2 components')
        
        #draw outlier players
        display(self.outlier_player_widget)
        
        
        
    def handle_button_event(self,sender):
        selected_options = [w.description for w in self.multi_select.children[1].children if w.value]
        if(len(selected_options) == 0):
            selected_options = [w.description for w in self.multi_select.children[1].children] # use everything if empty
        self.X = df[selected_options]
        x = df.values #returns a numpy array
        min_max_scaler = preprocessing.MinMaxScaler()
        x_scaled = min_max_scaler.fit_transform(X)
        self.X_scaled = pd.DataFrame(x_scaled)

        #cluster
        self.cluster()

        #dimension reduction
        self.reduce()

        #split data
        x = self.transformed_data[:,0]
        y = self.transformed_data[:,1]
        c = self.clusterlabels
        maximum = np.max(c)
        minimum = np.min(c)
        plt.clim(minimum, maximum)
        plt.show()


        # update plot
        self.update()
    
    def multi_checkbox_widget(self):
        descriptions = self.options
        """ Widget with a search field and lots of checkboxes """
        self.search_widget = widgets.Text()
        options_dict = {description: widgets.Checkbox(description=description, value=False,indent=False) for description in descriptions}
        options = [options_dict[description] for description in descriptions]
        options_widget = widgets.GridBox(options, layout=widgets.Layout(grid_template_columns="repeat(2, 150px)"))
        self.multi_select = widgets.VBox([self.search_widget, options_widget]) #,layout= widgets.Layout(display = 'flex',
                                                                                                    #))#{'overflow': 'scroll'})

        # Wire the search field to the checkboxes
        def on_text_change(change):
            search_input = change['new']
            if search_input == '':
                # Reset search field
                new_options = [options_dict[description] for description in descriptions]
            else:
                # Filter by search field using difflib.
                close_matches = difflib.get_close_matches(search_input, descriptions, cutoff=0.0)
                new_options = [options_dict[description] for description in close_matches]
            options_widget.children = new_options
        
        self.search_widget.observe(on_text_change, names='value')
    
    def handle_text_submit(self,sender):
        idx = self.labels[self.labels==self.text.value]
        point = None
        if(len(idx)>0):
            pointidx = idx.index[0]
            point = self.transformed_data[pointidx]
        self.textresult.value = (str)(point)
        #print(self.text.value,": ",point)

    # update annotation
    def update_annot(self,ind):
        pos = self.sc.get_offsets()[ind["ind"][0]]
        self.annot.xy = pos
        #text = "hi"
        text = "{}".format(" ".join([(self.labels[n]+':'+(str)(self.c[n])) for n in ind["ind"]])) #, 
                               #" ".join([(str)(c[n]) for n in ind["ind"]]))

        self.annot.set_text(text)
        self.annot.get_bbox_patch().set_facecolor(self.cmap(self.norm(self.c[ind["ind"][0]])))
        self.annot.get_bbox_patch().set_alpha(0.4)    
        
    def hover(self,event):
        vis = self.annot.get_visible()
        if event.inaxes == self.ax:
            cont, ind = self.sc.contains(event)
            
            if cont:
                self.update_annot(ind)             
                self.annot.set_visible(True)
                self.fig.canvas.draw_idle()
            else:
                if vis:
                    self.annot.set_visible(False)
                    self.fig.canvas.draw_idle()
        
    def grid(self):        
        # grid
        grid = GridspecLayout(1, 3)
        grid[0, 0] = self.multi_select
        grid[0, 1] = self.slider_button
        grid[0, 2] = self.combined
        display(grid)
        
    def cluster(self):
        dbscan = DBSCAN(eps=self.eps_widget.value,
                        min_samples=self.min_samples_widget.value
                       )
        self.clusterlabels = dbscan.fit_predict(self.X_scaled)
    
    def reduce(self):
        pca = sk.decomposition.PCA(n_components = 2)
        self.transformed_data = pca.fit_transform(self.X_scaled)
    
    def get_outliers(self):
        outliers = [idx for idx,point in enumerate(self.clusterlabels) if point == -1]
        outliers = np.array(outliers)
        labels = self.labels.to_numpy()
        outstanding_players = labels[outliers]
        self.outlier_player_widget.value = (str)(outstanding_players)
    
    def update(self):
        self.sc.remove()
        #self.fig.canvas.draw()
        #self.fig.canvas.flush_events()
        data = self.transformed_data
        x = data[:,0]
        y = data[:,1]
        self.c = self.clusterlabels
        self.sc =plt.scatter(x,y,c=self.c, s=100, cmap=self.cmap, norm=self.norm)
        self.get_outliers()
        #self.fig.show()
        

In [9]:
comp = State_Component(X,labels)

GridspecLayout(children=(VBox(children=(Text(value=''), GridBox(children=(Checkbox(value=False, description='a…

<IPython.core.display.Javascript object>

HTML(value='[ ]', description='outlier players:', placeholder='[ ]')

sources
<br>
thanks to react state components idea for handling 
<br>
https://stackoverflow.com/questions/55866439/why-doesnt-mpl-connect-work-when-called-in-the-init-of-a-class
<br>
https://matplotlib.org/users/event_handling.html
<br>
https://stackoverflow.com/questions/43923313/canvas-mpl-connect-in-jupyter-notebook (debug events)
<br>
https://ipywidgets.readthedocs.io/en/latest/examples/Widget_List.html
<br>
https://medium.com/@shahinrostami/jupyter-notebook-and-updating-plots-f1ec4cdc354b
<br>
https://chrisalbon.com/python/basics/set_the_color_of_a_matplotlib/
<br>
https://matplotlib.org/3.3.0/tutorials/colors/colormaps.html
<br>
https://www.geeksforgeeks.org/matplotlib-figure-figure-clear-in-python/ (not proper clearing of figure.)
<br>
https://stackoverflow.com/questions/32801990/how-to-clear-all-dynamically-plotted-points-on-pyplot-scatter-graph (remove old points)
<br>
https://stackoverflow.com/questions/43545050/using-matplotlib-notebook-after-matplotlib-inline-in-jupyter-notebook-doesnt (sequence bug when launching notebook first time with magic command)