# Interactive analysis of the football dataset
The idea is to come up with an interactive dashboard where users of the dashboards can view players who are good at certain skills. For e.g. The list of players who are good at crossing, shooting etc.

### Import necessary libraries

In [1]:
import sqlite3
import pandas as pd

# Establish the connection to the db
cnx = sqlite3.connect('../input/database.sqlite')

### Load the players data

In [2]:
# Loading the players table
players = pd.read_sql_query("SELECT * from Player", cnx)

# some basic validation
print(len(players['player_api_id']))
print(len(players['player_api_id'].unique()))

11060
11060


### Load the player attributes data

In [3]:
player_attributes = pd.read_sql_query("SELECT * from Player_Attributes", cnx)
player_attributes.head()

Unnamed: 0,id,player_fifa_api_id,player_api_id,date,overall_rating,potential,preferred_foot,attacking_work_rate,defensive_work_rate,crossing,...,vision,penalties,marking,standing_tackle,sliding_tackle,gk_diving,gk_handling,gk_kicking,gk_positioning,gk_reflexes
0,1,218353,505942,2016-02-18 00:00:00,67.0,71.0,right,medium,medium,49.0,...,54.0,48.0,65.0,69.0,69.0,6.0,11.0,10.0,8.0,8.0
1,2,218353,505942,2015-11-19 00:00:00,67.0,71.0,right,medium,medium,49.0,...,54.0,48.0,65.0,69.0,69.0,6.0,11.0,10.0,8.0,8.0
2,3,218353,505942,2015-09-21 00:00:00,62.0,66.0,right,medium,medium,49.0,...,54.0,48.0,65.0,66.0,69.0,6.0,11.0,10.0,8.0,8.0
3,4,218353,505942,2015-03-20 00:00:00,61.0,65.0,right,medium,medium,48.0,...,53.0,47.0,62.0,63.0,66.0,5.0,10.0,9.0,7.0,7.0
4,5,218353,505942,2007-02-22 00:00:00,61.0,65.0,right,medium,medium,48.0,...,53.0,47.0,62.0,63.0,66.0,5.0,10.0,9.0,7.0,7.0


Having a glimpse of how the data looks like. It appears that the player attributes dataset was being updated regularly and so there is a latest date for each player attribute. Let us obtain the most recent player attribute.


In [4]:
player_attributes['date'] = pd.to_datetime(player_attributes['date'])
player_attribute_dates = player_attributes[['id', 'player_api_id', 'date']]

In [5]:
pd.options.mode.chained_assignment = None
player_attribute_dates["rank"] = player_attribute_dates.groupby("player_api_id")["date"].rank(method="first", ascending=False)
player_attribute_dates = player_attribute_dates[player_attribute_dates['rank'] == 1.0]

In [6]:
assert len(player_attributes['player_api_id'].unique()) == len(player_attribute_dates['player_api_id'])
player_attributes.head()

Unnamed: 0,id,player_fifa_api_id,player_api_id,date,overall_rating,potential,preferred_foot,attacking_work_rate,defensive_work_rate,crossing,...,vision,penalties,marking,standing_tackle,sliding_tackle,gk_diving,gk_handling,gk_kicking,gk_positioning,gk_reflexes
0,1,218353,505942,2016-02-18,67.0,71.0,right,medium,medium,49.0,...,54.0,48.0,65.0,69.0,69.0,6.0,11.0,10.0,8.0,8.0
1,2,218353,505942,2015-11-19,67.0,71.0,right,medium,medium,49.0,...,54.0,48.0,65.0,69.0,69.0,6.0,11.0,10.0,8.0,8.0
2,3,218353,505942,2015-09-21,62.0,66.0,right,medium,medium,49.0,...,54.0,48.0,65.0,66.0,69.0,6.0,11.0,10.0,8.0,8.0
3,4,218353,505942,2015-03-20,61.0,65.0,right,medium,medium,48.0,...,53.0,47.0,62.0,63.0,66.0,5.0,10.0,9.0,7.0,7.0
4,5,218353,505942,2007-02-22,61.0,65.0,right,medium,medium,48.0,...,53.0,47.0,62.0,63.0,66.0,5.0,10.0,9.0,7.0,7.0


### Get the player info

In [7]:
players_attrs = player_attribute_dates.merge(player_attributes, on=['id', 'player_api_id', 'date'], how='left')
player_info = pd.merge(players_attrs, players, on=['player_api_id', 'player_fifa_api_id'], how='left')
player_info.head()

Unnamed: 0,id_x,player_api_id,date,rank,player_fifa_api_id,overall_rating,potential,preferred_foot,attacking_work_rate,defensive_work_rate,...,gk_diving,gk_handling,gk_kicking,gk_positioning,gk_reflexes,id_y,player_name,birthday,height,weight
0,1,505942,2016-02-18,1.0,218353,67.0,71.0,right,medium,medium,...,6.0,11.0,10.0,8.0,8.0,1.0,Aaron Appindangoye,1992-02-29 00:00:00,182.88,187.0
1,6,155782,2016-04-21,1.0,189615,74.0,76.0,left,high,medium,...,14.0,7.0,9.0,9.0,12.0,2.0,Aaron Cresswell,1989-12-15 00:00:00,170.18,146.0
2,39,162549,2016-01-07,1.0,186170,65.0,67.0,right,medium,medium,...,16.0,11.0,12.0,9.0,13.0,3.0,Aaron Doran,1991-05-13 00:00:00,170.18,163.0
3,65,30572,2016-04-21,1.0,140161,69.0,69.0,right,medium,medium,...,15.0,12.0,13.0,12.0,11.0,4.0,Aaron Galindo,1982-05-08 00:00:00,182.88,198.0
4,88,23780,2015-12-24,1.0,17725,70.0,70.0,right,medium,medium,...,8.0,6.0,16.0,12.0,11.0,5.0,Aaron Hughes,1979-11-08 00:00:00,182.88,154.0


### Build an interactive dashboard

Install necessary libraries

In [8]:
%%capture
!jupyter lab clean
!pip install ipywidgets
!jupyter nbextension enable --py widgetsnbextension

In [9]:
%matplotlib notebook
%matplotlib inline
import seaborn as sns
from ipywidgets import *
import numpy as np
import functools
import matplotlib.pyplot as plt

In [10]:
required_columns = ['player_name', 'height', 'weight']
required_numeric_columns = ['overall_rating',
       'potential', 'crossing', 'finishing', 'heading_accuracy',
       'short_passing', 'volleys', 'dribbling', 'curve', 'free_kick_accuracy',
       'short_passing', 'volleys', 'dribbling', 'curve', 'free_kick_accuracy',
       'long_passing', 'ball_control', 'acceleration', 'sprint_speed',
       'agility', 'reactions', 'balance', 'shot_power', 'jumping', 'stamina',
       'strength', 'long_shots', 'aggression', 'interceptions', 'positioning',
       'vision', 'penalties', 'marking', 'standing_tackle', 'sliding_tackle',
       'gk_diving', 'gk_handling', 'gk_kicking', 'gk_positioning',
       'gk_reflexes']

In [11]:
def conjunction(*conditions):
    return functools.reduce(np.logical_or, conditions)

In [12]:
def get_desc():
    return """
This is an interactive dashboard to visualize the football dataset. 
The output is a simple table with the player name, height and weight.

The default value for all the sliders are set to 0. <br>

<h4>How to use this dashboard?</h4>
<h7> Let us say you want to know the list of players whose crossing rating is above 90 and whose overall rating is above 70.
To find that, move the slider for crossing to 90 and the slider for overall rating to 70. The table below will change dynamically.
The table is sorted in descending order with the column/label that has the maximum value. In this example, the output is
sorted in descending order with the "crossing" column followed by the "overall rating" column. <br>

By default the output table will show the player_name, height, weight and overall_rating. You can add more columns to show by 
choosing the columns in the multiple selection box using the "ctrl" or "shift" key.
"""

In [13]:
def common_function(data):
    required_columns = ['player_name', 'height', 'weight']
    columns_to_display = required_columns + [column for column in data['columns_to_show']]
    del data['columns_to_show']
    if 'filter_by' in data.keys():
        del data['filter_by']
    columns = data.keys()
    comps = [player_info[column] > data[column] for column in columns]
    result = comps[0]
    for comp in comps[1:]:
        result &= comp
    df = player_info[result]
    re_order_numeric_columns = [item[0] for item in sorted(data.items(), key=lambda x:x[1], reverse=True)]
    df.sort_values(re_order_numeric_columns, ascending=False,inplace=True)
    return df


In [22]:
def plot(**data):
    columns_to_display = required_columns + [column for column in data['columns_to_show']]
    df = common_function(data)
    display(df[columns_to_display])
        
def height_plot(**data):
    df = common_function(data)
    chart = sns.countplot(data=df, x='height')
    chart.set_xticklabels(chart.get_xticklabels(), rotation=65, horizontalalignment='right')
    title = f"Height Distribution"
    chart.set_title(title)

    
def weight_plot(**data):
    df = common_function(data)
    chart = sns.countplot(data=df, x='weight')
    chart.set_xticklabels(chart.get_xticklabels(), rotation=65, horizontalalignment='right')
    title = f"Weight Distribution"
    chart.set_title(title)


sliders = {}
plt.style.use('seaborn')
%config InlineBackend.figure_format = 'svg'

style = {'description_width': 'initial'}
for column in required_numeric_columns:
    sliders[column] = IntSlider(description=f'{column}', min=0, max=100, step=1, value=0, style=style)
slider_displays = widgets.VBox(list(sliders.values()))

columns_to_show = widgets.SelectMultiple(
    options=required_numeric_columns,
    value=['overall_rating'],
    rows=10,
    description='Columns',
    disabled=False,
    layout=widgets.Layout(margin_left="0px",
                            border='1px solid black',
                            align_items='stretch')
)


columns_to_show_checkboxes = [widgets.Checkbox(value=False, description=column, disabled=False,  style=style) 
                              for column in required_numeric_columns]

sliders['columns_to_show'] = columns_to_show

dashboard_desc = get_desc()


title = widgets.HTML(
    value="<H2 style=\"font-family:Verdana\"><center>Interactive visualization of the Football dataset</center></H2>",
)
description = widgets.HTML(
    value=f"<p style=\"font-family:Arial\">{dashboard_desc}</p><br>",
)

columns_to_show_title = widgets.HTML(
    value="<H5 style=\"font-family:Verdana\"><left>Columns to show</left></H5>",
)
break_widget = widgets.HTML(
    value="<br>",
)

out = Output(layout=Layout(margin_left="0px",
    border='1px solid black',
    align_items='stretch'))

plot_output = widgets.interactive_output(plot, sliders)
with out:
    display(plot_output)
    
avg_height_plot = widgets.interactive_output(height_plot, sliders)
h_out = Output(layout=Layout(margin_left="0px",
    border='1px solid black',
    align_items='stretch'))
with h_out:
    display(avg_height_plot)

avg_weight_plot = widgets.interactive_output(weight_plot, sliders)
w_out = Output(layout=Layout(margin_left="0px",
    border='1px solid black',
    align_items='stretch'))
with w_out:
    display(avg_weight_plot)

# columns_to_show_display = HBox(columns_to_show_checkboxes, layout = Layout(flex_flow='row wrap', align_items='stretch'))
# columns_to_show_display = widgets.GridBox(columns_to_show_checkboxes, layout=widgets.Layout(grid_template_columns="repeat(5, 280px)"))
# table_widget = widgets.HBox([out])
# plots = HBox([avg_height_plot, avg_weight_plot])
column_1 = VBox([slider_displays])
column_2 = VBox([HBox([out, columns_to_show], layout = Layout(flex_flow='row wrap')), h_out, w_out], layout=Layout(margin_left="0px",
    border='1px solid black'))
# slider_displays = widgets.HBox(list(sliders.values()), layout = Layout(flex_flow='row wrap'))
line_break = widgets.Output(layout={'border': '1px solid black'})
title_widget = widgets.HBox([title])
description_widget = widgets.HBox([description])
# table_widget = widgets.HBox([columns_to_show, out])
# plots = HBox([avg_height_plot, avg_weight_plot])
# dashboard = widgets.VBox([
#     title_widget,
#     description_widget,
#     line_break, 
#     slider_displays,
#     break_widget,
#     table_widget,
#     plots])
dashboard = widgets.VBox([
    title_widget,
    description_widget,
    line_break, 
#     columns_to_show_title,
#     columns_to_show_display,
    HBox([column_1, column_2])])
display(dashboard)

VBox(children=(HBox(children=(HTML(value='<H2 style="font-family:Verdana"><center>Interactive visualization of…