#### In order to generate the interactive visualization, select Cell and 'Run All Below'. It might take a few seconds!

In [1]:
from IPython.display import HTML

HTML('''<script>
code_show=true; 
function code_toggle() {
 if (code_show){
 $('div.input').hide();
 } else {
 $('div.input').show();
 }
 code_show = !code_show
} 
$( document ).ready(code_toggle);
</script>
The raw code in this notebook has been hidden. In case you would like to see the code, click <a href="javascript:code_toggle()">here</a>.''')

In [3]:
from plotly import __version__
import pandas as pd
import plotly
from ipywidgets import widgets
import plotly.graph_objs as go
import plotlywidget
from difflib import get_close_matches

plotly.offline.init_notebook_mode(connected=True)
pd.options.mode.chained_assignment = None

# load the raw data
compressed_fifa_ratings = pd.read_csv('data/compressed_player_rating_dataset.csv', encoding='latin-1')
team_mapping = pd.read_csv('data/team_mappings.csv', encoding='latin-1')

# join the full dataset with the team mapping (the original dataset does not contain league nationalities)
compressed_fifa_ratings = pd.merge(left=compressed_fifa_ratings, right=team_mapping, left_on='team', right_on='Team')

# only preserve one position per player, and call this their 'main position'
parsed_positions = compressed_fifa_ratings['position'].str.split(' ', expand=True)
parsed_positions.columns = ['main_position', 'position_2', 'position_3', 'position_4']
compressed_fifa_ratings = pd.concat([compressed_fifa_ratings, parsed_positions], axis=1)

# drop unnecessary columns
compressed_fifa_ratings.drop(['Unnamed: 0', 'position'], axis=1, inplace=True)

# only keep the columns we need to produce the visualization
just_overall_ratings = compressed_fifa_ratings[['name', 'age', 'main_position', 'nationality', 'team', 'League', 'overall']].drop_duplicates()

# create the age pairings needed to build the aging curves
just_overall_ratings['age_next_year'] = just_overall_ratings['age'].apply(lambda x: x+1)
year_over_year_ratings = pd.merge(just_overall_ratings, just_overall_ratings, left_on=['name', 'age_next_year'],
                                  right_on=['name', 'age'], how='left').drop_duplicates()
year_over_year_ratings = year_over_year_ratings[['name', 'main_position_x', 'age_x', 'age_next_year_x', 'overall_x', 'overall_y', 'nationality_y', 'team_y', 'League_y']]
year_over_year_ratings.columns = ['name', 'main_position', 'age', 'age_next_year', 'overall', 'next_year_overall', 'nationality', 'team', 'league']
year_over_year_ratings['diff'] = year_over_year_ratings['next_year_overall'] - year_over_year_ratings['overall']
year_over_year_ratings['name'] = year_over_year_ratings['name'].apply(lambda x: x.strip())

# standardize the names of the positions
position_mapping = {'CF': 'ST', 'RWB': 'RB', 'LF': 'LW', 'RF': 'RW', 'SW': 'CB', 'LWB': 'LB'}
year_over_year_ratings['main_position'] = year_over_year_ratings['main_position'].replace(position_mapping)

# aging curve pairings for ages below 16 and above 38 have very few data points - we will remove these from the dataset
year_over_year_ratings = year_over_year_ratings.loc[(year_over_year_ratings['age'] >= 16) & (year_over_year_ratings['age'] <= 38)]

# drop any null values from the dataset
year_over_year_ratings.dropna(inplace=True)

# list out all available nationalities, teams and positions for the drop-down boxes
all_nationalities = ['All'] + sorted(list(set(year_over_year_ratings['nationality'])))
all_positions = ['All'] + sorted(list(set(year_over_year_ratings['main_position'])))
all_teams = ['All'] + sorted(list(set(year_over_year_ratings['team'])))
all_players = ['All'] + sorted(list(set(year_over_year_ratings['name'])))
all_leagues = ['All'] + sorted(list(set(year_over_year_ratings['league'])))

all_ages = sorted(list(set(year_over_year_ratings['age_next_year'])))

# define the widgets for the first series
nationalities_1 = widgets.Dropdown(
    description='Nationality:   ',
    value='All',
    options=all_nationalities,
)

positions_1 = widgets.Dropdown(
    description='Position:',
    value='All',
    options=all_positions,
)

teams_1 = widgets.Dropdown(
    description='Team:',
    value='All',
    options=all_teams
)

league_1 = widgets.Dropdown(
    description='League:',
    value='All',
    options=all_leagues)

players_1 = widgets.Text(
    description='Player:',
    value='All',
)

# define the widgets for the second series
nationalities_2 = widgets.Dropdown(
    description='Nationality:   ',
    value='All',
    options=all_nationalities,
)

positions_2 = widgets.Dropdown(
    description='Position:',
    value='All',
    options=all_positions,
)

teams_2 = widgets.Dropdown(
    description='Team:',
    value='All',
    options=all_teams
)

league_2 = widgets.Dropdown(
    description='League:',
    value='All',
    options=all_leagues)

players_2 = widgets.Text(
    description='Player:',
    value='All',
)


# define the function used to filter the dataset based on the selection made using the widgets
def filtered_aging_curves(year_over_year_ratings, chosen_items):
    
    if chosen_items['nationality'] != 'All':
        filtered_by_nationality = year_over_year_ratings.loc[year_over_year_ratings['nationality'] == chosen_items['nationality']]
    else:
        filtered_by_nationality = year_over_year_ratings.copy(deep=True)
    
    if chosen_items['main_position'] != 'All':
        filtered_by_position = filtered_by_nationality.loc[filtered_by_nationality['main_position'] == chosen_items['main_position']]
    else:
        filtered_by_position = filtered_by_nationality.copy(deep=True)
    
    if chosen_items['team'] != 'All':
        filtered_by_team = filtered_by_position.loc[filtered_by_position['team'] == chosen_items['team']]
    else:
        filtered_by_team = filtered_by_position.copy(deep=True)
    
    if chosen_items['league'] != 'All':
        filtered_by_league = filtered_by_team.loc[filtered_by_team['league'] == chosen_items['league']]
    else:
        filtered_by_league = filtered_by_team.copy(deep=True)
    
    if chosen_items['name'] != 'All':
        filtered_by_player = filtered_by_league.loc[filtered_by_league['name'] == chosen_items['name']]
    else:
        filtered_by_player = filtered_by_league.copy(deep=True)
        
    filtered_by_player.drop_duplicates(subset=['name', 'age', 'age_next_year'], inplace=True)
    number_of_players = len(set(filtered_by_player['name']))
    number_of_players = str(number_of_players) + ' players selected'

    avg_rating_changes = filtered_by_player.groupby(['age', 'age_next_year'], as_index=False)['diff'].mean()
    avg_rating_changes.columns = ['age', 'age_next_year', 'diff']

    avg_rating_changes['cum_sum'] = avg_rating_changes['diff'].cumsum()
    
    age_dataset = list(avg_rating_changes['age_next_year'])
    aging_dataset = list(avg_rating_changes['cum_sum'])
    
    return age_dataset, aging_dataset, number_of_players


default_age_series, default_aging_series, nr_players = filtered_aging_curves(year_over_year_ratings, {'nationality': 'All', 'main_position': 'All', 'team': 'All', 'league': 'All', 'name': 'All'})
first_series = go.Scatter(x=default_age_series, y=default_aging_series, mode='lines', name='Selection 1')
second_series = go.Scatter(x=default_age_series, y=default_aging_series, mode='lines', name='Selection 2')

layout = go.Layout(
    title='FIFA Player Aging Curves',
    legend=dict(x=0,y=0),
    width=500,
    height=530,
    xaxis = dict(
                range=[16, 36],
                title='Age',
                nticks=10,
                showticklabels=True,
                ),
    yaxis = dict(
                range = [-5, 30], 
                zerolinecolor='#969696',
                zerolinewidth=4,
                title='Cumulative Change in Player Rating'
                ),
    margin=dict(t=60, b=60, l=80, r=0),
    scene=dict(
        xaxis=dict(
            gridcolor='rgb(255, 255, 255)',
            zerolinecolor='rgb(255, 255, 255)',
            showbackground=True,
            backgroundcolor='rgb(230, 230,230)',
        ),
        yaxis=dict(
            gridcolor='rgb(255, 255, 255)',
            zerolinecolor='rgb(255, 255, 255)',
            showbackground=True,
            backgroundcolor='rgb(230, 230, 230)',
        )
    )
)

# suggested_player_1 = go.FigureWidget(layout=suggested_players_layout)
nr_players_1 = widgets.Label(value=nr_players)
suggested_player_1 = widgets.Label(value='')

# nr_players_2 = go.FigureWidget(layout=nr_players_layout)
nr_players_2 = widgets.Label(value=nr_players)
suggested_player_2 = widgets.Label(value='')

g = go.FigureWidget(data=[first_series, second_series], layout=layout)

def validate_1():
    if nationalities_1.value in all_nationalities and positions_1.value in all_positions and teams_1.value in all_teams and league_1.value in all_leagues:
        return True
    else:
        return False


def response_1(change):
    if validate_1():
        changes = {'nationality': nationalities_1.value, 'main_position': positions_1.value, 'team': teams_1.value, 'league': league_1.value, 'name': players_1.value}
        x, y, nr_players = filtered_aging_curves(year_over_year_ratings, changes)
        with g.batch_update():
            g.data[0].x = x
            g.data[0].y = y
        if players_1.value in all_players:
            nr_players_1.value = nr_players
            suggested_player_1.value = ''
        else:
            try:
                suggested_name = get_close_matches(players_1.value, all_players)[0]
                suggested_name = 'Not a valid selection. Did you mean ' + suggested_name + '?'
                suggested_player_1.value = suggested_name
            except:
                pass

            
def validate_2():
    if nationalities_2.value in all_nationalities and positions_2.value in all_positions and teams_2.value in all_teams and players_2.value in all_players:
        return True
    else:
        return False

    
def response_2(change):
    if validate_2():
        changes = {'nationality': nationalities_2.value, 'main_position': positions_2.value, 'team': teams_2.value, 'league': league_2.value, 'name': players_2.value}
        x, y, nr_players = filtered_aging_curves(year_over_year_ratings, changes)
        with g.batch_update():
            g.data[1].x = x
            g.data[1].y = y
        if players_2.value in all_players:
            nr_players_2.value = nr_players
            suggested_player_2.value = ''
        else:
            try:
                suggested_name = get_close_matches(players_2.value, all_players)[0]
                suggested_name = 'Not a valid selection. Did you mean ' + suggested_name + '?'
                suggested_player_2.value = suggested_name
            except:
                pass

nationalities_1.observe(response_1, names="value")
positions_1.observe(response_1, names="value")
teams_1.observe(response_1, names="value")
league_1.observe(response_1, names="value")
players_1.observe(response_1, names="value")

nationalities_2.observe(response_2, names="value")
positions_2.observe(response_2, names="value")
teams_2.observe(response_2, names="value")
league_2.observe(response_2, names="value")
players_2.observe(response_2, names="value")

selection_box_layout = widgets.Layout(width='340px', border='1px solid grey', padding='1%', align_items='center')
title_selection_1 = widgets.Label(value='Selection 1', font_size='10px')
title_selection_2 = widgets.Label(value='Selection 2')

filters_1 = widgets.VBox([title_selection_1, nationalities_1, positions_1, teams_1, league_1, players_1, suggested_player_1, nr_players_1], layout=selection_box_layout)
filters_2 = widgets.VBox([title_selection_2, nationalities_2, positions_2, teams_2, league_2, players_2, suggested_player_2, nr_players_2], layout=selection_box_layout)
filters = widgets.VBox([filters_1, filters_2], layout=widgets.Layout(padding='1%'))
visualization = widgets.HBox([filters, g])
display(visualization)

HBox(children=(VBox(children=(VBox(children=(Label(value='Selection 1'), Dropdown(description='Nationality:   …