In [1]:
# imports
import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
pd.set_option('display.max_columns', None)

In [2]:
init_notebook_mode(connected=True)

In [3]:
# retrieve data
stats = pd.read_csv('player_stats.csv', encoding='ISO-8859-1')

### Data Cleaning

In [4]:
# Read first 5 tables in the data frame
stats.head()

Unnamed: 0,player,country,height,weight,age,club,ball_control,dribbling,marking,slide_tackle,stand_tackle,aggression,reactions,att_position,interceptions,vision,composure,crossing,short_pass,long_pass,acceleration,stamina,strength,balance,sprint_speed,agility,jumping,heading,shot_power,finishing,long_shots,curve,fk_acc,penalties,volleys,gk_positioning,gk_diving,gk_handling,gk_kicking,gk_reflexes,value
0,Cristian Castro Devenish,Colombia,192,84,22,Atl. Nacional,55,43,,68,73,72,68,30,65,30,50,33,64,49,41,55,86,40,52,43,51,64,54,30,31,32,34,41,33,10,11,6,7,9,$1.400.000
1,Silaldo Taffarel,Brazil,181,80,31,Corinthians,69,70,,56,58,62,70,69,70,64,54,60,63,63,64,87,81,42,67,65,65,54,60,64,68,65,62,48,46,12,15,14,8,14,$975.00
2,Thomas DÃ¤hne,Germany,193,84,29,Holstein Kiel,25,12,,13,16,27,65,17,20,49,48,14,35,18,46,38,68,41,48,36,60,17,51,14,20,20,15,26,16,64,74,65,68,74,$1.100.000
3,Michael Sollbauer,Austria,187,86,33,SK Rapid Wien,46,48,,66,69,71,64,48,66,29,70,44,58,53,35,73,82,56,63,57,80,67,32,24,33,25,13,22,19,10,10,8,14,9,$650.00
4,Diego Segovia,Uruguay,191,80,23,Independiente,14,8,,14,16,28,50,10,12,38,34,11,23,20,38,28,64,24,31,34,27,13,48,4,6,9,10,16,5,61,59,62,64,64,$300.00


In [5]:
# Shape of the data frame
stats.shape

(5682, 41)

In [6]:
# information about the data frame
stats.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5682 entries, 0 to 5681
Data columns (total 41 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   player          5682 non-null   object 
 1   country         5682 non-null   object 
 2   height          5682 non-null   int64  
 3   weight          5682 non-null   int64  
 4   age             5682 non-null   int64  
 5   club            5682 non-null   object 
 6   ball_control    5682 non-null   int64  
 7   dribbling       5682 non-null   int64  
 8   marking         0 non-null      float64
 9   slide_tackle    5682 non-null   int64  
 10  stand_tackle    5682 non-null   int64  
 11  aggression      5682 non-null   int64  
 12  reactions       5682 non-null   int64  
 13  att_position    5682 non-null   int64  
 14  interceptions   5682 non-null   int64  
 15  vision          5682 non-null   int64  
 16  composure       5682 non-null   int64  
 17  crossing        5682 non-null   i

In [7]:
stats.describe()

Unnamed: 0,height,weight,age,ball_control,dribbling,marking,slide_tackle,stand_tackle,aggression,reactions,att_position,interceptions,vision,composure,crossing,short_pass,long_pass,acceleration,stamina,strength,balance,sprint_speed,agility,jumping,heading,shot_power,finishing,long_shots,curve,fk_acc,penalties,volleys,gk_positioning,gk_diving,gk_handling,gk_kicking,gk_reflexes
count,5682.0,5682.0,5682.0,5682.0,5682.0,0.0,5682.0,5682.0,5682.0,5682.0,5682.0,5682.0,5682.0,5682.0,5682.0,5682.0,5682.0,5682.0,5682.0,5682.0,5682.0,5682.0,5682.0,5682.0,5682.0,5682.0,5682.0,5682.0,5682.0,5682.0,5682.0,5682.0,5682.0,5682.0,5682.0,5682.0,5682.0
mean,181.670539,75.282295,26.316262,58.912179,56.128476,,46.728441,48.820838,56.324006,61.959345,50.724921,47.389124,54.465681,58.620908,49.790391,59.333333,53.910771,64.754664,63.377332,65.381204,64.070398,64.958993,63.780535,65.085005,52.245336,58.182682,46.354277,47.164203,48.099789,43.299542,48.174938,42.93189,16.18761,16.43805,16.191834,16.071982,16.570222
std,6.829238,6.998971,4.729967,16.567068,18.772075,,20.51943,20.975966,16.84641,8.893309,19.780636,20.451327,13.705286,12.024102,17.898054,14.326017,14.601361,15.2986,16.105897,12.624053,14.495444,15.114044,14.857875,12.274525,17.359459,12.971923,19.822642,19.463368,18.086672,17.086473,15.781737,17.912619,17.146572,17.63526,16.993154,16.808674,17.942154
min,156.0,54.0,17.0,8.0,5.0,,7.0,7.0,11.0,32.0,3.0,3.0,10.0,13.0,6.0,11.0,9.0,12.0,14.0,25.0,20.0,13.0,21.0,23.0,6.0,20.0,4.0,5.0,7.0,4.0,8.0,4.0,2.0,2.0,2.0,2.0,3.0
25%,177.0,70.0,23.0,55.0,51.0,,27.0,30.0,45.0,56.0,40.0,28.0,45.0,52.0,39.0,55.0,46.0,57.0,56.0,58.0,56.0,57.0,56.0,57.0,45.0,49.0,31.0,33.0,36.0,32.0,39.0,30.0,8.0,8.0,8.0,8.0,8.0
50%,182.0,75.0,26.0,63.0,62.0,,54.0,57.0,60.0,62.0,56.0,54.0,56.0,60.0,54.0,63.0,57.0,68.0,66.0,66.5,66.0,68.0,66.0,66.0,55.0,59.0,51.0,51.0,50.0,43.0,49.0,44.0,11.0,11.0,11.0,11.0,11.0
75%,186.0,80.0,30.0,69.0,68.0,,64.0,66.0,69.0,68.0,65.0,64.0,65.0,67.0,63.0,68.0,64.0,75.0,74.0,74.0,74.0,75.0,74.0,73.0,64.0,68.0,62.0,62.0,62.0,56.0,60.0,57.0,14.0,14.0,14.0,14.0,14.0
max,204.0,102.0,41.0,94.0,95.0,,87.0,91.0,96.0,93.0,93.0,89.0,94.0,96.0,94.0,93.0,93.0,97.0,95.0,96.0,95.0,97.0,93.0,95.0,93.0,94.0,94.0,91.0,93.0,94.0,92.0,90.0,90.0,90.0,87.0,90.0,89.0


In [8]:
# Lets have a look at the columns the data set contains
stats.columns

Index(['player', 'country', 'height', 'weight', 'age', 'club', 'ball_control',
       'dribbling', 'marking', 'slide_tackle', 'stand_tackle', 'aggression',
       'reactions', 'att_position', 'interceptions', 'vision', 'composure',
       'crossing', 'short_pass', 'long_pass', 'acceleration', 'stamina',
       'strength', 'balance', 'sprint_speed', 'agility', 'jumping', 'heading',
       'shot_power', 'finishing', 'long_shots', 'curve', 'fk_acc', 'penalties',
       'volleys', 'gk_positioning', 'gk_diving', 'gk_handling', 'gk_kicking',
       'gk_reflexes', 'value'],
      dtype='object')

In [9]:
# Lets check if the data frame has null values
stats.isnull().sum()

player               0
country              0
height               0
weight               0
age                  0
club                 0
ball_control         0
dribbling            0
marking           5682
slide_tackle         0
stand_tackle         0
aggression           0
reactions            0
att_position         0
interceptions        0
vision               0
composure            0
crossing             0
short_pass           0
long_pass            0
acceleration         0
stamina              0
strength             0
balance              0
sprint_speed         0
agility              0
jumping              0
heading              0
shot_power           0
finishing            0
long_shots           0
curve                0
fk_acc               0
penalties            0
volleys              0
gk_positioning       0
gk_diving            0
gk_handling          0
gk_kicking           0
gk_reflexes          0
value                0
dtype: int64

In [10]:
# From the above we can see that the marking column contains only null values
#Lets drop the whole column
stats = stats.drop('marking',axis=1)
stats.head()

Unnamed: 0,player,country,height,weight,age,club,ball_control,dribbling,slide_tackle,stand_tackle,aggression,reactions,att_position,interceptions,vision,composure,crossing,short_pass,long_pass,acceleration,stamina,strength,balance,sprint_speed,agility,jumping,heading,shot_power,finishing,long_shots,curve,fk_acc,penalties,volleys,gk_positioning,gk_diving,gk_handling,gk_kicking,gk_reflexes,value
0,Cristian Castro Devenish,Colombia,192,84,22,Atl. Nacional,55,43,68,73,72,68,30,65,30,50,33,64,49,41,55,86,40,52,43,51,64,54,30,31,32,34,41,33,10,11,6,7,9,$1.400.000
1,Silaldo Taffarel,Brazil,181,80,31,Corinthians,69,70,56,58,62,70,69,70,64,54,60,63,63,64,87,81,42,67,65,65,54,60,64,68,65,62,48,46,12,15,14,8,14,$975.00
2,Thomas DÃ¤hne,Germany,193,84,29,Holstein Kiel,25,12,13,16,27,65,17,20,49,48,14,35,18,46,38,68,41,48,36,60,17,51,14,20,20,15,26,16,64,74,65,68,74,$1.100.000
3,Michael Sollbauer,Austria,187,86,33,SK Rapid Wien,46,48,66,69,71,64,48,66,29,70,44,58,53,35,73,82,56,63,57,80,67,32,24,33,25,13,22,19,10,10,8,14,9,$650.00
4,Diego Segovia,Uruguay,191,80,23,Independiente,14,8,14,16,28,50,10,12,38,34,11,23,20,38,28,64,24,31,34,27,13,48,4,6,9,10,16,5,61,59,62,64,64,$300.00


In [11]:
#lets confirm if we have removed the null values
stats.isnull().sum()

player            0
country           0
height            0
weight            0
age               0
club              0
ball_control      0
dribbling         0
slide_tackle      0
stand_tackle      0
aggression        0
reactions         0
att_position      0
interceptions     0
vision            0
composure         0
crossing          0
short_pass        0
long_pass         0
acceleration      0
stamina           0
strength          0
balance           0
sprint_speed      0
agility           0
jumping           0
heading           0
shot_power        0
finishing         0
long_shots        0
curve             0
fk_acc            0
penalties         0
volleys           0
gk_positioning    0
gk_diving         0
gk_handling       0
gk_kicking        0
gk_reflexes       0
value             0
dtype: int64

In [12]:
# Lets check if the data has any duplicated values
stats.loc[stats.duplicated()]

Unnamed: 0,player,country,height,weight,age,club,ball_control,dribbling,slide_tackle,stand_tackle,aggression,reactions,att_position,interceptions,vision,composure,crossing,short_pass,long_pass,acceleration,stamina,strength,balance,sprint_speed,agility,jumping,heading,shot_power,finishing,long_shots,curve,fk_acc,penalties,volleys,gk_positioning,gk_diving,gk_handling,gk_kicking,gk_reflexes,value
158,Maxime CrÃ©peau,Canada,180,88,29,LAFC,45,19,17,17,34,78,7,20,55,60,19,34,46,50,33,69,40,48,43,67,14,53,17,14,16,14,23,19,70,72,70,70,75,$2.200.000
159,Stefan Thesker,Germany,190,84,32,Holstein Kiel,55,45,64,70,69,54,38,69,37,60,42,57,68,43,63,78,45,54,50,66,71,58,29,39,33,31,42,40,14,13,7,10,12,$700.00
160,Cameron John,England,181,78,24,Rochdale,55,53,57,60,61,58,45,59,44,56,47,58,53,70,65,70,62,75,68,75,55,42,29,36,34,25,32,28,8,13,9,7,8,$350.00


In [13]:
# Lets check an example of the duplicated value
stats.query('player == "Stefan Thesker"')

Unnamed: 0,player,country,height,weight,age,club,ball_control,dribbling,slide_tackle,stand_tackle,aggression,reactions,att_position,interceptions,vision,composure,crossing,short_pass,long_pass,acceleration,stamina,strength,balance,sprint_speed,agility,jumping,heading,shot_power,finishing,long_shots,curve,fk_acc,penalties,volleys,gk_positioning,gk_diving,gk_handling,gk_kicking,gk_reflexes,value
156,Stefan Thesker,Germany,190,84,32,Holstein Kiel,55,45,64,70,69,54,38,69,37,60,42,57,68,43,63,78,45,54,50,66,71,58,29,39,33,31,42,40,14,13,7,10,12,$700.00
159,Stefan Thesker,Germany,190,84,32,Holstein Kiel,55,45,64,70,69,54,38,69,37,60,42,57,68,43,63,78,45,54,50,66,71,58,29,39,33,31,42,40,14,13,7,10,12,$700.00


In [14]:
# Lets drop the duplicated values
stats = stats.drop_duplicates()

In [15]:
# Lets confirm
stats.duplicated().sum()

0

In [16]:
# Lets check the dtype
stats.dtypes

player            object
country           object
height             int64
weight             int64
age                int64
club              object
ball_control       int64
dribbling          int64
slide_tackle       int64
stand_tackle       int64
aggression         int64
reactions          int64
att_position       int64
interceptions      int64
vision             int64
composure          int64
crossing           int64
short_pass         int64
long_pass          int64
acceleration       int64
stamina            int64
strength           int64
balance            int64
sprint_speed       int64
agility            int64
jumping            int64
heading            int64
shot_power         int64
finishing          int64
long_shots         int64
curve              int64
fk_acc             int64
penalties          int64
volleys            int64
gk_positioning     int64
gk_diving          int64
gk_handling        int64
gk_kicking         int64
gk_reflexes        int64
value             object


In [17]:
# From the above we can see the column value is an object but it has numerical values
stats['value'].head()


0    $1.400.000
1      $975.00 
2    $1.100.000
3      $650.00 
4      $300.00 
Name: value, dtype: object

In [18]:
# Lets convert to dtype: float
# First we remove the $ sign and '.'
stats['value'] = stats['value'].str.replace('$','')
stats['value'] = stats['value'].str.replace('.','')
stats['value'].head()

0    1400000
1     97500 
2    1100000
3     65000 
4     30000 
Name: value, dtype: object

In [19]:
# Lets convert to float type
stats['value'] = stats['value'].astype(float)
stats['value'].dtype

dtype('float64')

In [20]:
# Lets change the column name from 'value' - 'value($)'
stats = stats.rename(columns={'value': 'value($)'})
stats.head()

Unnamed: 0,player,country,height,weight,age,club,ball_control,dribbling,slide_tackle,stand_tackle,aggression,reactions,att_position,interceptions,vision,composure,crossing,short_pass,long_pass,acceleration,stamina,strength,balance,sprint_speed,agility,jumping,heading,shot_power,finishing,long_shots,curve,fk_acc,penalties,volleys,gk_positioning,gk_diving,gk_handling,gk_kicking,gk_reflexes,value($)
0,Cristian Castro Devenish,Colombia,192,84,22,Atl. Nacional,55,43,68,73,72,68,30,65,30,50,33,64,49,41,55,86,40,52,43,51,64,54,30,31,32,34,41,33,10,11,6,7,9,1400000.0
1,Silaldo Taffarel,Brazil,181,80,31,Corinthians,69,70,56,58,62,70,69,70,64,54,60,63,63,64,87,81,42,67,65,65,54,60,64,68,65,62,48,46,12,15,14,8,14,97500.0
2,Thomas DÃ¤hne,Germany,193,84,29,Holstein Kiel,25,12,13,16,27,65,17,20,49,48,14,35,18,46,38,68,41,48,36,60,17,51,14,20,20,15,26,16,64,74,65,68,74,1100000.0
3,Michael Sollbauer,Austria,187,86,33,SK Rapid Wien,46,48,66,69,71,64,48,66,29,70,44,58,53,35,73,82,56,63,57,80,67,32,24,33,25,13,22,19,10,10,8,14,9,65000.0
4,Diego Segovia,Uruguay,191,80,23,Independiente,14,8,14,16,28,50,10,12,38,34,11,23,20,38,28,64,24,31,34,27,13,48,4,6,9,10,16,5,61,59,62,64,64,30000.0


In [21]:
stats.columns

Index(['player', 'country', 'height', 'weight', 'age', 'club', 'ball_control',
       'dribbling', 'slide_tackle', 'stand_tackle', 'aggression', 'reactions',
       'att_position', 'interceptions', 'vision', 'composure', 'crossing',
       'short_pass', 'long_pass', 'acceleration', 'stamina', 'strength',
       'balance', 'sprint_speed', 'agility', 'jumping', 'heading',
       'shot_power', 'finishing', 'long_shots', 'curve', 'fk_acc', 'penalties',
       'volleys', 'gk_positioning', 'gk_diving', 'gk_handling', 'gk_kicking',
       'gk_reflexes', 'value($)'],
      dtype='object')

In [22]:
# check for unique values
stats.nunique().sum()

9371

### Data Analysis

#### 1. Lets check the distribution of players based on their age, height and weight

In [23]:
# Lets visualize the distribution of the players height
px.histogram(stats,x='height', title='Height Distribution',color_discrete_sequence=['purple'],template='plotly_dark')

In [24]:
# Lets visualize the distribution of weight
px.histogram(stats,x='weight',title='Weight Distributon',color_discrete_sequence=['Purple'],template='plotly_dark')

In [25]:
# Lets visualize the age distribution 
px.histogram(stats,x='age',title='Age Distribution',template='plotly_dark',color_discrete_sequence=['purple'])

In [26]:
# From the above we can see that the oldest players are 41. Lets see who
stats[stats['age']==41]

Unnamed: 0,player,country,height,weight,age,club,ball_control,dribbling,slide_tackle,stand_tackle,aggression,reactions,att_position,interceptions,vision,composure,crossing,short_pass,long_pass,acceleration,stamina,strength,balance,sprint_speed,agility,jumping,heading,shot_power,finishing,long_shots,curve,fk_acc,penalties,volleys,gk_positioning,gk_diving,gk_handling,gk_kicking,gk_reflexes,value($)
1574,AntolÃ­n AlcÃ¡raz,Paraguay,187,78,41,Olimpia,61,57,73,74,72,71,56,70,44,80,47,67,48,33,51,82,53,30,50,72,74,54,38,42,32,35,48,35,16,8,11,10,11,32500.0
3930,Pepe Reina,Spain,188,92,41,Villarreal CF,23,10,24,22,32,72,13,22,65,64,11,37,39,43,34,70,60,49,50,71,13,59,14,15,11,11,24,11,79,75,77,79,76,27500.0
4422,Diego LÃ³pez,Spain,196,89,41,Rayo Vallecano,25,11,14,13,40,74,11,16,55,65,11,28,21,44,40,73,40,42,43,66,10,51,12,10,10,11,21,14,80,76,78,68,76,45000.0
5409,Enrique Bologna,Argentina,189,87,41,Defensa,32,22,17,22,25,69,15,17,53,65,21,34,21,44,33,73,47,40,43,69,10,53,13,15,16,18,25,10,70,73,66,71,73,5000.0
5681,Zlatan Ibrahimovi?,Sweden,195,102,41,Milan,85,75,24,37,84,77,88,20,83,90,71,77,72,55,34,85,51,61,67,77,82,86,84,85,79,74,80,87,9,13,15,10,12,10500000.0


Lets check top ten countries with the highest number of players

In [27]:
# Check for the top 10 contries with the most players
top = stats['country'].value_counts().head(10).reset_index()

# plot the countries
px.bar(top,x='country',y='count',title='Top 10 Contries with Most Players',
      template='plotly_dark',color='count',color_continuous_scale='Cividis')

In [28]:
# Lets look at the distribution of players around the world
total_dist = stats['country'].value_counts().reset_index()

# plot choropleth
px.choropleth(total_dist,
             locations='country',
             locationmode='country names',
             color='count',
             template='plotly',
             color_continuous_scale='Plasma',
             labels={'count': 'Number of Players'})

#### 2. Lets look at the top clubs in terms of number of players and average skill rating

In [29]:
# In terms of number of players
top_club = stats['club'].value_counts().head(10).reset_index()

#Plot the top clubs
px.bar(top_club,x='club',y='count',title='Top 10 Clubs with Most Players',
      template='plotly_dark',color='count',color_continuous_scale='Cividis')

In [30]:
# average skill rating
stats.columns

Index(['player', 'country', 'height', 'weight', 'age', 'club', 'ball_control',
       'dribbling', 'slide_tackle', 'stand_tackle', 'aggression', 'reactions',
       'att_position', 'interceptions', 'vision', 'composure', 'crossing',
       'short_pass', 'long_pass', 'acceleration', 'stamina', 'strength',
       'balance', 'sprint_speed', 'agility', 'jumping', 'heading',
       'shot_power', 'finishing', 'long_shots', 'curve', 'fk_acc', 'penalties',
       'volleys', 'gk_positioning', 'gk_diving', 'gk_handling', 'gk_kicking',
       'gk_reflexes', 'value($)'],
      dtype='object')

In [31]:
# add skill average column by calculatng average of relevant columns

 # list of columns to exclude from average calcs
exclude_col = ['player','country','height','weight','age','club','value($)']

# columns to include in the average calculations to
cols_to_average = [col for col in stats.columns if col not in exclude_col] # list comprehension for new list

# calculate average for each player and assign to new 'rating' column
stats['rating'] = stats[cols_to_average].mean(axis=1)

# Rescaling rate to be out of 100
max_rating = stats['rating'].max()
min_rating = stats['rating'].min()

stats['rating'] = 100 * (stats['rating'] - min_rating) / (max_rating - min_rating)

# Plot top 10 clubs with the highest rated players

top_clubs = stats.groupby('club')['rating'].mean().sort_values(ascending=False).head(10).reset_index()

fig = px.bar(top_clubs,x='club',y='rating',title='Top 10 Clubs with Highest Rated Players',
              template='plotly_dark',color_discrete_sequence=['purple'],labels={'rating': 'Average Rating'},text='rating')
# show average on top of bins
fig.update_traces(texttemplate='%{text:.2s}', textposition='outside')
fig.show()

#### 3. How do different skills correlate with each other

In [32]:
stats.columns

Index(['player', 'country', 'height', 'weight', 'age', 'club', 'ball_control',
       'dribbling', 'slide_tackle', 'stand_tackle', 'aggression', 'reactions',
       'att_position', 'interceptions', 'vision', 'composure', 'crossing',
       'short_pass', 'long_pass', 'acceleration', 'stamina', 'strength',
       'balance', 'sprint_speed', 'agility', 'jumping', 'heading',
       'shot_power', 'finishing', 'long_shots', 'curve', 'fk_acc', 'penalties',
       'volleys', 'gk_positioning', 'gk_diving', 'gk_handling', 'gk_kicking',
       'gk_reflexes', 'value($)', 'rating'],
      dtype='object')

In [33]:
def corr_skills(skill1, skill2):
    # Calculate correlation coefficient
    correlation_coefficient = stats[skill1].corr(stats[skill2])
    
    # Create scatter plot
    fig = px.scatter(stats, x=skill1, y=skill2, trendline='ols',labels={skill1: skill1, skill2: skill2},
                    template='plotly_dark', color_discrete_sequence=['purple'],trendline_color_override='green')
    
    # Add correlation coefficient to the layout
    fig.update_layout(
        title=f'Scatter Plot of {skill1} vs {skill2}',
        annotations=[
            {
                'x': 0.5,
                'y': 1.05,
                'xref': 'paper',
                'yref': 'paper',
                'text': f'Correlation Coefficient: {correlation_coefficient:.2f}',
                'showarrow': False,
                'font': {'size': 12}
            }
        ]
    )
    
    # Show the figure
    fig.show()


In [34]:
# Correlaton between interception and vision
corr_skills('interceptions','vision')

In [35]:
# Correlation between 'composure' and 'long_pass
corr_skills('composure','long_pass')

In [36]:
# correlation between dribbling and ball control
corr_skills('dribbling','ball_control')

#### 4. Lets look at how skills evolve with age

In [37]:
def age_skill(skill):
    # calculate correlatron coeficient
    age_vs_skill_coefficient = stats['age'].corr(stats[skill])
    
    # create a scatter plot
    fig = px.scatter(stats,x='age',y=skill,trendline='ols',template='plotly_dark',
                    color_discrete_sequence=['purple'],trendline_color_override='green')
    
    #Add correlation coefficient to the layout
    fig.update_layout(
        title=f'Scatter Plot of Age vs {skill}',
        annotations=[
            {
                'x': 0.5,
                'y': 1.05,
                'xref': 'paper',
                'yref': 'paper',
                'text': f'Correlation Coefficient: {age_vs_skill_coefficient:.2f}',
                'showarrow': False,
                'font': {'size': 12}
            }
        ]
    )
    
    fig.show()

In [38]:
# Age vs Compusure
age_skill('composure')

In [39]:
# Age vs Acceleration
age_skill('acceleration')

#### 5. Lets look at the top 10 most valiable and least valiable players

In [40]:
# Select the highest and lowest valuable players
highest_valuable_players = stats.nlargest(10, 'value($)')
lowest_valuable_players = stats.nsmallest(10, 'value($)')

# Create subplots
fig = make_subplots(rows=1, cols=2, subplot_titles=['Top 10 Highest Valuable Players', 'Top 10 Lowest Valuable Players'])

# Add bar trace for the highest valuable players
fig.add_trace(
    go.Bar(
        x=highest_valuable_players['player'],
        y=highest_valuable_players['value($)'],
        name='Highest Valuable Players',
        hoverinfo='y',  # Display only the y-axis value in hoverinfo
        marker=dict(color='purple'),
    ),
    row=1, col=1
)

# Add bar trace for the lowest valuable players
fig.add_trace(
    go.Bar(
        x=lowest_valuable_players['player'],
        y=lowest_valuable_players['value($)'],
        name='Lowest Valuable Players',
        hoverinfo='y',  # Display only the y-axis value in hoverinfo
        marker=dict(color='orange'),
    ),
    row=1, col=2
)

# Update layout
fig.update_layout(template='plotly_dark', showlegend=False, height=400)

# Add x-axis labels
fig.update_xaxes(title_text='Player', row=1, col=1)
fig.update_xaxes(title_text='Player', row=1, col=2)

# Add y-axis labels
fig.update_yaxes(title_text='Value ($)', row=1, col=1)
fig.update_yaxes(title_text='Value ($)', row=1, col=2)

# Show the plot
fig.show()

