## Players And Their Attributes

In [2]:
# Import packages
import pandas as pd
from datetime import date

In [3]:
# load csv file
df_pla_attr = pd.read_csv('data/Player_Attributes.csv')
df_pla = pd.read_csv('data/Player.csv')

In [4]:
# Sort date by descending to have recent entry at the top
df_pla_attr.sort_values(by='date', ascending = False)

# Remove duplicates whiles keeping the recent entry of player's attribute
df_pla_attr.drop_duplicates(subset='player_fifa_api_id', inplace=True)

In [5]:
# Print the number of samples and columns in the dataset
print('The number of players in the dataset is', df_pla_attr.shape[0], 
     '\nand the number of columns is', df_pla_attr.shape[1])

The number of players in the dataset is 11062 
and the number of columns is 42


In [6]:
# merge two datasets, Player and Player_attributes
df_players = pd.merge(df_pla, df_pla_attr, on='player_api_id')

In [7]:
# Drop columns 
columns = ['id_x', 'id_y', 'player_fifa_api_id_y', 'date','crossing', 'finishing', 'heading_accuracy',
       'short_passing', 'volleys', 'dribbling', 'curve', 'free_kick_accuracy', 'long_passing', 
       'acceleration', 'sprint_speed', 'agility', 'reactions', 'balance', 'shot_power', 'jumping', 
       'stamina', 'strength', 'long_shots', 'aggression', 'interceptions', 'positioning',
       'vision', 'penalties', 'marking', 'standing_tackle', 'sliding_tackle',
       'gk_diving', 'gk_handling', 'gk_kicking', 'gk_positioning', 'gk_reflexes']

df_players.drop(columns=columns, inplace=True)

In [8]:
# Calculate Age from birthday
today = date.today()

df_players['birthday'] = pd.to_datetime(df_players['birthday']).dt.date
df_players['Age'] = today - df_players['birthday']
df_players['Age'] = df_players['Age'].dt.days.astype("int16") // 365

In [9]:
# Calculate the BMI
#convert weight pounds to kg
wei_kg = df_players['weight'] * 0.45359237

df_players['BMI'] = round((wei_kg / df_players['height'] / df_players['height']) * 10000, 2)

In [10]:
df_players.head()

Unnamed: 0,player_api_id,player_name,player_fifa_api_id_x,birthday,height,weight,overall_rating,potential,preferred_foot,attacking_work_rate,defensive_work_rate,ball_control,Age,BMI
0,505942,Aaron Appindangoye,218353,1992-02-29,182.88,187,67.0,71.0,right,medium,medium,49.0,32,25.36
1,155782,Aaron Cresswell,189615,1989-12-15,170.18,146,74.0,76.0,left,high,medium,71.0,34,22.87
2,162549,Aaron Doran,186170,1991-05-13,170.18,163,65.0,67.0,right,medium,medium,67.0,33,25.53
3,30572,Aaron Galindo,140161,1982-05-08,182.88,198,69.0,69.0,right,medium,medium,62.0,42,26.85
4,23780,Aaron Hughes,17725,1979-11-08,182.88,154,70.0,70.0,right,medium,medium,58.0,44,20.89


In [11]:
df_players.describe()

Unnamed: 0,player_api_id,player_fifa_api_id_x,height,weight,overall_rating,potential,ball_control,Age,BMI
count,11062.0,11062.0,11062.0,11062.0,11060.0,11060.0,11060.0,11062.0,11062.0
mean,156569.025493,165654.051799,181.868317,168.385102,67.961935,71.037432,62.099005,36.89821,23.066492
std,160704.573209,58652.843912,6.367879,14.988696,6.316694,6.214862,15.309941,5.460441,1.316831
min,2625.0,2.0,157.48,117.0,42.0,49.0,9.0,25.0,17.59
25%,35554.5,151874.5,177.8,159.0,64.0,67.0,58.0,33.0,22.17
50%,96619.5,184656.0,182.88,168.0,68.0,71.0,65.0,36.0,23.06
75%,212460.75,203882.0,185.42,179.0,72.0,75.0,72.0,41.0,23.87
max,750584.0,234141.0,208.28,243.0,94.0,94.0,96.0,57.0,30.87


In [12]:
# data only on RIGHT preferred foot
df_players[df_players['preferred_foot'] == 'right'].describe()

Unnamed: 0,player_api_id,player_fifa_api_id_x,height,weight,overall_rating,potential,ball_control,Age,BMI
count,8373.0,8373.0,8373.0,8373.0,8373.0,8373.0,8373.0,8373.0,8373.0
mean,155603.27529,165483.43915,182.101285,168.959393,67.942434,71.015407,61.425296,36.953063,23.086213
std,160825.374993,58758.997623,6.40155,15.02306,6.372629,6.212234,15.891784,5.495302,1.317375
min,2625.0,6.0,157.48,117.0,42.0,49.0,9.0,25.0,17.59
25%,35417.0,150594.0,177.8,159.0,64.0,67.0,57.0,33.0,22.18
50%,95336.0,184484.0,182.88,168.0,68.0,71.0,65.0,37.0,23.1
75%,210691.0,203863.0,187.96,179.0,72.0,75.0,72.0,41.0,23.87
max,750584.0,234141.0,208.28,243.0,93.0,94.0,93.0,57.0,30.87


In [13]:
# data only on LEFT preferred foot
df_players[df_players['preferred_foot'] == 'left'].describe()

Unnamed: 0,player_api_id,player_fifa_api_id_x,height,weight,overall_rating,potential,ball_control,Age,BMI
count,2687.0,2687.0,2687.0,2687.0,2687.0,2687.0,2687.0,2687.0,2687.0
mean,159581.794194,166198.49907,181.135936,166.588016,68.022702,71.106066,64.198362,36.729066,23.005601
std,160375.914006,58348.460408,6.203949,14.743058,6.139894,6.223702,13.11762,5.349468,1.313817
min,2768.0,2.0,160.02,121.0,46.0,51.0,11.0,25.0,18.41
25%,36556.5,154346.0,177.8,157.0,64.0,67.0,60.0,33.0,22.15
50%,103139.0,185323.0,180.34,165.0,68.0,71.0,66.0,36.0,23.06
75%,215416.5,203950.5,185.42,176.0,72.0,75.0,72.0,41.0,23.87
max,744907.0,233911.0,203.2,225.0,94.0,94.0,96.0,54.0,29.57


In [14]:
df_players.to_excel('data/Players_and_thier_Attributes.xlsx', sheet_name='Players', index=False)