# Lectura del archivo

In [1]:
# Librerías
import pandas as pd
import numpy as np
import plotly.express as px

df = pd.read_csv('fifa_eda.csv')
df.sample(5)

Unnamed: 0,ID,Name,Age,Nationality,Overall,Potential,Club,Value,Wage,Preferred Foot,International Reputation,Skill Moves,Position,Joined,Contract Valid Until,Height,Weight,Release Clause
9869,245237,I. Saavedra,19,Chile,66,74,Universidad Católica,900.0,2.0,Right,1.0,3.0,RCM,2016,2020-01-01,5.666667,150.0,1400.0
17348,245560,M. Venturi,19,Italy,54,66,Carpi,120.0,1.0,Right,1.0,2.0,CM,2018,2019-01-01,6.166667,165.0,210.0
4635,230367,Tony Muttinho,34,Brazil,71,71,Paraná,675.0,13.0,Right,1.0,2.0,RB,2018,2018-01-01,6.0,176.0,1300.0
8037,226390,R. Vera,35,Argentina,67,67,Patronato,180.0,4.0,Right,1.0,2.0,RCB,2016,2020-01-01,6.0,190.0,261.0
10247,200780,G. Bigirimana,24,England,65,69,Motherwell,700.0,2.0,Right,1.0,2.0,RCM,2017,2019-01-01,5.666667,157.0,1300.0


# Matriz de correlación

In [2]:
numerics = ['int16', 'int32', 'int64', 'float16', 'float32', 'float64']

df.select_dtypes(include=numerics).corr()

Unnamed: 0,ID,Age,Overall,Potential,Value,Wage,International Reputation,Skill Moves,Joined,Height,Weight,Release Clause
ID,1.0,-0.739208,-0.417025,0.047074,-0.139837,-0.20461,-0.356191,-0.056914,0.206749,-0.09009,-0.191193,-0.121297
Age,-0.739208,1.0,0.45235,-0.253312,0.078315,0.141145,0.253765,0.027649,-0.202658,0.082506,0.22994,0.058672
Overall,-0.417025,0.45235,1.0,0.660939,0.631848,0.571926,0.499491,0.414463,-0.169281,0.038527,0.154557,0.597821
Potential,0.047074,-0.253312,0.660939,1.0,0.579608,0.486413,0.372993,0.35429,-0.047661,-0.009791,-0.006935,0.562346
Value,-0.139837,0.078315,0.631848,0.579608,1.0,0.858086,0.656158,0.317246,-0.115991,0.002827,0.046702,0.97331
Wage,-0.20461,0.141145,0.571926,0.486413,0.858086,1.0,0.668635,0.263205,-0.142337,0.019638,0.064764,0.828161
International Reputation,-0.356191,0.253765,0.499491,0.372993,0.656158,0.668635,1.0,0.208153,-0.133009,0.034881,0.08834,0.620863
Skill Moves,-0.056914,0.027649,0.414463,0.35429,0.317246,0.263205,0.208153,1.0,0.020692,-0.422753,-0.351209,0.297471
Joined,0.206749,-0.202658,-0.169281,-0.047661,-0.115991,-0.142337,-0.133009,0.020692,1.0,0.001188,-0.028274,-0.115374
Height,-0.09009,0.082506,0.038527,-0.009791,0.002827,0.019638,0.034881,-0.422753,0.001188,1.0,0.754678,0.001835


In [52]:
# Heatmap
fig = px.imshow(df.select_dtypes(include=numerics).drop('ID',axis=1).corr(), text_auto=True,
                title='Correlación entre variables numéricas de archivo FIFA', width=500)
fig.show()

# Relación entre Edad y Overall

In [4]:
df_age_overall = df[['Age', 'Overall']]

df_age_overall.corr()

Unnamed: 0,Age,Overall
Age,1.0,0.45235
Overall,0.45235,1.0


In [6]:
fig = px.imshow(df_age_overall.corr(), text_auto=True, title='Relación entre Edad y Overall de jugadores')
fig.show()

In [53]:
fig = px.density_heatmap(df_age_overall, x='Age', y='Overall', title='Edad vs. Overall', width=500)
fig.show()

# Jugadores por Club

In [10]:
players_per_club = df.groupby('Club').size().rename('Players').reset_index()
players_per_club

Unnamed: 0,Club,Players
0,SSV Jahn Regensburg,29
1,1. FC Heidenheim 1846,28
2,1. FC Kaiserslautern,26
3,1. FC Köln,28
4,1. FC Magdeburg,26
...,...,...
646,Zagłębie Sosnowiec,25
647,Çaykur Rizespor,30
648,Örebro SK,27
649,Östersunds FK,22


In [11]:
fig = px.bar(players_per_club, x = 'Club', y = 'Players', title = 'Jugadores por Club de Fútbol')
fig.show()

In [22]:
# Como son demasiados datos me quedo con los 20 equipos con más y menos jugadores (10 con más y 10 con menos)
sorted_df1 = players_per_club.sort_values(by='Players', ascending=False)
fig = px.bar(pd.concat([sorted_df1.head(10), sorted_df1.tail(10)]), x = 'Club', y = 'Players',
             title = 'Jugadores por Club de Fútbol')
fig.show()

In [23]:
fig = px.bar(sorted_df1.head(25), x = 'Club', y = 'Players', # 25 equipos con más jugadores
             title = 'Jugadores por Club de Fútbol')
fig.show()

In [25]:
fig = px.bar(sorted_df1.tail(25), x = 'Club', y = 'Players', # 25 equipos con menos jugadores
             title = 'Jugadores por Club de Fútbol')
fig.show()

# Relación entre altura y skill moves por pie

In [26]:
fig = px.strip(df, x='Height', y='Skill Moves', facet_col='Preferred Foot',
               title='Altura vs. Skill Moves por pie preferido')
fig.show()

# Posibles cracks

In [30]:
df.select_dtypes(include=numerics).corr().sort_values('Potential', ascending=False)

Unnamed: 0,ID,Age,Overall,Potential,Value,Wage,International Reputation,Skill Moves,Joined,Height,Weight,Release Clause
Potential,0.047074,-0.253312,0.660939,1.0,0.579608,0.486413,0.372993,0.35429,-0.047661,-0.009791,-0.006935,0.562346
Overall,-0.417025,0.45235,1.0,0.660939,0.631848,0.571926,0.499491,0.414463,-0.169281,0.038527,0.154557,0.597821
Value,-0.139837,0.078315,0.631848,0.579608,1.0,0.858086,0.656158,0.317246,-0.115991,0.002827,0.046702,0.97331
Release Clause,-0.121297,0.058672,0.597821,0.562346,0.97331,0.828161,0.620863,0.297471,-0.115374,0.001835,0.038103,1.0
Wage,-0.20461,0.141145,0.571926,0.486413,0.858086,1.0,0.668635,0.263205,-0.142337,0.019638,0.064764,0.828161
International Reputation,-0.356191,0.253765,0.499491,0.372993,0.656158,0.668635,1.0,0.208153,-0.133009,0.034881,0.08834,0.620863
Skill Moves,-0.056914,0.027649,0.414463,0.35429,0.317246,0.263205,0.208153,1.0,0.020692,-0.422753,-0.351209,0.297471
ID,1.0,-0.739208,-0.417025,0.047074,-0.139837,-0.20461,-0.356191,-0.056914,0.206749,-0.09009,-0.191193,-0.121297
Weight,-0.191193,0.22994,0.154557,-0.006935,0.046702,0.064764,0.08834,-0.351209,-0.028274,0.754678,1.0,0.038103
Height,-0.09009,0.082506,0.038527,-0.009791,0.002827,0.019638,0.034881,-0.422753,0.001188,1.0,0.754678,0.001835


In [46]:
df.sort_values('Potential', ascending=False).reset_index()

Unnamed: 0,index,ID,Name,Age,Nationality,Overall,Potential,Club,Value,Wage,Preferred Foot,International Reputation,Skill Moves,Position,Joined,Contract Valid Until,Height,Weight,Release Clause
0,25,231747,K. Mbappé,19,France,88,95,Paris Saint-Germain,81000.0,100.0,Right,3.0,5.0,RM,2018,2022-01-01,5.833333,161.0,166100.000000
1,0,158023,L. Messi,31,Argentina,94,94,FC Barcelona,110500.0,565.0,Left,5.0,4.0,RF,2004,2021-01-01,5.583333,159.0,226500.000000
2,15,211110,P. Dybala,24,Argentina,89,94,Juventus,89000.0,205.0,Left,3.0,4.0,LF,2015,2022-01-01,5.833333,165.0,153500.000000
3,1,20801,Cristiano Ronaldo,33,Portugal,94,94,Juventus,77000.0,405.0,Right,5.0,5.0,ST,2018,2022-01-01,6.166667,183.0,127100.000000
4,2,190871,Neymar Jr,26,Brazil,92,93,Paris Saint-Germain,118500.0,290.0,Right,5.0,5.0,LW,2017,2022-01-01,5.750000,150.0,228100.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
18202,17988,182478,Wang Xuanhong,28,China PR,51,51,Beijing Renhe FC,40.0,2.0,Right,1.0,2.0,CM,2017,2019-01-01,6.000000,172.0,86.000000
18203,18043,237917,A. Suzuki,31,Japan,50,50,Yokohama F. Marinos,20.0,1.0,Right,1.0,1.0,GK,2017,2021-01-01,6.000000,176.0,25.000000
18204,18025,211490,J. Miszczuk,27,Poland,50,50,Jagiellonia Białystok,20.0,1.0,Right,1.0,1.0,GK,2018,2019-01-01,6.083333,168.0,27.000000
18205,18183,53748,K. Pilkington,44,England,48,48,Cambridge United,,1.0,Right,1.0,1.0,GK,2018,2023-01-01,6.166667,190.0,4585.060806


In [47]:
fig = px.scatter(df, x='Overall', y='Potential', title='Posibles cracks', facet_col='Preferred Foot',
                color='Skill Moves')
fig.show()