In [None]:
import pandas as pd 
import numpy as np
import seaborn as sns
from matplotlib import pyplot as plt # for visualization
from IPython.display import display,HTML # for visualization
import plotly.express as ex # for visualization

pd.set_option('display.max_rows',None) #for displaying all data
pd.set_option('display.max_columns',None)
pd.set_option('display.width',None)

In [None]:
fifa = pd.read_csv("players_21.csv")

In [None]:
fifa.head()

In [None]:
x = fifa.age
y = fifa.overall
plt.plot(y,x)
plt.show()

In [None]:
skills = []
for col in fifa.columns:
    skills.append(col)
 
skills     

In [None]:
fifa.shape

In [None]:
fifa['nationality'].value_counts()

In [None]:
fifa['nationality'].value_counts()[0:10]

In [None]:
plt.figure(figsize=(8,5))
plt.bar(list(fifa['nationality'].value_counts()[0:5].keys()),list(fifa['nationality'].value_counts()[0:5]),color="g")
plt.show()

In [None]:
player_salary = fifa[['short_name','wage_eur']]

In [None]:
player_salary.head()

In [None]:
player_salary = player_salary.sort_values(by=['wage_eur'],ascending=False)
player_salary.head()

In [None]:
fifa.isnull().sum()

# SELECTING PARTICULAR NUMBER OF COLUMNS TO IMPUTE VALUES


In [None]:
fill_values = fifa.loc[:,['pace'
,'shooting',
'passing',
'dribbling',
'defending',
'physic']]

for i in fill_values.columns:
    fifa[i].fillna(fifa[i].mean(),inplace=True)

In [None]:
fifa.isnull().sum()

# Plotting count of players based on their height

In [None]:
plt.figure(figsize=(22,8))
ax = sns.countplot(x='height_cm',data=fifa)
ax.set_title(label="Count of table based on their heights",fontsize=20)
ax.set_xlabel(xlabel='Height',fontsize=16)
ax.set_ylabel(ylabel='No. of Players',fontsize=16)
plt.show()

# HEIGHT VS DRIBBLING

In [None]:
plt.figure(figsize=(32,18))
sns.barplot(x='height_cm',y='dribbling',data=fifa.sort_values('height_cm',inplace=False))
plt.xlabel('Height',fontsize=21)
plt.ylabel('Dribbling',fontsize=21)
plt.show()

# WEIGHT VS DRIBBLING

In [None]:
plt.figure(figsize=(32,18))
sns.barplot(x='weight_kg',y='dribbling',data=fifa.sort_values('weight_kg',inplace=False))
plt.xlabel('Weight',fontsize=21)
plt.ylabel('Dribbling',fontsize=21)
plt.show()

# Messi vs Ronaldo

In [None]:
skills=['pace',
 'shooting',
 'passing',
 'dribbling',
 'defending',
 'physic',
 'attacking_crossing',
 'attacking_finishing',
 'attacking_heading_accuracy',
 'attacking_short_passing',
 'attacking_volleys',
 'movement_acceleration',
 'movement_sprint_speed',
 'movement_agility',
 'movement_reactions',
 'movement_balance',
 'power_shot_power',
 'power_jumping',
 'power_stamina',
 'power_strength',
 'power_long_shots']

In [None]:
messi = fifa.loc[fifa['short_name']=='L. Messi']
messi = pd.DataFrame(messi,columns = skills)

ronaldo = fifa.loc[fifa['short_name']=='Cristiano Ronaldo']
ronaldo = pd.DataFrame(ronaldo,columns = skills)

In [None]:
messi

In [None]:
ronaldo

In [None]:
plt.figure(figsize=(30,10))
sns.pointplot(data=messi,color='blue')
sns.pointplot(data=ronaldo,color='red')
plt.title("Messi vs Ronaldo",fontsize=40)
plt.xticks(rotation=90)
plt.xlabel("skills",fontsize=20)
plt.ylabel("Skills value",fontsize=20)
plt.grid()

# Top 10 players based on overall skills

In [None]:
display(
    HTML(fifa.sort_values('overall',ascending=False)[['short_name','overall']][:10].to_html(index=False)
         )
)

# Top 5 nations with overall best players

In [None]:
top_nations = fifa.groupby(['nationality'])['overall'].max().sort_values(ascending=False).head(5)

In [None]:
top_nations.head()

# Top 5 clubs with overall best players

In [None]:
top_clubs = fifa.groupby(['club_name'])['overall'].max().sort_values(ascending=False).head(5)

In [None]:
top_clubs

# Top overall player at a certain position

In [None]:
rb_player = fifa[fifa['player_positions']=='RB']
best_rb = rb_player[rb_player['overall']==rb_player['overall'].max()]
print(best_rb[['short_name','overall']])

In [None]:
cf_player = fifa[fifa['player_positions']=='CF']
best_cf = cf_player[cf_player['overall']==cf_player['overall'].max()]
print(best_cf[['short_name','overall']])

In [None]:
gk_player = fifa[fifa['player_positions']=='GK']
best_gk = gk_player[gk_player['overall']==gk_player['overall'].max()]
print(best_gk[['short_name','overall']])

# AGE DISTRIBUTION OF PLAYERS IN COUNTRIES

In [None]:
countries_names=('Argentina','Portugal','Belgium','Brazil','India')

countries = fifa.loc[fifa['nationality'].isin(countries_names)& fifa['age']]

fig,ax = plt.subplots()
fig.set_size_inches(20,10)
ax = sns.boxenplot(x='nationality',y='age',data=countries)
plt.grid()

# AGE DISTRIBUTION OF PLAYERS IN THE CLUBS

In [None]:
clubs_names=('FC Barcelona','Juventus','Manchester City','Real Madrid','Paris Saint-Germain')

clubs = fifa.loc[fifa['club_name'].isin(clubs_names)& fifa['age']]

fig,ax = plt.subplots()
fig.set_size_inches(20,10)
ax = sns.boxenplot(x='club_name',y='age',data=clubs)
plt.grid()

# Nationwise player counts and Average potential

In [None]:
avg_overall_nationality = fifa.groupby('nationality').apply(lambda x: np.average(x['overall'])).reset_index(name='Overall Ratings')
nation_player_count = fifa.groupby('nationality').apply(lambda x: np.average(x['overall'].count())).reset_index(name='Player Counts')

In [None]:
avg_overall_nationality

In [None]:
nation_player_count

In [38]:
ovr_ratings_plus_counts = pd.merge(avg_overall_nationality,nation_player_count,how='inner',left_on='nationality',right_on='nationality')
best_avg_top = ovr_ratings_plus_counts[ovr_ratings_plus_counts['Player Counts']>=100]
best_avg_top.sort_values(by=['Overall Ratings','Player Counts'],ascending=False)
ex.scatter(best_avg_top,x='Overall Ratings',y='Player Counts',color='Player Counts',hover_data=['nationality'])
#ovr_ratings_plus_counts

# Top footballers in Fifa 21 Game

In [42]:
top_play = fifa[['short_name','overall','age','club_name']]
top_play.sort_values(by = 'overall',ascending= False , inplace= False)
top_30_play = top_play[:100]
plt.figure(figsize=(25,10))
fig = ex.scatter(top_30_play, x= 'age', y='overall',color = 'age', size = 'overall', hover_data= ['short_name','club_name'])
fig.show()

<Figure size 1800x720 with 0 Axes>

# Finding best players for each performance criteria

In [43]:
pr_cols=['pace',
 'shooting',
 'passing',
 'dribbling',
 'defending',
 'physic',
 'attacking_crossing',
 'attacking_finishing',
 'attacking_heading_accuracy',
 'attacking_short_passing',
 'attacking_volleys',
 'skill_dribbling',
 'skill_curve',
 'skill_fk_accuracy',
 'skill_long_passing',
 'skill_ball_control',
        'movement_acceleration',
 'movement_sprint_speed',
 'movement_agility',
 'movement_reactions',
 'movement_balance',
 'power_shot_power',
 'power_jumping',
 'power_stamina',
 'power_strength',
 'power_long_shots']
i=0
while i < len(pr_cols):
    print('Best {0} : {1}'.format(pr_cols[i],fifa.loc[fifa[pr_cols[i]].idxmax()][2]))
    i += 1

Best pace : K. Mbappé
Best shooting : Cristiano Ronaldo
Best passing : K. De Bruyne
Best dribbling : L. Messi
Best defending : V. van Dijk
Best physic : Casemiro
Best attacking_crossing : K. De Bruyne
Best attacking_finishing : L. Messi
Best attacking_heading_accuracy : L. de Jong
Best attacking_short_passing : K. De Bruyne
Best attacking_volleys : L. Suárez
Best skill_dribbling : L. Messi
Best skill_curve : Quaresma
Best skill_fk_accuracy : L. Messi
Best skill_long_passing : K. De Bruyne
Best skill_ball_control : L. Messi
Best movement_acceleration : Adama Traoré
Best movement_sprint_speed : K. Mbappé
Best movement_agility : Neymar Jr
Best movement_reactions : Cristiano Ronaldo
Best movement_balance : E. Oztumer
Best power_shot_power : A. Kolarov
Best power_jumping : Cristiano Ronaldo
Best power_stamina : V. Darida
Best power_strength : A. Akinfenwa
Best power_long_shots : L. Messi
