# Exploratory Data Analysis on FIFA 21 Dataset

### Import necessary modules

In [155]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

### Read the Dataset

In [156]:
df = pd.read_csv("FIFA21_players.csv")
df = df.drop(['Unnamed: 0'], axis=1)
df.head()

Unnamed: 0,PlayerName,PlayerHeight,PlayerWeight,PotentialRating,BestPositions,BestOverallRating,PlayerValue,Wage,Nationality,Crossing,...,SkillMoves,InternationalReputations,WorkRate,BodyType,Dribbling,Curve,FreekickAccuracy,LongPassing,BallControl,Age
0,Lionel Andrés Messi Cuccittini,170,72,93,RW,93,103500000,560000,Argentina,85,...,4,5,Medium/ Low,Unique,96,93,94,91,96,35
1,Cristiano Ronaldo dos Santos Aveiro,187,83,92,ST,92,63000000,220000,Portugal,84,...,5,5,High/ Low,Unique,88,81,76,77,92,37
2,Jan Oblak,188,87,93,GK,91,120000000,125000,Slovenia,13,...,1,3,Medium/ Medium,Unique,12,13,14,40,30,29
3,Kevin De Bruyne,181,70,91,CAM,91,129000000,370000,Belgium,94,...,4,4,High/ High,Unique,88,85,83,93,92,31
4,Neymar da Silva Santos Júnior,175,68,91,LW,91,132000000,270000,Brazil,85,...,5,5,High/ Medium,Unique,95,88,89,81,95,30


### Check the columns

In [157]:
df.columns

Index(['PlayerName', 'PlayerHeight', 'PlayerWeight', 'PotentialRating',
       'BestPositions', 'BestOverallRating', 'PlayerValue', 'Wage',
       'Nationality', 'Crossing', 'FinishingAccuracy', 'HeadingAccuracy',
       'ShortPassing', 'Volleys', 'DefensiveAwareness', 'StandingTackle',
       'SlidingTackle', 'Diving', 'Handling', 'Kicking', 'GkPositioning',
       'Reflexes', 'Aggression', 'Interceptions', 'Positioning', 'Vision',
       'Penalties', 'Composure', 'Acceleration', 'SprintSpeed', 'Agility',
       'Reactions', 'Balance', 'ShotPower', 'JumpingPower', 'Stamina',
       'Strength', 'LongShots', 'PreferredFoot', 'WeakFoot', 'SkillMoves',
       'InternationalReputations', 'WorkRate', 'BodyType', 'Dribbling',
       'Curve', 'FreekickAccuracy', 'LongPassing', 'BallControl', 'Age'],
      dtype='object')

### Shape of the dataset

In [158]:
df.shape

(19002, 50)

### Check whether columns have missing values

In [159]:
# df.isnull().sum()
df.columns[df.isnull().any()]

Index([], dtype='object')

### Rename the columns properly

In [160]:
# df = df.rename(columns = {'int_player_id': 'PlayerId', 'str_player_name': 'PlayerName', 'str_positions': 'Positions','dt_date_of_birth': 'D.O.B', 'int_height': 'PlayerHeight',
# 'int_weight': 'PlayerWeight', 'int_overall_rating': 'OverallRating',
# 'int_potential_rating': 'PotentialRating', 'str_best_position': 'BestPositions',
# 'int_best_overall_rating': 'BestOverallRating', 'int_value': 'PlayerValue',
# 'int_wage': 'Wage', 'int_team_id': 'TeamId', 'str_nationality': 'Nationality',
# 'int_crossing': 'Crossing', 'int_finishing': 'FinishingAccuracy',
# 'int_heading_accuracy': 'HeadingAccuracy', 'int_short_passing': 'ShortPassing',
# 'int_volleys': 'Volleys', 'int_defensive_awareness': 'DefensiveAwareness',
# 'int_standing_tackle': 'StandingTackle', 'int_sliding_tackle': 'SlidingTackle',
# 'int_diving': 'Diving', 'int_handling': 'Handling',
# 'int_kicking': 'Kicking', 'int_gk_positioning': 'GkPositioning',
# 'int_reflexes': 'Reflexes', 'int_aggression': 'Aggression',
# 'int_interceptions': 'Interceptions', 'int_positioning': 'Positioning',
# 'int_vision': 'Vision', 'int_penalties': 'Penalties',
# 'int_composure': 'Composure', 'int_acceleration': 'Acceleration',
# 'int_sprint_speed': 'SprintSpeed', 'int_agility': 'Agility',
# 'int_reactions': 'Reactions', 'int_balance': 'Balance',
# 'int_shot_power': 'ShotPower', 'int_jumping': 'JumpingPower',
# 'int_stamina': 'Stamina', 'int_strength': 'Strength',
# 'int_long_shots': 'LongShots', 'str_preferred_foot': 'PreferredFoot',
# 'int_weak_foot': 'WeakFoot', 'int_skill_moves': 'SkillMoves',
# 'int_international_reputations': 'InternationalReputations', 'str_work_rate': 'WorkRate',
# 'str_body_type': 'BodyType', 'int_dribbling': 'Dribbling',
# 'int_curve': 'Curve', 'int_fk_accuracy': 'FreekickAccuracy',
# 'int_long_passing': 'LongPassing', 'int_ball_control': 'BallControl', 'str_player_speciality':'PlayerSpeciality', 'str_trait': 'Trait'}, inplace = False)

### Getting Information about the datatypes

In [161]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 19002 entries, 0 to 19001
Data columns (total 50 columns):
 #   Column                    Non-Null Count  Dtype 
---  ------                    --------------  ----- 
 0   PlayerName                19002 non-null  object
 1   PlayerHeight              19002 non-null  int64 
 2   PlayerWeight              19002 non-null  int64 
 3   PotentialRating           19002 non-null  int64 
 4   BestPositions             19002 non-null  object
 5   BestOverallRating         19002 non-null  int64 
 6   PlayerValue               19002 non-null  int64 
 7   Wage                      19002 non-null  int64 
 8   Nationality               19002 non-null  object
 9   Crossing                  19002 non-null  int64 
 10  FinishingAccuracy         19002 non-null  int64 
 11  HeadingAccuracy           19002 non-null  int64 
 12  ShortPassing              19002 non-null  int64 
 13  Volleys                   19002 non-null  int64 
 14  DefensiveAwareness    

### Statistical Analysis

In [162]:
df.describe().apply(lambda s: s.apply(lambda x: format(x, 'f')))

Unnamed: 0,PlayerHeight,PlayerWeight,PotentialRating,BestOverallRating,PlayerValue,Wage,Crossing,FinishingAccuracy,HeadingAccuracy,ShortPassing,...,LongShots,WeakFoot,SkillMoves,InternationalReputations,Dribbling,Curve,FreekickAccuracy,LongPassing,BallControl,Age
count,19002.0,19002.0,19002.0,19002.0,19002.0,19002.0,19002.0,19002.0,19002.0,19002.0,...,19002.0,19002.0,19002.0,19002.0,19002.0,19002.0,19002.0,19002.0,19002.0,19002.0
mean,181.22566,75.046311,71.148932,66.784286,2891449.005368,9113.167035,49.690454,45.877329,51.970056,58.808494,...,46.812967,2.942217,2.36554,1.090569,55.607462,47.271603,42.394432,52.779971,58.552416,27.592464
std,6.847472,7.078378,6.115352,6.733926,7733189.305884,19735.324238,18.141514,19.580879,17.323647,14.51706,...,19.307572,0.669568,0.766687,0.359019,18.786894,18.217325,17.240399,15.172601,16.58012,4.715048
min,155.0,50.0,48.0,48.0,0.0,0.0,6.0,3.0,5.0,7.0,...,4.0,1.0,1.0,1.0,5.0,4.0,5.0,5.0,5.0,19.0
25%,176.0,70.0,67.0,62.0,475000.0,1000.0,38.0,30.0,44.0,54.0,...,32.0,3.0,2.0,1.0,49.0,35.0,31.0,43.0,54.0,24.0
50%,181.0,75.0,71.0,67.0,950000.0,3000.0,54.0,50.0,55.0,62.0,...,51.0,3.0,2.0,1.0,61.0,49.0,41.0,56.0,63.0,27.0
75%,186.0,80.0,75.0,71.0,2000000.0,8000.0,63.0,62.0,64.0,68.0,...,62.0,3.0,3.0,1.0,68.0,61.0,55.0,64.0,69.0,31.0
max,206.0,110.0,95.0,93.0,185500000.0,560000.0,94.0,95.0,93.0,94.0,...,94.0,5.0,5.0,5.0,96.0,94.0,94.0,93.0,96.0,55.0


# Answer the following questions based on the dataset given

### Q1 Fastest Players for FIFA 2021

##### In order to get the fastest players, we need to sort the players according to their acceleration or sprint speed in the desc order. (Here, I have taken Acceleration)

In [163]:
fastest = df[["Acceleration","PlayerName","BestPositions",'Age','Nationality','SprintSpeed']].nlargest(10, ['Acceleration']).set_index('PlayerName').head(10)
fastest

Unnamed: 0_level_0,Acceleration,BestPositions,Age,Nationality,SprintSpeed
PlayerName,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Adama Traoré Diarra,97,RM,26,Spain,96
Kylian Mbappé Lottin,96,ST,24,France,96
Raheem Shaquille Sterling,96,LW,28,England,90
Moussa Diaby,96,LM,23,France,90
Alphonso Davies,96,LB,22,Canada,96
Daniel James,96,RM,25,Wales,95
Jérémy Doku,96,RW,20,Belgium,91
Sadio Mané,95,LW,30,Senegal,93
Lucas Rodrigues Moura da Silva,95,RM,30,Brazil,88
Vinícius José Paixão de Oliveira Júnior,95,RM,22,Brazil,95


### Q2 Tallest Players in FIFA 2021

##### In order to get the tallest players, we need to sort the players according to their height in desc order.

In [164]:
tallest = df[['PlayerHeight','PlayerName','PlayerWeight','BestPositions','Age','Nationality']].nlargest(10, ['PlayerHeight']).set_index('PlayerName')
tallest

Unnamed: 0_level_0,PlayerHeight,PlayerWeight,BestPositions,Age,Nationality
PlayerName,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Tomáš Holý,206,102,GK,31,Czech Republic
Costel Fane Pantilimon,203,96,GK,35,Romania
Abdoul Bocar Bâ,203,94,CB,28,Mauritania
Aaron James Chapman,203,92,GK,32,England
Vanja Milinković-Savić,202,92,GK,25,Serbia
Kjell Scherpen,202,85,GK,22,Netherlands
Stefan Maierhofer,202,98,ST,40,Austria
Demba Thiam Ngagne,202,87,GK,24,Senegal
Lovre Kalinić,201,99,GK,32,Croatia
Fraser Forster,201,93,GK,34,England


### Q3 Best Defender in FIFA 2021

##### In order to get the best defenders, we need to sort the players according to their DefensiveAwareness in desc order.

In [165]:
defence = df[["DefensiveAwareness","PlayerName","BestPositions",'Age','Nationality']].nlargest(10, ['DefensiveAwareness']).set_index('PlayerName')
defence

Unnamed: 0_level_0,DefensiveAwareness,BestPositions,Age,Nationality
PlayerName,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Giorgio Chiellini,94,CB,38,Italy
Virgil van Dijk,93,CB,31,Netherlands
Milan Škriniar,92,CB,27,Slovakia
Kalidou Koulibaly,91,CB,31,Senegal
Mats Hummels,90,CB,34,Germany
Clément Nicolas Laurent Lenglet,90,CB,27,France
Leonardo Bonucci,90,CB,35,Italy
Diego Roberto Godín Leal,90,CB,36,Uruguay
N'Golo Kanté,89,CDM,31,France
Aymeric Laporte,89,CB,28,France


### Q4 Best Players with Long Passes in FIFA 2021

##### In order to get the players with best Long Passes, we need to sort the players according to their LongPassing ability in desc order.

In [166]:
long_pass = df[["LongPassing",'ShortPassing',"PlayerName","BestPositions",'Age','Nationality']].nlargest(10, ['LongPassing']).set_index('PlayerName')
long_pass

Unnamed: 0_level_0,LongPassing,ShortPassing,BestPositions,Age,Nationality
PlayerName,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Kevin De Bruyne,93,94,CAM,31,Belgium
Toni Kroos,93,93,CM,32,Germany
Lionel Andrés Messi Cuccittini,91,91,RW,35,Argentina
Paul Pogba,91,86,CM,29,France
Daniel Parejo Muñoz,90,92,CM,33,Spain
Trent Alexander-Arnold,89,85,RB,24,England
Luka Modrić,89,91,CM,37,Croatia
Marco Verratti,89,90,CM,30,Italy
Hakim Ziyech,89,86,CAM,29,Morocco
Luis Alberto Romero Alconchel,89,90,CAM,30,Spain


### Q5 Best Players with ShortPasses in FIFA 2021

##### In order to get the players with best Short Passes, we need to sort the players according to their ShortPassing ability in desc order.

In [167]:
long_pass = df[["LongPassing",'ShortPassing',"PlayerName","BestPositions",'Age','Nationality']].nlargest(10, ['ShortPassing']).set_index('PlayerName')
long_pass

Unnamed: 0_level_0,LongPassing,ShortPassing,BestPositions,Age,Nationality
PlayerName,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Kevin De Bruyne,93,94,CAM,31,Belgium
Toni Kroos,93,93,CM,32,Germany
David Josué Jiménez Silva,84,92,CAM,36,Spain
Daniel Parejo Muñoz,90,92,CM,33,Spain
Lionel Andrés Messi Cuccittini,91,91,RW,35,Argentina
Luka Modrić,89,91,CM,37,Croatia
Marco Verratti,89,90,CM,30,Italy
Frenkie de Jong,86,90,CM,25,Netherlands
Luis Alberto Romero Alconchel,89,90,CAM,30,Spain
Christian Dannemann Eriksen,87,90,CAM,30,Denmark


### Q6 Most Paid Players

##### In order to get the most paid players, we need to sort the players according to their wages in desc order.

In [168]:
paid_players = df[["Wage","PlayerName","PlayerValue","BestOverallRating",'Age','Nationality','PotentialRating','InternationalReputations']].nlargest(10, ['Wage']).set_index('PlayerName')
paid_players

Unnamed: 0_level_0,Wage,PlayerValue,BestOverallRating,Age,Nationality,PotentialRating,InternationalReputations
PlayerName,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Lionel Andrés Messi Cuccittini,560000,103500000,93,35,Argentina,93,5
Kevin De Bruyne,370000,129000000,91,31,Belgium,91,4
Karim Benzema,350000,83500000,89,35,France,89,4
Eden Hazard,350000,89500000,88,31,Belgium,88,4
Carlos Henrique Venancio Casimiro,310000,90500000,89,30,Brazil,89,3
Toni Kroos,310000,87500000,88,32,Germany,88,4
Sergio Ramos García,300000,33500000,89,36,Spain,89,4
Sergio Leonel Agüero del Castillo,300000,83500000,89,34,Argentina,89,4
Antoine Griezmann,290000,79500000,87,31,France,87,4
Neymar da Silva Santos Júnior,270000,132000000,91,30,Brazil,91,5


### Q7 Best GoalKeeper by Reflex

##### In order to get the goalkeepers with best Reflexes, we need to sort the players according to their Reflexes ability in desc order.

In [169]:
goal_keeper = df[["Reflexes","PlayerName","Kicking","Handling",'Age','Nationality']].nlargest(10, ['Reflexes']).set_index('PlayerName')
goal_keeper

Unnamed: 0_level_0,Reflexes,Kicking,Handling,Age,Nationality
PlayerName,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Jan Oblak,90,78,92,29,Slovenia
Marc-André ter Stegen,90,88,85,30,Germany
Keylor Navas Gamboa,90,75,81,36,Costa Rica
Hugo Lloris,90,68,82,36,France
Alisson Ramsés Becker,89,85,88,30,Brazil
Manuel Neuer,89,91,87,36,Germany
Samir Handanovič,89,73,85,38,Slovenia
David De Gea Quintana,89,78,81,32,Spain
Gianluigi Donnarumma,89,76,81,23,Italy
Kasper Schmeichel,89,83,77,36,Denmark


In [176]:
def getPlayer(data, playerName):
    return data[data['PlayerName'] == playerName].reset_index()

In [179]:
playerData = getPlayer(df, 'Lionel Andrés Messi Cuccittini')
playerData

Unnamed: 0,PlayerName,PlayerHeight,PlayerWeight,PotentialRating,BestPositions,BestOverallRating,PlayerValue,Wage,Nationality,Crossing,...,SkillMoves,InternationalReputations,WorkRate,BodyType,Dribbling,Curve,FreekickAccuracy,LongPassing,BallControl,Age
0,Lionel Andrés Messi Cuccittini,170,72,93,RW,93,103500000,560000,Argentina,85,...,4,5,Medium/ Low,Unique,96,93,94,91,96,35


In [185]:
preferred_foot_labels = df["PreferredFoot"].value_counts().index # (Right,Left) 
preferred_foot_values = df["PreferredFoot"].value_counts().values # (Right Values, Left Values)

In [186]:
preferred_foot_labels

Index(['Right', 'Left'], dtype='object')

In [189]:
import plotly.express as px
fig = px.pie(df, values=preferred_foot_values, names=preferred_foot_labels, title='Preferred Foot')
fig.show()