# FIFA dataset analysis:

In [32]:
import pandas as pd

# ; separator
fifa_players = pd.read_csv("fifa21.csv", sep=";")

print("columns: ", fifa_players.columns, "\n")

print("index: ", fifa_players.index, "\n")

print("shape: ", fifa_players.shape, "\n")

print("dtypes: ", fifa_players.dtypes, "\n")



columns:  Index(['player_id', 'name', 'nationality', 'position', 'overall', 'age',
       'hits', 'potential', 'team'],
      dtype='object') 

index:  RangeIndex(start=0, stop=17981, step=1) 

shape:  (17981, 9) 

dtypes:  player_id       int64
name           object
nationality    object
position       object
overall         int64
age             int64
hits            int64
potential       int64
team           object
dtype: object 



In [38]:
fifa_players.head()

Unnamed: 0,player_id,name,nationality,position,overall,age,hits,potential,team
0,158023,Lionel Messi,Argentina,ST|CF|RW,94,33,299,94,FC Barcelona
1,20801,Cristiano Ronaldo,Portugal,ST|LW,93,35,276,93,Juventus
2,190871,Neymar Jr,Brazil,CAM|LW,92,28,186,92,Paris Saint-Germain
3,203376,Virgil van Dijk,Netherlands,CB,91,29,127,92,Liverpool
4,200389,Jan Oblak,Slovenia,GK,91,27,47,93,Atlético Madrid



## Collect all possible positions

In [82]:
fifa_players.position

from itertools import chain

positions_split = list(map(lambda pos: pos.split('|'), list(fifa_players.position)))

unique_positions = set(chain(*positions_split))

print("positions: ", unique_positions, "\n")

print("position_count: ", len(unique_positions), "\n")

positions:  {'RM', 'CM', 'LWB', 'LW', 'LM', 'ST', 'RB', 'RW', 'CAM', 'GK', 'CB', 'CDM', 'LB', 'CF', 'RWB'} 

position_count:  15 



## Find all players with the position "ST":

In [85]:
label = list(unique_positions)[5]

fifa_players[(fifa_players["position"] == "ST")]

Unnamed: 0,player_id,name,nationality,position,overall,age,hits,potential,team
6,188545,Robert Lewandowski,Poland,ST,91,31,89,91,FC Bayern München
12,153079,Sergio Agüero,Argentina,ST,90,32,50,90,Manchester City
15,202126,Harry Kane,England,ST,89,27,64,91,Tottenham Hotspur
19,176580,Luis Suárez,Uruguay,ST,89,33,54,89,FC Barcelona
43,192387,Ciro Immobile,Italy,ST,87,30,130,87,Lazio
...,...,...,...,...,...,...,...,...,...
17951,208821,Jonte Smith,Bermuda,ST,57,26,0,61,Cheltenham Town
17956,198487,Luka Mijaljevic,Sweden,ST,57,29,0,57,AFC Eskilstuna
17969,256314,Gautier Ott,France,ST,56,18,0,75,AS Nancy Lorraine
17976,256093,Jaime Ortíz,Ecuador,ST,56,21,0,64,Sociedad Deportiva Aucas


## Find a club name (suggest we don't know the actual name):

In [117]:
teams = set(fifa_players.team)

number_of_teams = len(teams)

print("number of teams: ", number_of_teams, "\n")

number of teams:  713 



By the example of "Chelsea"

In [119]:
chelsea_players = fifa_players[fifa_players.team.str.contains("Chelsea")]

print(chelsea_players, "\n")

print("number of chelsea players: ", len(chelsea_players), "\n")

       player_id                 name    nationality    position  overall  \
14        215914         N'Golo Kanté         France      CDM|CM       89   
127       207410        Mateo Kovacic        Croatia          CM       84   
128       205498             Jorginho          Italy      CDM|CM       84   
146       184432          Azpilicueta          Spain    RB|LB|CB       84   
180       206585                 Kepa          Spain          GK       83   
271       205452      Antonio Rüdiger        Germany          CB       82   
295       180403              Willian         Brazil    RM|RW|LW       82   
359       204311           Kurt Zouma         France          CB       81   
378       192638        Marcos Alonso          Spain   LB|LWB|LW       81   
436       227796    Christian Pulisic  United States    LM|RW|LW       80   
463       213661  Andreas Christensen        Denmark          CB       80   
515       189505                Pedro          Spain       RW|LW       80   

Find the chelsea player with the best "overall" rating

In [123]:
chelsea_players.sort_values(by="overall", ascending=False)

Unnamed: 0,player_id,name,nationality,position,overall,age,hits,potential,team
14,215914,N'Golo Kanté,France,CDM|CM,89,29,75,89,Chelsea
128,205498,Jorginho,Italy,CDM|CM,84,28,29,84,Chelsea
146,184432,Azpilicueta,Spain,RB|LB|CB,84,30,25,84,Chelsea
127,207410,Mateo Kovacic,Croatia,CM,84,26,43,87,Chelsea
180,206585,Kepa,Spain,GK,83,25,60,87,Chelsea
271,205452,Antonio Rüdiger,Germany,CB,82,27,24,84,Chelsea
295,180403,Willian,Brazil,RM|RW|LW,82,31,53,82,Chelsea
359,204311,Kurt Zouma,France,CB,81,25,56,84,Chelsea
378,192638,Marcos Alonso,Spain,LB|LWB|LW,81,29,26,81,Chelsea
436,227796,Christian Pulisic,United States,LM|RW|LW,80,21,139,86,Chelsea


## Find the average overall rating for each team:

1. Map each team to an array of all overall ratings
2. reduce each array to the average rating

## By the example of chelsea:

In [133]:
print("mean: ", chelsea_players["overall"].mean(), "\n")

result = dict()

result.update({"chelsea": chelsea_players["overall"].mean()})

print(result, "\n")

mean:  75.21875 

{'chelsea': 75.21875} 



## Now for all teams:

In [221]:
from functools import reduce

result = dict()

teams_grouped = fifa_players.groupby("team").agg(average_overall=("overall","mean"))

teams_grouped.sort_values(by="average_overall", ascending=False)


Unnamed: 0_level_0,average_overall
team,Unnamed: 1_level_1
Juventus,83.043478
FC Bayern München,81.565217
Napoli,79.730769
Real Madrid,79.606061
Paris Saint-Germain,78.866667
...,...
Bury,58.888889
Cork City,58.777778
Waterford FC,58.750000
UCD AFC,58.000000
