# Analysis of Players in the 2022 World Cup Tournament

In [1]:
# Import libraries
import numpy as np
import pandas as pd
import plotly.express as px

In [2]:
data = pd.read_csv("WorldCup2022Players_final2.csv")
print(len(data))
data.head()

831


Unnamed: 0,No,First Name,Other Names,Full Name,DOB,Day of Birth,Month of Birth,Year of Birth,Age,Position,Nationality,International Caps,International Goals,Club,League,Club Country
0,1,Hernan,Galindez,"Galindez, Hernan",30-Mar-87,30,March,1987,35,GK,Ecuador,12,0,Aucas,Ecuadorian Serie A,Ecuador
1,2,Felix,Torres,"Torres, Felix",11-Jan-97,11,January,1997,25,DF,Ecuador,17,2,Santos Laguna,Mexican Liga MX,Mexico
2,3,Piero,Hincapie,"Hincapie, Piero",9-Jan-02,9,January,2002,20,DF,Ecuador,21,1,Bayer Leverkusen,German 1. Bundesliga,Germany
3,4,Robert,Arboleda,"Arboleda, Robert",22-Oct-91,22,October,1991,31,DF,Ecuador,33,2,São Paulo,Campeonato Brasileiro Série A,Brazil
4,5,Jose,Cifuentes,"Cifuentes, Jose",12-Mar-99,12,March,1999,23,MF,Ecuador,11,0,Los Angeles FC,USA Major League Soccer,United States


## Age Analysis

In [3]:
# Average age of Players
ages = data["Age"]
average = sum(ages) / len(ages)
print("Average age of World Cup 2022 players is: " + str(average) + ".")

Average age of World Cup 2022 players is: 26.948255114320098.


In [4]:
# Oldest player
oldest_player = data.loc[data.Age == max(ages), :]
display(oldest_player)

Unnamed: 0,No,First Name,Other Names,Full Name,DOB,Day of Birth,Month of Birth,Year of Birth,Age,Position,Nationality,International Caps,International Goals,Club,League,Club Country
233,234,Alfredo,Talavera,"Talavera, Alfredo",18-Sep-82,18,September,1982,40,GK,Mexico,40,0,Juárez,Mexican Liga MX,Mexico


Oldest Player is Alfredo Talavera of Mexico. 40 years old. I thought it was Dani Alves.

In [5]:
# Youngest Player
youngest_player = data.loc[data.Age == min(ages), :]
display(youngest_player)

Unnamed: 0,No,First Name,Other Names,Full Name,DOB,Day of Birth,Month of Birth,Year of Birth,Age,Position,Nationality,International Caps,International Goals,Club,League,Club Country
331,332,Garang,Kuol,"Kuol, Garang",15-Sep-04,15,September,2004,18,FW,Australia,1,0,Central Coast Mariners,Australian Hyundai A-League,Australia
423,424,Jewison,Bennette,"Bennette, Jewison",15-Jun-04,15,June,2004,18,MF,Costa Rica,7,2,Sunderland,English League One,England
466,467,Youssoufa,Moukoko,"Moukoko, Youssoufa",20-Nov-04,20,November,2004,18,FW,Germany,1,0,Borussia Dortmund,German 1. Bundesliga,Germany
501,502,Gavi,,Gavi,5-Aug-04,5,August,2004,18,MF,Spain,13,2,Barcelona,Spain Primera Division,Spain
619,620,Bilal,Khannous,"Khannous, Bilal",10-May-04,10,May,2004,18,MF,Morocco,0,0,Genk,Belgian Jupiler Pro League,Belgium
733,734,Abdul Fatawu,Issahaku,"Issahaku, Abdul Fatawu",8-Mar-04,8,March,2004,18,MF,Ghana,13,1,Sporting CP,Portuguese Liga ZON SAGRES,Portugal


There are six 18 year-old players at the world cup. Who is the youngest?

In [6]:
# youngest 18 year old
y_dob = youngest_player["DOB"]
youngest = youngest_player.loc[youngest_player.DOB == min(y_dob), :]
display(youngest)

Unnamed: 0,No,First Name,Other Names,Full Name,DOB,Day of Birth,Month of Birth,Year of Birth,Age,Position,Nationality,International Caps,International Goals,Club,League,Club Country
619,620,Bilal,Khannous,"Khannous, Bilal",10-May-04,10,May,2004,18,MF,Morocco,0,0,Genk,Belgian Jupiler Pro League,Belgium


The youngest player at the world cup is Moroccan Bilal Khannous (18 years old). Born on 10 May, 2004.

## International Statistics

In [8]:
# International Caps
caps = data["International Caps"]
quantiles = np.quantile(caps, [0, 0.1, 0.15, 0.2, 0.25, 0.3, 0.35, 0.4, 0.45, 0.5, 0.55, 0.6, 0.65 ,0.7, 0.75, 0.8, 0.85, 0.9, 0.95, 1])
# print(quantiles)
print(len(quantiles))
# Plotting International Caps
print(quantiles[18])
fig_plt = px.bar(data.loc[data["International Caps"] >= quantiles[18], :].sort_values("International Caps"), x="Full Name", y="International Caps", hover_name="Full Name", hover_data=["Age","Position", "Nationality", "International Caps", "International Goals", "Club", "League"], title="International Caps Above 0.9 Quantile")
fig_plt.show()

20
108.5


In [9]:
# International Goals
# Plotting International Goals
goals = data["International Goals"]
goals_quantiles = np.quantile(goals, [0, 0.1, 0.15, 0.2, 0.25, 0.3, 0.35, 0.4, 0.45, 0.5, 0.55, 0.6, 0.65 ,0.7, 0.75, 0.8, 0.85, 0.9, 0.95, 1])
print(goals_quantiles)
fig_plt_goals = px.bar(data.loc[data["International Goals"] >= goals_quantiles[18], :].sort_values("International Goals"), x="Full Name", y="International Goals", hover_name="Full Name", hover_data=["Age","Position", "Nationality", "International Caps", "International Goals", "Club", "League"], title="International Goals Above 0.9 Quantile" )
fig_plt_goals.show()

[  0.    0.    0.    0.    0.    0.    0.    0.    1.    1.    1.    2.
   2.5   3.    5.    6.    8.   12.   23.  117. ]


In [10]:
# Players with no international caps
print(len(data.loc[data["International Caps"] == 0, :]))
display(data.loc[data["International Caps"] == 0, :])

17


Unnamed: 0,No,First Name,Other Names,Full Name,DOB,Day of Birth,Month of Birth,Year of Birth,Age,Position,Nationality,International Caps,International Goals,Club,League,Club Country
5,6,William,Pacho,"Pacho, William",16-Oct-01,16,October,2001,21,DF,Ecuador,0,0,Antwerp,Belgian Jupiler Pro League,Belgium
48,49,Andries,Noppert,"Noppert, Andries",7-Apr-94,7,April,1994,28,GK,Netherlands,0,0,Heerenveen,Holland Eredivisie,Netherlands
50,51,Xavi,Simons,"Simons, Xavi",21-Apr-03,21,April,2003,19,MF,Netherlands,0,0,PSV Eindhoven,Holland Eredivisie,Netherlands
51,52,Jeremie,Frimpong,"Frimpong, Jeremie",10-Dec-00,10,December,2000,21,DF,Netherlands,0,0,Bayer Leverkusen,German 1. Bundesliga,Germany
76,77,Jassem,Gaber,"Gaber, Jassem",20-Feb-02,20,February,2002,20,MF,Qatar,0,0,Al-Arabi,Qatar Stars League,Qatar
84,85,Nicolas,Jackson,"Jackson, Nicolas",20-Jun-01,20,June,2001,21,FW,Senegal,0,0,Villarreal,Spain Primera Division,Spain
87,88,Moussa,Ndiaye,"Ndiaye, Moussa",18-Jun-02,18,June,2002,20,DF,Senegal,0,0,Anderlecht,Belgian Jupiler Pro League,Belgium
306,307,Nawaf,Aqidi,"Aqidi, Nawaf",10-May-00,10,May,2000,22,GK,Saudi Arabia,0,0,Al-Nassr,Saudi Abdul L. Jameel League,Saudi Arabia
365,366,Axel,Disasi,"Disasi, Axel",11-Mar-98,11,March,1998,24,DF,France,0,0,Monaco,French Ligue 1,France
506,507,Alejandro,Balde,"Balde, Alejandro",18-Oct-03,18,October,2003,19,DF,Spain,0,0,Barcelona,Spain Primera Division,Spain


In [12]:
# International goals to caps ratio
data["GoalsToCaps"] = ( data["International Goals"] / data["International Caps"])
fig_plt_capsgoals = px.bar(data.loc[data["International Goals"] >= quantiles[9], :].sort_values("GoalsToCaps"), x="Full Name", y="GoalsToCaps", hover_name="Full Name", hover_data=["Age","Position", "Nationality", "International Caps", "International Goals", "Club", "League","GoalsToCaps"], title="Goals To Caps Ratio")
fig_plt_capsgoals.show()

Neymar of Brazil, Sardar Azmoun of Iran, Aleksander Mitrovic of Serbia, Romelu Lukaku of Belgium and Harry Kane of England score more often for their national teams than Cristiano Ronaldo.

## Football Leagues Representation

In [13]:
football_clubs = list(set(data["Club"]))
# print(football_clubs)
club_count = []
club_country = []
leagues = []
for football_club in football_clubs:
    d = data.loc[data["Club"] == football_club, :]
    league = d.iloc[0]["League"]
    leagues.append(league)
    league_country = d.iloc[0]["Club Country"]
    club_country.append(league_country)
    count = 0
    for index in data.index:
        if data["Club"][index] == football_club:
            count += 1
    club_count.append(count)
# print(len(football_clubs))
# print(len(club_count)
# print(len(club_country))
club_rep_df = pd.DataFrame(list(zip(football_clubs, club_count, leagues, club_country)), columns=["Club Name", "Representation", "League", "League Country"])
club_rep_df = club_rep_df.sort_values("Representation")
club_rep_df.head()

Unnamed: 0,Club Name,Representation,League,League Country
0,Cádiz,1,Spanish Primera Division,Spain
213,Omonia,1,Cypriot First Division,Cyprus
211,Coton Sport,1,Cameroon Premeir League,Cameroon
208,Daegu FC,1,Korean K League 1,South Korea
207,Osijek,1,Croatia Premeir League,Croatia


In [14]:
club_rep_plt = px.bar(club_rep_df, x = club_rep_df["Club Name"], y = club_rep_df["Representation"], hover_data=["Club Name", "Representation", "League", "League Country"], title="Football Clubs Representation")
club_rep_plt.show()

Players of the most represented club at the World Cup

In [15]:
print("Highest club representation: ")
print(len(data.loc[data["Club"] == "Barcelona", :]))
display(data.loc[data["Club"] == "Barcelona", :])

Highest club representation: 
17


Unnamed: 0,No,First Name,Other Names,Full Name,DOB,Day of Birth,Month of Birth,Year of Birth,Age,Position,Nationality,International Caps,International Goals,Club,League,Club Country,GoalsToCaps
35,36,Memphis,Depay,"Depay, Memphis",13-Feb-94,13,February,1994,28,FW,Netherlands,81,42,Barcelona,Spain Primera Division,Spain,0.518519
46,47,Frenkie de,Jong,"Jong, Frenkie de",12-May-97,12,May,1997,25,MF,Netherlands,45,1,Barcelona,Spain Primera Division,Spain,0.022222
267,268,Robert,Lewandowski,"Lewandowski, Robert",21-Aug-88,21,August,1988,34,FW,Poland,134,76,Barcelona,Spain Primera Division,Spain,0.567164
342,343,Andreas,Christensen,"Christensen, Andreas",10-Apr-96,10,April,1996,26,DF,Denmark,58,2,Barcelona,Spain Primera Division,Spain,0.034483
367,368,Jules,Kounde,"Kounde, Jules",12-Nov-98,12,November,1998,24,DF,France,12,0,Barcelona,Spain Primera Division,Spain,0.0
373,374,Ousmane,Dembele,"Dembele, Ousmane",15-May-97,15,May,1997,25,FW,France,28,4,Barcelona,Spain Primera Division,Spain,0.142857
462,463,Marc-Andre,Ter Stegen,"Ter Stegen, Marc-Andre",30-Apr-92,30,April,1992,30,GK,Germany,30,0,Barcelona,Spain Primera Division,Spain,0.0
495,496,Eric,Garcia,"Garcia, Eric",9-Jan-01,9,January,2001,21,DF,Spain,19,0,Barcelona,Spain Primera Division,Spain,0.0
497,498,Sergio,Busquets,"Busquets, Sergio",16-Jul-88,16,July,1988,34,MF,Spain,139,2,Barcelona,Spain Primera Division,Spain,0.014388
501,502,Gavi,,Gavi,5-Aug-04,5,August,2004,18,MF,Spain,13,2,Barcelona,Spain Primera Division,Spain,0.153846


Players of the second most represented team (Bayern Munich) at the world cup.

In [16]:
display(data.loc[data["Club"] == "Bayern Munich", :])

Unnamed: 0,No,First Name,Other Names,Full Name,DOB,Day of Birth,Month of Birth,Year of Birth,Age,Position,Nationality,International Caps,International Goals,Club,League,Club Country,GoalsToCaps
28,29,Matthijs de,Ligt,"Ligt, Matthijs de",12-Aug-99,12,August,1999,23,DF,Netherlands,38,2,Bayern Munich,German 1. Bundesliga,Germany,0.052632
364,365,Benjamin,Pavard,"Pavard, Benjamin",28-Mar-96,28,March,1996,26,DF,France,46,2,Bayern Munich,German 1. Bundesliga,Germany,0.043478
380,381,Dayot,Upamecano,"Upamecano, Dayot",27-Oct-98,27,October,1998,24,DF,France,7,1,Bayern Munich,German 1. Bundesliga,Germany,0.142857
382,383,Kingsley,Coman,"Coman, Kingsley",13-Jun-96,13,June,1996,26,FW,France,40,5,Bayern Munich,German 1. Bundesliga,Germany,0.125
383,384,Lucas,Hernandez,"Hernandez, Lucas",14-Feb-96,14,February,1996,26,DF,France,32,0,Bayern Munich,German 1. Bundesliga,Germany,0.0
441,442,Manuel,Neuer,"Neuer, Manuel",27-Mar-86,27,March,1986,36,GK,Germany,114,0,Bayern Munich,German 1. Bundesliga,Germany,0.0
446,447,Joshua,Kimmich,"Kimmich, Joshua",8-Feb-95,8,February,1995,27,MF,Germany,71,5,Bayern Munich,German 1. Bundesliga,Germany,0.070423
448,449,Leon,Goretzka,"Goretzka, Leon",6-Feb-95,6,February,1995,27,MF,Germany,45,14,Bayern Munich,German 1. Bundesliga,Germany,0.311111
450,451,Serge,Gnabry,"Gnabry, Serge",14-Jul-95,14,July,1995,27,FW,Germany,36,20,Bayern Munich,German 1. Bundesliga,Germany,0.555556
453,454,Thomas,Muller,"Muller, Thomas",13-Sep-89,13,September,1989,33,MF,Germany,118,44,Bayern Munich,German 1. Bundesliga,Germany,0.372881


Players of the second most represnted team (Manchester City) at the world cup.

In [17]:
display(data.loc[data["Club"] == "Manchester City", :])

Unnamed: 0,No,First Name,Other Names,Full Name,DOB,Day of Birth,Month of Birth,Year of Birth,Age,Position,Nationality,International Caps,International Goals,Club,League,Club Country,GoalsToCaps
30,31,Nathan,Ake,"Ake, Nathan",18-Feb-95,18,February,1995,27,DF,Netherlands,29,3,Manchester City,English Premier League,England,0.103448
105,106,Kyle,Walker,"Walker, Kyle",28-May-90,28,May,1990,32,DF,England,70,0,Manchester City,English Premier League,England,0.0
108,109,John,Stones,"Stones, John",28-May-94,28,May,1994,28,DF,England,59,3,Manchester City,English Premier League,England,0.050847
110,111,Jack,Grealish,"Grealish, Jack",10-Sep-95,10,September,1995,27,FW,England,24,1,Manchester City,English Premier League,England,0.041667
117,118,Kalvin,Phillips,"Phillips, Kalvin",2-Dec-95,2,December,1995,26,MF,England,23,0,Manchester City,English Premier League,England,0.0
123,124,Phil,Foden,"Foden, Phil",28-May-00,28,May,2000,22,MF,England,18,2,Manchester City,English Premier League,England,0.111111
215,216,Julian,Alvarez,"Alvarez, Julian",31-Jan-00,31,January,2000,22,FW,Argentina,12,3,Manchester City,English Premier League,England,0.25
461,462,Ilkay,Gundogan,"Gundogan, Ilkay",24-Oct-90,24,October,1990,32,MF,Germany,63,16,Manchester City,English Premier League,England,0.253968
508,509,Rodri,,Rodri,22-Jun-96,22,June,1996,26,MF,Spain,35,1,Manchester City,English Premier League,England,0.028571
516,517,Aymeric,Laporte,"Laporte, Aymeric",27-May-94,27,May,1994,28,DF,Spain,16,1,Manchester City,English Premier League,England,0.0625


Because I am biased, Arsenal players at the world cup.

In [18]:
display(data.loc[data["Club"] == "Arsenal", :])

Unnamed: 0,No,First Name,Other Names,Full Name,DOB,Day of Birth,Month of Birth,Year of Birth,Age,Position,Nationality,International Caps,International Goals,Club,League,Club Country,GoalsToCaps
120,121,Bukayo,Saka,"Saka, Bukayo",5-Sep-01,5,September,2001,21,FW,England,20,4,Arsenal,English Premier League,England,0.2
124,125,Ben,White,"White, Ben",8-Oct-97,8,October,1997,25,DF,England,4,0,Arsenal,English Premier League,England,0.0
126,127,Aaron,Ramsdale,"Ramsdale, Aaron",14-May-98,14,May,1998,24,GK,England,3,0,Arsenal,English Premier League,England,0.0
155,156,Matt,Turner,"Turner, Matt",24-Jun-94,24,June,1994,28,GK,United States,20,0,Arsenal,English Premier League,England,0.0
379,380,William,Saliba,"Saliba, William",24-Mar-01,24,March,2001,21,DF,France,7,0,Arsenal,English Premier League,England,0.0
482,483,Takehiro,Tomiyasu,"Tomiyasu, Takehiro",5-Nov-98,5,November,1998,24,DF,Japan,29,1,Arsenal,English Premier League,England,0.034483
640,641,Gabriel,Jesus,"Jesus, Gabriel",3-Apr-97,3,April,1997,25,FW,Brazil,56,19,Arsenal,English Premier League,England,0.339286
648,649,Gabriel,Martinelli,"Martinelli, Gabriel",18-Jun-01,18,June,2001,21,FW,Brazil,3,0,Arsenal,English Premier League,England,0.0
710,711,Granit,Xhaka,"Xhaka, Granit",27-Sep-92,27,September,1992,30,MF,Switzerland,107,12,Arsenal,English Premier League,England,0.11215
731,732,Thomas,Partey,"Partey, Thomas",13-Jun-93,13,June,1993,29,MF,Ghana,40,13,Arsenal,English Premier League,England,0.325


Representation in the 0.75 Quantile

In [19]:
rep = club_rep_df["Representation"]
rep_quantiles = np.quantile(rep, [0, 0.25, 0.5, 0.75, 1])
# print(quantiles)
# Plotting International Caps
rep_plt = px.bar(club_rep_df.loc[club_rep_df["Representation"] >= quantiles[3], :], x="Club Name", y="Representation", hover_data=["Club Name", "Representation", "League", "League Country"], title="Football Club Representation Above 0.75 Quantile")
rep_plt.show()

In [20]:
# Most represented Nation in terms of players that for a club in a certain nation
leagues = list(set(data["League"]))
lgs_count = []
for league in leagues:
    count = 0
    for index in data.index:
        if data["League"][index] == league:
            count += 1
    lgs_count.append(count)
# print(leagues)
# print(len(lgs_count))
# print(len(leagues))
leagues_rep_df = pd.DataFrame(list(zip(leagues, lgs_count)), columns=["League", "Representation"])
leagues_rep_df.head()
leagues_rep_fig = px.bar(leagues_rep_df, x="League", y="Representation", title="Football Leagues Representation")
leagues_rep_fig.show()

English Premier League has the highest representation at 125 players. The English Championship has more representation than French League 1, 33 to 30.

In [21]:
# Most represented Nation in terms of players that for a club in a certain nation
league_countries = list(set(data["Club Country"]))
lg_cntry_count = []
for country in league_countries:
    count = 0
    for index in data.index:
        if data["Club Country"][index] == country:
            count += 1
    lg_cntry_count.append(count)
# print(len(lg_cntry_count))
# print(len(league_countries))
most_plyr_percent = (max(lg_cntry_count)/len(data))*100
print(most_plyr_percent)
second_most_plyr_percent = (87/len(data))*100
print(second_most_plyr_percent)
league_cntry_rep_df = pd.DataFrame(list(zip(league_countries, lg_cntry_count)), columns=["League Country", "Representation"])
league_cntry_rep_df.head()
league_cntry_rep_fig = px.bar(league_cntry_rep_df, x="League Country", y="Representation", title="Football Leagues' Country Representation")
league_cntry_rep_fig.show()

19.133574007220215
10.469314079422382


159 players at the 2022 World Cup play for football clubs in England. English clubs are the most represented at the World Club, that is 19.33% of players at the World Cup. Second is SpanishClubs with 87 players, that is 10.47% of players at the World Cup. 2 Tunisian plays play for clubs in Egypt. Dejan Lovren of Croatia and Moumi Ngameleu of Cameroon represent the Russian Premier League.

In [22]:
# Players who play for their local leagues
nations = list(set(data["Nationality"]))
no_players = []
local_players = []
# print(nations)
for nation in nations:
    nation_df = data.loc[data["Nationality"] == nation, :]
    no_players.append(len(nation_df))
    local_cnt = 0
    for index in nation_df.index:
        if nation_df["Club Country"][index] == nation:
            local_cnt += 1
    local_players.append(local_cnt)

print(len(nations))
print(len(no_players))
print(len(local_players))

local_rep_df = pd.DataFrame(list(zip(nations, local_players, no_players)), columns=["Country", "Local Players", "Total Players"])
local_rep_df["Local Percentage Representation"] = (local_rep_df["Local Players"]/local_rep_df["Total Players"])*100
local_rep_df.head()
local_rep_fig = px.bar(local_rep_df, x="Country", y="Local Players", hover_data=["Country", "Local Players", "Local Percentage Representation"], title="Number of National Players Playing For Teams in Their Country")
local_rep_fig.show()

32
32
32


All players in the Saudi Arabia and Qatar national teams play for football clubs in their respective countries. All players in the England national team except one play in the Premier League, Jude Bellingham of Borrusia Dortmund. Senegal have no players the play in their local leagues.