In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt 
%matplotlib inline 


# STATISTICAL DATA ANALYSIS OF FIFA 19 PLAYER RATING

In [2]:
#load data
data=pd.read_csv("FIFA 19.csv")
data

FileNotFoundError: File b'FIFA 19.csv' does not exist

# EXPLORING THE DATA

In [None]:
data.describe()

In [None]:
(data.columns)

In [None]:
data.info()

In [None]:
#looking at the total number of players in the dataset
players=data["Name"].count()
print("There are",players,"players in the dataset")

In [None]:
#work rate of players
work_rate=data["Work Rate"]
work_rate.value_counts()

In [None]:
#looking at the body type of players
body_type=data["Body Type"]
body_type.value_counts()

In [None]:
skill_moves=data["Skill Moves"].value_counts()
skill_moves

# COUNTRIES WITH MOST PLAYERS

In [None]:
from collections import Counter
countries=Counter(data["Nationality"].values).most_common()
print("the number of players in each country are", countries)

In [None]:
#alternative way of looking at the number of players in each country
data["Nationality"].value_counts()

# ANALYSIS OF PLAYERS PER EACH POSITION

In [None]:
#Best players in each position based on their overall scores 
data.loc[data.groupby(data["Position"])["Overall"].idxmax()]
data.loc[data.groupby(data["Position"])["Overall"].idxmax()][['Position', 'Name', "Age", "Nationality",'Club','Overall']]

In [None]:
#worst players in each of the positions 
data.loc[data.groupby(data["Position"])["Overall"].idxmin()][['Position','Name','Age','Nationality','Club',"Overall"]]

# DETERMINING THE CORRELATION OF THE VARIOUS FEATURES 

In [None]:
data[['Age','Crossing', 'International Reputation',
       'Skill Moves', 'Work Rate', 'Body Type','Overall', 'Potential', 'Value', 'Wage',
       'Finishing', 'HeadingAccuracy', 'ShortPassing', 'Volleys', 'Dribbling',
       'Curve', 'FKAccuracy', 'LongPassing', 'BallControl', 'Acceleration',
       'SprintSpeed', 'Agility', 'Reactions', 'Balance', 'ShotPower',
       'Jumping', 'Stamina', 'Strength', 'LongShots', 'Aggression',
       'Interceptions', 'Positioning', 'Vision', 'Penalties', 'Composure',
       'Marking', 'StandingTackle', 'SlidingTackle', 'GKDiving', 'GKHandling',
       'GKKicking', 'GKPositioning', 'GKReflexes']].corr()

In [None]:
#looking at the heatmap for all features
plt.figure(figsize=(10,8))
plt.title("Heatmap for all features")
sns.heatmap(data[['Age','Crossing', 'International Reputation',
       'Skill Moves', 'Work Rate', 'Body Type','Overall', 'Potential', 'Value', 'Wage',
       'Finishing', 'HeadingAccuracy', 'ShortPassing', 'Volleys', 'Dribbling',
       'Curve', 'FKAccuracy', 'LongPassing', 'BallControl', 'Acceleration',
       'SprintSpeed', 'Agility', 'Reactions', 'Balance', 'ShotPower',
       'Jumping', 'Stamina', 'Strength', 'LongShots', 'Aggression',
       'Interceptions', 'Positioning', 'Vision', 'Penalties', 'Composure',
       'Marking', 'StandingTackle', 'SlidingTackle', 'GKDiving', 'GKHandling',
       'GKKicking', 'GKPositioning', 'GKReflexes']].corr(), linewidths=.5)
plt.show()
print("this heatmap shows how various features of the players are correlated. From the graph, a white box signifies "
     "a stronger correlation between two intersecting features. A black box shows a weaker correlation whiles orange or red "
     "shows neither positive nor negative correlation.  The diagonal white line shows self correlation, that is, correlation "
     "between same variables, which is 1. for eg, from the heatmap, looking at where marking and sliding tackle intersect is "
     "white meaning a player who marks very well also slides very well as well which in real football matches, its always the "
     "case.  Also where Age and potential intersect, it shows no correlation. which means that a player's age does not determine "
     "his potential which is also the case. Mbappe wouldn't have got that high potential")

In [None]:
#age distribution of players
age=data["Age"]
plt.title('age distribution of players')
plt.hist(age)
plt.xlabel("age")
plt.ylabel("Number of players")
plt.show()

In [None]:
#distribution of overall ratings of players
overall=data.Overall
plt.title("distribution of players by overall ratings ")
plt.hist(overall)
plt.xlabel("overall rating")
plt.ylabel("number of players ")
plt.show()
print("The average overall rating of a player is ", np.mean(data["Overall"]))

In [None]:
#looking at the relationship between age and overall ratings to see if a persons age has impact on his overall rating
overall=data["Overall"]
plt.title("Relationship between age and overall rating")
plt.scatter(age, overall)
plt.xlabel("Age")
plt.ylabel("overall rating")
plt.show()

In [None]:
#want to determine if a there is a relationship between a players value and overall rating
value=data["Value"].str.replace("[€,M,K]", "")
new_value=value.astype("float")
overall=data["Overall"]
plt.title("Relationship between player's value and overall rating")
plt.scatter(new_value, overall)
plt.xlabel("Value of player")
plt.ylabel("Overall rating")
plt.show()

#determining the relationship between a players wage and overall rating
wage=data["Wage"].str.replace("[€,M,K]","")
new_wage=wage.astype("int")
overall=data["Overall"]
plt.title("Relationship between player's wage and overall rating")
plt.scatter(new_wage, overall)
plt.xlabel("wage of player")
plt.ylabel("Overall rating")
plt.show()


In [None]:
#looking at some hidden facts in the dataset
oldest=data.loc[data['Age'].idxmax()]
youngest=data.loc[data["Age"].idxmin()]
best=data.loc[data["Overall"].idxmax()]
worst=data.loc[data["Overall"].idxmin()]
fast=data.loc[data["SprintSpeed"].idxmax()]
slow=data.loc[data["SprintSpeed"].idxmin()]
dribbler=data.loc[data["Dribbling"].idxmax()]
volley=data.loc[data["Volleys"].idxmax()]
jump=data.loc[data["Jumping"].idxmax()]
penalty=data.loc[data["Penalties"].idxmax()]
worst_p=data.loc[data["Penalties"].idxmin()]
intercept=data.loc[data["Interceptions"].idxmax()]
aggresive=data.loc[data["Aggression"].idxmax()]
vision=data.loc[data["Vision"].idxmax()]
GK=data.loc[data["GKReflexes"].idxmax()]
Gk_dive=data.loc[data["GKDiving"].idxmax()]
intl_repute=data.loc[data["International Reputation"].idxmax()]
agility=data.loc[data["Agility"].idxmax()]
stamina=data.loc[data["Stamina"].idxmax()]
control=data.loc[data["BallControl"].idxmax()]
shot=data.loc[data["ShotPower"].idxmax()]
acceleration=data.loc[data["Acceleration"].idxmax()]
marking=data.loc[data["Marking"].idxmax()]
long_passing=data.loc[data["LongPassing"].idxmax()]
free_kick=data.loc[data["FKAccuracy"].idxmax()]
finish=data.loc[data["Finishing"].idxmax()]
print("the oldest player is",data["Age"].max(),"years","." "His name is", oldest["Name"], "from", oldest["Nationality"], "and plays for", 
      oldest["Club"])
print("the youngest player is",data["Age"].min(),"." "His name is", youngest["Name"], "from", youngest["Nationality"],
      "and plays for", youngest["Club"])
print("the best player has an overall rating of",data["Overall"].max(),"." "His name is", best["Name"], "from", best["Nationality"],
      "and plays for", best["Club"])
print("The worst player has an overall rating of", data["Overall"].min(),".", "His name is", worst["Name"], "from",
     worst["Nationality"], "and plays for", worst["Club"])
print("The most fastest player has a sprint speed of", data["SprintSpeed"].max(),".", "His name is", fast["Name"], "from",
     fast["Nationality"], "and plays for", fast["Club"])
print("The most slowest player has a sprint speed of", data["SprintSpeed"].min(),".", "His name is", slow["Name"], "from",
     slow["Nationality"], "and plays for", slow["Club"])
print("The best dribbler has a dribbling ability of", data["Dribbling"].max(),".", "His name is", dribbler["Name"], "from",
     dribbler["Nationality"], "and plays for", dribbler["Club"])
print("The player who plays volley more than anyone has a Volley play ability of", data["Volleys"].max(),".", "His name is",
      volley["Name"], "from",volley["Nationality"], "and plays for", volley["Club"])
print("The player with the best jump has a jumping rate of", data["Jumping"].max(),".", "His name is", jump["Name"], "from",
     jump["Nationality"], "and plays for", jump["Club"])
print("The best penalty taker has a penalty taking ability of", data["Penalties"].max(),".", "His name is", penalty["Name"],
      "from",penalty["Nationality"], "and plays for", penalty["Club"])
print("The worst penalty taker has a penalty taking ability of", data["Penalties"].min(),".", "His name is", worst_p["Name"],
      "from",worst_p["Nationality"], "and plays for", worst_p["Club"])
print("The most aggressive player has a aggresive ability of", data["Aggression"].max(),".", "His name is", aggresive["Name"],
      "from", aggresive["Nationality"], "and plays for", aggresive["Club"])
print("The player with the best interception has an intercepting ability of", data["Interceptions"].max(),".", "His name is",
      intercept["Name"], "from",intercept["Nationality"], "and plays for", intercept["Club"])
print("The player with the best vision has a vision ability of", data["Vision"].max(),".", "His name is", vision["Name"], 
      "from",vision["Nationality"], "and plays for", vision["Club"])
print("The goalkeeper with good reflexes has a reflex ability of", data["GKReflexes"].max(),".", "His name is", GK["Name"], 
      "from", GK["Nationality"], "and plays for", GK["Club"])
print("The player with high international reputation has an international rating of ", data["International Reputation"].max(),".",
      "His name is", intl_repute["Name"], "from",intl_repute["Nationality"], "and plays for", intl_repute["Club"])
print("The goalkeeper with the highest diving ability has a diving ability of", data["GKDiving"].max(),".", "His name is", 
      Gk_dive["Name"], "from",Gk_dive["Nationality"], "and plays for", Gk_dive["Club"])
print("The player with the highest agility has agility rating of", data["Agility"].max(),".", "His name is", 
      agility["Name"], "from",agility["Nationality"], "and plays for", agility["Club"])
print("The player with the highest stamina has a stamina rating of", data["Stamina"].max(),".", "His name is", 
      stamina["Name"], "from",stamina["Nationality"], "and plays for", stamina["Club"])
print("The best controller of the ball has a ball control rate of", data["BallControl"].max(),".", "His name is", 
      control["Name"], "from",control["Nationality"], "and plays for", control["Club"])
print("The player with the most powerful shot has a shooting power of", data["ShotPower"].max(),".", "His name is", 
      shot["Name"], "from",shot["Nationality"], "and plays for", shot["Club"])
print("The player with the highest acceleration has an acceleration rating of", data["Acceleration"].max(),".", "His name is", 
      acceleration["Name"], "from",acceleration["Nationality"], "and plays for", acceleration["Club"])
print("The player who marks very well has a marking rating of", data["Marking"].max(),".", "His name is", 
      marking["Name"], "from",marking["Nationality"], "and plays for", marking["Club"])
print("The best freekick taker has an accuracy of", data["FKAccuracy"].max(),".", "His name is", 
      free_kick["Name"], "from",free_kick["Nationality"], "and plays for", free_kick["Club"])
print("The best long ball passer of the ball has a passing accuracy of", data["LongPassing"].max(),".", "His name is", 
      long_passing["Name"], "from",long_passing["Nationality"], "and plays for", long_passing["Club"])
print("The best finisher has a finishing rating of", data["Finishing"].max(),".", "His name is", 
      finish["Name"], "from",finish["Nationality"], "and plays for", finish["Club"])



# LOOKING AT PREFERRED FOOT OF PLAYERS

In [None]:
#preferred foot of players
preferred=data["Preferred Foot"]
preferred_foot=preferred.dropna()
preferred_foot.value_counts()

preferred_foot=data["Preferred Foot"].value_counts()
plt.title("Distribution of players according to their preferred foot")
plt.pie(preferred_foot, labels=("Right","Left"),autopct='%1.1f%%')
plt.show()

# ANALYSING THE CLUBS

In [None]:
clubs=data["Club"].unique()
club_size=len(clubs)
total_players=data["Name"].count()
average_players=total_players/club_size
print('There are a total of',club_size,'clubs in the dataset.','and an average of',average_players,'players in each club')


In [None]:
from collections import Counter
all_clubs=Counter(data["Club"].values).most_common()
all_clubs

# ANALYSIS OF SOME SELECTED NATIONAL TEAMS 

In [None]:
#looking at some of the powerhouse countries in world football 
countries=['England','Germany','Spain','Argentina','Brazil','Netherlands','Portugal','France','Italy','Uruguay','Belgium']
countries=data.loc[data["Nationality"].isin(countries)][['Name',"Nationality",'Age','Overall','Wage',"International Reputation"]]
countries

In [None]:
#looking at the distribution of wages of players in their countries to see which country's players have the highest wages
nationality=countries['Nationality']
wage=countries['Wage'].str.replace('[€,M,K]',"")
country_wage=wage.astype("int")
plt.title("Distrbution of wages of players by country")
plt.bar(nationality,country_wage)
plt.xticks(rotation=60)
plt.show()

#distribution of players international reputation in their respected couuntries. some big national teams 
intl_reputation=countries["International Reputation"]
plt.bar(nationality,intl_reputation)
plt.title("Distribution of players international reputations by country")
plt.xticks(rotation=60)
plt.show()

#distribution of players overall ratings with respect to their countries
overall=countries["Overall"]
plt.title('distribution of players overall ratings by country')
plt.bar(nationality,overall)
plt.xticks(rotation=60)
plt.show()



# POPULAR AFRICAN NATIONAL TEAMS

In [None]:
#looking at some powerhouses of African countries
africa=["Ghana", 'Nigeria','Cameroon','Egypt','Ivory Coast','Senegal','Algeria','Morocco','Tunisia','South Africa','Mali']
africa=data.loc[data['Nationality'].isin(africa)]
africa

In [None]:
africa.shape

In [None]:
africa.describe()

In [None]:
#looking at the number of players in each of the selected African countries
more=africa['Nationality'].value_counts()
more

In [None]:
#looking at which African country's players have the highest wage
wage=africa['Wage'].str.replace('[€,M,K]',"")
africa_wage=wage.astype("int")
nation=africa['Nationality']
plt.title("Distribution of African player's wages with respect to national teams")
plt.bar(nation,africa_wage)
plt.xticks(rotation=60)
plt.show()


In [None]:
#looking at which African country has the highest overall rating 
overall=africa['Overall']
nations=africa['Nationality']
plt.title('Distribution of African players overall rating with respect to their countries')
plt.bar(nations,overall)
plt.xlabel('African countries')
plt.ylabel("Overall Ratings of players")
plt.xticks(rotation=60)
plt.show()

In [None]:
#looking at the best players in Africa with respect to their positions
africa.loc[africa.groupby(africa['Position'])["Overall"].idxmax()][['Name','Position','Age','Nationality','Club','Overall']]

In [None]:
#looking at the overall best player in Africa
best=africa.loc[africa['Overall'].idxmax()]
#looking at the oldest and the youngest player in Africa
oldest=africa.loc[africa["Age"].idxmax()]
young=africa.loc[africa.Age.idxmin()]
print('The best player in Africa has an ooverall rating of',best['Overall'].max(),".", 'His name is',best['Name'],'from',
     best['Nationality'], 'and plays for',best['Club'])
print('The oldest player in Africa is',oldest['Age'].max(),".", 'His name is',oldest['Name'],'from',
     oldest['Nationality'], 'and plays for',oldest['Club'])
print('The youngest player in Africa is',young['Age'].min(),".", 'His name is',young['Name'],'from',
     young['Nationality'], 'and plays for',young['Club'])

In [None]:
#looking at the worst players in Africa with their respective position
africa.loc[africa.groupby(africa['Position'])["Overall"].idxmin()][['Name','Position','Age','Nationality','Club','Overall']]

# GHANAIAN PLAYERS ANALYSIS

In [None]:
Ghana=data.loc[data["Nationality"]=="Ghana"]
Ghana


In [None]:
Gh=Ghana["Name"].count()
print("There are", Gh, "Ghanaian players in the dataset")

In [None]:
Ghana.describe()

In [None]:
Ghana[['Age','Crossing', 'International Reputation',
       'Skill Moves', 'Work Rate', 'Body Type','Overall', 'Potential', 'Value', 'Wage',
       'Finishing', 'HeadingAccuracy', 'ShortPassing', 'Volleys', 'Dribbling',
       'Curve', 'FKAccuracy', 'LongPassing', 'BallControl', 'Acceleration',
       'SprintSpeed', 'Agility', 'Reactions', 'Balance', 'ShotPower',
       'Jumping', 'Stamina', 'Strength', 'LongShots', 'Aggression',
       'Interceptions', 'Positioning', 'Vision', 'Penalties', 'Composure',
       'Marking', 'StandingTackle', 'SlidingTackle', 'GKDiving', 'GKHandling',
       'GKKicking', 'GKPositioning', 'GKReflexes']].corr()

In [None]:
plt.figure(figsize=(10,8))
plt.title("Heatmap for Ghanaian players ")
sns.heatmap(Ghana[['Age','Crossing', 'International Reputation',
       'Skill Moves', 'Work Rate', 'Body Type','Overall', 'Potential', 'Value', 'Wage',
       'Finishing', 'HeadingAccuracy', 'ShortPassing', 'Volleys', 'Dribbling',
       'Curve', 'FKAccuracy', 'LongPassing', 'BallControl', 'Acceleration',
       'SprintSpeed', 'Agility', 'Reactions', 'Balance', 'ShotPower',
       'Jumping', 'Stamina', 'Strength', 'LongShots', 'Aggression',
       'Interceptions', 'Positioning', 'Vision', 'Penalties', 'Composure',
       'Marking', 'StandingTackle', 'SlidingTackle', 'GKDiving', 'GKHandling',
       'GKKicking', 'GKPositioning', 'GKReflexes']].corr(),linewidth=.5)
plt.show()

In [None]:
oldest=Ghana.loc[Ghana['Age'].idxmax()]
youngest=Ghana.loc[Ghana["Age"].idxmin()]
best=Ghana.loc[Ghana["Overall"].idxmax()]
worst=Ghana.loc[Ghana["Overall"].idxmin()]
fast=Ghana.loc[Ghana["SprintSpeed"].idxmax()]
slow=Ghana.loc[Ghana["SprintSpeed"].idxmin()]
dribbler=Ghana.loc[Ghana["Dribbling"].idxmax()]
volley=Ghana.loc[Ghana["Volleys"].idxmax()]
jump=Ghana.loc[Ghana["Jumping"].idxmax()]
penalty=Ghana.loc[Ghana["Penalties"].idxmax()]
worst_p=Ghana.loc[Ghana["Penalties"].idxmin()]
intercept=Ghana.loc[Ghana["Interceptions"].idxmax()]
aggresive=Ghana.loc[Ghana["Aggression"].idxmax()]
vision=Ghana.loc[Ghana["Vision"].idxmax()]
GK=Ghana.loc[Ghana["GKReflexes"].idxmax()]
Gk_dive=Ghana.loc[Ghana["GKDiving"].idxmax()]
intl_repute=Ghana.loc[Ghana["International Reputation"].idxmax()]
agility=Ghana.loc[Ghana["Agility"].idxmax()]
stamina=Ghana.loc[Ghana["Stamina"].idxmax()]
control=Ghana.loc[Ghana["BallControl"].idxmax()]
shot=Ghana.loc[Ghana["ShotPower"].idxmax()]
acceleration=Ghana.loc[Ghana["Acceleration"].idxmax()]
marking=Ghana.loc[Ghana["Marking"].idxmax()]
long_passing=data.loc[Ghana["LongPassing"].idxmax()]
free_kick=Ghana.loc[Ghana["FKAccuracy"].idxmax()]
finish=Ghana.loc[Ghana["Finishing"].idxmax()]
print("the oldest player is",Ghana["Age"].max(),"years","." "His name is", oldest["Name"], "from", oldest["Nationality"], "and plays for", 
      oldest["Club"])
print("the youngest player is",Ghana["Age"].min(),"." "His name is", youngest["Name"], "from", youngest["Nationality"],
      "and plays for", youngest["Club"])
print("the best player has an overall rating of",Ghana["Overall"].max(),"." "His name is", best["Name"], "from", best["Nationality"],
      "and plays for", best["Club"])
print("The worst player has an overall rating of", Ghana["Overall"].min(),".", "His name is", worst["Name"], "from",
     worst["Nationality"], "and plays for", worst["Club"])
print("The most fastest player has a sprint speed of", Ghana["SprintSpeed"].max(),".", "His name is", fast["Name"], "from",
     fast["Nationality"], "and plays for", fast["Club"])
print("The most slowest player has a sprint speed of", Ghana["SprintSpeed"].min(),".", "His name is", slow["Name"], "from",
     slow["Nationality"], "and plays for", slow["Club"])
print("The best dribbler has a dribbling ability of", Ghana["Dribbling"].max(),".", "His name is", dribbler["Name"], "from",
     dribbler["Nationality"], "and plays for", dribbler["Club"])
print("The player who plays volley more than anyone has a Volley play ability of", Ghana["Volleys"].max(),".", "His name is",
      volley["Name"], "from",volley["Nationality"], "and plays for", volley["Club"])
print("The player with the best jump has a jumping rate of", Ghana["Jumping"].max(),".", "His name is", jump["Name"], "from",
     jump["Nationality"], "and plays for", jump["Club"])
print("The best penalty taker has a penalty taking ability of", Ghana["Penalties"].max(),".", "His name is", penalty["Name"],
      "from",penalty["Nationality"], "and plays for", penalty["Club"])
print("The worst penalty taker has a penalty taking ability of", Ghana["Penalties"].min(),".", "His name is", worst_p["Name"],
      "from",worst_p["Nationality"], "and plays for", worst_p["Club"])
print("The most aggressive player has a aggresive ability of", Ghana["Aggression"].max(),".", "His name is", aggresive["Name"],
      "from", aggresive["Nationality"], "and plays for", aggresive["Club"])
print("The player with the best interception has an intercepting ability of", Ghana["Interceptions"].max(),".", "His name is",
      intercept["Name"], "from",intercept["Nationality"], "and plays for", intercept["Club"])
print("The player with the best vision has a vision ability of", Ghana["Vision"].max(),".", "His name is", vision["Name"], 
      "from",vision["Nationality"], "and plays for", vision["Club"])
print("The goalkeeper with good reflexes has a reflex ability of", Ghana["GKReflexes"].max(),".", "His name is", GK["Name"], 
      "from", GK["Nationality"], "and plays for", GK["Club"])
print("The player with high international reputation has an international rating of ", Ghana["International Reputation"].max(),".",
      "His name is", intl_repute["Name"], "from",intl_repute["Nationality"], "and plays for", intl_repute["Club"])
print("The goalkeeper with the highest diving ability has a diving ability of", Ghana["GKDiving"].max(),".", "His name is", 
      Gk_dive["Name"], "from",Gk_dive["Nationality"], "and plays for", Gk_dive["Club"])
print("The player with the highest agility has agility rating of", Ghana["Agility"].max(),".", "His name is", 
      agility["Name"], "from",agility["Nationality"], "and plays for", agility["Club"])
print("The player with the highest stamina has a stamina rating of", Ghana["Stamina"].max(),".", "His name is", 
      stamina["Name"], "from",stamina["Nationality"], "and plays for", stamina["Club"])
print("The best controller of the ball has a ball control rate of", Ghana["BallControl"].max(),".", "His name is", 
      control["Name"], "from",control["Nationality"], "and plays for", control["Club"])
print("The player with the most powerful shot has a shooting power of", Ghana["ShotPower"].max(),".", "His name is", 
      shot["Name"], "from",shot["Nationality"], "and plays for", shot["Club"])
print("The player with the highest acceleration has an acceleration rating of", Ghana["Acceleration"].max(),".", "His name is", 
      acceleration["Name"], "from",acceleration["Nationality"], "and plays for", acceleration["Club"])
print("The player who marks very well has a marking rating of", Ghana["Marking"].max(),".", "His name is", 
      marking["Name"], "from",marking["Nationality"], "and plays for", marking["Club"])
print("The best freekick taker has an accuracy of", Ghana["FKAccuracy"].max(),".", "His name is", 
      free_kick["Name"], "from",free_kick["Nationality"], "and plays for", free_kick["Club"])
print("The best long ball passer of the ball has a passing accuracy of", Ghana["LongPassing"].max(),".", "His name is", 
      long_passing["Name"], "from",long_passing["Nationality"], "and plays for", long_passing["Club"])
print("The best finisher has a finishing rating of", Ghana["Finishing"].max(),".", "His name is", 
      finish["Name"], "from",finish["Nationality"], "and plays for", finish["Club"])



In [None]:
total_players=Ghana["Name"].count()
print("There are",total_players,"players in Ghana from the FIFA 19 dataset")

# AGE DISTRIBUTION OF GHANAIAN PLAYERS

In [None]:
agegh=Ghana["Age"]
age=Ghana.Age
plt.title("age distribution of Ghanaian players")
plt.hist(agegh)
plt.xlabel("Age")
plt.ylabel("Number of players")
plt.show()
print("the average age of Ghanaian players on FIFA 19 is ", np.mean(Ghana["Age"]))

# ANALYSIS BASED ON LEAGUE

# 1. THE ENGLISH PREMEIR LEAGUE

In [None]:
EPL=["Chelsea", "Manchester United", "Manchester City", "Everton", "Tottenham Hotspur","Liverpool","Arsenal",
         "Wolverhampton Wanderers","Brighton & Hove Albion", "Crystal Palace",'Sheffield United', "Fulham", 
   "West Bromwich Albion", "Burnley", "Leeds United", "West Ham United", "Newcastle United", "Leicester City", "Aston Villa",
     "Southampton" ]
EPL=data.loc[data["Club"].isin(EPL)]
EPL

In [None]:
EPL["Club"].value_counts()

In [None]:
#number of different nationalities in the EPL
epl=EPL["Nationality"].value_counts()
epl

In [None]:
epl_intl=epl.count()
print("There are", epl_intl, "countries whose players plays in the EPL")

In [None]:
total_epl=EPL["Name"].count()
total_clubs=EPL["Club"].unique()
average_players=(total_epl)/len(total_clubs)
print("There are a total of", total_epl,"players in the EPL",".","on average there are",average_players,"in each team in the EPL")

In [None]:
#looking the the statistical relationship of the various features of epl players
EPL[['Age','Crossing', 'International Reputation',
       'Skill Moves', 'Work Rate', 'Body Type','Overall', 'Potential', 'Value', 'Wage',
       'Finishing', 'HeadingAccuracy', 'ShortPassing', 'Volleys', 'Dribbling',
       'Curve', 'FKAccuracy', 'LongPassing', 'BallControl', 'Acceleration',
       'SprintSpeed', 'Agility', 'Reactions', 'Balance', 'ShotPower',
       'Jumping', 'Stamina', 'Strength', 'LongShots', 'Aggression',
       'Interceptions', 'Positioning', 'Vision', 'Penalties', 'Composure',
       'Marking', 'StandingTackle', 'SlidingTackle', 'GKDiving', 'GKHandling',
       'GKKicking', 'GKPositioning', 'GKReflexes']].corr()

In [None]:
plt.figure(figsize=(10,8))
plt.title("Heatmap showing the relationship of the various features of players in EPL")
sns.heatmap(EPL[['Age','Crossing', 'International Reputation',
       'Skill Moves', 'Work Rate', 'Body Type','Overall', 'Potential', 'Value', 'Wage',
       'Finishing', 'HeadingAccuracy', 'ShortPassing', 'Volleys', 'Dribbling',
       'Curve', 'FKAccuracy', 'LongPassing', 'BallControl', 'Acceleration',
       'SprintSpeed', 'Agility', 'Reactions', 'Balance', 'ShotPower',
       'Jumping', 'Stamina', 'Strength', 'LongShots', 'Aggression',
       'Interceptions', 'Positioning', 'Vision', 'Penalties', 'Composure',
       'Marking', 'StandingTackle', 'SlidingTackle', 'GKDiving', 'GKHandling',
       'GKKicking', 'GKPositioning', 'GKReflexes']].corr(), linewidth=.5)
plt.show()

In [None]:
#looking at the wages of the teams In EPL
wages=EPL["Wage"].str.replace('[€,M,K]',"")
new_wages=wages.astype("int")
EPL=EPL['Club']
plt.title("Distribution of the wages of the teams in EPL")
plt.bar(EPL,new_wages)
plt.xticks(rotation=90)
plt.show()

# ANALYSIS OF LA LIGA TEAMS

In [None]:
#creating a list of laliga teams 
laliga=["FC Barcelona", "Real Madrid","CA Osasuna", "Villareal CF", "Real Sociedad", "Real Betis", "Deportivo Alavés",
        'Athletic Club de Bilbao',"Sevilla FC","RC Celta","Real Valladolid CF",'Valencia CF', 'RC Celta',
        "SD Huesca","SD Eibar", "Elche CF",
        "Granada CF", 'Getafe CF',"Levante UD",'RCD Espanyol','Cádiz CF','Atlético Madrd']
laliga=data[data["Club"].isin(laliga)]


laliga

In [None]:
#teams with their number of players 
laliga.Club.value_counts()

In [None]:
#looking at the number of p[layers in each of the countries  whose players are in the LA Liga 
liga=laliga["Nationality"].value_counts()
liga

In [None]:
liga_intl=liga.count()
print("there are", liga_intl, "players who plays for their national teams in LA Liga ")

In [None]:
#total and average number of players in LA liga
total_players=laliga["Name"].count()
teams=laliga["Club"].unique()
average_players_laliga=total_players/len(teams)
print("There are a total of",total_players, "players in laliga",".""on average, there are",average_players_laliga, "in each team in laliga")

In [None]:
#statistical representation of LA Liga 
laliga.describe()

In [None]:
#heatmap of LA Liga players
plt.figure(figsize=(10,8))
plt.title("Heatmap showing the relationship of the various features of players in LA liga")
sns.heatmap(laliga[['Age','Crossing', 'International Reputation',
       'Skill Moves', 'Work Rate', 'Body Type','Overall', 'Potential', 'Value', 'Wage',
       'Finishing', 'HeadingAccuracy', 'ShortPassing', 'Volleys', 'Dribbling',
       'Curve', 'FKAccuracy', 'LongPassing', 'BallControl', 'Acceleration',
       'SprintSpeed', 'Agility', 'Reactions', 'Balance', 'ShotPower',
       'Jumping', 'Stamina', 'Strength', 'LongShots', 'Aggression',
       'Interceptions', 'Positioning', 'Vision', 'Penalties', 'Composure',
       'Marking', 'StandingTackle', 'SlidingTackle', 'GKDiving', 'GKHandling',
       'GKKicking', 'GKPositioning', 'GKReflexes']].corr(), linewidth=.5)
plt.show()

In [None]:
plt.scatter(laliga["Acceleration"], laliga["LongPassing"])
plt.title("Distribution of the relationship between acceleration and long passing")
plt.xlabel("Acceleration")
plt.ylabel("LongPassing")
plt.show()
print("from the graph, it can be deduced that, acceleration and long passing are positively correlated. which means that a player "
       "who has a high acceleration is also good in giving long passes")

In [None]:
#looking at the wages of the teams In laliga
wages=laliga["Wage"].str.replace('[€,M,K]',"")
new_wages=wages.astype("int")
laliga_teams=laliga['Club']
plt.title("Distribution of the wages of the teams in La Liga")
plt.bar(laliga_teams, new_wages)
plt.xticks(rotation=90)
plt.show()

In [None]:
#looking at the overall rating of the teams In laliga

laliga_teams=laliga['Club']
overall=laliga['Overall']
plt.title("Distribution of the wages of the teams in La Liga")
plt.bar(laliga_teams, overall)
plt.xticks(rotation=90)
plt.show()

# COMPARISON BETWEEN LALIGA AND EPL PLAYERS(age and rating)

In [None]:
#age distribution of players in LA liga
plt.title("age distribution of LA Liga players")
laliga_age=laliga["Age"]
plt.hist(laliga_age)
plt.xlabel("Age")
plt.ylabel("number of players")
plt.show()


In [None]:
#LaliGA overall rating
overall=laliga["Overall"]
plt.title("Distribution of overall ratings of LA Liga players")
plt.hist(overall)
plt.xlabel("overall rating")
plt.ylabel("Number of players")
plt.show()

In [None]:
laliga_shot=laliga["ShotPower"]
plt.title("Distribution of shot power in LA liga")
plt.hist(laliga_shot)
plt.show()



In [None]:
#statistical description of LA Liga players 
laliga.describe()

In [None]:
#statistical description of EPL players
EPL.describe()

In [None]:
pen=laliga.loc[laliga["Penalties"].idxmax()]
pen["Name"]
pen["Club"]
pen["Nationality"]
print("The best penalty taker in laliga is", pen["Name"], "plays for",pen["Club"],"and from",pen["Nationality"])

In [None]:
goalkeepers=["GK"]
goalkeepers=data[data["Position"].isin(goalkeepers)]
goalkeepers

In [None]:
total_keepers=goalkeepers["Name"].count()
print("There are a total of",total_keepers,"in the FIFA 19 dataset")

In [None]:
#altenative way of selecting only keepers
data.loc[data["Position"]=="GK"]

In [None]:
goalkeepers.describe()

In [None]:
goalkeepers_age=goalkeepers["Age"]
plt.title("Age distribution of goalkepeers worldwide based on FIFA 19 player ratings")
plt.hist(goalkeepers_age)
plt.xlabel("Age of goalkeeper")
plt.ylabel("Number of goalkeepers")
plt.show()

In [None]:
#distribution of goalkeepers overall rating
overall=goalkeepers.Overall
plt.title("Distribution of goalkeeper  overall rating")
plt.hist(overall)
plt.xlabel("Rating")
plt.ylabel("Number of goalkeepers")
plt.show()

In [None]:
old_GK=goalkeepers.loc[goalkeepers["Age"].idxmax()]
young_GK=goalkeepers.loc[goalkeepers["Age"].idxmin()]
print("The oldest goalkeeper is", old_GK["Name"], "He is", goalkeepers["Age"].max(), "from", old_GK["Nationality"], "and plays for", 
      old_GK["Club"], "He has a rating of", old_GK["Overall"])
print("The youngest goalkeeper is", young_GK["Name"], "He is", goalkeepers["Age"].min(), "from", young_GK["Nationality"], "and plays for", 
      young_GK["Club"], "He has a rating of", young_GK["Overall"])

In [None]:
sns.heatmap(goalkeepers[['GKDiving', 'GKHandling',
       'GKKicking', 'GKPositioning', 'GKReflexes']].corr(), linewidth=.5)
plt.title("Heatmap of goalkeepers features")
plt.show()

In [None]:
goalkeepers[['GKDiving', 'GKHandling',
       'GKKicking', 'GKPositioning', 'GKReflexes']].corr()

In [None]:
goalkeepers_percentage=(total_keepers)/len(data["Name"])*100
print("There is a percentage of",goalkeepers_percentage," % goalkeepers in the FIFA 19 dataset")

# ANALYSIS OF DEFENDERS

In [None]:
defenders=['LWB', 'RWB', 'LB', 'LCB', 'CB', 'RCB', 'RB']
defenders=data[data["Position"].isin(defenders)]
defenders

In [None]:
defenders.describe()

In [None]:
total_defenders=defenders["Name"].count()
print("There are a total of",total_defenders,"players in the FIFA 19 dataset")
defenders_percentage=(total_defenders)/len(data["Name"])*100
print("The percentage of defenders is",defenders_percentage,"%")

In [None]:
defenders_age=defenders["Age"]
plt.title("The distribution of the ages of defenders according to FIFA 19 player ratings")
plt.hist(defenders_age )
plt.xlabel("Age")
plt.ylabel("Number of players")
plt.show()

#distribution of defenders overall rating
overall=defenders.Overall
plt.title("Distribution of defenders overall rating")
plt.hist(overall)
plt.xlabel("Rating")
plt.ylabel("Number of defenders")
plt.show()

In [None]:
defenders[['Agility', 'Reactions', 'Balance', 
       'Jumping', 'Stamina', 'Strength', 'Aggression',
       'Interceptions', 'Positioning', 'Vision', 'Penalties', 'Composure',
       'Marking', 'StandingTackle', 'SlidingTackle']].corr()

In [None]:
sns.heatmap(defenders[['Agility', 'Reactions', 'Balance', 
       'Jumping', 'Stamina', 'Strength', 'Aggression',
       'Interceptions', 'Positioning', 'Vision', 'Penalties', 'Composure',
       'Marking', 'StandingTackle', 'SlidingTackle']].corr(), linewidth=.5)
plt.title("Heatmap showing the relationship between the various defending features in FIFA 19")
plt.figure(figsize=(8,8))
plt.show()

In [None]:
old_DF=defenders.loc[defenders['Age'].idxmax()]
young_DF=defenders.loc[defenders["Age"].idxmin()]
print("The oldest defender is",defenders['Age'].max(),"years old",".","He is",old_DF["Name"],"from",old_DF["Nationality"], 
      "plays for",old_DF["Club"], "and has a rating of",old_DF["Overall"])
print("The youngest defender is",defenders['Age'].min(),"years old",".","He is",young_DF["Name"],"from",young_DF["Nationality"], 
      "plays for",young_DF["Club"], "and has a rating of",young_DF["Overall"])


# ANALYSIS OF MIDFIELDERS

In [None]:
midfielders=['LAM', 'CAM', 'RAM', 'LM', 'LCM', 'CM', 'RCM', 'RM', 'LDM','CDM', 'RDM']
midfielders=data.loc[data['Position'].isin(midfielders)]
midfielders

In [None]:
midfielders.describe()

In [None]:
#distribution of midfielders age
midfielders_age=midfielders.Age
plt.title("Distribution of midfieelders age")
plt.hist(midfielders_age)
plt.xlabel("Age")
plt.ylabel("Number of players")
plt.show()

#distribution of midfielders overall rating
midfielders_overall=midfielders.Overall
plt.title("Distribution of midfieelders overall rating")
plt.hist(midfielders_overall)
plt.xlabel("overall")
plt.ylabel("Number of players")
plt.show()

In [None]:
#heatmap of midfielders with their peculiar features
sns.heatmap(midfielders[[ 'Crossing','HeadingAccuracy', 'ShortPassing', 'Dribbling',
       'Curve', 'FKAccuracy', 'LongPassing', 'BallControl', 'Acceleration',
       'SprintSpeed', 'Agility', 'Reactions', 'Balance', 'ShotPower',
       'Jumping', 'Stamina', 'Strength', 'LongShots', 'Aggression',
       'Interceptions', 'Positioning', 'Vision', 'Penalties', 'Composure',
       'Marking', 'StandingTackle']].corr(), linewidth=.5)
plt.title("Heatmap of midfielders")
plt.show()

midfielders[[ 'Crossing','HeadingAccuracy', 'ShortPassing', 'Volleys', 'Dribbling',
       'Curve', 'FKAccuracy', 'LongPassing', 'BallControl', 'Acceleration',
       'SprintSpeed', 'Agility', 'Reactions', 'Balance', 'Stamina', 'Strength', 'LongShots', 'Aggression',
       'Interceptions', 'Positioning', 'Vision', 'Penalties', 'Composure',
       'Marking', 'StandingTackle',]].corr()

In [None]:
old_MD=midfielders.loc[midfielders["Age"].idxmax()]
young_MD=midfielders.loc[midfielders["Age"].idxmin()]
print("The oldest midfielder is",midfielders["Age"].max(),"His name is",old_MD["Name"],"from",old_MD["Nationality"],"plays for",
     old_MD["Club"],"and has a rating of",old_MD["Overall"])
print("The youngest midfielder is",midfielders["Age"].min(),"His name is",young_MD["Name"],"from",young_MD["Nationality"],"plays for",
     young_MD["Club"],"and has a rating of",young_MD["Overall"])

In [None]:
total_midfielders=midfielders.Name.count()
midfielders_percentage=total_midfielders/len(data["Name"])*100
print("the percentage of midfielders is",midfielders_percentage,"%")

# ANALYSIS OF STRIKERS

In [None]:
strikers=['LS', 'ST', 'RS', 'LW', 'LF', 'CF', 'RF', 'RW']
strikers=data[data["Position"].isin(strikers)]
strikers

In [None]:
strikers.describe()

In [None]:
#distribution of midfielders age
strikers_age=strikers.Age
plt.title("Distribution of strikers age")
plt.hist(strikers_age)
plt.xlabel("Age")
plt.ylabel("Number of players")
plt.show()

#distribution of midfielders overall rating
strikers_overall=strikers.Overall
plt.title("Distribution of strikers overall rating")
plt.hist(strikers_overall)
plt.xlabel("overall")
plt.ylabel("Number of players")
plt.show()

In [None]:
sns.heatmap(strikers[['Potential','Work Rate','Crossing',
       'Finishing', 'HeadingAccuracy', 'ShortPassing', 'Volleys', 'Dribbling',
       'Curve', 'FKAccuracy', 'LongPassing', 'BallControl', 'Acceleration',
       'SprintSpeed', 'Agility', 'Reactions', 'Balance', 'ShotPower',
       'Jumping', 'Stamina', 'Strength', 'LongShots', 'Aggression','Positioning', 'Vision', 'Penalties', 'Composure',]].corr(),
            linewidth=.5)
plt.figure(figsize=(10,10))
plt.show()

strikers[['Potential','Work Rate','Crossing',
       'Finishing', 'HeadingAccuracy', 'ShortPassing', 'Volleys', 'Dribbling',
       'Curve', 'FKAccuracy', 'LongPassing', 'BallControl', 'Acceleration',
       'SprintSpeed', 'Agility', 'Reactions', 'Balance', 'ShotPower',
       'Jumping', 'Stamina', 'Strength', 'LongShots', 'Aggression','Positioning', 'Vision', 'Penalties', 'Composure',]].corr()

In [None]:
total_strikers=strikers["Name"].count()
strikers_percentage=(total_strikers)/len(data["Name"])*100
print('There is a total of',total_strikers,"strikers in FIFA 19, with a percentage of", strikers_percentage,"%")

In [None]:
plt.title("A Pie chart showing the percentage of the various positions")
positions=(goalkeepers_percentage,defenders_percentage,midfielders_percentage,strikers_percentage)
plt.pie(positions, labels=("goalkeepers_percentage", "defenders_percentage","midfielders_percentage","strikers_percentage"),
        autopct='%1.1f%%') 
plt.show()

# CLUB ANALYSIS


In [None]:
#looking at some big teams in the world 

In [None]:
big_teams=['FC Barcelona','Real Madrid','Atlético Madrd', 'Sevilla FC','Liverpool','Chelsea','Manchester City',
          'Manchester United','Borussia Dortmund',"Arsenal",'Milan','Inter','Juventus','Napoli','Paris Saint-Germain',
          'Olympique Lyon','FC Bayern München','Burrossia Dortmund',]
big_teams=data.loc[data["Club"].isin(big_teams)]
big_teams

In [None]:
big_teams["Club"].value_counts()

In [None]:
#comparing the wages of players in the big teams
wages=big_teams['Wage'].str.replace('[€,M,K]','')
new_wages=wages.astype('int')
teams=big_teams["Club"]
plt.title("Distribution of wages of the big teams in the world ")
plt.bar(teams,new_wages)
plt.xlabel("Teams")
plt.ylabel("Wages")
plt.xticks(rotation=90)
plt.show()

In [None]:
#looking at the overall ratings of the big teams 
overall=big_teams["Overall"]
teams=big_teams["Club"]
plt.title("distribution to compare the overall ratings of players in the big teams")
plt.bar(teams, overall)
plt.xlabel("Team")
plt.ylabel("Overall rating")
plt.xticks(rotation=90)
plt.show()