# Attribute Evaluation

https://www.goal.com/en-us/news/fifa-player-ratings-explained-how-are-the-card-number--stats-decided/1hszd2fgr7wgf1n2b2yjdpgynu

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler, StandardScaler

In [3]:
df = pd.read_csv('dataset.csv')
df = df[df['Position'] != 'Goalkeeper']
df = df .drop('index', axis = 1)
df = df.drop('level_0', axis = 1)
df.reset_index(drop = True, inplace = True)
df

Unnamed: 0,Player,Age,Matches Played,Start,Minutes,Goals,Assists,Penalties Made,Yellow Cards,Red Cards,...,Successful Challenge %,Blocks,Interceptions,Tackles + Interceptions,Clearances,Errors,Club,Position,Nation,Value
0,Bruno Fernandes,27,37,37,3316,8.0,8.0,2.0,6.0,0.0,...,42.3,37.0,25,93.0,40.0,1.0,Manchester United,midfield,Portugal,74900000.0
1,Randal Kolo Muani,23,32,31,2631,15.0,11.0,2.0,6.0,1.0,...,11.1,19.0,3,17.0,13.0,0.0,Eintracht Frankfurt,Attack,France,64000000.0
2,Victor Osimhen,23,32,30,2566,26.0,4.0,2.0,4.0,0.0,...,0.0,6.0,6,16.0,29.0,0.0,SSC Napoli,Attack,Nigeria,55000000.0
3,Bukayo Saka,20,38,37,3181,14.0,11.0,2.0,6.0,0.0,...,45.5,38.0,14,77.0,19.0,0.0,Arsenal FC,Attack,England,55000000.0
4,Vinicius Junior,22,33,32,2823,10.0,9.0,0.0,10.0,1.0,...,35.9,24.0,6,36.0,1.0,0.0,Real Madrid,Attack,Brazil,50000000.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1600,Carlos Soler,25,26,14,1187,3.0,2.0,0.0,0.0,0.0,...,52.6,12.0,4,24.0,6.0,0.0,Paris Saint-Germain,midfield,Spain,-25000000.0
1601,Ansu Fati,19,36,12,1393,7.0,3.0,0.0,3.0,0.0,...,27.8,19.0,5,21.0,6.0,0.0,FC Barcelona,Attack,Spain,-25000000.0
1602,Romelu Lukaku,29,25,19,1660,10.0,6.0,2.0,1.0,0.0,...,40.0,3.0,0,3.0,16.0,0.0,Chelsea FC,Attack,Belgium,-30000000.0
1603,Jadon Sancho,22,26,21,1686,6.0,3.0,0.0,0.0,0.0,...,28.6,25.0,7,23.0,6.0,0.0,Manchester United,Attack,England,-30000000.0


In [247]:
df.columns

Index(['Player', 'Age', 'Matches Played', 'Start', 'Minutes', 'Goals',
       'Assists', 'Penalties Made', 'Yellow Cards', 'Red Cards', 'Shots',
       'Shots On Target', 'Shots On Target %', 'Goals Per Shot',
       'Goals Per Shot On Target', 'Average Shot Distance',
       'Shots From Freekicks', 'Passes Completed', 'Passes',
       'Pass Completion %', 'Total Passing Distance',
       'Progressive Passing Distance', 'Shot-Creating Actions',
       'Shot-Creating Actions/90', 'SCA Pass Live', 'SCA Pass Dead',
       'SCA Take-ons', 'SCA Shots', 'SCA Fouls', 'SCA Defense',
       'Goal-Creating Actions', 'Goal-Creating Actions/90', 'Goal Pass Live',
       'Goal Pass Dead', 'Goal Take-ons', 'Goal Shots', 'Goal Fouls',
       'Goal Defense', 'Tackles', 'Tackles Won', 'Challenges',
       'Successful Challenge %', 'Blocks', 'Interceptions',
       'Tackles + Interceptions', 'Clearances', 'Errors', 'Club', 'Position',
       'Nation', 'Value'],
      dtype='object')

In [248]:
df['Shooting'] = 0.35*df['Shots On Target %'] + 0.35*df['Goals'] / df['Matches Played'] + 0.20*df['Goals Per Shot'] + 0.05*df['Penalties Made'] / df['Matches Played'] + 0.05*df['Shots From Freekicks'] / df['Matches Played']
df['Passing'] = 0.45*df['Assists'] / df['Matches Played'] + 0.025*df['Pass Completion %'] + 0.025*df['Passes Completed'] / df['Matches Played'] + 0.025*df['Progressive Passing Distance'] / df['Matches Played'] + 0.025*df['Total Passing Distance'] / df['Matches Played'] + 0.45*df['Shot-Creating Actions/90']
df['Dribbling'] = 0.40*df['SCA Take-ons'] / df['Matches Played'] + 0.40*df['Goal Take-ons'] / df['Matches Played'] + 0.10*df['Shot-Creating Actions/90'] + 0.10*df['Goal-Creating Actions/90']
df['Defending'] = 0.25*df['Successful Challenge %'] + 0.25*df['Tackles + Interceptions'] / df['Matches Played'] + 0.20*df['Clearances'] / df['Matches Played'] + 0.25*df['Blocks'] / df['Matches Played'] - 0.05*df['Errors'] / df['Matches Played']
df["Physical"] = 0.20*df['Yellow Cards'] / df['Matches Played'] + 0.20*df['Red Cards'] / df['Matches Played'] + 0.20*df['Tackles']/ df['Matches Played'] + 0.20*df['Challenges'] / df['Matches Played'] + 0.10*df['SCA Defense'] / df['Matches Played'] + 0.10*df['Goal Defense'] / df['Matches Played']

In [249]:
selected_columns = ['Player', 'Age', 'Position', 'Club', 'Shooting', 'Passing', 'Dribbling', 'Defending', 'Physical']
rating = df[selected_columns].copy()
rating

Unnamed: 0,Player,Age,Position,Club,Shooting,Passing,Dribbling,Defending,Physical
0,Bruno Fernandes,27,midfield,Manchester United,12.560486,28.842324,0.729486,11.668243,0.562162
1,Randal Kolo Muani,23,Attack,Eintracht Frankfurt,14.201188,8.920344,0.734000,3.137500,0.137500
2,Victor Osimhen,23,Attack,SSC Napoli,14.115063,6.750625,0.374500,0.353125,0.093750
3,Bukayo Saka,20,Attack,Arsenal FC,11.955895,17.283316,0.640263,12.231579,0.476316
4,Vinicius Junior,22,Attack,Real Madrid,16.512061,15.732591,0.955758,9.435606,0.339394
...,...,...,...,...,...,...,...,...,...
1600,Carlos Soler,25,midfield,Paris Saint-Germain,21.100385,12.802731,0.319000,13.542308,0.230769
1601,Ansu Fati,19,Attack,FC Barcelona,10.419222,9.125444,0.468000,7.261111,0.136111
1602,Romelu Lukaku,29,Attack,Chelsea FC,14.456000,8.057000,0.584000,10.188000,0.048000
1603,Jadon Sancho,22,Attack,Manchester United,15.153769,14.158577,0.604308,7.657692,0.184615


In [250]:
rating.replace([np.inf, -np.inf], 0, inplace=True)

In [251]:
selected_columns = ['Shooting', 'Passing', 'Dribbling', 'Defending', 'Physical']
selected = rating[selected_columns]
scaler = MinMaxScaler(feature_range=(0, 99))
selected_scaled = pd.DataFrame(scaler.fit_transform(selected), columns=selected_columns)
rating[selected_columns] = selected_scaled
rating.head(25)

Unnamed: 0,Player,Age,Position,Club,Shooting,Passing,Dribbling,Defending,Physical
0,Bruno Fernandes,27,midfield,Manchester United,35.297121,52.294243,32.097405,44.036081,41.621622
1,Randal Kolo Muani,23,Attack,Eintracht Frankfurt,39.907772,16.173545,32.296,11.978332,10.180288
2,Victor Osimhen,23,Attack,SSC Napoli,39.665746,12.239611,16.478,1.514907,6.941106
3,Bukayo Saka,20,Attack,Arsenal FC,33.598115,31.336515,28.171579,46.153044,35.265688
4,Vinicius Junior,22,Attack,Real Madrid,46.401722,28.524883,42.053333,35.646035,25.128205
5,Alejandro Balde,18,Defender,FC Barcelona,20.811023,43.689992,16.102667,57.777836,31.634615
6,Jamal Musiala,19,midfield,Bayern Munich,37.735749,29.868396,36.934667,37.473745,22.435897
7,Martin Odegaard,23,midfield,Arsenal FC,33.275393,44.376598,24.694703,29.997247,22.211538
8,Rodrygo,21,Attack,Real Madrid,31.816431,26.593972,41.655059,17.971643,11.105769
9,Alexander Isak,22,Attack,Newcastle United,33.4298,10.626625,5.192,1.409216,7.403846


In [252]:
rating[rating['Shooting'] == 99]

Unnamed: 0,Player,Age,Position,Club,Shooting,Passing,Dribbling,Defending,Physical
1428,Pol Lirola,24,Defender,Olympique Marseille,99.0,25.62571,13.434667,60.768505,27.147436


# It looks like I didn't factor in the amount of games people played. Looks like people who played less games and got lucky with their stats are superior

In [253]:
df['Shooting'] = 0.35*df['Shots On Target %'] + 0.35*df['Goals'] / df['Matches Played'] + 0.20*df['Goals Per Shot'] + 0.05*df['Penalties Made'] / df['Matches Played'] + 0.05*df['Shots From Freekicks'] / df['Matches Played']
df['Passing'] = 0.45*df['Assists'] / df['Matches Played'] + 0.025*df['Pass Completion %'] + 0.025*df['Passes Completed'] / df['Matches Played'] + 0.025*df['Progressive Passing Distance'] / df['Matches Played'] + 0.025*df['Total Passing Distance'] / df['Matches Played'] + 0.45*df['Shot-Creating Actions/90']
df['Dribbling'] = 0.40*df['SCA Take-ons'] / df['Matches Played'] + 0.40*df['Goal Take-ons'] / df['Matches Played'] + 0.10*df['Shot-Creating Actions/90'] + 0.10*df['Goal-Creating Actions/90']
df['Defending'] = 0.25*df['Successful Challenge %'] + 0.25*df['Tackles + Interceptions'] / df['Matches Played'] + 0.20*df['Clearances'] / df['Matches Played'] + 0.25*df['Blocks'] / df['Matches Played'] - 0.05*df['Errors'] / df['Matches Played']
df["Physical"] = 0.20*df['Yellow Cards'] / df['Matches Played'] + 0.20*df['Red Cards'] / df['Matches Played'] + 0.20*df['Tackles']/ df['Matches Played'] + 0.20*df['Challenges'] / df['Matches Played'] + 0.10*df['SCA Defense'] / df['Matches Played'] + 0.10*df['Goal Defense'] / df['Matches Played']

In [254]:
df['Shooting'] = 0.50*df['Matches Played'] + 0.50*df['Shooting']
df['Passing'] = 0.50*df['Matches Played'] + 0.50*df['Passing']
df['Dribbling'] = 0.50*df['Matches Played'] + 0.50*df['Dribbling']
df['Defending'] = 0.50*df['Matches Played'] + 0.50*df['Defending']
df["Physical"] = 0.50*df['Matches Played'] + 0.50*df['Physical']

In [255]:
selected_columns = ['Player', 'Age', 'Position', 'Club', 'Shooting', 'Passing', 'Dribbling', 'Defending', 'Physical']
rating = df[selected_columns].copy()
rating.head()

Unnamed: 0,Player,Age,Position,Club,Shooting,Passing,Dribbling,Defending,Physical
0,Bruno Fernandes,27,midfield,Manchester United,24.780243,32.921162,18.864743,24.334122,18.781081
1,Randal Kolo Muani,23,Attack,Eintracht Frankfurt,23.100594,20.460172,16.367,17.56875,16.06875
2,Victor Osimhen,23,Attack,SSC Napoli,23.057531,19.375312,16.18725,16.176562,16.046875
3,Bukayo Saka,20,Attack,Arsenal FC,24.977947,27.641658,19.320132,25.115789,19.238158
4,Vinicius Junior,22,Attack,Real Madrid,24.75603,24.366295,16.977879,21.217803,16.669697


In [256]:
rating.replace([np.inf, -np.inf], 0, inplace=True)

In [257]:
selected_columns = ['Shooting', 'Passing', 'Dribbling', 'Defending', 'Physical']
selected = rating[selected_columns]
scaler = MinMaxScaler(feature_range=(0, 99))
selected_scaled = pd.DataFrame(scaler.fit_transform(selected), columns=selected_columns)
rating[selected_columns] = selected_scaled
rating.head(25)

Unnamed: 0,Player,Age,Position,Club,Shooting,Passing,Dribbling,Defending,Physical
0,Bruno Fernandes,27,midfield,Manchester United,84.289402,76.410509,96.431512,83.095757,95.771379
1,Randal Kolo Muani,23,Attack,Eintracht Frankfurt,78.458461,47.042326,83.316101,59.533545,81.561952
2,Victor Osimhen,23,Attack,SSC Napoli,78.308969,44.485519,82.372251,54.684881,81.447352
3,Bukayo Saka,20,Attack,Arsenal FC,84.975736,63.967722,98.822713,85.818124,98.165924
4,Vinicius Junior,22,Attack,Real Madrid,84.205346,56.248316,86.523767,72.242346,84.710207
5,Alejandro Balde,18,Defender,FC Barcelona,68.398717,66.104673,84.975307,82.498039,84.940398
6,Jamal Musiala,19,midfield,Bayern Munich,78.852627,57.121514,86.21834,73.089292,84.614956
7,Martin Odegaard,23,midfield,Arsenal FC,83.040639,71.264542,95.989798,76.590278,95.084668
8,Rodrygo,21,Attack,Real Madrid,76.932198,56.171751,89.125455,65.793563,86.833519
9,Alexander Isak,22,Attack,Newcastle United,22.384375,8.085036,2.935256,2.394402,2.881354


In [258]:
rating.sort_values(by = 'Shooting', ascending=False).head()

Unnamed: 0,Player,Age,Position,Club,Shooting,Passing,Dribbling,Defending,Physical
1177,Juan Bernat,29,Defender,Paris Saint-Germain,99.0,53.995436,71.579204,71.605064,71.734478
478,Felipe Anderson,29,Attack,SS Lazio,98.525087,62.765439,98.439673,82.026032,98.489904
667,Tim Ream,34,Defender,Fulham FC,96.198666,85.135286,84.21138,80.327906,84.543518
441,Serano Seymor,20,Defender,Excelsior Rotterdam,95.669364,41.957104,52.614063,67.246423,53.186544
1311,Aissa Mandi,30,Defender,Villarreal CF,95.466858,58.352924,52.68495,58.846321,53.011916


# Still doesn't make sense. Looks like any stats with '%' have a major flaw. If the players do something 1 time and it is successful, they automatically have 100% which is an issue.

## Replacing all the % with the actual stat / total games played. For example, Shots on target% -> Shots on target / matches played

In [35]:
selected_columns = ['Goals',
       'Assists', 'Penalties Made', 'Yellow Cards', 'Red Cards', 'Shots',
       'Shots On Target', 'Shots On Target %', 'Goals Per Shot',
       'Goals Per Shot On Target', 'Average Shot Distance',
       'Shots From Freekicks', 'Passes Completed', 'Passes',
       'Pass Completion %', 'Total Passing Distance',
       'Progressive Passing Distance', 'Shot-Creating Actions',
       'Shot-Creating Actions/90', 'SCA Pass Live', 'SCA Pass Dead',
       'SCA Take-ons', 'SCA Shots', 'SCA Fouls', 'SCA Defense',
       'Goal-Creating Actions', 'Goal-Creating Actions/90', 'Goal Pass Live',
       'Goal Pass Dead', 'Goal Take-ons', 'Goal Shots', 'Goal Fouls',
       'Goal Defense', 'Tackles', 'Tackles Won', 'Challenges',
       'Successful Challenge %', 'Blocks', 'Interceptions',
       'Tackles + Interceptions', 'Clearances', 'Errors']
selected = df[selected_columns]
scaler = MinMaxScaler(feature_range=(0, 99))
selected_scaled = pd.DataFrame(scaler.fit_transform(selected), columns=selected_columns)
df[selected_columns] = selected_scaled

In [36]:
df['Shooting'] = 0.30*df['Shots On Target'] / df['Matches Played'] + 0.45*df['Goals'] / df['Matches Played'] + 0.15*df['Goals Per Shot'] + 0.05*df['Penalties Made'] / df['Matches Played'] + 0.05*df['Shots From Freekicks'] / df['Matches Played']
df['Passing'] = 0.45*df['Assists'] / df['Matches Played'] + 0.05*df['Passes Completed'] / df['Matches Played'] + 0.025*df['Progressive Passing Distance'] / df['Matches Played'] + 0.025*df['Total Passing Distance'] / df['Matches Played'] + 0.45*df['Shot-Creating Actions/90']
df['Dribbling'] = 0.40*df['SCA Take-ons'] / df['Matches Played'] + 0.40*df['Goal Take-ons'] / df['Matches Played'] + 0.10*df['Shot-Creating Actions/90'] + 0.10*df['Goal-Creating Actions/90']
df['Defending'] = 0.20*df['Successful Challenge %']*df['Challenges'] / df['Matches Played'] + 0.20*df['Tackles Won'] / df['Matches Played'] + 0.20*df['Interceptions'] / df['Matches Played'] + 0.15*df['Clearances'] / df['Matches Played'] + 0.20*df['Blocks'] / df['Matches Played'] - 0.05*df['Errors'] / df['Matches Played']
df["Physical"] = 0.20*df['Yellow Cards'] / df['Matches Played'] + 0.20*df['Red Cards'] / df['Matches Played'] + 0.20*df['Tackles'] / df['Matches Played'] + 0.20*df['Challenges'] / df['Matches Played'] + 0.10*df['SCA Defense'] / df['Matches Played'] + 0.10*df['Goal Defense'] / df['Matches Played']

In [37]:
df['Shooting'] = 0.40*df['Matches Played'] + 0.60*df['Shooting']
df['Passing'] = 0.40*df['Matches Played'] + 0.60*df['Passing']
df['Dribbling'] = 0.40*df['Matches Played'] + 0.60*df['Dribbling']
df['Defending'] = 0.40*df['Matches Played'] + 0.60*df['Defending']
df["Physical"] = 0.40*df['Matches Played'] + 0.60*df['Physical']

In [38]:
selected_columns = ['Player', 'Age', 'Position', 'Club', 'Shooting', 'Passing', 'Dribbling', 'Defending', 'Physical']
rating = df[selected_columns].copy()
rating.replace([np.inf, -np.inf], 0, inplace=True)
rating.tail()

Unnamed: 0,Player,Age,Position,Club,Shooting,Passing,Dribbling,Defending,Physical
1600,Carlos Soler,25,midfield,Paris Saint-Germain,13.215006,15.15548,11.74376,13.770759,10.523913
1601,Ansu Fati,19,Attack,FC Barcelona,15.658316,20.309955,16.206521,15.14169,14.56193
1602,Romelu Lukaku,29,Attack,Chelsea FC,11.941648,17.452682,12.367703,10.555241,10.056484
1603,Jadon Sancho,22,Attack,Manchester United,13.239746,18.891688,12.887364,12.021274,10.49913
1604,Sadio Mane,30,Attack,Bayern Munich,11.395168,17.197732,12.165491,10.177824,10.109867


In [39]:
selected_columns = ['Shooting', 'Passing', 'Dribbling', 'Defending', 'Physical']
selected = df[selected_columns]
scaler = MinMaxScaler(feature_range=(0, 99))
selected_scaled = pd.DataFrame(scaler.fit_transform(selected), columns=selected_columns)
rating[selected_columns] = selected_scaled

In [40]:
rating

Unnamed: 0,Player,Age,Position,Club,Shooting,Passing,Dribbling,Defending,Physical
0,Bruno Fernandes,27,midfield,Manchester United,86.989626,91.171735,96.903033,60.923267,94.777124
1,Randal Kolo Muani,23,Attack,Eintracht Frankfurt,81.780802,69.467210,85.583627,38.536189,81.388143
2,Victor Osimhen,23,Attack,SSC Napoli,84.692298,63.153320,77.828189,38.284471,80.635528
3,Bukayo Saka,20,Attack,Arsenal FC,93.145659,84.074420,97.948382,57.021780,97.577449
4,Vinicius Junior,22,Attack,Real Madrid,81.439475,80.432157,91.919860,46.789164,85.715518
...,...,...,...,...,...,...,...,...,...
1600,Carlos Soler,25,midfield,Paris Saint-Germain,72.226724,52.944508,63.905008,40.778732,64.760786
1601,Ansu Fati,19,Attack,FC Barcelona,85.997473,71.439406,89.045948,44.782011,90.591231
1602,Romelu Lukaku,29,Attack,Chelsea FC,65.049942,61.187154,67.419986,31.389042,61.770732
1603,Jadon Sancho,22,Attack,Manchester United,72.366159,66.350488,70.347492,35.670033,64.602257


In [41]:
selected_columns = ['Shooting', 'Passing', 'Dribbling', 'Defending', 'Physical']
rating['Overall'] = rating[selected_columns].apply(lambda x: x.nlargest(4).mean(), axis=1)

In [42]:
rating.sort_values(by = 'Defending', ascending=False).head(20)

Unnamed: 0,Player,Age,Position,Club,Shooting,Passing,Dribbling,Defending,Physical,Overall
1081,Andrei Girotto,30,Defender,FC Nantes,81.304015,56.531065,81.315435,99.0,94.684857,89.076077
1189,Melvin Bard,21,Defender,OGC Nice,72.225532,53.220119,74.712904,96.248637,86.206118,82.348298
73,Joao Palhinha,27,midfield,Fulham FC,81.38093,54.57057,78.556374,93.537967,94.290934,86.941552
488,Danilo,31,Defender,Juventus FC,86.226284,63.729277,85.353523,92.208527,95.576365,89.841175
405,Reinildo Mandava,28,Defender,Atlético de Madrid,47.343285,37.597111,49.968644,90.673315,58.390322,61.593891
1163,Nicolas Otamendi,34,Defender,SL Benfica,69.674222,50.047615,70.033573,90.526052,82.702741,78.234147
1293,Benjamin Andre,31,midfield,LOSC Lille,76.690243,61.773127,79.745943,90.204327,90.710299,84.337703
769,Juan Foyth,24,Defender,Villarreal CF,55.771079,41.864854,55.0629,87.637254,63.534537,65.501443
846,Bryan Cristante,27,midfield,AS Roma,81.221514,58.932488,82.371377,87.448373,94.656951,86.424554
838,Vincent Le Goff,32,Defender,FC Lorient,81.383024,58.599697,83.702818,86.209051,94.763114,86.514502


In [43]:
rating['Value'] = df['Value']

# Scaling Defenders Stats

In [44]:
for i in range(len(rating['Position'])):
    if rating.loc[i, 'Position'] == 'Defender':
        rating.loc[i, 'Shooting'] = 0.80 * rating.loc[i, 'Shooting']
        rating.loc[i, 'Dribbling'] = 0.90 * rating.loc[i, 'Dribbling']
        rating.loc[i, 'Passing'] = 1.05 * rating.loc[i, 'Passing']

def normalize_physical_stat(value):
    if value < 75 and value > 50:
        return value + 20
    if value < 50:
        return value + 45
    else:
        return value

rating.loc[(rating['Position'] == 'Defender'), 'Physical'] = rating.loc[(rating['Position'] == 'Defender'), 'Physical'].apply(normalize_physical_stat)

## Scaling Midfielders Stats

In [45]:
for i in range(len(rating['Position'])):
    if rating.loc[i, 'Position'] == 'midfield':
        rating.loc[i, 'Passing'] = 1.03 * rating.loc[i, 'Passing']

# Assigning teams into leagues

In [46]:
England =  [
        'Manchester United', 'Arsenal FC', 'Newcastle United', 'Everton FC',
        'Manchester City', 'Chelsea FC', 'Nottingham Forest', 'Liverpool FC',
        'AFC Bournemouth', 'West Ham United', 'Crystal Palace', 'Wolverhampton Wanderers',
        'Tottenham Hotspur', 'Aston Villa', 'Fulham FC', 'Brentford FC', 'Burnley FC'
    ]

Spain = [
        'Real Madrid', 'FC Barcelona', 'Athletic Bilbao', 'Villarreal CF',
        'Atlético de Madrid', 'Celta de Vigo', 'RCD Mallorca', 'Real Betis Balompié',
        'Valencia CF', 'CA Osasuna', 'Deportivo Alavés', 'Sevilla FC', 'Getafe CF',
        'Cádiz CF', 'UD Las Palmas', 'Real Sociedad', 'Girona FC', 'Rayo Vallecano'
    ]

Germany = [
        'Bayern Munich', 'RB Leipzig', 'Bayer 04 Leverkusen', 'Borussia Dortmund',
        'SC Freiburg', 'SV Werder Bremen', 'FC Augsburg', 'VfL Wolfsburg', 'VfB Stuttgart',
        'Borussia Mönchengladbach', '1.FSV Mainz 05', '1.FC Köln', 'VfL Bochum',
        '1.FC Heidenheim 1846', 'Eintracht Frankfurt', '1.FC Union Berlin', 'TSG 1899 Hoffenheim'
    ]

Italy =  [
        'SSC Napoli', 'Atalanta BC', 'Juventus FC', 'Torino FC', 'AC Milan',
        'ACF Fiorentina', 'Udinese Calcio', 'Inter Milan', 'AS Roma', 'Hellas Verona',
        'Genoa CFC', 'SS Lazio', 'FC Empoli', 'US Salernitana 1919', 'US Sassuolo', 'Bologna FC 1909',
        'US Lecce', 'AC Monza', 'Frosinone Calcio'
    ]

France =  [
        'Paris Saint-Germain', 'RC Lens', 'LOSC Lille', 'Stade Rennais FC',
        'OGC Nice', 'Montpellier HSC', 'AS Monaco', 'Olympique Lyon', 'Olympique Marseille',
        'Clermont Foot 63', 'Stade Reims', 'UD Almería', 'FC Toulouse', 'FC Nantes',
        'RC Strasbourg Alsace', 'FC Lorient', 'Stade Brestois 29', 'FC Metz'
    ]

Netherlands =  [
        'Feyenoord Rotterdam', 'AZ Alkmaar', 'Ajax Amsterdam', 'PSV Eindhoven',
        'Sparta Rotterdam', 'RKC Waalwijk', 'NEC Nijmegen', 'Go Ahead Eagles',
        'FC Utrecht', 'Vitesse Arnhem', 'Fortuna Sittard', 'Excelsior Rotterdam', 'FC Volendam',
        'SC Heerenveen', 'Twente Enschede FC'
    ]
Portugal =  [
        'SL Benfica', 'FC Porto', 'Sporting CP', 'FC Famalicão', 'Rio Ave FC',
        'Gil Vicente FC', 'GD Estoril Praia', 'FC Arouca', 'Portimonense SC', 'SC Braga',
        'Vitória Guimarães SC', 'SC Farense', 'Casa Pia AC', 'Boavista FC', 'FC Vizela',
        'GD Chaves', 'CF Estrela Amadora SAD'
    ]

Belgium = [
        'RWD Molenbeek', 'RSC Anderlecht', 'Royal Antwerp FC', 'KAS Eupen',
        'Cercle Brugge', 'Club Brugge KV', 'KAA Gent', 'Charleroi SC','Cercle Brugge',
        'RSC Charleroi', 'Cercle Brugge '
    ]

Turkey = [
        'Fenerbahce', 'Kasimpasa', 'Galatasaray', 'Alanyaspor', 'Hatayspor',
        'MKE Ankaragücü'
    ]

In [47]:
LeagueList = []
for i in rating['Club']:
    if i in England:
        LeagueList.append('Premier League')
    elif i in Spain:
        LeagueList.append('La Liga')
    elif i in Italy:
        LeagueList.append('Serie A')
    elif i in Germany:
        LeagueList.append('Bundesliga')
    elif i in France:
        LeagueList.append('Ligue 1')
    elif i in Netherlands:
        LeagueList.append('Eredivisie')
    elif i in Portugal:
        LeagueList.append('Primeira Liga')
    elif i in Belgium:
        LeagueList.append('Jupiler Pro League')
    elif i in Turkey:
        LeagueList.append('Super Lig ')
    else:
        LeagueList.append("NA")
        
rating['League'] = LeagueList

# Scaling League Weights for Defense Rating with Hakeem Number

In [48]:
selected = rating[(rating['Position'] == 'Defender') & (rating['League'] == 'Premier League')]['Defending']
min_defending_stat = min(selected)
max_defending_stat = max(selected)

def normalize_defending_stat(value):
    if value < 70 and value > 50:
        return value + 20
    if value < 50:
        return value + 48
    else:
        return value

def normalize_passing_stat(value):
    if value < 70 and value > 50:
        return value + 10
    if value < 50:
        return value + 35
    else:
        return value

# Use .loc to assign the scaled defending stat to the DataFrame
rating.loc[(rating['Position'] == 'Defender') & (rating['League'] == 'Premier League'), 'Defending'] = selected.apply(normalize_defending_stat)
rating.loc[(rating['Position'] == 'Defender') & (rating['League'] == 'Premier League'), 'Passing'] = selected.apply(normalize_passing_stat)


In [49]:
selected = rating[(rating['Position'] == 'Defender') & (rating['League'] == 'La Liga')]['Defending']
min_defending_stat = min(selected)
max_defending_stat = max(selected)

def normalize_defending_stat(value):
    if value < 70 and value > 50:
        return value + 20
    if value < 50:
        return value + 48
    else:
        return value
    
def normalize_passing_stat(value):
    if value < 70 and value > 50:
        return value + 10
    if value < 50:
        return value + 35
    else:
        return value

# Use .loc to assign the scaled defending stat to the DataFrame
rating.loc[(rating['Position'] == 'Defender') & (rating['League'] == 'La Liga'), 'Defending'] = selected.apply(normalize_defending_stat)
rating.loc[(rating['Position'] == 'Defender') & (rating['League'] == 'La Liga'), 'Passing'] = selected.apply(normalize_passing_stat)


In [50]:
selected = rating[(rating['Position'] == 'Defender') & (rating['League'] == 'Bundesliga')]['Defending']
min_defending_stat = min(selected)
max_defending_stat = max(selected)

def normalize_defending_stat(value):
    if value < 70 and value > 50:
        return value + 27.5
    if value < 50:
        return value + 48
    else:
        return value
    
def normalize_passing_stat(value):
    if value < 70 and value > 50:
        return value + 10
    if value < 50:
        return value + 35
    else:
        return value

# Use .loc to assign the scaled defending stat to the DataFrame
rating.loc[(rating['Position'] == 'Defender') & (rating['League'] == 'Bundesliga'), 'Defending'] = selected.apply(normalize_defending_stat)
rating.loc[(rating['Position'] == 'Defender') & (rating['League'] == 'Bundesliga'), 'Passing'] = selected.apply(normalize_passing_stat)


In [51]:
selected = rating[(rating['Position'] == 'Defender') & (rating['League'] == 'Serie A')]['Defending']
min_defending_stat = min(selected)
max_defending_stat = max(selected)

def normalize_defending_stat(value):
    if value < 70 and value > 50:
        return value + 20
    if value < 50:
        return value + 48
    else:
        return value

def normalize_passing_stat(value):
    if value < 70 and value > 50:
        return value + 10
    if value < 50:
        return value + 35
    else:
        return value

# Use .loc to assign the scaled defending stat to the DataFrame
rating.loc[(rating['Position'] == 'Defender') & (rating['League'] == 'Serie A'), 'Defending'] = selected.apply(normalize_defending_stat)
rating.loc[(rating['Position'] == 'Defender') & (rating['League'] == 'Serie A'), 'Passing'] = selected.apply(normalize_passing_stat)


## Re-Compute Overall

In [52]:
selected_columns = ['Shooting', 'Passing', 'Dribbling', 'Defending', 'Physical']
rating['Overall'] = rating[selected_columns].apply(lambda x: x.nlargest(4).mean(), axis=1)

In [53]:
rating = rating[['Player', 'Age', 'League', 'Club', 'Position', 'Value', 'Overall', 'Shooting', 'Passing', 'Dribbling', 'Defending', 'Physical']]

In [54]:
selected_columns = ['Overall', 'Shooting', 'Passing', 'Dribbling', 'Defending', 'Physical']
rating[selected_columns] = rating[selected_columns].round(2)

In [55]:
rating.sort_values(by = 'Overall', ascending=False).head(20)

Unnamed: 0,Player,Age,League,Club,Position,Value,Overall,Shooting,Passing,Dribbling,Defending,Physical
1516,Antoine Griezmann,31,La Liga,Atlético de Madrid,Attack,-10000000.0,94.47,93.79,88.31,99.0,53.43,96.77
3,Bukayo Saka,20,Premier League,Arsenal FC,Attack,55000000.0,93.19,93.15,84.07,97.95,57.02,97.58
0,Bruno Fernandes,27,Premier League,Manchester United,midfield,74900000.0,93.14,86.99,93.91,96.9,60.92,94.78
55,Eberechi Eze,24,Premier League,Crystal Palace,midfield,18000000.0,92.2,92.29,82.02,97.95,56.03,96.56
836,Harry Kane,29,Premier League,Tottenham Hotspur,Attack,0.0,91.64,99.0,75.81,95.22,48.26,96.52
7,Martin Odegaard,23,Premier League,Arsenal FC,midfield,45000000.0,91.38,92.24,85.48,93.58,49.53,94.21
1599,Mohamed Salah,30,Premier League,Liverpool FC,Attack,-25000000.0,91.36,94.3,79.53,95.45,50.07,96.16
1435,Dusan Tadic,33,Eredivisie,Ajax Amsterdam,Attack,-6000000.0,91.0,85.28,99.0,93.81,50.78,85.9
1398,Piotr Zielinski,28,Serie A,SSC Napoli,midfield,-5000000.0,90.77,85.35,89.69,94.9,51.47,93.12
840,Benjamin Bourigeaud,28,Ligue 1,Stade Rennais FC,midfield,0.0,90.76,86.48,87.26,94.21,53.47,95.1


In [56]:
rating[rating['Player'] == 'Josko Gvardiol']

Unnamed: 0,Player,Age,League,Club,Position,Value,Overall,Shooting,Passing,Dribbling,Defending,Physical
10,Josko Gvardiol,20,Bundesliga,RB Leipzig,Defender,40000000.0,76.32,55.37,77.71,61.59,90.71,75.27


In [57]:
rating.to_csv('Fifa_stats.csv')