# FIFA 19/20 Players Skills Match

## Import libraries

In [1]:
import time
import pandas as pd

## Import CSV files

In [2]:
FIFA_Player_Stats_filepath = './Input Files/players _ fifa20.csv'
TOP_Players_filepath = './Input Files/Player Match TM FIFA.csv'

df_player_skills = pd.read_csv(FIFA_Player_Stats_filepath)
df_top_players = pd.read_csv(TOP_Players_filepath, delimiter = ',')

## Player Skills Dataset

In [3]:
df_player_skills.head(n = 3)

Unnamed: 0,sofifa_id,player_url,short_name,long_name,age,dob,height_cm,weight_kg,nationality,club,...,lwb,ldm,cdm,rdm,rwb,lb,lcb,cb,rcb,rb
0,158023,https://sofifa.com/player/158023/lionel-messi/...,L. Messi,Lionel Andrés Messi Cuccittini,32,1987-06-24,170,72,Argentina,FC Barcelona,...,68+2,66+2,66+2,66+2,68+2,63+2,52+2,52+2,52+2,63+2
1,20801,https://sofifa.com/player/20801/c-ronaldo-dos-...,Cristiano Ronaldo,Cristiano Ronaldo dos Santos Aveiro,34,1985-02-05,187,83,Portugal,Juventus,...,65+3,61+3,61+3,61+3,65+3,61+3,53+3,53+3,53+3,61+3
2,190871,https://sofifa.com/player/190871/neymar-da-sil...,Neymar Jr,Neymar da Silva Santos Junior,27,1992-02-05,175,68,Brazil,Paris Saint-Germain,...,66+3,61+3,61+3,61+3,66+3,61+3,46+3,46+3,46+3,61+3


## Top Players Dataset

In [4]:
df_top_players.head(n = 3)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12
0,462250,João Félix,19,Atlético Madrid,Second Striker,242444,João Félix,19,Atlético Madrid,0,0,0.0,0
1,125781,Antoine Griezmann,28,FC Barcelona,Centre-Forward,194765,A. Griezmann,28,FC Barcelona,6,6,6.0,0
2,50202,Eden Hazard,28,Real Madrid,Left Winger,183277,E. Hazard,28,Real Madrid,3,3,3.0,0


In [5]:
df_top_players.columns = ['PlayerID', 'Name', 'Age', 'Team', 'Position', 'PlayerID_FIFA', 'Name_FIFA', 'Age_FIFA', 'Team_FIFA', 'd1', 'd2', 'd3', 'avg_dist']
df_top_players = df_top_players.loc[:,['PlayerID', 'PlayerID_FIFA', 'Position']]
df_top_players.head()

Unnamed: 0,PlayerID,PlayerID_FIFA,Position
0,462250,242444,Second Striker
1,125781,194765,Centre-Forward
2,50202,183277,Left Winger
3,177907,203263,Centre-Back
4,326031,235243,Centre-Back


## Display Transfer Markt position names

In [6]:
df_top_players['Position'].value_counts()

Centre-Forward        4
Centre-Back           4
Central Midfield      4
Left Winger           2
Right-Back            2
Second Striker        1
Right Winger          1
Left-Back             1
Defensive Midfield    1
Name: Position, dtype: int64

## Display the most relevant FIFA position name (first one)

In [7]:
df_player_skills['player_positions'] = df_player_skills['player_positions'].str.split(',', expand = True)[0]

In [8]:
df_player_skills['player_positions'].str.split(',', expand = True)[0].value_counts()

CB     3162
ST     2582
CM     2193
GK     2036
CDM    1424
RB     1314
LB     1303
CAM    1146
RM     1050
LM     1049
LW      378
RW      369
CF      113
LWB      90
RWB      69
Name: 0, dtype: int64

## Mapping dictionary for position names

In [9]:
positions = {
    'Centre-Back': 'CB',
    'Centre-Forward': 'CF',
    'Central Midfield': 'CM',
    'Left Winger': 'LW',
    'Right-Back': 'RB',
    'Right Winger': 'RW',
    'Second Striker': 'ST',
    'Defensive Midfield': 'CDM',
    'Left-Back': 'LB'
            }

## Select the relevant columns for skill comparison

In [10]:
df_player_skills = df_player_skills.loc[:,['sofifa_id', 'player_positions', 'pace', 'shooting', 'passing', 'dribbling', 'defending', 'physic']]
df_player_skills = df_player_skills.dropna()
df_player_skills.head()

Unnamed: 0,sofifa_id,player_positions,pace,shooting,passing,dribbling,defending,physic
0,158023,RW,87.0,92.0,92.0,96.0,39.0,66.0
1,20801,ST,90.0,93.0,82.0,89.0,35.0,78.0
2,190871,LW,91.0,85.0,87.0,95.0,32.0,58.0
4,183277,LW,91.0,83.0,86.0,94.0,35.0,66.0
5,192985,CAM,76.0,86.0,92.0,86.0,61.0,78.0


## Merge the two datasets (TOP Players & Player Skills)

In [11]:
df_top_players = pd.merge(df_top_players, df_player_skills, how='left', left_on=['PlayerID_FIFA'], right_on=['sofifa_id'])
df_top_players

Unnamed: 0,PlayerID,PlayerID_FIFA,Position,sofifa_id,player_positions,pace,shooting,passing,dribbling,defending,physic
0,462250,242444,Second Striker,242444,CF,82.0,80.0,74.0,81.0,40.0,70.0
1,125781,194765,Centre-Forward,194765,CF,81.0,86.0,84.0,89.0,57.0,72.0
2,50202,183277,Left Winger,183277,LW,91.0,83.0,86.0,94.0,35.0,66.0
3,177907,203263,Centre-Back,203263,CB,50.0,53.0,64.0,69.0,81.0,84.0
4,326031,235243,Centre-Back,235243,CB,67.0,58.0,66.0,68.0,83.0,84.0
5,343052,226110,Right Winger,226110,RM,91.0,81.0,75.0,85.0,33.0,67.0
6,281963,220814,Centre-Back,220814,CB,81.0,54.0,70.0,71.0,83.0,82.0
7,326330,228702,Central Midfield,228702,CM,79.0,64.0,84.0,87.0,76.0,76.0
8,357565,231866,Defensive Midfield,231866,CDM,67.0,68.0,77.0,77.0,82.0,80.0
9,182712,210514,Right-Back,210514,RB,90.0,65.0,80.0,84.0,78.0,71.0


## Skills comparison code

In [12]:
match_list = []
start_time = time.time()

for top_player in list(df_top_players.itertuples(index = False, name = None)):
    
    PlayerID = top_player[0]
    PlayerID_FIFA = top_player[1]
    PlayerPosition = top_player[2]
    PlayerSkills = top_player[-6:]
    max_skills_dif = 100
    
    for matched_player in list(df_player_skills.loc[df_player_skills['player_positions'] == positions[PlayerPosition]].itertuples(index = False, name = None)):
        
        MatchedPlayerID_FIFA = matched_player[0]
        MatchedPlayerSkills = matched_player[-6:]
        
        if PlayerID_FIFA == MatchedPlayerID_FIFA: 
            continue
            
        else:
            
            skills_dif = [(a - b)**2 for a, b in zip(PlayerSkills, MatchedPlayerSkills)]
            avg_skills_dif = sum(skills_dif)**(1/2)
            
            match_list.append([PlayerID, PlayerID_FIFA, MatchedPlayerID_FIFA, avg_skills_dif])

df = pd.DataFrame(match_list)    
print('Process completed in:', time.time() - start_time, 'seconds.')

Process completed in: 0.2330160140991211 seconds.


## Set column names

In [13]:
df.columns = ['PlayerID', 'PlayerID_FIFA', 'MatchedPlayerID_FIFA', 'Distance']

## Collect the best 10 player matches (minimum distance)

In [14]:
df = df.set_index('MatchedPlayerID_FIFA').groupby(['PlayerID', 'PlayerID_FIFA'])['Distance'].nsmallest(10).reset_index()

In [15]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 200 entries, 0 to 199
Data columns (total 4 columns):
PlayerID                200 non-null int64
PlayerID_FIFA           200 non-null int64
MatchedPlayerID_FIFA    200 non-null int64
Distance                200 non-null float64
dtypes: float64(1), int64(3)
memory usage: 6.3 KB


In [16]:
df.head()

Unnamed: 0,PlayerID,PlayerID_FIFA,MatchedPlayerID_FIFA,Distance
0,50202,183277,190871,8.888194
1,50202,183277,222492,12.569805
2,50202,183277,208808,14.56022
3,50202,183277,208722,16.124515
4,50202,183277,229906,16.278821


## Export results to csv file

In [17]:
df.to_csv('./Output Files/Player Skills Match.csv', index = False)