In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
import re
import scipy as sp
import sympy as sym

In [2]:
df = pd.read_csv("fifa21rawdataset.csv", low_memory=False)

# Data Preprocessing Stage

The data is Addressed in Certain Areas and Altered to Reduce Structural Data Nuasance and Quirks


Let's take a look at the first five rows of the data to understand what the data columns and a few rows look like.

In [3]:
df = pd.read_csv("fifa21rawdataset.csv", low_memory=False)

# From the Observation of the Data Columns and Rows 

There is a few nuances resulting to inconsistency in ID numbering.
#let's fix that.

In [4]:
#add leading Zeros to the ID Numbers
df['ID'] = df['ID'].astype(str).str.zfill(6)

In [5]:
df.Name

0                 L. Messi
1        Cristiano Ronaldo
2                 J. Oblak
3             K. De Bruyne
4                Neymar Jr
               ...        
18974               Xia Ao
18975             B. Hough
18976          R. McKinley
18977         Wang Zhen'ao
18978            Zhou Xiao
Name: Name, Length: 18979, dtype: object

In [6]:
df.LongName

0                        Lionel Messi
1        C. Ronaldo dos Santos Aveiro
2                           Jan Oblak
3                     Kevin De Bruyne
4          Neymar da Silva Santos Jr.
                     ...             
18974                          Ao Xia
18975                       Ben Hough
18976                  Ronan McKinley
18977                    Zhen'ao Wang
18978                       Xiao Zhou
Name: LongName, Length: 18979, dtype: object

# Getting Rid of unwanted characters in the Longname column


In [7]:
df['LongName'] = df['LongName'].str.replace(r'\b\w\.', '', regex=True)

In [9]:
df = df.rename(columns={'↓OVA': 'OVA'})

In [10]:
df.describe()

Unnamed: 0,Age,OVA,POT,BOV,Attacking,Crossing,Finishing,Heading Accuracy,Short Passing,Volleys,...,GK Positioning,GK Reflexes,Total Stats,Base Stats,PAC,SHO,PAS,DRI,DEF,PHY
count,18979.0,18979.0,18979.0,18979.0,18979.0,18979.0,18979.0,18979.0,18979.0,18979.0,...,18979.0,18979.0,18979.0,18979.0,18979.0,18979.0,18979.0,18979.0,18979.0,18979.0
mean,25.194109,65.718636,71.136414,66.751726,248.938142,49.688392,45.842405,51.942726,58.768112,42.696507,...,16.217187,16.519627,1595.286949,355.702197,67.453975,53.457031,57.681016,62.87502,49.866221,64.368934
std,4.71052,6.968999,6.114635,6.747193,74.299428,18.131153,19.567081,17.294409,14.519106,17.646937,...,17.002239,17.854079,269.874789,40.761117,10.677859,13.827425,10.081857,9.927415,16.443213,9.601883
min,16.0,47.0,47.0,48.0,42.0,6.0,3.0,5.0,7.0,3.0,...,2.0,2.0,747.0,232.0,25.0,16.0,25.0,25.0,12.0,28.0
25%,21.0,61.0,67.0,62.0,222.0,38.0,30.0,44.0,54.0,30.0,...,8.0,8.0,1452.0,327.0,61.0,44.0,51.0,57.0,35.0,58.0
50%,25.0,66.0,71.0,67.0,263.0,54.0,49.0,55.0,62.0,44.0,...,11.0,11.0,1627.0,356.0,68.0,56.0,58.0,64.0,53.0,65.0
75%,29.0,70.0,75.0,71.0,297.0,63.0,62.0,64.0,68.0,56.0,...,14.0,14.0,1781.0,384.0,75.0,64.0,64.0,69.0,63.0,71.0
max,53.0,93.0,95.0,93.0,437.0,94.0,95.0,93.0,94.0,90.0,...,91.0,90.0,2316.0,498.0,96.0,93.0,93.0,95.0,91.0,91.0


# Getting an understanding of the data


In [11]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 18979 entries, 0 to 18978
Data columns (total 77 columns):
 #   Column            Non-Null Count  Dtype 
---  ------            --------------  ----- 
 0   ID                18979 non-null  object
 1   Name              18979 non-null  object
 2   LongName          18979 non-null  object
 3   photoUrl          18979 non-null  object
 4   playerUrl         18979 non-null  object
 5   Nationality       18979 non-null  object
 6   Age               18979 non-null  int64 
 7   OVA               18979 non-null  int64 
 8   POT               18979 non-null  int64 
 9   Club              18979 non-null  object
 10  Contract          18979 non-null  object
 11  Positions         18979 non-null  object
 12  Height            18979 non-null  object
 13  Weight            18979 non-null  object
 14  Preferred Foot    18979 non-null  object
 15  BOV               18979 non-null  int64 
 16  Best Position     18979 non-null  object
 17  Joined      

# Checking For Null Values of all Column

In [12]:
df.isna().sum().sort_values(ascending= False )

Loan Date End       17966
Hits                 2595
GK Diving               0
Sliding Tackle          0
Standing Tackle         0
                    ...  
Heading Accuracy        0
Finishing               0
Crossing                0
Attacking               0
Reactions               0
Length: 77, dtype: int64

In [13]:
df.head()

Unnamed: 0,ID,Name,LongName,photoUrl,playerUrl,Nationality,Age,OVA,POT,Club,...,A/W,D/W,IR,PAC,SHO,PAS,DRI,DEF,PHY,Hits
0,158023,L. Messi,Lionel Messi,https://cdn.sofifa.com/players/158/023/21_60.png,http://sofifa.com/player/158023/lionel-messi/2...,Argentina,33,93,93,\n\n\n\nFC Barcelona,...,Medium,Low,5 ★,85,92,91,95,38,65,771
1,20801,Cristiano Ronaldo,Ronaldo dos Santos Aveiro,https://cdn.sofifa.com/players/020/801/21_60.png,http://sofifa.com/player/20801/c-ronaldo-dos-s...,Portugal,35,92,92,\n\n\n\nJuventus,...,High,Low,5 ★,89,93,81,89,35,77,562
2,200389,J. Oblak,Jan Oblak,https://cdn.sofifa.com/players/200/389/21_60.png,http://sofifa.com/player/200389/jan-oblak/210006/,Slovenia,27,91,93,\n\n\n\nAtlético Madrid,...,Medium,Medium,3 ★,87,92,78,90,52,90,150
3,192985,K. De Bruyne,Kevin De Bruyne,https://cdn.sofifa.com/players/192/985/21_60.png,http://sofifa.com/player/192985/kevin-de-bruyn...,Belgium,29,91,91,\n\n\n\nManchester City,...,High,High,4 ★,76,86,93,88,64,78,207
4,190871,Neymar Jr,Neymar da Silva Santos Jr.,https://cdn.sofifa.com/players/190/871/21_60.png,http://sofifa.com/player/190871/neymar-da-silv...,Brazil,28,91,91,\n\n\n\nParis Saint-Germain,...,High,Medium,5 ★,91,85,86,94,36,59,595


In [14]:
# Check for duplicates
duplicates = df.duplicated()

# Print the number of duplicates
print("Number of duplicates:", duplicates.sum())

# Print the duplicate rows
print("Duplicate rows:")
print(df[duplicates])

Number of duplicates: 0
Duplicate rows:
Empty DataFrame
Columns: [ID, Name, LongName, photoUrl, playerUrl, Nationality, Age, OVA, POT, Club, Contract, Positions, Height, Weight, Preferred Foot, BOV, Best Position, Joined, Loan Date End, Value, Wage, Release Clause, Attacking, Crossing, Finishing, Heading Accuracy, Short Passing, Volleys, Skill, Dribbling, Curve, FK Accuracy, Long Passing, Ball Control, Movement, Acceleration, Sprint Speed, Agility, Reactions, Balance, Power, Shot Power, Jumping, Stamina, Strength, Long Shots, Mentality, Aggression, Interceptions, Positioning, Vision, Penalties, Composure, Defending, Marking, Standing Tackle, Sliding Tackle, Goalkeeping, GK Diving, GK Handling, GK Kicking, GK Positioning, GK Reflexes, Total Stats, Base Stats, W/F, SM, A/W, D/W, IR, PAC, SHO, PAS, DRI, DEF, PHY, Hits]
Index: []

[0 rows x 77 columns]


In [15]:
df['Club'] = df['Club'].str.strip('*&@#\n')

print(df)

           ID               Name                    LongName  \
0      158023           L. Messi                Lionel Messi   
1      020801  Cristiano Ronaldo   Ronaldo dos Santos Aveiro   
2      200389           J. Oblak                   Jan Oblak   
3      192985       K. De Bruyne             Kevin De Bruyne   
4      190871          Neymar Jr  Neymar da Silva Santos Jr.   
...       ...                ...                         ...   
18974  247223             Xia Ao                      Ao Xia   
18975  258760           B. Hough                   Ben Hough   
18976  252757        R. McKinley              Ronan McKinley   
18977  243790       Wang Zhen'ao                Zhen'ao Wang   
18978  252520          Zhou Xiao                   Xiao Zhou   

                                               photoUrl  \
0      https://cdn.sofifa.com/players/158/023/21_60.png   
1      https://cdn.sofifa.com/players/020/801/21_60.png   
2      https://cdn.sofifa.com/players/200/389/21_60.pn

In [16]:
df.head()

Unnamed: 0,ID,Name,LongName,photoUrl,playerUrl,Nationality,Age,OVA,POT,Club,...,A/W,D/W,IR,PAC,SHO,PAS,DRI,DEF,PHY,Hits
0,158023,L. Messi,Lionel Messi,https://cdn.sofifa.com/players/158/023/21_60.png,http://sofifa.com/player/158023/lionel-messi/2...,Argentina,33,93,93,FC Barcelona,...,Medium,Low,5 ★,85,92,91,95,38,65,771
1,20801,Cristiano Ronaldo,Ronaldo dos Santos Aveiro,https://cdn.sofifa.com/players/020/801/21_60.png,http://sofifa.com/player/20801/c-ronaldo-dos-s...,Portugal,35,92,92,Juventus,...,High,Low,5 ★,89,93,81,89,35,77,562
2,200389,J. Oblak,Jan Oblak,https://cdn.sofifa.com/players/200/389/21_60.png,http://sofifa.com/player/200389/jan-oblak/210006/,Slovenia,27,91,93,Atlético Madrid,...,Medium,Medium,3 ★,87,92,78,90,52,90,150
3,192985,K. De Bruyne,Kevin De Bruyne,https://cdn.sofifa.com/players/192/985/21_60.png,http://sofifa.com/player/192985/kevin-de-bruyn...,Belgium,29,91,91,Manchester City,...,High,High,4 ★,76,86,93,88,64,78,207
4,190871,Neymar Jr,Neymar da Silva Santos Jr.,https://cdn.sofifa.com/players/190/871/21_60.png,http://sofifa.com/player/190871/neymar-da-silv...,Brazil,28,91,91,Paris Saint-Germain,...,High,Medium,5 ★,91,85,86,94,36,59,595


The Hits column seems less likely to be feature engineered to make a good sense out of it


# Let's work on the Missing Variables



In [17]:
missing=df.isnull().sum()
missing=missing[missing>0]
missing

Loan Date End    17966
Hits              2595
dtype: int64

# Droping LOAN DATE END COLUMN FOR MISSING VALUES


the Values of Loan Date End are repeated in Contract column which would be worked on subsequently.


In [18]:
df.drop('Loan Date End', axis = 1, inplace= True)

# Handling Hits column for missing values

In [19]:
df['Hits']=df['Hits'].fillna(0)
df['Hits'].isnull().sum()

0

# Let's take a look at all the Columns


In [20]:
pd.set_option('display.max_columns', 77)
df.head()


Unnamed: 0,ID,Name,LongName,photoUrl,playerUrl,Nationality,Age,OVA,POT,Club,Contract,Positions,Height,Weight,Preferred Foot,BOV,Best Position,Joined,Value,Wage,Release Clause,Attacking,Crossing,Finishing,Heading Accuracy,Short Passing,Volleys,Skill,Dribbling,Curve,FK Accuracy,Long Passing,Ball Control,Movement,Acceleration,Sprint Speed,Agility,Reactions,Balance,Power,Shot Power,Jumping,Stamina,Strength,Long Shots,Mentality,Aggression,Interceptions,Positioning,Vision,Penalties,Composure,Defending,Marking,Standing Tackle,Sliding Tackle,Goalkeeping,GK Diving,GK Handling,GK Kicking,GK Positioning,GK Reflexes,Total Stats,Base Stats,W/F,SM,A/W,D/W,IR,PAC,SHO,PAS,DRI,DEF,PHY,Hits
0,158023,L. Messi,Lionel Messi,https://cdn.sofifa.com/players/158/023/21_60.png,http://sofifa.com/player/158023/lionel-messi/2...,Argentina,33,93,93,FC Barcelona,2004 ~ 2021,"RW, ST, CF",170cm,72kg,Left,93,RW,"Jul 1, 2004",€103.5M,€560K,€138.4M,429,85,95,70,91,88,470,96,93,94,91,96,451,91,80,91,94,95,389,86,68,72,69,94,347,44,40,93,95,75,96,91,32,35,24,54,6,11,15,14,8,2231,466,4 ★,4★,Medium,Low,5 ★,85,92,91,95,38,65,771
1,20801,Cristiano Ronaldo,Ronaldo dos Santos Aveiro,https://cdn.sofifa.com/players/020/801/21_60.png,http://sofifa.com/player/20801/c-ronaldo-dos-s...,Portugal,35,92,92,Juventus,2018 ~ 2022,"ST, LW",187cm,83kg,Right,92,ST,"Jul 10, 2018",€63M,€220K,€75.9M,437,84,95,90,82,86,414,88,81,76,77,92,431,87,91,87,95,71,444,94,95,84,78,93,353,63,29,95,82,84,95,84,28,32,24,58,7,11,15,14,11,2221,464,4 ★,5★,High,Low,5 ★,89,93,81,89,35,77,562
2,200389,J. Oblak,Jan Oblak,https://cdn.sofifa.com/players/200/389/21_60.png,http://sofifa.com/player/200389/jan-oblak/210006/,Slovenia,27,91,93,Atlético Madrid,2014 ~ 2023,GK,188cm,87kg,Right,91,GK,"Jul 16, 2014",€120M,€125K,€159.4M,95,13,11,15,43,13,109,12,13,14,40,30,307,43,60,67,88,49,268,59,78,41,78,12,140,34,19,11,65,11,68,57,27,12,18,437,87,92,78,90,90,1413,489,3 ★,1★,Medium,Medium,3 ★,87,92,78,90,52,90,150
3,192985,K. De Bruyne,Kevin De Bruyne,https://cdn.sofifa.com/players/192/985/21_60.png,http://sofifa.com/player/192985/kevin-de-bruyn...,Belgium,29,91,91,Manchester City,2015 ~ 2023,"CAM, CM",181cm,70kg,Right,91,CAM,"Aug 30, 2015",€129M,€370K,€161M,407,94,82,55,94,82,441,88,85,83,93,92,398,77,76,78,91,76,408,91,63,89,74,91,408,76,66,88,94,84,91,186,68,65,53,56,15,13,5,10,13,2304,485,5 ★,4★,High,High,4 ★,76,86,93,88,64,78,207
4,190871,Neymar Jr,Neymar da Silva Santos Jr.,https://cdn.sofifa.com/players/190/871/21_60.png,http://sofifa.com/player/190871/neymar-da-silv...,Brazil,28,91,91,Paris Saint-Germain,2017 ~ 2022,"LW, CAM",175cm,68kg,Right,91,LW,"Aug 3, 2017",€132M,€270K,€166.5M,408,85,87,62,87,87,448,95,88,89,81,95,453,94,89,96,91,83,357,80,62,81,50,84,356,51,36,87,90,92,93,94,35,30,29,59,9,9,15,15,11,2175,451,5 ★,5★,High,Medium,5 ★,91,85,86,94,36,59,595


# Renaming a Few Columns To be Worked on

In [21]:
df.rename(columns={'Height': 'Height(cm)',
                    'Weight': 'Weight(kg)',
                    'Value': 'Value(Euro)',
                    'Wage': 'Wage(Euro)',
                    'Release Clause': 'Release Clause(Euro)',
                   'LongName':'FullName'}, inplace=True)

In [22]:
col=['Height(cm)','Weight(kg)','Value(Euro)','Wage(Euro)','Release Clause(Euro)']
for i in col:
    if i in df.columns:
        print("Column", i , ":\n", df[i].value_counts(),'\n')
    else:
        print("Column", i , "does not exist in the DataFrame")

Column Height(cm) :
 180cm    1474
178cm    1250
185cm    1182
183cm    1148
175cm    1091
         ... 
6'5"        1
157cm       1
5'4"        1
5'6"        1
155cm       1
Name: Height(cm), Length: 62, dtype: int64 

Column Weight(kg) :
 70kg      1495
75kg      1457
80kg      1108
72kg      1022
78kg       991
          ... 
190lbs       1
130lbs       1
146lbs       1
203lbs       1
157lbs       1
Name: Weight(kg), Length: 79, dtype: int64 

Column Value(Euro) :
 €1.2M     582
€1.1M     559
€1.3M     534
€1.6M     532
€1M       497
         ... 
€99M        1
€121M       1
€71.5M      1
€57M        1
€9K         1
Name: Value(Euro), Length: 255, dtype: int64 

Column Wage(Euro) :
 €2K      2899
€500     2042
€1K      1891
€3K      1823
€4K      1050
         ... 
€270K       2
€370K       1
€290K       1
€88K        1
€560K       1
Name: Wage(Euro), Length: 134, dtype: int64 

Column Release Clause(Euro) :
 €0        1261
€1.1M      609
€1.2M      504
€1.3M      443
€1.4M      385

In [23]:
df1=df
df1.head()

Unnamed: 0,ID,Name,FullName,photoUrl,playerUrl,Nationality,Age,OVA,POT,Club,Contract,Positions,Height(cm),Weight(kg),Preferred Foot,BOV,Best Position,Joined,Value(Euro),Wage(Euro),Release Clause(Euro),Attacking,Crossing,Finishing,Heading Accuracy,Short Passing,Volleys,Skill,Dribbling,Curve,FK Accuracy,Long Passing,Ball Control,Movement,Acceleration,Sprint Speed,Agility,Reactions,Balance,Power,Shot Power,Jumping,Stamina,Strength,Long Shots,Mentality,Aggression,Interceptions,Positioning,Vision,Penalties,Composure,Defending,Marking,Standing Tackle,Sliding Tackle,Goalkeeping,GK Diving,GK Handling,GK Kicking,GK Positioning,GK Reflexes,Total Stats,Base Stats,W/F,SM,A/W,D/W,IR,PAC,SHO,PAS,DRI,DEF,PHY,Hits
0,158023,L. Messi,Lionel Messi,https://cdn.sofifa.com/players/158/023/21_60.png,http://sofifa.com/player/158023/lionel-messi/2...,Argentina,33,93,93,FC Barcelona,2004 ~ 2021,"RW, ST, CF",170cm,72kg,Left,93,RW,"Jul 1, 2004",€103.5M,€560K,€138.4M,429,85,95,70,91,88,470,96,93,94,91,96,451,91,80,91,94,95,389,86,68,72,69,94,347,44,40,93,95,75,96,91,32,35,24,54,6,11,15,14,8,2231,466,4 ★,4★,Medium,Low,5 ★,85,92,91,95,38,65,771
1,20801,Cristiano Ronaldo,Ronaldo dos Santos Aveiro,https://cdn.sofifa.com/players/020/801/21_60.png,http://sofifa.com/player/20801/c-ronaldo-dos-s...,Portugal,35,92,92,Juventus,2018 ~ 2022,"ST, LW",187cm,83kg,Right,92,ST,"Jul 10, 2018",€63M,€220K,€75.9M,437,84,95,90,82,86,414,88,81,76,77,92,431,87,91,87,95,71,444,94,95,84,78,93,353,63,29,95,82,84,95,84,28,32,24,58,7,11,15,14,11,2221,464,4 ★,5★,High,Low,5 ★,89,93,81,89,35,77,562
2,200389,J. Oblak,Jan Oblak,https://cdn.sofifa.com/players/200/389/21_60.png,http://sofifa.com/player/200389/jan-oblak/210006/,Slovenia,27,91,93,Atlético Madrid,2014 ~ 2023,GK,188cm,87kg,Right,91,GK,"Jul 16, 2014",€120M,€125K,€159.4M,95,13,11,15,43,13,109,12,13,14,40,30,307,43,60,67,88,49,268,59,78,41,78,12,140,34,19,11,65,11,68,57,27,12,18,437,87,92,78,90,90,1413,489,3 ★,1★,Medium,Medium,3 ★,87,92,78,90,52,90,150
3,192985,K. De Bruyne,Kevin De Bruyne,https://cdn.sofifa.com/players/192/985/21_60.png,http://sofifa.com/player/192985/kevin-de-bruyn...,Belgium,29,91,91,Manchester City,2015 ~ 2023,"CAM, CM",181cm,70kg,Right,91,CAM,"Aug 30, 2015",€129M,€370K,€161M,407,94,82,55,94,82,441,88,85,83,93,92,398,77,76,78,91,76,408,91,63,89,74,91,408,76,66,88,94,84,91,186,68,65,53,56,15,13,5,10,13,2304,485,5 ★,4★,High,High,4 ★,76,86,93,88,64,78,207
4,190871,Neymar Jr,Neymar da Silva Santos Jr.,https://cdn.sofifa.com/players/190/871/21_60.png,http://sofifa.com/player/190871/neymar-da-silv...,Brazil,28,91,91,Paris Saint-Germain,2017 ~ 2022,"LW, CAM",175cm,68kg,Right,91,LW,"Aug 3, 2017",€132M,€270K,€166.5M,408,85,87,62,87,87,448,95,88,89,81,95,453,94,89,96,91,83,357,80,62,81,50,84,356,51,36,87,90,92,93,94,35,30,29,59,9,9,15,15,11,2175,451,5 ★,5★,High,Medium,5 ★,91,85,86,94,36,59,595


In [24]:
# get the column names of the DataFrame
cols = df1.columns.tolist()

# move columns 4 and 5 to the end of the list
cols = cols[:3] + cols[6:] + cols[3:6]

# select the columns in the new order
df1 = df[cols]

print(df)

           ID               Name                    FullName  \
0      158023           L. Messi                Lionel Messi   
1      020801  Cristiano Ronaldo   Ronaldo dos Santos Aveiro   
2      200389           J. Oblak                   Jan Oblak   
3      192985       K. De Bruyne             Kevin De Bruyne   
4      190871          Neymar Jr  Neymar da Silva Santos Jr.   
...       ...                ...                         ...   
18974  247223             Xia Ao                      Ao Xia   
18975  258760           B. Hough                   Ben Hough   
18976  252757        R. McKinley              Ronan McKinley   
18977  243790       Wang Zhen'ao                Zhen'ao Wang   
18978  252520          Zhou Xiao                   Xiao Zhou   

                                               photoUrl  \
0      https://cdn.sofifa.com/players/158/023/21_60.png   
1      https://cdn.sofifa.com/players/020/801/21_60.png   
2      https://cdn.sofifa.com/players/200/389/21_60.pn

# Data Description for the version 2 and 1

Here is a brief documentation for each column name in the given dataset:

photoUrl: The URL of the player's photo.

LongName: The full name of the player.

playerUrl: The URL of the player's page on http://sofifa.com.

Nationality: The nationality of the player.

Positions: The positions the player can play.

Name: The short name of the player.

Age: The age of the player.

OVA: The overall rating of the player in FIFA 21.

POT: The potential rating of the player in FIFA 21.

Team & Contract: The team the player is playing for in FIFA 21, along with their contract details.

ID: The unique identifier for the player.

Height: The height of the player in feet and inches.

Weight: The weight of the player in pounds.

foot: The preferred foot of the player.

BOV: The best overall rating the player has achieved in their career.

BP: The best position the player has played in their career.

Growth: The difference between the potential rating and overall rating of the player.

Joined: The date the player joined their current team in FIFA 21.

Loan Date End: The date the player's loan contract ends.

Value: The market value of the player in FIFA 21.

Wage: The weekly wage of the player in FIFA 21.

Release Clause: The release clause value of the player in FIFA 21.

Attacking: The attacking attributes of the player.

Crossing: The crossing attribute of the player.

Finishing: The finishing attribute of the player.

Heading Accuracy: The heading accuracy attribute of the player.

Short Passing: The short passing attribute of the player.

Volleys: The volleys attribute of the player.

Skill: The skill attributes of the player.

Dribbling: The dribbling attribute of the player.

Curve: The curve attribute of the player.

FK Accuracy: The free kick accuracy attribute of the player.

Long Passing: The long passing attribute of the player.

Ball Control: The ball control attribute of the player.

Movement: The movement attributes of the player.

Acceleration: The acceleration attribute of the player.

Sprint Speed: The sprint speed attribute of the player.

Agility: The agility attribute of the player.

Reactions: The reactions attribute of the player.

Balance: The balance attribute of the player.

Power: The power attributes of the player.

Shot Power: The shot power attribute of the player.

Jumping: The jumping attribute of the player.

Stamina: The stamina attribute of the player.

Strength: The strength attribute of the player.

Long Shots: The long shots attribute of the player.

Mentality: The mentality attributes of the player.

Aggression: The aggression attribute of the player.

Interceptions: The interceptions attribute of the player.

Positioning: The positioning attribute of the player.

Vision: The vision attribute of the player.

Penalties: The penalties attribute of the player.

Composure: The composure attribute of the player.

Defending: The defending attributes of the player.

Marking: The marking attribute of the player.

Standing Tackle: The standing tackle attribute of the player.

Sliding Tackle: The sliding tackle attribute of the player.

Goalkeeping: The goalkeeping attributes of the player.

GK Diving: The goalkeeper diving attribute of the player.

GK Handling: The goalkeeper handling attribute of the player.

GK Kicking: The goalkeeper kicking attribute of the player.

GK Positioning: The goalkeeper positioning attribute of the player.

GK Reflexes: This refers to the goalkeeper's ability to react and make saves quickly.

Total Stats: This refers to the overall rating of the player based on their performance in all areas of the game.

Base Stats: This refers to the player's rating in the six main areas of the game: Pace, Shooting, Passing, Dribbling, Defending, and Physicality.

W/F: This refers to the player's weaker foot ability.

SM: This refers to the player's skill moves ability.

A/W: This refers to the player's attacking work rate. It measures how frequently the player participates in attacking actions, such as making runs or positioning themselves in the opponent's half.

D/W: This refers to the player's defensive work rate. It measures how frequently the player participates in defensive actions, such as tracking back or making tackles.

IR: This refers to the player's injury resistance. It measures the player's ability to avoid injuries and how quickly they recover from them.

PAC: This refers to the player's pace or speed attribute. It measures how quickly the player can move with and without the ball.

SHO: This refers to the player's shooting ability. It measures the player's accuracy and power when shooting the ball.

PAS: This refers to the player's passing ability. It measures the player's accuracy and range when passing the ball.

DRI: This refers to the player's dribbling ability. It measures the player's agility, balance, and ball control when dribbling the ball.

DEF: This refers to the player's defensive ability. It measures the player's ability to tackle, intercept, and defend against opposing players.

PHY: This refers to the player's physicality or strength. It measures the player's ability to win physical battles and maintain possession of the ball.

Hits: This refers to the number of times the player's profile has been viewed on the website

In [25]:
df1.head()

Unnamed: 0,ID,Name,FullName,Age,OVA,POT,Club,Contract,Positions,Height(cm),Weight(kg),Preferred Foot,BOV,Best Position,Joined,Value(Euro),Wage(Euro),Release Clause(Euro),Attacking,Crossing,Finishing,Heading Accuracy,Short Passing,Volleys,Skill,Dribbling,Curve,FK Accuracy,Long Passing,Ball Control,Movement,Acceleration,Sprint Speed,Agility,Reactions,Balance,Power,Shot Power,Jumping,Stamina,Strength,Long Shots,Mentality,Aggression,Interceptions,Positioning,Vision,Penalties,Composure,Defending,Marking,Standing Tackle,Sliding Tackle,Goalkeeping,GK Diving,GK Handling,GK Kicking,GK Positioning,GK Reflexes,Total Stats,Base Stats,W/F,SM,A/W,D/W,IR,PAC,SHO,PAS,DRI,DEF,PHY,Hits,photoUrl,playerUrl,Nationality
0,158023,L. Messi,Lionel Messi,33,93,93,FC Barcelona,2004 ~ 2021,"RW, ST, CF",170cm,72kg,Left,93,RW,"Jul 1, 2004",€103.5M,€560K,€138.4M,429,85,95,70,91,88,470,96,93,94,91,96,451,91,80,91,94,95,389,86,68,72,69,94,347,44,40,93,95,75,96,91,32,35,24,54,6,11,15,14,8,2231,466,4 ★,4★,Medium,Low,5 ★,85,92,91,95,38,65,771,https://cdn.sofifa.com/players/158/023/21_60.png,http://sofifa.com/player/158023/lionel-messi/2...,Argentina
1,20801,Cristiano Ronaldo,Ronaldo dos Santos Aveiro,35,92,92,Juventus,2018 ~ 2022,"ST, LW",187cm,83kg,Right,92,ST,"Jul 10, 2018",€63M,€220K,€75.9M,437,84,95,90,82,86,414,88,81,76,77,92,431,87,91,87,95,71,444,94,95,84,78,93,353,63,29,95,82,84,95,84,28,32,24,58,7,11,15,14,11,2221,464,4 ★,5★,High,Low,5 ★,89,93,81,89,35,77,562,https://cdn.sofifa.com/players/020/801/21_60.png,http://sofifa.com/player/20801/c-ronaldo-dos-s...,Portugal
2,200389,J. Oblak,Jan Oblak,27,91,93,Atlético Madrid,2014 ~ 2023,GK,188cm,87kg,Right,91,GK,"Jul 16, 2014",€120M,€125K,€159.4M,95,13,11,15,43,13,109,12,13,14,40,30,307,43,60,67,88,49,268,59,78,41,78,12,140,34,19,11,65,11,68,57,27,12,18,437,87,92,78,90,90,1413,489,3 ★,1★,Medium,Medium,3 ★,87,92,78,90,52,90,150,https://cdn.sofifa.com/players/200/389/21_60.png,http://sofifa.com/player/200389/jan-oblak/210006/,Slovenia
3,192985,K. De Bruyne,Kevin De Bruyne,29,91,91,Manchester City,2015 ~ 2023,"CAM, CM",181cm,70kg,Right,91,CAM,"Aug 30, 2015",€129M,€370K,€161M,407,94,82,55,94,82,441,88,85,83,93,92,398,77,76,78,91,76,408,91,63,89,74,91,408,76,66,88,94,84,91,186,68,65,53,56,15,13,5,10,13,2304,485,5 ★,4★,High,High,4 ★,76,86,93,88,64,78,207,https://cdn.sofifa.com/players/192/985/21_60.png,http://sofifa.com/player/192985/kevin-de-bruyn...,Belgium
4,190871,Neymar Jr,Neymar da Silva Santos Jr.,28,91,91,Paris Saint-Germain,2017 ~ 2022,"LW, CAM",175cm,68kg,Right,91,LW,"Aug 3, 2017",€132M,€270K,€166.5M,408,85,87,62,87,87,448,95,88,89,81,95,453,94,89,96,91,83,357,80,62,81,50,84,356,51,36,87,90,92,93,94,35,30,29,59,9,9,15,15,11,2175,451,5 ★,5★,High,Medium,5 ★,91,85,86,94,36,59,595,https://cdn.sofifa.com/players/190/871/21_60.png,http://sofifa.com/player/190871/neymar-da-silv...,Brazil


In [26]:
df2=df1

In [27]:
df2[['Height(cm)', 'Weight(kg)']].dtypes

Height(cm)    object
Weight(kg)    object
dtype: object

In [28]:
df2['Height(cm)'] = df2['Height(cm)'].str.replace('cm', '')

In [29]:
df2['Weight(kg)'] = df2['Weight(kg)'].str.replace('kg', '')

In [30]:
df2['Value(Euro)'] = df2['Value(Euro)'].str.replace('€', '')

In [31]:
df2['Release Clause(Euro)'] = df2['Release Clause(Euro)'].str.replace('€', '')

In [32]:
df2['Wage(Euro)'] = df2['Wage(Euro)'].str.replace('€', '')

# Extract Player's End of contract date from contract column

In [33]:
for i, row in df2.iterrows():
    if "~" in row['Contract']:
        split_str = row['Contract'].split("~", 1)
        df2.at[i, 'Contract_End'] = split_str[1]
    elif "on Loan" in row['Contract']:
            split_str = row['Contract'].strip("on Loan")
            df2.at[i,'Contract_End'] = split_str
    elif row['Contract']== "Free":
            df2.at[i, 'Contract_End']=np.nan
        
        

# Categorising Contract Column

In [34]:
def contract_conv(df, col):
    for i, row in df2.iterrows():
        val = row[col]
        if "~" in val:
            df2.at[i,col]="Full"
        elif "on Loan" in val:
            df2.at[i,col]="On Loan" 
        else:       
            df2.at[i,col]="Free"
contract_conv(df2,'Contract')               
                    
                

In [35]:
df2['Contract']=df2['Contract'].astype('category')

# changing contract column to contract(type)

In [36]:
df2.rename(columns={'Contract': 'Contract(type)'}, inplace = True)
df2['Contract(type)'].unique()

['Full', 'Free']
Categories (2, object): ['Free', 'Full']

In [37]:
df2.head()

Unnamed: 0,ID,Name,FullName,Age,OVA,POT,Club,Contract(type),Positions,Height(cm),Weight(kg),Preferred Foot,BOV,Best Position,Joined,Value(Euro),Wage(Euro),Release Clause(Euro),Attacking,Crossing,Finishing,Heading Accuracy,Short Passing,Volleys,Skill,Dribbling,Curve,FK Accuracy,Long Passing,Ball Control,Movement,Acceleration,Sprint Speed,Agility,Reactions,Balance,Power,Shot Power,Jumping,Stamina,Strength,Long Shots,Mentality,Aggression,Interceptions,Positioning,Vision,Penalties,Composure,Defending,Marking,Standing Tackle,Sliding Tackle,Goalkeeping,GK Diving,GK Handling,GK Kicking,GK Positioning,GK Reflexes,Total Stats,Base Stats,W/F,SM,A/W,D/W,IR,PAC,SHO,PAS,DRI,DEF,PHY,Hits,photoUrl,playerUrl,Nationality,Contract_End
0,158023,L. Messi,Lionel Messi,33,93,93,FC Barcelona,Full,"RW, ST, CF",170,72,Left,93,RW,"Jul 1, 2004",103.5M,560K,138.4M,429,85,95,70,91,88,470,96,93,94,91,96,451,91,80,91,94,95,389,86,68,72,69,94,347,44,40,93,95,75,96,91,32,35,24,54,6,11,15,14,8,2231,466,4 ★,4★,Medium,Low,5 ★,85,92,91,95,38,65,771,https://cdn.sofifa.com/players/158/023/21_60.png,http://sofifa.com/player/158023/lionel-messi/2...,Argentina,2021
1,20801,Cristiano Ronaldo,Ronaldo dos Santos Aveiro,35,92,92,Juventus,Full,"ST, LW",187,83,Right,92,ST,"Jul 10, 2018",63M,220K,75.9M,437,84,95,90,82,86,414,88,81,76,77,92,431,87,91,87,95,71,444,94,95,84,78,93,353,63,29,95,82,84,95,84,28,32,24,58,7,11,15,14,11,2221,464,4 ★,5★,High,Low,5 ★,89,93,81,89,35,77,562,https://cdn.sofifa.com/players/020/801/21_60.png,http://sofifa.com/player/20801/c-ronaldo-dos-s...,Portugal,2022
2,200389,J. Oblak,Jan Oblak,27,91,93,Atlético Madrid,Full,GK,188,87,Right,91,GK,"Jul 16, 2014",120M,125K,159.4M,95,13,11,15,43,13,109,12,13,14,40,30,307,43,60,67,88,49,268,59,78,41,78,12,140,34,19,11,65,11,68,57,27,12,18,437,87,92,78,90,90,1413,489,3 ★,1★,Medium,Medium,3 ★,87,92,78,90,52,90,150,https://cdn.sofifa.com/players/200/389/21_60.png,http://sofifa.com/player/200389/jan-oblak/210006/,Slovenia,2023
3,192985,K. De Bruyne,Kevin De Bruyne,29,91,91,Manchester City,Full,"CAM, CM",181,70,Right,91,CAM,"Aug 30, 2015",129M,370K,161M,407,94,82,55,94,82,441,88,85,83,93,92,398,77,76,78,91,76,408,91,63,89,74,91,408,76,66,88,94,84,91,186,68,65,53,56,15,13,5,10,13,2304,485,5 ★,4★,High,High,4 ★,76,86,93,88,64,78,207,https://cdn.sofifa.com/players/192/985/21_60.png,http://sofifa.com/player/192985/kevin-de-bruyn...,Belgium,2023
4,190871,Neymar Jr,Neymar da Silva Santos Jr.,28,91,91,Paris Saint-Germain,Full,"LW, CAM",175,68,Right,91,LW,"Aug 3, 2017",132M,270K,166.5M,408,85,87,62,87,87,448,95,88,89,81,95,453,94,89,96,91,83,357,80,62,81,50,84,356,51,36,87,90,92,93,94,35,30,29,59,9,9,15,15,11,2175,451,5 ★,5★,High,Medium,5 ★,91,85,86,94,36,59,595,https://cdn.sofifa.com/players/190/871/21_60.png,http://sofifa.com/player/190871/neymar-da-silv...,Brazil,2022


In [38]:

print(df2.columns)






Index(['ID', 'Name', 'FullName', 'Age', 'OVA', 'POT', 'Club', 'Contract(type)',
       'Positions', 'Height(cm)', 'Weight(kg)', 'Preferred Foot', 'BOV',
       'Best Position', 'Joined', 'Value(Euro)', 'Wage(Euro)',
       'Release Clause(Euro)', 'Attacking', 'Crossing', 'Finishing',
       'Heading Accuracy', 'Short Passing', 'Volleys', 'Skill', 'Dribbling',
       'Curve', 'FK Accuracy', 'Long Passing', 'Ball Control', 'Movement',
       'Acceleration', 'Sprint Speed', 'Agility', 'Reactions', 'Balance',
       'Power', 'Shot Power', 'Jumping', 'Stamina', 'Strength', 'Long Shots',
       'Mentality', 'Aggression', 'Interceptions', 'Positioning', 'Vision',
       'Penalties', 'Composure', 'Defending', 'Marking', 'Standing Tackle',
       'Sliding Tackle', 'Goalkeeping', 'GK Diving', 'GK Handling',
       'GK Kicking', 'GK Positioning', 'GK Reflexes', 'Total Stats',
       'Base Stats', 'W/F', 'SM', 'A/W', 'D/W', 'IR', 'PAC', 'SHO', 'PAS',
       'DRI', 'DEF', 'PHY', 'Hits', 'photoUrl',

# Checking if K and M are consistent all the way

In [39]:
nominals = df2['Value(Euro)'].str[-1]
nominals.value_counts()

K    9431
M    9300
0     248
Name: Value(Euro), dtype: int64

# hahaha, the column gives a strange output of variables.
whew Cleaning this set is hell and fun as well

In [40]:
df2['Value(Euro)'].str.contains('K|M|0').sum()

18979

In [41]:
def convert_Value_to_numerals(value_str):
    value_str = value_str.replace('€', '') # remove currency symbol
    if 'M' in value_str:
        return float(value_str.replace('M', '')) * 1000000
    elif 'K' in value_str:
        return float(value_str.replace('K', '')) * 1000
    else:
        return float(value_str)

In [42]:
df2[['Release Clause(Euro)','Wage(Euro)','Value(Euro)']] = df2[['Release Clause(Euro)','Wage(Euro)','Value(Euro)']].applymap(convert_Value_to_numerals)


In [43]:
df2.head()

Unnamed: 0,ID,Name,FullName,Age,OVA,POT,Club,Contract(type),Positions,Height(cm),Weight(kg),Preferred Foot,BOV,Best Position,Joined,Value(Euro),Wage(Euro),Release Clause(Euro),Attacking,Crossing,Finishing,Heading Accuracy,Short Passing,Volleys,Skill,Dribbling,Curve,FK Accuracy,Long Passing,Ball Control,Movement,Acceleration,Sprint Speed,Agility,Reactions,Balance,Power,Shot Power,Jumping,Stamina,Strength,Long Shots,Mentality,Aggression,Interceptions,Positioning,Vision,Penalties,Composure,Defending,Marking,Standing Tackle,Sliding Tackle,Goalkeeping,GK Diving,GK Handling,GK Kicking,GK Positioning,GK Reflexes,Total Stats,Base Stats,W/F,SM,A/W,D/W,IR,PAC,SHO,PAS,DRI,DEF,PHY,Hits,photoUrl,playerUrl,Nationality,Contract_End
0,158023,L. Messi,Lionel Messi,33,93,93,FC Barcelona,Full,"RW, ST, CF",170,72,Left,93,RW,"Jul 1, 2004",103500000.0,560000.0,138400000.0,429,85,95,70,91,88,470,96,93,94,91,96,451,91,80,91,94,95,389,86,68,72,69,94,347,44,40,93,95,75,96,91,32,35,24,54,6,11,15,14,8,2231,466,4 ★,4★,Medium,Low,5 ★,85,92,91,95,38,65,771,https://cdn.sofifa.com/players/158/023/21_60.png,http://sofifa.com/player/158023/lionel-messi/2...,Argentina,2021
1,20801,Cristiano Ronaldo,Ronaldo dos Santos Aveiro,35,92,92,Juventus,Full,"ST, LW",187,83,Right,92,ST,"Jul 10, 2018",63000000.0,220000.0,75900000.0,437,84,95,90,82,86,414,88,81,76,77,92,431,87,91,87,95,71,444,94,95,84,78,93,353,63,29,95,82,84,95,84,28,32,24,58,7,11,15,14,11,2221,464,4 ★,5★,High,Low,5 ★,89,93,81,89,35,77,562,https://cdn.sofifa.com/players/020/801/21_60.png,http://sofifa.com/player/20801/c-ronaldo-dos-s...,Portugal,2022
2,200389,J. Oblak,Jan Oblak,27,91,93,Atlético Madrid,Full,GK,188,87,Right,91,GK,"Jul 16, 2014",120000000.0,125000.0,159400000.0,95,13,11,15,43,13,109,12,13,14,40,30,307,43,60,67,88,49,268,59,78,41,78,12,140,34,19,11,65,11,68,57,27,12,18,437,87,92,78,90,90,1413,489,3 ★,1★,Medium,Medium,3 ★,87,92,78,90,52,90,150,https://cdn.sofifa.com/players/200/389/21_60.png,http://sofifa.com/player/200389/jan-oblak/210006/,Slovenia,2023
3,192985,K. De Bruyne,Kevin De Bruyne,29,91,91,Manchester City,Full,"CAM, CM",181,70,Right,91,CAM,"Aug 30, 2015",129000000.0,370000.0,161000000.0,407,94,82,55,94,82,441,88,85,83,93,92,398,77,76,78,91,76,408,91,63,89,74,91,408,76,66,88,94,84,91,186,68,65,53,56,15,13,5,10,13,2304,485,5 ★,4★,High,High,4 ★,76,86,93,88,64,78,207,https://cdn.sofifa.com/players/192/985/21_60.png,http://sofifa.com/player/192985/kevin-de-bruyn...,Belgium,2023
4,190871,Neymar Jr,Neymar da Silva Santos Jr.,28,91,91,Paris Saint-Germain,Full,"LW, CAM",175,68,Right,91,LW,"Aug 3, 2017",132000000.0,270000.0,166500000.0,408,85,87,62,87,87,448,95,88,89,81,95,453,94,89,96,91,83,357,80,62,81,50,84,356,51,36,87,90,92,93,94,35,30,29,59,9,9,15,15,11,2175,451,5 ★,5★,High,Medium,5 ★,91,85,86,94,36,59,595,https://cdn.sofifa.com/players/190/871/21_60.png,http://sofifa.com/player/190871/neymar-da-silv...,Brazil,2022


In [44]:
df2[['W/F','SM','IR']].head()

Unnamed: 0,W/F,SM,IR
0,4 ★,4★,5 ★
1,4 ★,5★,5 ★
2,3 ★,1★,3 ★
3,5 ★,4★,4 ★
4,5 ★,5★,5 ★


# Removing the Special Characters From this Columns

In [45]:
df2['W/F'] = df2['W/F'].str.replace('★', '')
df2['SM'] = df2['SM'].str.replace('★', '')
df2['IR'] = df2['IR'].str.replace('★', '')

# Viewing the Columns

In [46]:
df2[['W/F','SM','IR']].head()

Unnamed: 0,W/F,SM,IR
0,4,4,5
1,4,5,5
2,3,1,3
3,5,4,4
4,5,5,5


In [47]:
df2.head()

Unnamed: 0,ID,Name,FullName,Age,OVA,POT,Club,Contract(type),Positions,Height(cm),Weight(kg),Preferred Foot,BOV,Best Position,Joined,Value(Euro),Wage(Euro),Release Clause(Euro),Attacking,Crossing,Finishing,Heading Accuracy,Short Passing,Volleys,Skill,Dribbling,Curve,FK Accuracy,Long Passing,Ball Control,Movement,Acceleration,Sprint Speed,Agility,Reactions,Balance,Power,Shot Power,Jumping,Stamina,Strength,Long Shots,Mentality,Aggression,Interceptions,Positioning,Vision,Penalties,Composure,Defending,Marking,Standing Tackle,Sliding Tackle,Goalkeeping,GK Diving,GK Handling,GK Kicking,GK Positioning,GK Reflexes,Total Stats,Base Stats,W/F,SM,A/W,D/W,IR,PAC,SHO,PAS,DRI,DEF,PHY,Hits,photoUrl,playerUrl,Nationality,Contract_End
0,158023,L. Messi,Lionel Messi,33,93,93,FC Barcelona,Full,"RW, ST, CF",170,72,Left,93,RW,"Jul 1, 2004",103500000.0,560000.0,138400000.0,429,85,95,70,91,88,470,96,93,94,91,96,451,91,80,91,94,95,389,86,68,72,69,94,347,44,40,93,95,75,96,91,32,35,24,54,6,11,15,14,8,2231,466,4,4,Medium,Low,5,85,92,91,95,38,65,771,https://cdn.sofifa.com/players/158/023/21_60.png,http://sofifa.com/player/158023/lionel-messi/2...,Argentina,2021
1,20801,Cristiano Ronaldo,Ronaldo dos Santos Aveiro,35,92,92,Juventus,Full,"ST, LW",187,83,Right,92,ST,"Jul 10, 2018",63000000.0,220000.0,75900000.0,437,84,95,90,82,86,414,88,81,76,77,92,431,87,91,87,95,71,444,94,95,84,78,93,353,63,29,95,82,84,95,84,28,32,24,58,7,11,15,14,11,2221,464,4,5,High,Low,5,89,93,81,89,35,77,562,https://cdn.sofifa.com/players/020/801/21_60.png,http://sofifa.com/player/20801/c-ronaldo-dos-s...,Portugal,2022
2,200389,J. Oblak,Jan Oblak,27,91,93,Atlético Madrid,Full,GK,188,87,Right,91,GK,"Jul 16, 2014",120000000.0,125000.0,159400000.0,95,13,11,15,43,13,109,12,13,14,40,30,307,43,60,67,88,49,268,59,78,41,78,12,140,34,19,11,65,11,68,57,27,12,18,437,87,92,78,90,90,1413,489,3,1,Medium,Medium,3,87,92,78,90,52,90,150,https://cdn.sofifa.com/players/200/389/21_60.png,http://sofifa.com/player/200389/jan-oblak/210006/,Slovenia,2023
3,192985,K. De Bruyne,Kevin De Bruyne,29,91,91,Manchester City,Full,"CAM, CM",181,70,Right,91,CAM,"Aug 30, 2015",129000000.0,370000.0,161000000.0,407,94,82,55,94,82,441,88,85,83,93,92,398,77,76,78,91,76,408,91,63,89,74,91,408,76,66,88,94,84,91,186,68,65,53,56,15,13,5,10,13,2304,485,5,4,High,High,4,76,86,93,88,64,78,207,https://cdn.sofifa.com/players/192/985/21_60.png,http://sofifa.com/player/192985/kevin-de-bruyn...,Belgium,2023
4,190871,Neymar Jr,Neymar da Silva Santos Jr.,28,91,91,Paris Saint-Germain,Full,"LW, CAM",175,68,Right,91,LW,"Aug 3, 2017",132000000.0,270000.0,166500000.0,408,85,87,62,87,87,448,95,88,89,81,95,453,94,89,96,91,83,357,80,62,81,50,84,356,51,36,87,90,92,93,94,35,30,29,59,9,9,15,15,11,2175,451,5,5,High,Medium,5,91,85,86,94,36,59,595,https://cdn.sofifa.com/players/190/871/21_60.png,http://sofifa.com/player/190871/neymar-da-silv...,Brazil,2022


# drop the name column to keep only the Fullname

In [48]:
df2.drop('Name', axis=1, inplace=True)


In [50]:
df2.head()

Unnamed: 0,ID,FullName,Age,OVA,POT,Club,Contract(type),Positions,Height(cm),Weight(kg),Preferred Foot,BOV,Best Position,Joined,Value(Euro),Wage(Euro),Release Clause(Euro),Attacking,Crossing,Finishing,Heading Accuracy,Short Passing,Volleys,Skill,Dribbling,Curve,FK Accuracy,Long Passing,Ball Control,Movement,Acceleration,Sprint Speed,Agility,Reactions,Balance,Power,Shot Power,Jumping,Stamina,Strength,Long Shots,Mentality,Aggression,Interceptions,Positioning,Vision,Penalties,Composure,Defending,Marking,Standing Tackle,Sliding Tackle,Goalkeeping,GK Diving,GK Handling,GK Kicking,GK Positioning,GK Reflexes,Total Stats,Base Stats,W/F,SM,A/W,D/W,IR,PAC,SHO,PAS,DRI,DEF,PHY,Hits,photoUrl,playerUrl,Nationality,Contract_End
0,158023,Lionel Messi,33,93,93,FC Barcelona,Full,"RW, ST, CF",170,72,Left,93,RW,"Jul 1, 2004",103500000.0,560000.0,138400000.0,429,85,95,70,91,88,470,96,93,94,91,96,451,91,80,91,94,95,389,86,68,72,69,94,347,44,40,93,95,75,96,91,32,35,24,54,6,11,15,14,8,2231,466,4,4,Medium,Low,5,85,92,91,95,38,65,771,https://cdn.sofifa.com/players/158/023/21_60.png,http://sofifa.com/player/158023/lionel-messi/2...,Argentina,2021
1,20801,Ronaldo dos Santos Aveiro,35,92,92,Juventus,Full,"ST, LW",187,83,Right,92,ST,"Jul 10, 2018",63000000.0,220000.0,75900000.0,437,84,95,90,82,86,414,88,81,76,77,92,431,87,91,87,95,71,444,94,95,84,78,93,353,63,29,95,82,84,95,84,28,32,24,58,7,11,15,14,11,2221,464,4,5,High,Low,5,89,93,81,89,35,77,562,https://cdn.sofifa.com/players/020/801/21_60.png,http://sofifa.com/player/20801/c-ronaldo-dos-s...,Portugal,2022
2,200389,Jan Oblak,27,91,93,Atlético Madrid,Full,GK,188,87,Right,91,GK,"Jul 16, 2014",120000000.0,125000.0,159400000.0,95,13,11,15,43,13,109,12,13,14,40,30,307,43,60,67,88,49,268,59,78,41,78,12,140,34,19,11,65,11,68,57,27,12,18,437,87,92,78,90,90,1413,489,3,1,Medium,Medium,3,87,92,78,90,52,90,150,https://cdn.sofifa.com/players/200/389/21_60.png,http://sofifa.com/player/200389/jan-oblak/210006/,Slovenia,2023
3,192985,Kevin De Bruyne,29,91,91,Manchester City,Full,"CAM, CM",181,70,Right,91,CAM,"Aug 30, 2015",129000000.0,370000.0,161000000.0,407,94,82,55,94,82,441,88,85,83,93,92,398,77,76,78,91,76,408,91,63,89,74,91,408,76,66,88,94,84,91,186,68,65,53,56,15,13,5,10,13,2304,485,5,4,High,High,4,76,86,93,88,64,78,207,https://cdn.sofifa.com/players/192/985/21_60.png,http://sofifa.com/player/192985/kevin-de-bruyn...,Belgium,2023
4,190871,Neymar da Silva Santos Jr.,28,91,91,Paris Saint-Germain,Full,"LW, CAM",175,68,Right,91,LW,"Aug 3, 2017",132000000.0,270000.0,166500000.0,408,85,87,62,87,87,448,95,88,89,81,95,453,94,89,96,91,83,357,80,62,81,50,84,356,51,36,87,90,92,93,94,35,30,29,59,9,9,15,15,11,2175,451,5,5,High,Medium,5,91,85,86,94,36,59,595,https://cdn.sofifa.com/players/190/871/21_60.png,http://sofifa.com/player/190871/neymar-da-silv...,Brazil,2022


In [51]:
df2.to_csv('RealFifadata3Cleaned', index=False)