# Pre Process The Data

## Table of Contents

* [Add Labels](#Add-Labels)
* [Combine All Datasets To 1](#Combine-All-Datasets-To-1)
* [Drop Unused Columns](#Drop-Unused-Columns)
* [Convert Wage to int](#Convert-Wage-to-int)
* [Handling Missing Values](#Handling-Missing-Values)
* [Split to Positions](#Split-to-Positions)
* [FIFA23 - Prep](#FIFA23-Prep)
* [Export](#Export)

## Imports

In [1]:
import pandas as pd
import csv
import numpy as np
import matplotlib.pyplot as plt

In [3]:
path_to_raw = "Dataset/Raw-Data"
output_folder = "Dataset/After-PreProcess"

In [6]:
f_2017 = pd.read_csv(path_to_raw+"/FIFA17_official_data.csv" ,low_memory = False)
f_2018 = pd.read_csv(path_to_raw+"/FIFA18_official_data.csv" ,low_memory = False)
f_2019 = pd.read_csv(path_to_raw+"/FIFA19_official_data.csv" ,low_memory = False)
f_2020 = pd.read_csv(path_to_raw+"/FIFA20_official_data.csv" ,low_memory = False)
f_2021 = pd.read_csv(path_to_raw+"/FIFA21_official_data.csv" ,low_memory = False)
f_2022 = pd.read_csv(path_to_raw+"/FIFA22_official_data.csv" ,low_memory = False)


In [None]:
f_2023 = pd.read_csv(path_to_raw+"/FIFA23_official_data.csv" ,low_memory = False)

In [4]:
f_2017["version"]= 2017
f_2018["version"]= 2018
f_2019["version"]= 2019
f_2020["version"]= 2020
f_2021["version"]= 2021
f_2022["version"]= 2022
f_2023["version"]= 2023

In [5]:
f_2017

Unnamed: 0,ID,Name,Age,Photo,Nationality,Flag,Overall,Potential,Club,Club Logo,...,StandingTackle,SlidingTackle,GKDiving,GKHandling,GKKicking,GKPositioning,GKReflexes,Best Position,Best Overall Rating,version
0,176580,L. Suárez,29,https://cdn.sofifa.com/players/176/580/17_60.png,Uruguay,https://cdn.sofifa.com/flags/uy.png,92,92,FC Barcelona,https://cdn.sofifa.com/teams/241/light_30.png,...,45.0,38.0,27.0,25.0,31.0,33.0,37.0,ST,88.0,2017
1,178518,R. Nainggolan,28,https://cdn.sofifa.com/players/178/518/17_60.png,Belgium,https://cdn.sofifa.com/flags/be.png,86,86,Roma,https://cdn.sofifa.com/teams/52/light_30.png,...,85.0,88.0,11.0,11.0,14.0,8.0,11.0,CDM,84.0,2017
2,181872,A. Vidal,29,https://cdn.sofifa.com/players/181/872/17_60.png,Chile,https://cdn.sofifa.com/flags/cl.png,87,87,FC Bayern München,https://cdn.sofifa.com/teams/21/light_30.png,...,89.0,84.0,4.0,2.0,4.0,2.0,4.0,CDM,85.0,2017
3,197445,D. Alaba,24,https://cdn.sofifa.com/players/197/445/17_60.png,Austria,https://cdn.sofifa.com/flags/at.png,86,89,FC Bayern München,https://cdn.sofifa.com/teams/21/light_30.png,...,83.0,83.0,5.0,7.0,14.0,15.0,9.0,LB,84.0,2017
4,195864,P. Pogba,23,https://cdn.sofifa.com/players/195/864/17_60.png,France,https://cdn.sofifa.com/flags/fr.png,88,94,Manchester United,https://cdn.sofifa.com/teams/11/light_30.png,...,73.0,73.0,5.0,6.0,2.0,4.0,3.0,CAM,85.0,2017
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
17555,233215,C. Addai,18,https://cdn.sofifa.com/players/233/215/17_60.png,Ghana,https://cdn.sofifa.com/flags/gh.png,54,72,Coventry City,https://cdn.sofifa.com/teams/1800/light_30.png,...,13.0,13.0,51.0,51.0,48.0,59.0,60.0,GK,53.0,2017
17556,232911,R. Kamiyama,31,https://cdn.sofifa.com/players/232/911/17_60.png,Japan,https://cdn.sofifa.com/flags/jp.png,57,57,Avispa Fukuoka,https://cdn.sofifa.com/teams/113186/light_30.png,...,10.0,11.0,59.0,46.0,41.0,62.0,59.0,GK,56.0,2017
17557,235387,B. Gaye,18,https://cdn.sofifa.com/players/235/387/17_60.png,Germany,https://cdn.sofifa.com/flags/de.png,53,67,DSC Arminia Bielefeld,https://cdn.sofifa.com/teams/159/light_30.png,...,13.0,12.0,54.0,50.0,49.0,54.0,58.0,GK,52.0,2017
17558,237904,K. Olifirenko,18,https://cdn.sofifa.com/players/237/904/17_60.png,Russia,https://cdn.sofifa.com/flags/ru.png,50,66,FC Tom Tomsk,https://cdn.sofifa.com/teams/110233/light_30.png,...,13.0,13.0,47.0,59.0,52.0,44.0,49.0,GK,49.0,2017


In [7]:
def addNextGradeToDf (prev_year ,  next_year):
    grade_dict = dict(zip(next_year['ID'], next_year['Overall']))
    prev_year['Next_Overall'] = prev_year['ID'].map(grade_dict)
    return prev_year


In [8]:
def addNextClubToDf (prev_year ,  next_year):
    grade_dict = dict(zip(next_year['ID'], next_year['Club']))
    prev_year['Next_Year_Club'] = prev_year['ID'].map(grade_dict)
    return prev_year

## Add Labels 

In [9]:
addNextGradeToDf(f_2017, f_2018)
addNextClubToDf(f_2017, f_2018)

Unnamed: 0,ID,Name,Age,Photo,Nationality,Flag,Overall,Potential,Club,Club Logo,...,GKDiving,GKHandling,GKKicking,GKPositioning,GKReflexes,Best Position,Best Overall Rating,version,Next_Overall,Next_Year_Club
0,176580,L. Suárez,29,https://cdn.sofifa.com/players/176/580/17_60.png,Uruguay,https://cdn.sofifa.com/flags/uy.png,92,92,FC Barcelona,https://cdn.sofifa.com/teams/241/light_30.png,...,27.0,25.0,31.0,33.0,37.0,ST,88.0,2017,92.0,FC Barcelona
1,178518,R. Nainggolan,28,https://cdn.sofifa.com/players/178/518/17_60.png,Belgium,https://cdn.sofifa.com/flags/be.png,86,86,Roma,https://cdn.sofifa.com/teams/52/light_30.png,...,11.0,11.0,14.0,8.0,11.0,CDM,84.0,2017,86.0,Roma
2,181872,A. Vidal,29,https://cdn.sofifa.com/players/181/872/17_60.png,Chile,https://cdn.sofifa.com/flags/cl.png,87,87,FC Bayern München,https://cdn.sofifa.com/teams/21/light_30.png,...,4.0,2.0,4.0,2.0,4.0,CDM,85.0,2017,86.0,FC Bayern München
3,197445,D. Alaba,24,https://cdn.sofifa.com/players/197/445/17_60.png,Austria,https://cdn.sofifa.com/flags/at.png,86,89,FC Bayern München,https://cdn.sofifa.com/teams/21/light_30.png,...,5.0,7.0,14.0,15.0,9.0,LB,84.0,2017,85.0,FC Bayern München
4,195864,P. Pogba,23,https://cdn.sofifa.com/players/195/864/17_60.png,France,https://cdn.sofifa.com/flags/fr.png,88,94,Manchester United,https://cdn.sofifa.com/teams/11/light_30.png,...,5.0,6.0,2.0,4.0,3.0,CAM,85.0,2017,88.0,Manchester United
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
17555,233215,C. Addai,18,https://cdn.sofifa.com/players/233/215/17_60.png,Ghana,https://cdn.sofifa.com/flags/gh.png,54,72,Coventry City,https://cdn.sofifa.com/teams/1800/light_30.png,...,51.0,51.0,48.0,59.0,60.0,GK,53.0,2017,,
17556,232911,R. Kamiyama,31,https://cdn.sofifa.com/players/232/911/17_60.png,Japan,https://cdn.sofifa.com/flags/jp.png,57,57,Avispa Fukuoka,https://cdn.sofifa.com/teams/113186/light_30.png,...,59.0,46.0,41.0,62.0,59.0,GK,56.0,2017,,
17557,235387,B. Gaye,18,https://cdn.sofifa.com/players/235/387/17_60.png,Germany,https://cdn.sofifa.com/flags/de.png,53,67,DSC Arminia Bielefeld,https://cdn.sofifa.com/teams/159/light_30.png,...,54.0,50.0,49.0,54.0,58.0,GK,52.0,2017,60.0,DSC Arminia Bielefeld
17558,237904,K. Olifirenko,18,https://cdn.sofifa.com/players/237/904/17_60.png,Russia,https://cdn.sofifa.com/flags/ru.png,50,66,FC Tom Tomsk,https://cdn.sofifa.com/teams/110233/light_30.png,...,47.0,59.0,52.0,44.0,49.0,GK,49.0,2017,,


In [10]:
addNextGradeToDf(f_2018, f_2019)
addNextClubToDf(f_2018, f_2019)

Unnamed: 0,ID,Name,Age,Photo,Nationality,Flag,Overall,Potential,Club,Club Logo,...,GKHandling,GKKicking,GKPositioning,GKReflexes,Best Position,Best Overall Rating,Release Clause,version,Next_Overall,Next_Year_Club
0,176580,L. Suárez,30,https://cdn.sofifa.com/players/176/580/18_60.png,Uruguay,https://cdn.sofifa.com/flags/uy.png,92,92,FC Barcelona,https://cdn.sofifa.com/teams/241/light_30.png,...,25.0,31.0,33.0,37.0,CF,88.0,€198.9M,2018,91.0,FC Barcelona
1,178518,R. Nainggolan,29,https://cdn.sofifa.com/players/178/518/18_60.png,Belgium,https://cdn.sofifa.com/flags/be.png,86,86,Roma,https://cdn.sofifa.com/teams/52/light_30.png,...,11.0,14.0,8.0,11.0,CDM,84.0,€72.3M,2018,85.0,Inter
2,203551,A. Florenzi,26,https://cdn.sofifa.com/players/203/551/18_60.png,Italy,https://cdn.sofifa.com/flags/it.png,82,83,Roma,https://cdn.sofifa.com/teams/52/light_30.png,...,8.0,10.0,11.0,10.0,CF,81.0,€37.3M,2018,82.0,Roma
3,197445,D. Alaba,25,https://cdn.sofifa.com/players/197/445/18_60.png,Austria,https://cdn.sofifa.com/flags/at.png,85,87,FC Bayern München,https://cdn.sofifa.com/teams/21/light_30.png,...,7.0,14.0,15.0,9.0,LB,82.0,€66.4M,2018,84.0,FC Bayern München
4,195864,P. Pogba,24,https://cdn.sofifa.com/players/195/864/18_60.png,France,https://cdn.sofifa.com/flags/fr.png,88,92,Manchester United,https://cdn.sofifa.com/teams/11/light_30.png,...,6.0,2.0,4.0,3.0,CM,85.0,€146.3M,2018,88.0,Manchester United
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
17922,240757,A. Viscovo,18,https://cdn.sofifa.com/players/240/757/18_60.png,Italy,https://cdn.sofifa.com/flags/it.png,51,70,Crotone,https://cdn.sofifa.com/teams/110734/light_30.png,...,46.0,57.0,52.0,47.0,GK,50.0,€146K,2018,,
17923,237463,A. Kelsey,17,https://cdn.sofifa.com/players/237/463/18_60.png,England,https://cdn.sofifa.com/flags/gb-eng.png,46,63,Scunthorpe United,https://cdn.sofifa.com/teams/1949/light_30.png,...,47.0,49.0,42.0,48.0,GK,45.0,€119K,2018,,
17924,240790,A. Giacomel,18,https://cdn.sofifa.com/players/240/790/18_60.png,Italy,https://cdn.sofifa.com/flags/it.png,49,64,Empoli,https://cdn.sofifa.com/teams/1746/light_30.png,...,55.0,48.0,45.0,56.0,GK,48.0,€111K,2018,,
17925,241206,D. Maekawa,22,https://cdn.sofifa.com/players/241/206/18_60.png,Japan,https://cdn.sofifa.com/flags/jp.png,54,61,Vissel Kobe,https://cdn.sofifa.com/teams/101146/light_30.png,...,51.0,44.0,55.0,59.0,GK,53.0,€102K,2018,60.0,Vissel Kobe


In [11]:
addNextGradeToDf(f_2019, f_2020)
addNextClubToDf(f_2019, f_2020)

Unnamed: 0,ID,Name,Age,Photo,Nationality,Flag,Overall,Potential,Club,Club Logo,...,GKHandling,GKKicking,GKPositioning,GKReflexes,Best Position,Best Overall Rating,Release Clause,version,Next_Overall,Next_Year_Club
0,176580,L. Suárez,31,https://cdn.sofifa.com/players/176/580/19_60.png,Uruguay,https://cdn.sofifa.com/flags/uy.png,91,91,FC Barcelona,https://cdn.sofifa.com/teams/241/light_30.png,...,25.0,31.0,33.0,37.0,ST,87.0,€164M,2019,89.0,FC Barcelona
1,194765,A. Griezmann,27,https://cdn.sofifa.com/players/194/765/19_60.png,France,https://cdn.sofifa.com/flags/fr.png,89,89,Atlético Madrid,https://cdn.sofifa.com/teams/240/light_30.png,...,8.0,14.0,13.0,14.0,LW,87.0,€145.6M,2019,89.0,FC Barcelona
2,177003,L. Modrić,32,https://cdn.sofifa.com/players/177/003/19_60.png,Croatia,https://cdn.sofifa.com/flags/hr.png,91,91,Real Madrid,https://cdn.sofifa.com/teams/243/light_30.png,...,9.0,7.0,14.0,9.0,CM,88.0,€137.4M,2019,89.0,Real Madrid
3,224334,M. Acuña,26,https://cdn.sofifa.com/players/224/334/19_60.png,Argentina,https://cdn.sofifa.com/flags/ar.png,81,81,Sporting CP,https://cdn.sofifa.com/teams/237/light_30.png,...,14.0,13.0,13.0,14.0,LM,81.0,€38M,2019,82.0,Sporting CP
4,192985,K. De Bruyne,27,https://cdn.sofifa.com/players/192/985/19_60.png,Belgium,https://cdn.sofifa.com/flags/be.png,91,91,Manchester City,https://cdn.sofifa.com/teams/10/light_30.png,...,13.0,5.0,10.0,13.0,CAM,89.0,€172.1M,2019,91.0,Manchester City
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
17938,233940,M. Brzozowski,19,https://cdn.sofifa.com/players/233/940/19_60.png,Poland,https://cdn.sofifa.com/flags/pl.png,49,60,Queens Park Rangers,https://cdn.sofifa.com/teams/15/light_30.png,...,52.0,52.0,44.0,51.0,GK,48.0,€84K,2019,,
17939,245370,E. Destanoglu,17,https://cdn.sofifa.com/players/245/370/19_60.png,Turkey,https://cdn.sofifa.com/flags/tr.png,50,68,Beşiktaş JK,https://cdn.sofifa.com/teams/327/light_30.png,...,52.0,50.0,47.0,52.0,GK,49.0,€126K,2019,62.0,Beşiktaş JK
17940,233215,C. Addai,20,https://cdn.sofifa.com/players/233/215/19_60.png,Ghana,https://cdn.sofifa.com/flags/gh.png,51,61,Coventry City,https://cdn.sofifa.com/teams/1800/light_30.png,...,48.0,40.0,54.0,56.0,GK,50.0,€98K,2019,,
17941,246648,O. Battersby,16,https://cdn.sofifa.com/players/246/648/19_60.png,England,https://cdn.sofifa.com/flags/gb-eng.png,50,70,Grimsby Town,https://cdn.sofifa.com/teams/92/light_30.png,...,51.0,50.0,52.0,53.0,GK,49.0,€138K,2019,,


In [12]:
addNextGradeToDf(f_2020, f_2021)
addNextClubToDf(f_2020, f_2021)

Unnamed: 0,ID,Name,Age,Photo,Nationality,Flag,Overall,Potential,Club,Club Logo,...,GKKicking,GKPositioning,GKReflexes,Best Position,Best Overall Rating,Release Clause,DefensiveAwareness,version,Next_Overall,Next_Year_Club
0,176580,L. Suárez,32,https://cdn.sofifa.com/players/176/580/20_60.png,Uruguay,https://cdn.sofifa.com/flags/uy.png,89,89,FC Barcelona,https://cdn.sofifa.com/teams/241/light_30.png,...,31.0,33.0,37.0,ST,89.0,€108.7M,57.0,2020,87.0,Atlético Madrid
1,194765,A. Griezmann,28,https://cdn.sofifa.com/players/194/765/20_60.png,France,https://cdn.sofifa.com/flags/fr.png,89,89,FC Barcelona,https://cdn.sofifa.com/teams/241/light_30.png,...,14.0,13.0,14.0,CF,89.0,€141.5M,59.0,2020,87.0,FC Barcelona
2,212198,Bruno Fernandes,24,https://cdn.sofifa.com/players/212/198/20_60.png,Portugal,https://cdn.sofifa.com/flags/pt.png,86,89,Manchester United,https://cdn.sofifa.com/teams/11/light_30.png,...,15.0,8.0,14.0,CAM,87.0,€111.6M,72.0,2020,87.0,Manchester United
3,192985,K. De Bruyne,28,https://cdn.sofifa.com/players/192/985/20_60.png,Belgium,https://cdn.sofifa.com/flags/be.png,91,91,Manchester City,https://cdn.sofifa.com/teams/10/light_30.png,...,5.0,10.0,13.0,CAM,91.0,€166.5M,68.0,2020,91.0,Manchester City
4,224334,M. Acuña,27,https://cdn.sofifa.com/players/224/334/20_60.png,Argentina,https://cdn.sofifa.com/flags/ar.png,82,82,Sporting CP,https://cdn.sofifa.com/teams/237/light_30.png,...,13.0,13.0,14.0,LB,82.0,€40M,79.0,2020,83.0,Sevilla FC
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
17099,251433,B. Voll,18,https://cdn.sofifa.com/players/251/433/20_60.png,Germany,https://cdn.sofifa.com/flags/de.png,51,63,FC Hansa Rostock,https://cdn.sofifa.com/teams/27/light_30.png,...,52.0,42.0,57.0,GK,51.0,€83K,5.0,2020,51.0,FC Hansa Rostock
17100,239426,18 N. Krucker,18,https://cdn.sofifa.com/players/239/426/18_60.png,Switzerland,https://cdn.sofifa.com/flags/ch.png,50,61,FC St. Gallen,https://cdn.sofifa.com/teams/898/light_30.png,...,55.0,49.0,51.0,GK,49.0,€66K,,2020,,
17101,240757,18 A. Viscovo,18,https://cdn.sofifa.com/players/240/757/18_60.png,Italy,https://cdn.sofifa.com/flags/it.png,51,70,Crotone,https://cdn.sofifa.com/teams/110734/light_30.png,...,57.0,52.0,47.0,GK,50.0,€146K,,2020,,
17102,248182,H. Sveijer,17,https://cdn.sofifa.com/players/248/182/20_60.png,Sweden,https://cdn.sofifa.com/flags/se.png,48,63,IK Sirius,https://cdn.sofifa.com/teams/113458/light_30.png,...,48.0,47.0,51.0,GK,48.0,€94K,8.0,2020,49.0,IK Sirius


In [13]:
addNextGradeToDf(f_2021, f_2022)
addNextClubToDf(f_2021, f_2022)

Unnamed: 0,ID,Name,Age,Photo,Nationality,Flag,Overall,Potential,Club,Club Logo,...,GKKicking,GKPositioning,GKReflexes,Best Position,Best Overall Rating,Release Clause,DefensiveAwareness,version,Next_Overall,Next_Year_Club
0,176580,L. Suárez,33,https://cdn.sofifa.com/players/176/580/20_60.png,Uruguay,https://cdn.sofifa.com/flags/uy.png,87,87,Atlético Madrid,https://cdn.sofifa.com/teams/240/light_30.png,...,31.0,33.0,37.0,ST,87.0,€64.6M,57.0,2021,88.0,Atlético de Madrid
1,192985,K. De Bruyne,29,https://cdn.sofifa.com/players/192/985/20_60.png,Belgium,https://cdn.sofifa.com/flags/be.png,91,91,Manchester City,https://cdn.sofifa.com/teams/10/light_30.png,...,5.0,10.0,13.0,CAM,91.0,€161M,68.0,2021,91.0,Manchester City
2,212198,Bruno Fernandes,25,https://cdn.sofifa.com/players/212/198/20_60.png,Portugal,https://cdn.sofifa.com/flags/pt.png,87,90,Manchester United,https://cdn.sofifa.com/teams/11/light_30.png,...,15.0,8.0,14.0,CAM,88.0,€124.4M,72.0,2021,88.0,Manchester United
3,194765,A. Griezmann,29,https://cdn.sofifa.com/players/194/765/20_60.png,France,https://cdn.sofifa.com/flags/fr.png,87,87,FC Barcelona,https://cdn.sofifa.com/teams/241/light_30.png,...,14.0,13.0,14.0,ST,87.0,€103.5M,59.0,2021,85.0,Atlético de Madrid
4,224334,M. Acuña,28,https://cdn.sofifa.com/players/224/334/20_60.png,Argentina,https://cdn.sofifa.com/flags/ar.png,83,83,Sevilla FC,https://cdn.sofifa.com/teams/481/light_30.png,...,13.0,13.0,14.0,LB,83.0,€46.2M,79.0,2021,84.0,Sevilla FC
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
17103,247866,19 C. Miszta,16,https://cdn.sofifa.com/players/247/866/19_60.png,Poland,https://cdn.sofifa.com/flags/pl.png,50,70,Legia Warszawa,https://cdn.sofifa.com/teams/1871/light_30.png,...,56.0,40.0,56.0,GK,49.0,€120K,,2021,59.0,Legia Warszawa
17104,251433,B. Voll,19,https://cdn.sofifa.com/players/251/433/20_60.png,Germany,https://cdn.sofifa.com/flags/de.png,51,63,FC Hansa Rostock,https://cdn.sofifa.com/teams/27/light_30.png,...,52.0,42.0,57.0,GK,51.0,€83K,5.0,2021,58.0,F.C. Hansa Rostock
17105,252420,T. Parker,18,https://cdn.sofifa.com/players/252/420/20_60.png,Northern Ireland,https://cdn.sofifa.com/flags/gb-nir.png,51,70,Luton Town,https://cdn.sofifa.com/teams/1923/light_30.png,...,50.0,53.0,55.0,GK,51.0,€149K,8.0,2021,,
17106,248182,H. Sveijer,18,https://cdn.sofifa.com/players/248/182/20_60.png,Sweden,https://cdn.sofifa.com/flags/se.png,49,63,IK Sirius,https://cdn.sofifa.com/teams/113458/light_30.png,...,49.0,50.0,51.0,GK,49.0,€94K,8.0,2021,,


In [14]:
addNextGradeToDf(f_2022, f_2023)
addNextClubToDf(f_2022, f_2023)

Unnamed: 0,ID,Name,Age,Photo,Nationality,Flag,Overall,Potential,Club,Club Logo,...,GKKicking,GKPositioning,GKReflexes,Best Position,Best Overall Rating,Release Clause,DefensiveAwareness,version,Next_Overall,Next_Year_Club
0,212198,Bruno Fernandes,26,https://cdn.sofifa.com/players/212/198/22_60.png,Portugal,https://cdn.sofifa.com/flags/pt.png,88,89,Manchester United,https://cdn.sofifa.com/teams/11/30.png,...,15.0,8.0,14.0,CAM,88.0,€206.9M,72.0,2022,86.0,Manchester United
1,209658,L. Goretzka,26,https://cdn.sofifa.com/players/209/658/22_60.png,Germany,https://cdn.sofifa.com/flags/de.png,87,88,FC Bayern München,https://cdn.sofifa.com/teams/21/30.png,...,15.0,11.0,9.0,CM,87.0,€160.4M,74.0,2022,87.0,FC Bayern München
2,176580,L. Suárez,34,https://cdn.sofifa.com/players/176/580/22_60.png,Uruguay,https://cdn.sofifa.com/flags/uy.png,88,88,Atlético de Madrid,https://cdn.sofifa.com/teams/240/30.png,...,31.0,33.0,37.0,ST,88.0,€91.2M,42.0,2022,84.0,Club Nacional de Football
3,192985,K. De Bruyne,30,https://cdn.sofifa.com/players/192/985/22_60.png,Belgium,https://cdn.sofifa.com/flags/be.png,91,91,Manchester City,https://cdn.sofifa.com/teams/10/30.png,...,5.0,10.0,13.0,CM,91.0,€232.2M,68.0,2022,91.0,Manchester City
4,224334,M. Acuña,29,https://cdn.sofifa.com/players/224/334/22_60.png,Argentina,https://cdn.sofifa.com/flags/ar.png,84,84,Sevilla FC,https://cdn.sofifa.com/teams/481/30.png,...,13.0,13.0,14.0,LB,84.0,€77.7M,80.0,2022,85.0,Sevilla FC
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
16705,240558,18 L. Clayton,17,https://cdn.sofifa.com/players/240/558/18_60.png,England,https://cdn.sofifa.com/flags/gb-eng.png,53,70,Cheltenham Town,https://cdn.sofifa.com/teams/1936/30.png,...,52.0,50.0,59.0,GK,52.0,€238K,,2022,,
16706,262846,�. Dobre,20,https://cdn.sofifa.com/players/262/846/22_60.png,Romania,https://cdn.sofifa.com/flags/ro.png,53,63,FC Academica Clinceni,https://cdn.sofifa.com/teams/113391/30.png,...,53.0,48.0,58.0,GK,53.0,€279K,5.0,2022,55.0,FC Academica Clinceni
16707,241317,21 Xue Qinghao,19,https://cdn.sofifa.com/players/241/317/21_60.png,China PR,https://cdn.sofifa.com/flags/cn.png,47,60,Shanghai Shenhua FC,https://cdn.sofifa.com/teams/110955/30.png,...,45.0,38.0,52.0,GK,47.0,€223K,21.0,2022,,
16708,259646,A. Shaikh,18,https://cdn.sofifa.com/players/259/646/22_60.png,India,https://cdn.sofifa.com/flags/in.png,47,67,ATK Mohun Bagan FC,https://cdn.sofifa.com/teams/113146/30.png,...,39.0,45.0,49.0,GK,47.0,€259K,7.0,2022,,


## Combine All Datasets To 1

In [15]:
df = pd.concat([f_2017 , f_2018 , f_2019 , f_2020 , f_2021, f_2022])


In [16]:
df

Unnamed: 0,ID,Name,Age,Photo,Nationality,Flag,Overall,Potential,Club,Club Logo,...,GKKicking,GKPositioning,GKReflexes,Best Position,Best Overall Rating,version,Next_Overall,Next_Year_Club,Release Clause,DefensiveAwareness
0,176580,L. Suárez,29,https://cdn.sofifa.com/players/176/580/17_60.png,Uruguay,https://cdn.sofifa.com/flags/uy.png,92,92,FC Barcelona,https://cdn.sofifa.com/teams/241/light_30.png,...,31.0,33.0,37.0,ST,88.0,2017,92.0,FC Barcelona,,
1,178518,R. Nainggolan,28,https://cdn.sofifa.com/players/178/518/17_60.png,Belgium,https://cdn.sofifa.com/flags/be.png,86,86,Roma,https://cdn.sofifa.com/teams/52/light_30.png,...,14.0,8.0,11.0,CDM,84.0,2017,86.0,Roma,,
2,181872,A. Vidal,29,https://cdn.sofifa.com/players/181/872/17_60.png,Chile,https://cdn.sofifa.com/flags/cl.png,87,87,FC Bayern München,https://cdn.sofifa.com/teams/21/light_30.png,...,4.0,2.0,4.0,CDM,85.0,2017,86.0,FC Bayern München,,
3,197445,D. Alaba,24,https://cdn.sofifa.com/players/197/445/17_60.png,Austria,https://cdn.sofifa.com/flags/at.png,86,89,FC Bayern München,https://cdn.sofifa.com/teams/21/light_30.png,...,14.0,15.0,9.0,LB,84.0,2017,85.0,FC Bayern München,,
4,195864,P. Pogba,23,https://cdn.sofifa.com/players/195/864/17_60.png,France,https://cdn.sofifa.com/flags/fr.png,88,94,Manchester United,https://cdn.sofifa.com/teams/11/light_30.png,...,2.0,4.0,3.0,CAM,85.0,2017,88.0,Manchester United,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
16705,240558,18 L. Clayton,17,https://cdn.sofifa.com/players/240/558/18_60.png,England,https://cdn.sofifa.com/flags/gb-eng.png,53,70,Cheltenham Town,https://cdn.sofifa.com/teams/1936/30.png,...,52.0,50.0,59.0,GK,52.0,2022,,,€238K,
16706,262846,�. Dobre,20,https://cdn.sofifa.com/players/262/846/22_60.png,Romania,https://cdn.sofifa.com/flags/ro.png,53,63,FC Academica Clinceni,https://cdn.sofifa.com/teams/113391/30.png,...,53.0,48.0,58.0,GK,53.0,2022,55.0,FC Academica Clinceni,€279K,5.0
16707,241317,21 Xue Qinghao,19,https://cdn.sofifa.com/players/241/317/21_60.png,China PR,https://cdn.sofifa.com/flags/cn.png,47,60,Shanghai Shenhua FC,https://cdn.sofifa.com/teams/110955/30.png,...,45.0,38.0,52.0,GK,47.0,2022,,,€223K,21.0
16708,259646,A. Shaikh,18,https://cdn.sofifa.com/players/259/646/22_60.png,India,https://cdn.sofifa.com/flags/in.png,47,67,ATK Mohun Bagan FC,https://cdn.sofifa.com/teams/113146/30.png,...,39.0,45.0,49.0,GK,47.0,2022,,,€259K,7.0


## Drop Unused Columns 

In [17]:
df.columns

Index(['ID', 'Name', 'Age', 'Photo', 'Nationality', 'Flag', 'Overall',
       'Potential', 'Club', 'Club Logo', 'Value', 'Wage', 'Special',
       'Preferred Foot', 'International Reputation', 'Weak Foot',
       'Skill Moves', 'Work Rate', 'Body Type', 'Real Face', 'Position',
       'Jersey Number', 'Joined', 'Loaned From', 'Contract Valid Until',
       'Height', 'Weight', 'Crossing', 'Finishing', 'HeadingAccuracy',
       'ShortPassing', 'Volleys', 'Dribbling', 'Curve', 'FKAccuracy',
       'LongPassing', 'BallControl', 'Acceleration', 'SprintSpeed', 'Agility',
       'Reactions', 'Balance', 'ShotPower', 'Jumping', 'Stamina', 'Strength',
       'LongShots', 'Aggression', 'Interceptions', 'Positioning', 'Vision',
       'Penalties', 'Composure', 'Marking', 'StandingTackle', 'SlidingTackle',
       'GKDiving', 'GKHandling', 'GKKicking', 'GKPositioning', 'GKReflexes',
       'Best Position', 'Best Overall Rating', 'version', 'Next_Overall',
       'Next_Year_Club', 'Release Clause', '

In [18]:
df =df.drop([ 'Best Overall Rating' ,
             'Release Clause', 'Loaned From', 'Contract Valid Until', 'Jersey Number', 'Joined'
             , 'Nationality', 'Real Face', 'Photo', 'Flag', 'Club Logo', 'Preferred Foot',
            'Real Face', 'Special' , 'Body Type', "Marking" ], axis=1)

In [171]:
df

Unnamed: 0,ID,Name,Age,Overall,Potential,Club,Value,Wage,International Reputation,Weak Foot,...,GKDiving,GKHandling,GKKicking,GKPositioning,GKReflexes,Best Position,version,Next_Overall,Next_Year_Club,DefensiveAwareness
0,176580,L. Suárez,29,92,92,FC Barcelona,€83M,€525K,5.0,4.0,...,27.0,25.0,31.0,33.0,37.0,ST,2017,92.0,FC Barcelona,
1,178518,R. Nainggolan,28,86,86,Roma,€37.5M,€130K,3.0,3.0,...,11.0,11.0,14.0,8.0,11.0,CDM,2017,86.0,Roma,
2,181872,A. Vidal,29,87,87,FC Bayern München,€41.5M,€180K,4.0,4.0,...,4.0,2.0,4.0,2.0,4.0,CDM,2017,86.0,FC Bayern München,
3,197445,D. Alaba,24,86,89,FC Bayern München,€41.5M,€140K,4.0,4.0,...,5.0,7.0,14.0,15.0,9.0,LB,2017,85.0,FC Bayern München,
4,195864,P. Pogba,23,88,94,Manchester United,€71.5M,€225K,4.0,4.0,...,5.0,6.0,2.0,4.0,3.0,CAM,2017,88.0,Manchester United,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
16705,240558,18 L. Clayton,17,53,70,Cheltenham Town,€100K,€1K,1.0,2.0,...,55.0,54.0,52.0,50.0,59.0,GK,2022,,,
16706,262846,�. Dobre,20,53,63,FC Academica Clinceni,€180K,€550,1.0,2.0,...,57.0,52.0,53.0,48.0,58.0,GK,2022,55.0,FC Academica Clinceni,5.0
16707,241317,21 Xue Qinghao,19,47,60,Shanghai Shenhua FC,€100K,€700,1.0,2.0,...,49.0,48.0,45.0,38.0,52.0,GK,2022,,,21.0
16708,259646,A. Shaikh,18,47,67,ATK Mohun Bagan FC,€110K,€500,1.0,3.0,...,49.0,41.0,39.0,45.0,49.0,GK,2022,,,7.0


## Convert Wage to int

In [172]:
def convert_wage_to_int(df, column_name):
    for index, row in df.iterrows():
        wage = row[column_name]
        wage = wage.replace('€', '') # Remove euro symbol
        if wage[-1] == "M":
            df.at[index, column_name] = int(float(wage[0:-1]) * 1000000)
        elif wage[-1] == "K":
            df.at[index, column_name] = int(float(wage[0:-1]) * 1000)
        else:
            df.at[index, column_name] = int(float(wage))
    return df


In [67]:
df = convert_wage_to_int(df , "Wage")
df = convert_wage_to_int(df , "Value")

In [24]:
nan_cols = df.isna().any()

# print list of columns with NaN values
print(nan_cols[nan_cols == True].index.tolist())

['Club', 'Position', 'Volleys', 'Curve', 'Agility', 'Balance', 'Jumping', 'Interceptions', 'Positioning', 'Vision', 'Composure', 'Marking', 'SlidingTackle', 'Next_Overall', 'Next_Year_Club']


#### Handling Missing Values

In [186]:
fifa_rating_categories = {
    'Pace': ['SprintSpeed', 'Acceleration'],
    'Shooting': ['Positioning', 'Finishing', 'ShotPower', 'LongShots', 'Volleys', 'Penalties'],
    'Passing': ['Vision', 'Crossing', 'FKAccuracy', 'ShortPassing', 'LongPassing', 'Curve'],
    'Dribbling': ['Agility', 'Balance', 'Reactions', 'BallControl', 'Dribbling', 'Composure'],
    'Defending': ['Interceptions', 'HeadingAccuracy', 'StandingTackle', 'SlidingTackle' , 'DefensiveAwareness'],
    'Physicality': ['Jumping', 'Stamina', 'Strength', 'Aggression']
}


In [187]:
# Dropping Unknown Players For Next Fifa
df = df.dropna(subset=['Next_Overall'])

In [188]:
def drop_rows_with_missing_ratings(df, fifa_rating_categories):
    """
    Drops rows with 2 or more missing sub-ratings in a given FIFA rating category.

    Args:
        df (pandas.DataFrame): The FIFA dataframe.
        fifa_rating_categories (dict): A dictionary mapping each FIFA rating category
            to its sub-ratings.

    Returns:
        pandas.DataFrame: The cleaned FIFA dataframe.
    """
    for i, row in df.iterrows():
        # Check each rating category
        for category, sub_ratings in fifa_rating_categories.items():
            missing_sub_ratings = 0
            for sub in sub_ratings:
                # Count the number of missing sub-ratings
                missing_sub_ratings = missing_sub_ratings +1
                # If there are 2 or more missing sub-ratings, drop the row
                if missing_sub_ratings >= 2:
                    df.drop(i)
                    break  # Move on to the next row
    return df


In [189]:
def drop_rows_with_nans(df, columns_dict):
    for cols in columns_dict.values():
#         print (cols)
        nan_counts = df[cols].isna().sum(axis=1)
        drop_rows = df[nan_counts >= 2].index
        df.drop(drop_rows, inplace=True)
    return df


In [190]:
def impute_missing_category_average(df, category_dict):
    for category, cols in category_dict.items():
        for col in cols:
            category_avg = df.groupby(category)[col].transform('mean')
            df[col] = df[col].fillna(category_avg)
    return df


In [191]:
df2 = drop_rows_with_nans(df.copy(), fifa_rating_categories)
df2

Unnamed: 0,ID,Name,Age,Overall,Potential,Club,Value,Wage,International Reputation,Weak Foot,...,GKDiving,GKHandling,GKKicking,GKPositioning,GKReflexes,Best Position,version,Next_Overall,Next_Year_Club,DefensiveAwareness
0,176580,L. Suárez,29,92,92,FC Barcelona,€83M,€525K,5.0,4.0,...,27.0,25.0,31.0,33.0,37.0,ST,2017,92.0,FC Barcelona,
1,178518,R. Nainggolan,28,86,86,Roma,€37.5M,€130K,3.0,3.0,...,11.0,11.0,14.0,8.0,11.0,CDM,2017,86.0,Roma,
2,181872,A. Vidal,29,87,87,FC Bayern München,€41.5M,€180K,4.0,4.0,...,4.0,2.0,4.0,2.0,4.0,CDM,2017,86.0,FC Bayern München,
3,197445,D. Alaba,24,86,89,FC Bayern München,€41.5M,€140K,4.0,4.0,...,5.0,7.0,14.0,15.0,9.0,LB,2017,85.0,FC Bayern München,
4,195864,P. Pogba,23,88,94,Manchester United,€71.5M,€225K,4.0,4.0,...,5.0,6.0,2.0,4.0,3.0,CAM,2017,88.0,Manchester United,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
16698,254562,A. Abaz,19,50,70,FC St. Gallen 1879,€120K,€500,1.0,2.0,...,49.0,53.0,50.0,48.0,50.0,GK,2022,50.0,FC Winterthur,7.0
16699,262373,F. Chalupniczak,20,51,63,Sutton United,€120K,€800,1.0,2.0,...,50.0,52.0,51.0,51.0,52.0,GK,2022,51.0,Sutton United,9.0
16701,263373,J. Searle,20,51,64,Swansea City,€120K,€2K,1.0,3.0,...,51.0,53.0,52.0,49.0,50.0,GK,2022,51.0,Barnsley,7.0
16703,259718,F. Gebhardt,19,52,66,FC Basel 1893,€170K,€650,1.0,3.0,...,53.0,45.0,47.0,52.0,57.0,GK,2022,58.0,Hallescher FC,6.0


In [204]:
import math
def impute_with_overall(df, categories_dict):
    for category in categories_dict.values():
        df[category] = df[category].fillna(df['Overall'])
    return df

def impute_nans_with_overall(df, categories_dict):
    """
    Imputes every NaN value in the categories_dict values with that row's "overall" value.

    Args:
    df (pandas.DataFrame): The dataframe to impute the missing values in.
    categories_dict (dict): The dictionary containing lists of column names to impute.

    Returns:
    pandas.DataFrame: The dataframe with the missing values imputed.
    """
    for category, columns in categories_dict.items():
        category_cols = [col for col in df.columns if col in columns]
        for col in category_cols:
#             df[col].fillna(df["Overall"], inplace=True)
            category_cols_without_col = [c for c in category_cols if c != col]
            df[col] = df.apply(lambda x: math.floor((x[col] if not pd.isna(x[col]) else (x[category_cols_without_col].mean()))), axis=1)
#             df[col].fillna(df[[x for x in category_cols if x != col]].mean(), inplace=True)
    return df


In [205]:
df3 = impute_nans_with_overall(df2.copy(), fifa_rating_categories)
df3

Unnamed: 0,ID,Name,Age,Overall,Potential,Club,Value,Wage,International Reputation,Weak Foot,...,GKDiving,GKHandling,GKKicking,GKPositioning,GKReflexes,Best Position,version,Next_Overall,Next_Year_Club,DefensiveAwareness
0,176580,L. Suárez,29,92,92,FC Barcelona,€83M,€525K,5.0,4.0,...,27.0,25.0,31.0,33.0,37.0,ST,2017,92.0,FC Barcelona,50
1,178518,R. Nainggolan,28,86,86,Roma,€37.5M,€130K,3.0,3.0,...,11.0,11.0,14.0,8.0,11.0,CDM,2017,86.0,Roma,79
2,181872,A. Vidal,29,87,87,FC Bayern München,€41.5M,€180K,4.0,4.0,...,4.0,2.0,4.0,2.0,4.0,CDM,2017,86.0,FC Bayern München,85
3,197445,D. Alaba,24,86,89,FC Bayern München,€41.5M,€140K,4.0,4.0,...,5.0,7.0,14.0,15.0,9.0,LB,2017,85.0,FC Bayern München,81
4,195864,P. Pogba,23,88,94,Manchester United,€71.5M,€225K,4.0,4.0,...,5.0,6.0,2.0,4.0,3.0,CAM,2017,88.0,Manchester United,72
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
16698,254562,A. Abaz,19,50,70,FC St. Gallen 1879,€120K,€500,1.0,2.0,...,49.0,53.0,50.0,48.0,50.0,GK,2022,50.0,FC Winterthur,7
16699,262373,F. Chalupniczak,20,51,63,Sutton United,€120K,€800,1.0,2.0,...,50.0,52.0,51.0,51.0,52.0,GK,2022,51.0,Sutton United,9
16701,263373,J. Searle,20,51,64,Swansea City,€120K,€2K,1.0,3.0,...,51.0,53.0,52.0,49.0,50.0,GK,2022,51.0,Barnsley,7
16703,259718,F. Gebhardt,19,52,66,FC Basel 1893,€170K,€650,1.0,3.0,...,53.0,45.0,47.0,52.0,57.0,GK,2022,58.0,Hallescher FC,6


In [206]:
nan_cols = df3.isna().any()

# print list of columns with NaN values
print(nan_cols[nan_cols == True].index.tolist())

['Club', 'Position', 'Next_Year_Club']


In [207]:
df = convert_wage_to_int(df3 , "Wage")
df = convert_wage_to_int(df , "Value")

## Split to Positions

In [10]:
map_position= {
    'GK': 'Goalkeeper',
    'CB': 'Defender',
    'RB': 'Defender',
    'LB': 'Defender',
    'RWB': 'Defender',
    'LWB': 'Defender',
    'CM': 'Midfielder',
    'CDM': 'Midfielder',
    'CAM': 'Midfielder',
    'RM': 'Midfielder',
    'LM': 'Midfielder',
    'ST': 'Forward',
    'CF': 'Forward',
    'RF': 'Forward',
    'LF': 'Forward',
    'RW': 'Forward',
    'LW': 'Forward'
}

In [209]:
df['Position'] = df['Best Position'].map(map_position)


In [210]:
df['Position'] = df['Best Position'].map(map_position)
position_groups = df.groupby('Position')


In [211]:
type(position_groups)

pandas.core.groupby.generic.DataFrameGroupBy

In [212]:
positions = {}
for position, group in position_groups:
    positions[str(position)] = pd.DataFrame(group)

In [213]:
positions.keys()

dict_keys(['Defender', 'Forward', 'Goalkeeper', 'Midfielder'])

In [214]:
defenders_df = positions['Defender']
forward_df = positions['Forward']
gk_df = positions['Goalkeeper']
midfielders_df = positions['Midfielder']

In [321]:
defenders_df

Unnamed: 0,ID,Name,Age,Overall,Potential,Value,Wage,International Reputation,Weak Foot,Skill Moves,...,StandingTackle,SlidingTackle,GKDiving,GKHandling,GKKicking,GKPositioning,GKReflexes,Best Position,version,Next_Overall
3,197445,D. Alaba,24,86,89,107500000,350000,4.0,4.0,3.0,...,83.0,83.0,5.0,7.0,14.0,15.0,9.0,LB,2017,85.0
6,203551,A. Florenzi,25,82,85,55500000,220000,3.0,3.0,3.0,...,79.0,77.0,9.0,8.0,10.0,11.0,10.0,RB,2017,82.0
7,163631,L. Baines,31,83,83,28500000,61000,3.0,3.0,3.0,...,82.0,82.0,8.0,15.0,10.0,13.0,12.0,LWB,2017,80.0
12,189332,Jordi Alba,27,86,86,77500000,90000,3.0,3.0,3.0,...,84.0,83.0,13.0,15.0,13.0,6.0,13.0,LWB,2017,87.0
13,146530,Dani Alves,33,84,84,47000000,49000,4.0,3.0,3.0,...,80.0,84.0,5.0,11.0,9.0,6.0,7.0,RWB,2017,83.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
15794,164945,07 Nuno Mendes,28,71,70,70000,950,2.0,3.0,1.0,...,64.0,,11.0,12.0,26.0,13.0,13.0,CB,2022,71.0
15799,252561,Zhang Yu,18,50,69,40000,1000,1.0,2.0,2.0,...,53.0,50.0,5.0,13.0,12.0,5.0,15.0,CB,2022,
15817,262041,P. Maguire,18,49,67,110000,500,1.0,3.0,2.0,...,53.0,54.0,13.0,5.0,15.0,7.0,6.0,CB,2022,49.0
15869,239427,21 A. Ajeti,23,54,61,1600000,5000,1.0,3.0,2.0,...,57.0,59.0,14.0,13.0,10.0,9.0,10.0,CB,2022,


## FIFA23 Prep

In [4]:
f_2023 = pd.read_csv(path_to_raw+"/Fifa 23 Players Data.csv" ,low_memory = False)

In [5]:
f_2023

Unnamed: 0,Known As,Full Name,Overall,Potential,Value(in Euro),Positions Played,Best Position,Nationality,Image Link,Age,...,LM Rating,CM Rating,RM Rating,LWB Rating,CDM Rating,RWB Rating,LB Rating,CB Rating,RB Rating,GK Rating
0,L. Messi,Lionel Messi,91,91,54000000,RW,CAM,Argentina,https://cdn.sofifa.net/players/158/023/23_60.png,35,...,91,88,91,67,66,67,62,53,62,22
1,K. Benzema,Karim Benzema,91,91,64000000,"CF,ST",CF,France,https://cdn.sofifa.net/players/165/153/23_60.png,34,...,89,84,89,67,67,67,63,58,63,21
2,R. Lewandowski,Robert Lewandowski,91,91,84000000,ST,ST,Poland,https://cdn.sofifa.net/players/188/545/23_60.png,33,...,86,83,86,67,69,67,64,63,64,22
3,K. De Bruyne,Kevin De Bruyne,91,91,107500000,"CM,CAM",CM,Belgium,https://cdn.sofifa.net/players/192/985/23_60.png,31,...,91,91,91,82,82,82,78,72,78,24
4,K. Mbappé,Kylian Mbappé,91,95,190500000,"ST,LW",ST,France,https://cdn.sofifa.net/players/231/747/23_60.png,23,...,92,84,92,70,66,70,66,57,66,21
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
18534,D. Collins,Darren Collins,47,56,110000,"ST,RM",CAM,Republic of Ireland,https://cdn.sofifa.net/players/243/725/23_60.png,21,...,50,44,50,41,38,41,40,36,40,15
18535,Yang Dejiang,Dejiang Yang,47,57,90000,CDM,CDM,China PR,https://cdn.sofifa.net/players/261/933/23_60.png,17,...,45,45,45,47,48,47,49,49,49,15
18536,L. Mullan,Liam Mullan,47,67,130000,CM,RM,Northern Ireland,https://cdn.sofifa.net/players/267/823/23_60.png,18,...,52,49,52,46,44,46,46,42,46,17
18537,D. McCallion,Daithí McCallion,47,61,100000,CB,CB,Republic of Ireland,https://cdn.sofifa.net/players/267/824/23_60.png,17,...,33,33,33,44,42,44,47,49,47,15


In [12]:
f_2023['Position'] = f_2023['Best Position'].map(map_position)


In [13]:
f_2023["Wage"] = f_2023["Wage(in Euro)"]
f_2023["Value"]= f_2023["Value(in Euro)"]

In [14]:
position_groups = f_2023.groupby('Position')


In [18]:
positions = {}
for position, group in position_groups:
    positions[str(position)] = pd.DataFrame(group)

In [19]:
defenders_df = positions['Defender']
forward_df = positions['Forward']
gk_df = positions['Goalkeeper']
midfielders_df = positions['Midfielder']

In [21]:
f_2023.to_csv(output_folder+'/predict-by-rating/f23-full.csv',mode='w',header=True, index=True)


In [20]:
defenders_df.to_csv(output_folder+'/predict-by-rating/f23-defenders_df.csv',mode='w',header=True, index=True)
gk_df.to_csv(output_folder+'/predict-by-rating/f23-gk_df.csv',mode='w',header=True, index=True)
midfielders_df.to_csv(output_folder+'/predict-by-rating/f23-midfielders_df.csv',mode='w',header=True, index=True)
forward_df.to_csv(output_folder+'/predict-by-rating/f23-forward_df.csv',mode='w',header=True, index=True)


In [9]:
f_2023.columns

Index(['Known As', 'Full Name', 'Overall', 'Potential', 'Value(in Euro)',
       'Positions Played', 'Best Position', 'Nationality', 'Image Link', 'Age',
       'Height(in cm)', 'Weight(in kg)', 'TotalStats', 'BaseStats',
       'Club Name', 'Wage(in Euro)', 'Release Clause', 'Club Position',
       'Contract Until', 'Club Jersey Number', 'Joined On', 'On Loan',
       'Preferred Foot', 'Weak Foot Rating', 'Skill Moves',
       'International Reputation', 'National Team Name',
       'National Team Image Link', 'National Team Position',
       'National Team Jersey Number', 'Attacking Work Rate',
       'Defensive Work Rate', 'Pace Total', 'Shooting Total', 'Passing Total',
       'Dribbling Total', 'Defending Total', 'Physicality Total', 'Crossing',
       'Finishing', 'Heading Accuracy', 'Short Passing', 'Volleys',
       'Dribbling', 'Curve', 'Freekick Accuracy', 'LongPassing', 'BallControl',
       'Acceleration', 'Sprint Speed', 'Agility', 'Reactions', 'Balance',
       'Shot Powe

In [8]:
f_2022.columns

Index(['ID', 'Name', 'Age', 'Photo', 'Nationality', 'Flag', 'Overall',
       'Potential', 'Club', 'Club Logo', 'Value', 'Wage', 'Special',
       'Preferred Foot', 'International Reputation', 'Weak Foot',
       'Skill Moves', 'Work Rate', 'Body Type', 'Real Face', 'Position',
       'Jersey Number', 'Joined', 'Loaned From', 'Contract Valid Until',
       'Height', 'Weight', 'Crossing', 'Finishing', 'HeadingAccuracy',
       'ShortPassing', 'Volleys', 'Dribbling', 'Curve', 'FKAccuracy',
       'LongPassing', 'BallControl', 'Acceleration', 'SprintSpeed', 'Agility',
       'Reactions', 'Balance', 'ShotPower', 'Jumping', 'Stamina', 'Strength',
       'LongShots', 'Aggression', 'Interceptions', 'Positioning', 'Vision',
       'Penalties', 'Composure', 'Marking', 'StandingTackle', 'SlidingTackle',
       'GKDiving', 'GKHandling', 'GKKicking', 'GKPositioning', 'GKReflexes',
       'Best Position', 'Best Overall Rating', 'Release Clause',
       'DefensiveAwareness'],
      dtype='object')

# Export

In [249]:
df.to_csv(output_folder+'/predict-by-rating/Combined_Data.csv',mode='w',header=True, index=True)


In [246]:
defenders_df.to_csv(output_folder+'/predict-by-rating/defenders_df.csv',mode='w',header=True, index=True)
gk_df.to_csv(output_folder+'/predict-by-rating/gk_df.csv',mode='w',header=True, index=True)
midfielders_df.to_csv(output_folder+'/predict-by-rating/midfielders_df.csv',mode='w',header=True, index=True)
forward_df.to_csv(output_folder+'/predict-by-rating/forward_df.csv',mode='w',header=True, index=True)
