In [42]:
import kagglehub
from pathlib import Path
import shutil

In [43]:
paths = [kagglehub.dataset_download("stefanoleone992/ea-sports-fc-24-complete-player-dataset"),
         kagglehub.dataset_download("nyagami/ea-sports-fc-25-database-ratings-and-stats"),
         kagglehub.dataset_download("hubertsidorowicz/football-players-stats-2024-2025")]

In [44]:
raw_dir = Path("../data/raw")
for path in paths:
    download_dir = Path(path)
    if not download_dir.exists():
        print(f"Download directory {download_dir} does not exist, skipping.")
        continue
    dataset_name = download_dir.name
    dataset_raw_dir = raw_dir / dataset_name
    dataset_raw_dir.mkdir(parents=True, exist_ok=True)
    for item in download_dir.iterdir():
        if item.is_file():
            dest = dataset_raw_dir / item.name
            shutil.copy2(item, dest)

In [45]:
import pandas as pd
from pathlib import Path

In [46]:
ratings_24_path = Path("../data/raw/4/male_players.csv")
ratings_25_path = Path("../data/raw/5/male_players.csv")

ratings_24_df = pd.read_csv(ratings_24_path)
ratings_25_df = pd.read_csv(ratings_25_path)

  ratings_24_df = pd.read_csv(ratings_24_path)


In [47]:
league_names_24 = ["La Liga", "Premier League", "Serie A", "Ligue 1", "Bundesliga"]
league_names_25 = ["LALIGA EA SPORTS", "Premier League", "Serie A Enilive", "Ligue 1 McDonald's", "Bundesliga"]

ratings_25_df = ratings_25_df[ratings_25_df['League'].isin(league_names_25)]

def transform_25_league_names(league_name):
    mapping = {
        "LALIGA EA SPORTS": "La Liga",
        "Serie A Enilive": "Serie A",
        "Ligue 1 McDonald's": "Ligue 1"
    }
    return mapping.get(league_name, league_name)

ratings_25_df['League'] = ratings_25_df['League'].apply(transform_25_league_names)

ratings_24_df = ratings_24_df[ratings_24_df['league_name'].isin(league_names_24)]
ratings_24_df = ratings_24_df[ratings_24_df["fifa_version"]==24.0]

In [48]:
ratings_24_df.head()

Unnamed: 0,player_id,player_url,fifa_version,fifa_update,update_as_of,short_name,long_name,player_positions,overall,potential,...,ldm,cdm,rdm,rwb,lb,lcb,cb,rcb,rb,gk
0,231747,/player/231747/kylian-mbappe/240002,24.0,2.0,2023-09-22,K. Mbappé,Kylian Mbappé Lottin,"ST, LW",91,94,...,63+3,63+3,63+3,68+3,63+3,54+3,54+3,54+3,63+3,18+3
1,239085,/player/239085/erling-haaland/240002,24.0,2.0,2023-09-22,E. Haaland,Erling Braut Haaland,ST,91,94,...,63+3,63+3,63+3,62+3,60+3,62+3,62+3,62+3,60+3,19+3
2,192985,/player/192985/kevin-de-bruyne/240002,24.0,2.0,2023-09-22,K. De Bruyne,Kevin De Bruyne,"CM, CAM",91,91,...,80+3,80+3,80+3,79+3,75+3,70+3,70+3,70+3,75+3,21+3
5,188545,/player/188545/robert-lewandowski/240002,24.0,2.0,2023-09-22,R. Lewandowski,Robert Lewandowski,ST,90,90,...,66+3,66+3,66+3,65+3,61+3,61+3,61+3,61+3,61+3,19+3
6,192119,/player/192119/thibaut-courtois/240002,24.0,2.0,2023-09-22,T. Courtois,Thibaut Nicolas Marc Courtois,GK,90,90,...,31+3,31+3,31+3,29+3,29+3,29+3,29+3,29+3,29+3,88+2


In [49]:
ratings_25_df.head()

Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,Rank,Name,OVR,PAC,SHO,PAS,DRI,DEF,...,Nation,League,Team,play style,url,GK Diving,GK Handling,GK Kicking,GK Positioning,GK Reflexes
0,0,0,1,Kylian Mbappé,91,97,90,80,92,36,...,France,La Liga,Real Madrid,"Quick Step+, Acrobatic, Finesse Shot, Flair, R...",https://www.ea.com/games/ea-sports-fc/ratings/...,,,,,
1,1,1,2,Rodri,91,66,80,86,84,87,...,Spain,Premier League,Manchester City,"Tiki Taka+, Aerial, Bruiser, Long Ball Pass, P...",https://www.ea.com/games/ea-sports-fc/ratings/...,,,,,
2,2,2,4,Erling Haaland,91,88,92,70,81,45,...,Norway,Premier League,Manchester City,"Acrobatic+, Bruiser, Power Header, Power Shot,...",https://www.ea.com/games/ea-sports-fc/ratings/...,,,,,
3,3,3,5,Jude Bellingham,90,80,87,83,88,78,...,England,La Liga,Real Madrid,"Relentless+, Flair, Intercept, Slide Tackle, T...",https://www.ea.com/games/ea-sports-fc/ratings/...,,,,,
4,4,4,7,Vini Jr.,90,95,84,81,91,29,...,Brazil,La Liga,Real Madrid,"Quick Step+, Chip Shot, Finesse Shot, First To...",https://www.ea.com/games/ea-sports-fc/ratings/...,,,,,


In [50]:
ratings_25_df.shape

(2612, 58)

In [51]:
column_mapping = {
    # Player Identity
    'short_name': 'Name',
    'age': 'Age', 
    'nationality_name': 'Nation',
    'club_name': 'Team',
    'league_name': 'League',
    'player_positions': 'Position',
    'preferred_foot': 'Preferred foot',
    'weak_foot': 'Weak foot',
    'skill_moves': 'Skill moves',
    'height_cm': 'Height',
    'weight_kg': 'Weight',
    
    # Main Stats
    'overall': 'OVR',
    'pace': 'PAC',
    'shooting': 'SHO',
    'passing': 'PAS',
    'dribbling': 'DRI',
    'defending': 'DEF',
    'physic': 'PHY',
    
    # Detailed Stats
    'movement_acceleration': 'Acceleration',
    'movement_sprint_speed': 'Sprint Speed',
    'mentality_positioning': 'Positioning',
    'attacking_finishing': 'Finishing',
    'power_shot_power': 'Shot Power',
    'power_long_shots': 'Long Shots',
    'attacking_volleys': 'Volleys',
    'mentality_penalties': 'Penalties',
    'mentality_vision': 'Vision',
    'attacking_crossing': 'Crossing',
    'skill_fk_accuracy': 'Free Kick Accuracy',
    'attacking_short_passing': 'Short Passing',
    'skill_long_passing': 'Long Passing',
    'skill_curve': 'Curve',
    'skill_dribbling': 'Dribbling',
    'movement_agility': 'Agility',
    'movement_balance': 'Balance',
    'movement_reactions': 'Reactions',
    'skill_ball_control': 'Ball Control',
    'mentality_composure': 'Composure',
    'mentality_interceptions': 'Interceptions',
    'attacking_heading_accuracy': 'Heading Accuracy',
    'defending_marking_awareness': 'Def Awareness',
    'defending_standing_tackle': 'Standing Tackle',
    'defending_sliding_tackle': 'Sliding Tackle',
    'power_jumping': 'Jumping',
    'power_stamina': 'Stamina',
    'power_strength': 'Strength',
    'mentality_aggression': 'Aggression',
    
    # Goalkeeper Stats
    'goalkeeping_diving': 'GK Diving',
    'goalkeeping_handling': 'GK Handling',
    'goalkeeping_kicking': 'GK Kicking',
    'goalkeeping_positioning': 'GK Positioning',
    'goalkeeping_reflexes': 'GK Reflexes'
}

## Merge 2024 and 2025 Datasets

Now we'll merge the datasets from both seasons using player_id as the join key.


In [52]:
df24 = ratings_24_df.copy()
df25 = ratings_25_df.copy()

In [53]:
len(df24)

3467

In [54]:
# Extract player_id from 2025 URL
df25["player_id"] = df25["url"].astype(str).str.extract(r"/(\d+)$", expand=False)
df25["player_id"] = pd.to_numeric(df25["player_id"], errors="coerce")

# Keep only numeric player_id from 2024
df24["player_id"] = pd.to_numeric(df24["player_id"], errors="coerce")

In [55]:
# Apply column mapping to 2024 dataset
cols_to_map = {k: v for k, v in column_mapping.items() if k in df24.columns}
df24_mapped = df24.rename(columns=cols_to_map)

# Get stat columns (exclude Name and player_id)
stat_columns_24 = [col for col in df24_mapped.columns if col in column_mapping.values() and col != "Name"]
stat_columns_25 = [col for col in df25.columns if col in column_mapping.values() and col != "Name"]

print(f"Found {len(stat_columns_24)} stat columns in 2024, {len(stat_columns_25)} in 2025")


Found 51 stat columns in 2024, 51 in 2025


In [56]:
# Add season suffixes to all stat columns
rename_dict_24 = {col: f"{col}_24" for col in stat_columns_24}
rename_dict_25 = {col: f"{col}_25" for col in stat_columns_25}

df24_final = df24_mapped.rename(columns=rename_dict_24)
df25_final = df25.rename(columns=rename_dict_25)

# Prepare columns for merge
cols_to_keep_24 = ["player_id", "Name"] + [f"{col}_24" for col in stat_columns_24]
cols_to_keep_25 = ["player_id"] + [f"{col}_25" for col in stat_columns_25]

# Keep only existing columns
cols_to_keep_24 = [col for col in cols_to_keep_24 if col in df24_final.columns]
cols_to_keep_25 = [col for col in cols_to_keep_25 if col in df25_final.columns]


In [57]:
# Merge datasets on player_id
merged = df24_final[cols_to_keep_24].merge(df25_final[cols_to_keep_25], on="player_id", how="inner")

# Drop duplicates
merged = merged.drop_duplicates(subset=["player_id"])

print(f"Merged dataset: {len(merged)} players, {len(merged.columns)} columns")
merged.head()


Merged dataset: 1839 players, 104 columns


Unnamed: 0,player_id,Name,Position_24,OVR_24,Age_24,Height_24,Weight_24,Team_24,League_24,Nation_24,...,Weight_25,Age_25,Nation_25,League_25,Team_25,GK Diving_25,GK Handling_25,GK Kicking_25,GK Positioning_25,GK Reflexes_25
0,231747,K. Mbappé,"ST, LW",91,24,182,75,Paris Saint Germain,Ligue 1,France,...,75kg / 165lb,25,France,La Liga,Real Madrid,,,,,
1,239085,E. Haaland,ST,91,22,195,94,Manchester City,Premier League,Norway,...,94kg / 207lb,24,Norway,Premier League,Manchester City,,,,,
2,192985,K. De Bruyne,"CM, CAM",91,32,181,75,Manchester City,Premier League,Belgium,...,75kg / 165lb,33,Belgium,Premier League,Manchester City,,,,,
3,188545,R. Lewandowski,ST,90,34,185,81,FC Barcelona,La Liga,Poland,...,81kg / 179lb,36,Poland,La Liga,FC Barcelona,,,,,
4,192119,T. Courtois,GK,90,31,199,96,Real Madrid,La Liga,Belgium,...,96kg / 212lb,32,Belgium,La Liga,Real Madrid,85.0,89.0,76.0,88.0,90.0


In [58]:
output_dir = Path("../data/processed")
output_dir.mkdir(parents=True, exist_ok=True)

csv_path = output_dir / "merged_players_24_25_final.csv"
xlsx_path = output_dir / "merged_players_24_25_final.xlsx"

merged.to_csv(csv_path, index=False)
merged.to_excel(xlsx_path, index=False)

print(f"Saved to:\n  - {csv_path}\n  - {xlsx_path}")


Saved to:
  - ..\data\processed\merged_players_24_25_final.csv
  - ..\data\processed\merged_players_24_25_final.xlsx
