# Combining All Leagues Into a Single Dataset

## Importing Libraries

In [1]:
import pandas as pd
import os

## Defining Directory and File Names

In [2]:
directory = "../data/raw"
files = ["premier_league_players.csv", "serie_a_players.csv", "la_liga_players.csv", "bundesliga_players.csv", "ligue_1_players.csv"]
league_names = {
    "premier_league_players.csv": "Premier League",
    "serie_a_players.csv": "Serie A",
    "la_liga_players.csv": "La Liga",
    "bundesliga_players.csv": "Bundesliga",
    "ligue_1_players.csv": "Ligue 1"
}

combined_data = []

## Reading and Processing Each File

In [3]:
for file in files:
    path = os.path.join(directory, file)
    if os.path.exists(path):
        df = pd.read_csv(path)
        df["League"] = league_names[file]
        # Retain only the first nationality if multiple exist
        df["Nationality"] = df["Nationality"].apply(lambda x: x.split(" / ")[0] if isinstance(x, str) else x)
        # Select and rename columns
        df = df[["Name", "Age", "Position", "Nationality", "League", "Club Name", "Market Value"]]
        df.columns = ["Name", "Age", "Position", "Nationality", "League", "Club", "Market Value"]
        combined_data.append(df)

## Combining All Files into a Single DataFrame

In [4]:
if combined_data:
    full_df = pd.concat(combined_data, ignore_index=True)
    full_df.to_csv("../data/cleaned/all_leagues_combined.csv", index=False)
    print("✅ Combined CSV saved as 'data/all_leagues_combined.csv'")
else:
    print("⚠️ No files found. Please check paths.")

✅ Combined CSV saved as 'data/all_leagues_combined.csv'
