# Data Preprocessing

This file contains the preprocessing for the players visualizations

In [5]:
import pandas as pd

# Load dataset
df = pd.read_csv("data/player_valuations.csv")  # Replace with your file

# Convert 'date' to datetime
df["date"] = pd.to_datetime(df["date"], errors="coerce")

df["player_id"] = df["player_id"].astype(int)

# Remove missing dates
df = df.dropna(subset=["date"])

# Fill missing market values using forward fill per player
df["market_value_in_eur"] = df.groupby("player_id")["market_value_in_eur"].fillna(method="ffill")

# Convert market value to millions
df["market_value_in_millions"] = df["market_value_in_eur"] / 1_000_000

# Sort by player and date
df = df.sort_values(by=["player_id", "date"])

# Save processed data
df.to_csv("processed_data/player_valuations.csv", index=False)