This is code to get data from 2023 and 2024 for my dataset

In [1]:
import pandas as pd
import numpy as np
import requests

To pull data from API football

In [12]:
API_KEY = "6d6c9af1c62a63412b4f45ad3354b521"
LEAGUE_ID = 39

HEADERS = {
    "x-apisports-key": API_KEY
}
url = "https://v3.football.api-sports.io/transfers"

def fetch_transfers(team_id=None, player_id=None):
    params = {}
    if team_id:
        params["team"] = team_id
    if player_id:
        params["player"] = player_id
    response = requests.get(url, headers=HEADERS, params=params)
    return response.json()

def normalize_transfers(response_json, club_name=None, year_filter=None):
    rows = []
    for item in response_json.get("response", []):
        player_info = item.get("player", {})
        for tr in item.get("transfers", []):
            transfer_date = tr.get("date")
            transfer_year = int(transfer_date.split("-")[0]) if transfer_date else None

            # Filter by year if requested
            if year_filter and transfer_year != year_filter:
                continue

            rows.append({
                "club_name": club_name,
                "player_name": player_info.get("name"),
                "age": player_info.get("age"),
                "nationality": player_info.get("nationality"),
                "from_team": tr.get("teams", {}).get("out", {}).get("name"),
                "to_team": tr.get("teams", {}).get("in", {}).get("name"),
                "type": tr.get("type"),
                "date": transfer_date,
                "fee": tr.get("type")  # API doesn’t always have fee field
            })
    return pd.DataFrame(rows)

In [18]:
teams_2023_24 = {
    "Arsenal FC": 19,
    "Aston Villa": 15,
    "AFC Bournemouth": 52,
    "Brentford": 236,
    "Brighton & Hove Albion": 78,
    "Burnley": 2,            # example ID
    "Chelsea FC": 18,
    "Crystal Palace FC": 51,
    "Everton FC": 13,
    "Fulham FC": 11,
    "Liverpool FC": 8,
    "Luton Town": 139,       # example ID
    "Manchester City": 9,
    "Manchester United": 14,
    "Newcastle United": 20,
    "Nottingham Forest": 63,
    "Sheffield United": 97,  # example ID
    "Tottenham Hotspur": 6,
    "West Ham United": 1,
    "Wolverhampton Wanderers": 29
}

teams_2022_23 = {
    "Arsenal FC": 19,
    "Aston Villa": 15,
    "AFC Bournemouth": 52,
    "Brentford": 236,
    "Brighton & Hove Albion": 78,
    "Chelsea FC": 18,
    "Crystal Palace FC": 51,
    "Everton FC": 13,
    "Fulham FC": 11,
    "Leeds United": 71,
    "Leicester City": 42,
    "Liverpool FC": 8,
    "Manchester City": 9,
    "Manchester United": 14,
    "Newcastle United": 20,
    "Nottingham Forest": 63,
    "Southampton FC": 65,
    "Tottenham Hotspur": 6,
    "West Ham United": 1,
    "Wolverhampton Wanderers": 29
}


In [19]:
all_transfers_2023 = pd.DataFrame()

for club, team_id in teams_2022_23.items():
    raw_data = fetch_transfers(team_id=team_id)
    df_2023 = normalize_transfers(raw_data, club_name=club, year_filter=2023)
    all_transfers_2023 = pd.concat([all_transfers_2023, df_2023], ignore_index=True)

In [32]:
all_transfers_2023.to_csv("all_transfers_2023.csv", index=False)

In [22]:
all_transfers_2024 = pd.DataFrame()

for club, team_id in teams_2023_24.items():
    raw_data = fetch_transfers(team_id=team_id)
    df_2024 = normalize_transfers(raw_data, club_name=club, year_filter=2024)
    all_transfers_2024 = pd.concat([all_transfers_2024, df_2024], ignore_index=True)


In [25]:
all_transfers_2024.head()

Unnamed: 0,club_name,player_name,age,nationality,from_team,to_team,type,date,fee
0,AFC Bournemouth,Roshaun Mathurin,,,Crystal Palace,Hartlepool,Loan,2024-09-04,Loan
1,AFC Bournemouth,S. Grehan,,,Carlisle,Crystal Palace,,2024-04-04,
2,AFC Bournemouth,S. Grehan,,,Crystal Palace,Carlisle,Loan,2024-01-04,Loan
3,AFC Bournemouth,O. Goodman,,,Crystal Palace,AFC Wimbledon,Loan,2024-07-16,Loan
4,AFC Bournemouth,J. Gordon,,,Cambridge United,Crystal Palace,,2024-01-09,


In [26]:
def aggregate_transfers(df, season_label):
    result = []
    for club in df['club_name'].unique():
        club_data = df[df['club_name'] == club]

        # Players In = from_team != club
        players_in = club_data[club_data['to_team'] == club].shape[0]
        # Players Out = from_team == club
        players_out = club_data[club_data['from_team'] == club].shape[0]

        # Total spent = sum of fees for "players_in" (numeric only)
        spent = pd.to_numeric(
            club_data.loc[club_data['to_team'] == club, 'fee'],
            errors='coerce'
        ).sum(min_count=1)

        # Total income = sum of fees for "players_out"
        income = pd.to_numeric(
            club_data.loc[club_data['from_team'] == club, 'fee'],
            errors='coerce'
        ).sum(min_count=1)

        # Net spend
        net_spend = spent - income

        # Average ages
        avg_age_in = club_data.loc[club_data['to_team'] == club, 'age'].mean()
        avg_age_out = club_data.loc[club_data['from_team'] == club, 'age'].mean()

        # Loans
        loan_in = club_data[(club_data['to_team'] == club) & (club_data['type'].str.contains("Loan", na=False))].shape[0]
        loan_out = club_data[(club_data['from_team'] == club) & (club_data['type'].str.contains("Loan", na=False))].shape[0]

        result.append({
            "club_name": club,
            "season": season_label,
            "players_in": players_in,
            "players_out": players_out,
            "total_spent": round(spent, 2) if pd.notna(spent) else 0,
            "total_income": round(income, 2) if pd.notna(income) else 0,
            "net_spend": round(net_spend, 2) if pd.notna(net_spend) else 0,
            "avg_age_in": round(avg_age_in, 8) if pd.notna(avg_age_in) else None,
            "avg_age_out": round(avg_age_out, 8) if pd.notna(avg_age_out) else None,
            "loan_in": loan_in,
            "loan_out": loan_out
        })

    return pd.DataFrame(result)

In [27]:
agg_2023 = aggregate_transfers(all_transfers_2023, "2023/2024")
agg_2024 = aggregate_transfers(all_transfers_2024, "2024/2025")

In [30]:
agg_2023

Unnamed: 0,club_name,season,players_in,players_out,total_spent,total_income,net_spend,avg_age_in,avg_age_out,loan_in,loan_out
0,AFC Bournemouth,2023/2024,0,0,0,0,0,,,0,0
1,Brentford,2023/2024,0,0,0,0,0,,,0,0
2,Brighton & Hove Albion,2023/2024,0,0,0,0,0,,,0,0
3,Crystal Palace FC,2023/2024,0,0,0,0,0,,,0,0
4,Leeds United,2023/2024,0,0,0,0,0,,,0,0


In [31]:
agg_2024

Unnamed: 0,club_name,season,players_in,players_out,total_spent,total_income,net_spend,avg_age_in,avg_age_out,loan_in,loan_out
0,AFC Bournemouth,2024/2025,0,0,0,0,0,,,0,0
1,Brentford,2024/2025,0,0,0,0,0,,,0,0
2,Brighton & Hove Albion,2024/2025,0,0,0,0,0,,,0,0
3,Crystal Palace FC,2024/2025,0,0,0,0,0,,,0,0
