## FPL Gaffer Data Collection Workflow

This notebook shows how the data collection works. It shows the different nodes for data extraction from fpl, user info, and search data.

### 🛠️ Tools and helpers

In [None]:
import httpx

In [None]:
class FPLOfficialAPI:
    def __init__(self, base_url="https://fantasy.premierleague.com/api"):
        self.base_url = base_url
        self.client = httpx.Client()

    def _get(self, endpoint):
        url = f"{self.base_url}{endpoint}"
        r = self.client.get(url)
        r.raise_for_status()
        return r.json()

    def get_bootstrap_data(self):
        return self._get("/bootstrap-static/")

    def get_fixtures(self):
        return self._get("/fixtures/")

    def get_manager_data(self, manager_id: int):
        return self._get(f"/entry/{manager_id}/")

    def get_gameweek_picks(self, manager_id: int, gw: int):
        return self._get(f"/entry/{manager_id}/event/{gw}/picks/")

This tool handles all interactions with the FPL official API. The tool can be found in the `src/fpl_gaffer/tools/fpl_api.py` file.

In [None]:
def build_mappings(boostrap_data):
    players_mapping = {
        player["id"]: {
            "name": f"{player['first_name']} {player['second_name']}",
            "team_id": player["team"],
            "position_id": player["element_type"],
            "current_price": player["now_cost"] / 10,
            "status": player["status"],
        }
        for player in boostrap_data.get("elements", [])
    }

    teams_mapping = {team["id"]: team["name"] for team in boostrap_data.get("teams", [])}
    positions_mapping = {position["id"]: position["singular_name_short"]for position in boostrap_data.get("element_types", [])}

    return players_mapping, teams_mapping, positions_mapping

def map_player(player_id, players, teams, positions):
    player_info = players.get(player_id, {})

    if not player_info:
        return {}

    return {
        "id": player_id,
        "name": player_info["name"],
        "team": teams.get(player_info["team_id"], "Unknown Team"),
        "position": positions.get(player_info["position_id"], "Unknown Position"),
        "current_price": player_info["current_price"],
        "status": player_info["status"]
    }

def map_squad(picks, players, teams, positions):
    mapped_team = []

    for pick in picks:
        mapped_team.append({
            **map_player(pick["element"], players, teams, positions),
            "multiplier": pick["multiplier"],
            "is_captain": pick.get("is_captain", False),
            "is_vice_captain": pick.get("is_vice_captain", False),
        })

    return mapped_team

These are the mapping helpers for teams, positons and player mappings. The mapper utility can be found in the `src/fpl_gaffer/utils/fpl_mapper.py` file.

### FPL Data Collection

In [None]:
def get_gameweek_data(api: FPLOfficialAPI):
    bootstrap_data = api.get_bootstrap_data()
    _, teams, _ = build_mappings(bootstrap_data)

    next_gw = next((gw for gw in bootstrap_data.get("events", []) if gw["is_next"]), None)

    fixtures = api.get_fixtures()
    next_gw_fixtures = []
    for f in fixtures:
        if f.get("event") == next_gw.get("id"):
            next_gw_fixtures.append({
                "id": f.get("id"),
                "home_team": teams.get(f.get("team_h"), "Unknown"),
                "away_team": teams.get(f.get("team_a"), "Unknown"),
                "kickoff_time": f.get("kickoff_time"),
            })

    return {"gameweek": next_gw.get("id"), "deadline": next_gw.get("deadline_time"), "fixtures": next_gw_fixtures}

The core function from the fpl data extractor tool from `src/fpl_gaffer/tools/data_collector/fpl_data/py`.

In [22]:
api = FPLOfficialAPI()
gw_data = get_gameweek_data(api)
print("Next gameweek data:", gw_data)

Next gameweek data: {'gameweek': 4, 'deadline': '2025-09-13T10:00:00Z', 'fixtures': [{'id': 31, 'home_team': 'Arsenal', 'away_team': "Nott'm Forest", 'kickoff_time': '2025-09-13T11:30:00Z'}, {'id': 32, 'home_team': 'Bournemouth', 'away_team': 'Brighton', 'kickoff_time': '2025-09-13T14:00:00Z'}, {'id': 35, 'home_team': 'Crystal Palace', 'away_team': 'Sunderland', 'kickoff_time': '2025-09-13T14:00:00Z'}, {'id': 36, 'home_team': 'Everton', 'away_team': 'Aston Villa', 'kickoff_time': '2025-09-13T14:00:00Z'}, {'id': 37, 'home_team': 'Fulham', 'away_team': 'Leeds', 'kickoff_time': '2025-09-13T14:00:00Z'}, {'id': 39, 'home_team': 'Newcastle', 'away_team': 'Wolves', 'kickoff_time': '2025-09-13T14:00:00Z'}, {'id': 40, 'home_team': 'West Ham', 'away_team': 'Spurs', 'kickoff_time': '2025-09-13T16:30:00Z'}, {'id': 33, 'home_team': 'Brentford', 'away_team': 'Chelsea', 'kickoff_time': '2025-09-13T19:00:00Z'}, {'id': 34, 'home_team': 'Burnley', 'away_team': 'Liverpool', 'kickoff_time': '2025-09-14T13

This tool shows important gameweek information like the next gameweek number, the deadline of the gameweek, and the fixtures for the gameweek.

### User Data Collection

In [18]:
def extract_user_data(api: FPLOfficialAPI, manager_id: int):
    bootstrap_data = api.get_bootstrap_data()
    players, teams, positions = build_mappings(bootstrap_data)
    current_gw = next((gw for gw in bootstrap_data.get("events", []) if gw.get("is_current")), None)
    gw_id = current_gw.get("id") if current_gw else None

    manager_data = api.get_manager_data(manager_id)

    team_data = api.get_gameweek_picks(manager_id, gw_id)
    gw_history = team_data.get("entry_history", {})

    picks = team_data.get("picks", [])

    squad_info = {
        "starting_xi": [],
        "bench": [],
        "captain": None,
        "vice_captain": None,
        "active_chip": team_data.get("active_chip"),
        "points": gw_history.get("points", 0),
        "total_points": gw_history.get("total_points", 0),
        "rank": gw_history.get("overall_rank", 0),
        "squad_value": gw_history.get("value", 0) / 10,
        "transfers": gw_history.get("event_transfers", 0),
        "transfers_cost": gw_history.get("event_transfers_cost", 0),
        "money_itb": gw_history.get("bank", 0) / 10
    }

    for pick in picks:
        player_info = map_player(pick["element"], players, teams, positions)
        player_info.update({
            "position_in_team": pick["position"],
            "multiplier": pick["multiplier"]
        })

        # Get captain and vice captain
        if pick["is_captain"]:
            squad_info["captain"] = player_info
        if pick['is_vice_captain']:
            squad_info['vice_captain'] = player_info

        # Sort out starting 11
        if pick["position"] <= 11:
            squad_info["starting_xi"].append(player_info)
        else:
            squad_info["bench"].append(player_info)


    profile = {
        "manager_id": manager_data.get("id"),
        "team_name": manager_data.get("name"),
        "first_name": manager_data.get("player_first_name"),
        "last_name": manager_data.get("player_last_name"),
        "overall_rank": manager_data.get("summary_overall_rank"),
        "total_points": manager_data.get("summary_overall_points"),
        "squad_info": squad_info
    }
    return profile


This is the user data extraction tool. The full code for the tool can be found in the `src/fpl_gaffer/tools/data_collector/user_data.py` file.

In [None]:
import pandas as pd

In [23]:
MANAGER_ID = 2723529
api = FPLOfficialAPI()
user_data = extract_user_data(api, MANAGER_ID)

print(f"Data for Manager with ID '{MANAGER_ID}':\n", user_data)

Data for Manager with ID '2723529':
 {'manager_id': 2723529, 'team_name': 'Ghost', 'first_name': 'Deju', 'last_name': '🔞', 'overall_rank': 5901781, 'total_points': 144, 'squad_info': {'starting_xi': [{'id': 220, 'name': 'Robert Lynch Sánchez', 'team': 'Chelsea', 'position': 'GKP', 'current_price': 5.0, 'status': 'a', 'position_in_team': 1, 'multiplier': 1}, {'id': 568, 'name': 'Pedro Porro Sauceda', 'team': 'Spurs', 'position': 'DEF', 'current_price': 5.6, 'status': 'a', 'position_in_team': 2, 'multiplier': 1}, {'id': 291, 'name': 'James Tarkowski', 'team': 'Everton', 'position': 'DEF', 'current_price': 5.5, 'status': 'a', 'position_in_team': 3, 'multiplier': 1}, {'id': 505, 'name': 'Nikola Milenković', 'team': "Nott'm Forest", 'position': 'DEF', 'current_price': 5.5, 'status': 'a', 'position_in_team': 4, 'multiplier': 1}, {'id': 224, 'name': 'Marc Cucurella Saseta', 'team': 'Chelsea', 'position': 'DEF', 'current_price': 6.1, 'status': 'a', 'position_in_team': 5, 'multiplier': 1}, {'id

In [24]:
starting_xi_df = pd.DataFrame(user_data["squad_info"]["starting_xi"])
starting_xi_df

Unnamed: 0,id,name,team,position,current_price,status,position_in_team,multiplier
0,220,Robert Lynch Sánchez,Chelsea,GKP,5.0,a,1,1
1,568,Pedro Porro Sauceda,Spurs,DEF,5.6,a,2,1
2,291,James Tarkowski,Everton,DEF,5.5,a,3,1
3,505,Nikola Milenković,Nott'm Forest,DEF,5.5,a,4,1
4,224,Marc Cucurella Saseta,Chelsea,DEF,6.1,a,5,1
5,381,Mohamed Salah,Liverpool,MID,14.5,a,6,2
6,543,Simon Adingra,Sunderland,MID,5.5,a,7,1
7,450,Matheus Santos Carneiro da Cunha,Man Utd,MID,8.1,d,8,1
8,582,Mohammed Kudus,Spurs,MID,6.6,a,9,1
9,64,Ollie Watkins,Aston Villa,FWD,8.9,a,10,1


The tool extracts all relevant information regarding the user like team name, name, rank, points, current squad, money itb, etc, for FPL Gaffer.