# Notebook Setup

In [2]:
import requests
import pandas as pd
import time
pd.set_option('display.max_columns', None)

# Gathering Data API

In [58]:
url = "https://fantasy.premierleague.com/api/bootstrap-static/"
response = requests.get(url).json()

players_df = pd.DataFrame(response['elements'])         # All players
teams_df = pd.DataFrame(response['teams'])              # All teams
positions_df = pd.DataFrame(response['element_types'])  # Position info


In [59]:
fixtures = requests.get("https://fantasy.premierleague.com/api/fixtures/").json()
fixtures_df = pd.DataFrame(fixtures)

In [60]:
players_df['team_name'] = players_df['team'].map(teams_df.set_index('id')['name'])
players_df['position'] = players_df['element_type'].map(positions_df.set_index('id')['singular_name'])

In [61]:
def get_player_history(player_id):
    url = f"https://fantasy.premierleague.com/api/element-summary/{player_id}/"
    r = requests.get(url).json()
    history_df = pd.DataFrame(r['history'])
    return history_df

# Example: Get Haaland’s (id = 97) match history
example_player_history = get_player_history(97)

In [62]:
all_histories = []
for player_id in players_df['id']:
    df = get_player_history(player_id)
    df['player_id'] = player_id
    all_histories.append(df)
    #time.sleep(0.5)  # Delay to not get banned

full_history_df = pd.concat(all_histories, ignore_index=True)

In [63]:
# Merge player metadata with their history
full_history_df = full_history_df.merge(players_df[['id', 'first_name', 'second_name', 'team_name', 'position']], 
                                        left_on='player_id', right_on='id')


In [64]:
full_history_df

Unnamed: 0,element,fixture,opponent_team,total_points,was_home,kickoff_time,team_h_score,team_a_score,round,modified,minutes,goals_scored,assists,clean_sheets,goals_conceded,own_goals,penalties_saved,penalties_missed,yellow_cards,red_cards,saves,bonus,bps,influence,creativity,threat,ict_index,starts,expected_goals,expected_assists,expected_goal_involvements,expected_goals_conceded,mng_win,mng_draw,mng_loss,mng_underdog_win,mng_underdog_draw,mng_clean_sheets,mng_goals_scored,value,transfers_balance,selected,transfers_in,transfers_out,player_id,id,first_name,second_name,team_name,position
0,1,2,20,0,True,2024-08-17T14:00:00Z,2,0,1,False,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0,0.00,0.00,0.00,0.00,0,0,0,0,0,0,0,55,0,2923,0,0,1,1,Fábio,Ferreira Vieira,Arsenal,Midfielder
1,1,11,2,0,False,2024-08-24T16:30:00Z,0,2,2,False,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0,0.00,0.00,0.00,0.00,0,0,0,0,0,0,0,55,-790,2321,84,874,1,1,Fábio,Ferreira Vieira,Arsenal,Midfielder
2,1,21,5,0,True,2024-08-31T11:30:00Z,1,1,3,False,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0,0.00,0.00,0.00,0.00,0,0,0,0,0,0,0,54,-279,2397,355,634,1,1,Fábio,Ferreira Vieira,Arsenal,Midfielder
3,1,39,18,0,False,2024-09-15T13:00:00Z,0,1,4,False,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0,0.00,0.00,0.00,0.00,0,0,0,0,0,0,0,54,-747,1650,0,747,1,1,Fábio,Ferreira Vieira,Arsenal,Midfielder
4,1,47,13,0,False,2024-09-22T15:30:00Z,2,2,5,False,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0,0.00,0.00,0.00,0.00,0,0,0,0,0,0,0,54,-174,1494,0,174,1,1,Fábio,Ferreira Vieira,Arsenal,Midfielder
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
27600,784,340,11,0,True,2025-04-26T14:00:00Z,3,0,34,False,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0,0.00,0.00,0.00,0.00,0,0,0,0,0,0,0,45,-14,5498,509,523,784,784,Mateus,Mané,Wolves,Forward
27601,784,349,13,0,False,2025-05-02T19:00:00Z,1,0,35,False,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0,0.00,0.00,0.00,0.00,0,0,0,0,0,0,0,45,-23,5327,323,346,784,784,Mateus,Mané,Wolves,Forward
27602,784,360,5,1,True,2025-05-10T14:00:00Z,0,2,36,False,1,0,0,0,0,0,0,0,0,0,0,0,3,0.0,0.3,0.0,0.0,0,0.00,0.00,0.00,0.00,0,0,0,0,0,0,0,45,-10,5431,361,371,784,784,Mateus,Mané,Wolves,Forward
27603,784,366,7,0,False,2025-05-20T19:00:00Z,4,2,37,False,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0,0.00,0.00,0.00,0.00,0,0,0,0,0,0,0,45,113,5639,350,237,784,784,Mateus,Mané,Wolves,Forward


# Feature Engineering

In [69]:
# ADDING ROLLING AVERAGES
# Make sure the data is sorted by player and gameweek
full_history_df = full_history_df.sort_values(by=["player_id", "round"])

# Shift total_points by 1 so the current gameweek is NOT included
full_history_df['points_shifted'] = full_history_df.groupby('player_id')['total_points'].shift(1)

# Rolling averages (prediction-safe)
full_history_df['avg_points_last_3'] = full_history_df.groupby('player_id')['points_shifted'].rolling(window=3, min_periods=1).mean().reset_index(level=0, drop=True)
full_history_df['avg_points_last_5'] = full_history_df.groupby('player_id')['points_shifted'].rolling(window=5, min_periods=1).mean().reset_index(level=0, drop=True)
full_history_df['avg_points_last_10'] = full_history_df.groupby('player_id')['points_shifted'].rolling(window=10, min_periods=1).mean().reset_index(level=0, drop=True)

# Optionally drop the intermediate shifted column
full_history_df.drop(columns=["points_shifted"], inplace=True)

In [71]:
full_history_df

Unnamed: 0,element,fixture,opponent_team,total_points,was_home,kickoff_time,team_h_score,team_a_score,round,modified,minutes,goals_scored,assists,clean_sheets,goals_conceded,own_goals,penalties_saved,penalties_missed,yellow_cards,red_cards,saves,bonus,bps,influence,creativity,threat,ict_index,starts,expected_goals,expected_assists,expected_goal_involvements,expected_goals_conceded,mng_win,mng_draw,mng_loss,mng_underdog_win,mng_underdog_draw,mng_clean_sheets,mng_goals_scored,value,transfers_balance,selected,transfers_in,transfers_out,player_id,id,first_name,second_name,team_name,position,master_id,avg_points_last_3,avg_points_last_5,avg_points_last_10
0,1,2,20,0,True,2024-08-17T14:00:00Z,2,0,1,False,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0,0.00,0.00,0.00,0.00,0,0,0,0,0,0,0,55,0,2923,0,0,1,1,Fábio,Ferreira Vieira,Arsenal,Midfielder,1_2,,,
1,1,11,2,0,False,2024-08-24T16:30:00Z,0,2,2,False,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0,0.00,0.00,0.00,0.00,0,0,0,0,0,0,0,55,-790,2321,84,874,1,1,Fábio,Ferreira Vieira,Arsenal,Midfielder,1_11,0.0,0.0,0.0
2,1,21,5,0,True,2024-08-31T11:30:00Z,1,1,3,False,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0,0.00,0.00,0.00,0.00,0,0,0,0,0,0,0,54,-279,2397,355,634,1,1,Fábio,Ferreira Vieira,Arsenal,Midfielder,1_21,0.0,0.0,0.0
3,1,39,18,0,False,2024-09-15T13:00:00Z,0,1,4,False,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0,0.00,0.00,0.00,0.00,0,0,0,0,0,0,0,54,-747,1650,0,747,1,1,Fábio,Ferreira Vieira,Arsenal,Midfielder,1_39,0.0,0.0,0.0
4,1,47,13,0,False,2024-09-22T15:30:00Z,2,2,5,False,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0,0.00,0.00,0.00,0.00,0,0,0,0,0,0,0,54,-174,1494,0,174,1,1,Fábio,Ferreira Vieira,Arsenal,Midfielder,1_47,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1243,801,361,15,0,True,2025-05-18T15:30:00Z,1,0,37,False,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0,0.00,0.00,0.00,0.00,0,0,0,0,0,0,0,40,28,149,42,14,801,801,Brayden,Clarke,Arsenal,Defender,801_361,0.0,0.0,0.0
1244,801,378,17,0,False,2025-05-25T15:00:00Z,1,2,38,False,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0,0.00,0.00,0.00,0.00,0,0,0,0,0,0,0,40,74,247,95,21,801,801,Brayden,Clarke,Arsenal,Defender,801_378,0.0,0.0,0.0
15314,802,371,3,0,False,2025-05-25T15:00:00Z,2,0,38,False,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0,0.00,0.00,0.00,0.00,0,0,0,0,0,0,0,45,0,0,0,0,802,802,Sammy,Braybrooke,Leicester,Midfielder,802_371,,,
11424,803,376,15,0,False,2025-05-25T15:00:00Z,0,1,38,False,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0,0.00,0.00,0.00,0.00,0,0,0,0,0,0,0,40,0,0,0,0,803,803,Reece,Welch,Everton,Defender,803_376,,,


In [73]:
fixtures_df['team_h_name'] = fixtures_df['team_h'].map(teams_df.set_index('id')['name'])
fixtures_df['team_a_name'] = fixtures_df['team_a'].map(teams_df.set_index('id')['name'])

In [74]:
fixtures_df

Unnamed: 0,code,event,finished,finished_provisional,id,kickoff_time,minutes,provisional_start_time,started,team_a,team_a_score,team_h,team_h_score,stats,team_h_difficulty,team_a_difficulty,pulse_id,team_h_name,team_a_name
0,2444470,1,True,True,1,2024-08-16T19:00:00Z,90,False,True,9,0,14,1,"[{'identifier': 'goals_scored', 'a': [], 'h': ...",3,3,115827,Man Utd,Fulham
1,2444473,1,True,True,4,2024-08-17T11:30:00Z,90,False,True,12,2,10,0,"[{'identifier': 'goals_scored', 'a': [{'value'...",5,2,115830,Ipswich,Liverpool
2,2444471,1,True,True,2,2024-08-17T14:00:00Z,90,False,True,20,0,1,2,"[{'identifier': 'goals_scored', 'a': [], 'h': ...",3,5,115828,Arsenal,Wolves
3,2444472,1,True,True,3,2024-08-17T14:00:00Z,90,False,True,5,3,8,0,"[{'identifier': 'goals_scored', 'a': [{'value'...",3,3,115829,Everton,Brighton
4,2444474,1,True,True,5,2024-08-17T14:00:00Z,90,False,True,17,0,15,1,"[{'identifier': 'goals_scored', 'a': [], 'h': ...",1,4,115831,Newcastle,Southampton
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
375,2444845,38,True,True,376,2025-05-25T15:00:00Z,90,False,True,8,1,15,0,"[{'identifier': 'goals_scored', 'a': [{'value'...",3,4,116202,Newcastle,Everton
376,2444846,38,True,True,377,2025-05-25T15:00:00Z,90,False,True,6,1,16,0,"[{'identifier': 'goals_scored', 'a': [{'value'...",3,4,116203,Nott'm Forest,Chelsea
377,2444847,38,True,True,378,2025-05-25T15:00:00Z,90,False,True,1,2,17,1,"[{'identifier': 'goals_scored', 'a': [{'value'...",5,1,116204,Southampton,Arsenal
378,2444848,38,True,True,379,2025-05-25T15:00:00Z,90,False,True,5,4,18,1,"[{'identifier': 'goals_scored', 'a': [{'value'...",3,3,116205,Spurs,Brighton


In [91]:
# Build a lookup table of next fixture difficulty for each team and GW
next_fixture_lookup = []

for _, row in fixtures_df.iterrows():
    if row['event'] is None:
        continue
    # Home team
    next_fixture_lookup.append({
        'team_name': row['team_h_name'],
        'round': row['event'],
        'opponent_team': row['team_a_name'],
        'opponent_team_id': row['team_a'],  # opponent's team ID
        'was_home': True,
        'fdr': row['team_h_difficulty']
    })
    # Away team
    next_fixture_lookup.append({
        'team_name': row['team_a_name'],
        'round': row['event'],
        'opponent_team': row['team_h_name'],
        'opponent_team_id': row['team_h'],  # opponent's team ID
        'was_home': False,
        'fdr': row['team_a_difficulty']
    })

next_fixtures_df = pd.DataFrame(next_fixture_lookup)
next_fixtures_df.rename(columns={'round': 'next_round'}, inplace=True)


In [92]:
next_fixtures_df

Unnamed: 0,team_name,next_round,opponent_team,opponent_team_id,was_home,fdr
0,Man Utd,1,Fulham,9,True,3
1,Fulham,1,Man Utd,14,False,3
2,Ipswich,1,Liverpool,12,True,5
3,Liverpool,1,Ipswich,10,False,2
4,Arsenal,1,Wolves,20,True,3
...,...,...,...,...,...,...
755,Arsenal,38,Southampton,17,False,1
756,Spurs,38,Brighton,5,True,3
757,Brighton,38,Spurs,18,False,3
758,Wolves,38,Brentford,4,True,3


In [None]:
full_history_with_fixture_df = full_history_df.copy()
full_history_with_fixture_df['next_round'] = full_history_with_fixture_df['round'] + 1

# Merge next fixture based on player's team and next GW
full_history_with_fixture_df = full_history_with_fixture_df.merge(
    next_fixtures_df,
    how='left',
    left_on=['team_name', 'next_round'],
    right_on=['team_name', 'next_round']
)

# Optional: drop temp column
full_history_with_fixture_df.drop(columns=["next_round"], inplace=True)


In [135]:
full_history_with_fixture_df

Unnamed: 0,element,fixture,opponent_team,total_points,was_home,kickoff_time,team_h_score,team_a_score,round,modified,minutes,goals_scored,assists,clean_sheets,goals_conceded,own_goals,penalties_saved,penalties_missed,yellow_cards,red_cards,saves,bonus,bps,influence,creativity,threat,ict_index,starts,expected_goals,expected_assists,expected_goal_involvements,expected_goals_conceded,mng_win,mng_draw,mng_loss,mng_underdog_win,mng_underdog_draw,mng_clean_sheets,mng_goals_scored,value,transfers_balance,selected,transfers_in,transfers_out,player_id,id,first_name,second_name,team_name,position,master_id,avg_points_last_3,avg_points_last_5,avg_points_last_10,fdr
0,1,2,20,0,True,2024-08-17T14:00:00Z,2,0,1,False,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0,0.00,0.00,0.00,0.00,0,0,0,0,0,0,0,55,0,2923,0,0,1,1,Fábio,Ferreira Vieira,Arsenal,Midfielder,1_2,,,,4.0
1,1,11,2,0,False,2024-08-24T16:30:00Z,0,2,2,False,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0,0.00,0.00,0.00,0.00,0,0,0,0,0,0,0,55,-790,2321,84,874,1,1,Fábio,Ferreira Vieira,Arsenal,Midfielder,1_11,0.0,0.0,0.0,3.0
2,1,21,5,0,True,2024-08-31T11:30:00Z,1,1,3,False,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0,0.00,0.00,0.00,0.00,0,0,0,0,0,0,0,54,-279,2397,355,634,1,1,Fábio,Ferreira Vieira,Arsenal,Midfielder,1_21,0.0,0.0,0.0,3.0
3,1,39,18,0,False,2024-09-15T13:00:00Z,0,1,4,False,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0,0.00,0.00,0.00,0.00,0,0,0,0,0,0,0,54,-747,1650,0,747,1,1,Fábio,Ferreira Vieira,Arsenal,Midfielder,1_39,0.0,0.0,0.0,4.0
4,1,47,13,0,False,2024-09-22T15:30:00Z,2,2,5,False,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0,0.00,0.00,0.00,0.00,0,0,0,0,0,0,0,54,-174,1494,0,174,1,1,Fábio,Ferreira Vieira,Arsenal,Midfielder,1_47,0.0,0.0,0.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
27600,801,361,15,0,True,2025-05-18T15:30:00Z,1,0,37,False,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0,0.00,0.00,0.00,0.00,0,0,0,0,0,0,0,40,28,149,42,14,801,801,Brayden,Clarke,Arsenal,Defender,801_361,0.0,0.0,0.0,1.0
27601,801,378,17,0,False,2025-05-25T15:00:00Z,1,2,38,False,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0,0.00,0.00,0.00,0.00,0,0,0,0,0,0,0,40,74,247,95,21,801,801,Brayden,Clarke,Arsenal,Defender,801_378,0.0,0.0,0.0,
27602,802,371,3,0,False,2025-05-25T15:00:00Z,2,0,38,False,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0,0.00,0.00,0.00,0.00,0,0,0,0,0,0,0,45,0,0,0,0,802,802,Sammy,Braybrooke,Leicester,Midfielder,802_371,,,,
27603,803,376,15,0,False,2025-05-25T15:00:00Z,0,1,38,False,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0,0.00,0.00,0.00,0.00,0,0,0,0,0,0,0,40,0,0,0,0,803,803,Reece,Welch,Everton,Defender,803_376,,,,


In [136]:
full_history_with_fixture_df['master_id'] = full_history_with_fixture_df['player_id'].astype(str) + '_' + full_history_with_fixture_df['fixture'].astype(str)
duplicates = full_history_with_fixture_df[full_history_with_fixture_df.duplicated(subset=['master_id'], keep=False)]
duplicates[duplicates['element'] == 211]

Unnamed: 0,element,fixture,opponent_team,total_points,was_home,kickoff_time,team_h_score,team_a_score,round,modified,minutes,goals_scored,assists,clean_sheets,goals_conceded,own_goals,penalties_saved,penalties_missed,yellow_cards,red_cards,saves,bonus,bps,influence,creativity,threat,ict_index,starts,expected_goals,expected_assists,expected_goal_involvements,expected_goals_conceded,mng_win,mng_draw,mng_loss,mng_underdog_win,mng_underdog_draw,mng_clean_sheets,mng_goals_scored,value,transfers_balance,selected,transfers_in,transfers_out,player_id,id,first_name,second_name,team_name,position,master_id,avg_points_last_3,avg_points_last_5,avg_points_last_10,fdr


In [115]:
print(full_history_with_fixture_df.columns)


Index(['element', 'fixture', 'opponent_team', 'total_points', 'was_home',
       'kickoff_time', 'team_h_score', 'team_a_score', 'round', 'modified',
       'minutes', 'goals_scored', 'assists', 'clean_sheets', 'goals_conceded',
       'own_goals', 'penalties_saved', 'penalties_missed', 'yellow_cards',
       'red_cards', 'saves', 'bonus', 'bps', 'influence', 'creativity',
       'threat', 'ict_index', 'starts', 'expected_goals', 'expected_assists',
       'expected_goal_involvements', 'expected_goals_conceded', 'mng_win',
       'mng_draw', 'mng_loss', 'mng_underdog_win', 'mng_underdog_draw',
       'mng_clean_sheets', 'mng_goals_scored', 'value', 'transfers_balance',
       'selected', 'transfers_in', 'transfers_out', 'player_id', 'id',
       'first_name', 'second_name', 'team_name', 'position', 'master_id',
       'avg_points_last_3', 'avg_points_last_5', 'avg_points_last_10',
       'next_round', 'opponent_team_fixture', 'opponent_team_id',
       'was_home_fixture', 'fdr'],
     

In [118]:
full_history_with_fixture_df

Unnamed: 0,element,fixture,opponent_team,total_points,was_home,kickoff_time,team_h_score,team_a_score,round,modified,minutes,goals_scored,assists,clean_sheets,goals_conceded,own_goals,penalties_saved,penalties_missed,yellow_cards,red_cards,saves,bonus,bps,influence,creativity,threat,ict_index,starts,expected_goals,expected_assists,expected_goal_involvements,expected_goals_conceded,mng_win,mng_draw,mng_loss,mng_underdog_win,mng_underdog_draw,mng_clean_sheets,mng_goals_scored,value,transfers_balance,selected,transfers_in,transfers_out,player_id,id,first_name,second_name,team_name,position,master_id,avg_points_last_3,avg_points_last_5,avg_points_last_10,next_round,opponent_team_fixture,opponent_team_id_x,was_home_fixture,fdr_x,opponent_team_id_y,fdr_y
0,1,2,20,0,True,2024-08-17T14:00:00Z,2,0,1,False,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0,0.00,0.00,0.00,0.00,0,0,0,0,0,0,0,55,0,2923,0,0,1,1,Fábio,Ferreira Vieira,Arsenal,Midfielder,1_2,,,,2,,,,,,
1,1,11,2,0,False,2024-08-24T16:30:00Z,0,2,2,False,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0,0.00,0.00,0.00,0.00,0,0,0,0,0,0,0,55,-790,2321,84,874,1,1,Fábio,Ferreira Vieira,Arsenal,Midfielder,1_11,0.0,0.0,0.0,3,,,,,,
2,1,21,5,0,True,2024-08-31T11:30:00Z,1,1,3,False,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0,0.00,0.00,0.00,0.00,0,0,0,0,0,0,0,54,-279,2397,355,634,1,1,Fábio,Ferreira Vieira,Arsenal,Midfielder,1_21,0.0,0.0,0.0,4,,,,,,
3,1,39,18,0,False,2024-09-15T13:00:00Z,0,1,4,False,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0,0.00,0.00,0.00,0.00,0,0,0,0,0,0,0,54,-747,1650,0,747,1,1,Fábio,Ferreira Vieira,Arsenal,Midfielder,1_39,0.0,0.0,0.0,5,,,,,,
4,1,47,13,0,False,2024-09-22T15:30:00Z,2,2,5,False,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0,0.00,0.00,0.00,0.00,0,0,0,0,0,0,0,54,-174,1494,0,174,1,1,Fábio,Ferreira Vieira,Arsenal,Midfielder,1_47,0.0,0.0,0.0,6,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
27600,801,361,15,0,True,2025-05-18T15:30:00Z,1,0,37,False,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0,0.00,0.00,0.00,0.00,0,0,0,0,0,0,0,40,28,149,42,14,801,801,Brayden,Clarke,Arsenal,Defender,801_361,0.0,0.0,0.0,38,,,,,,
27601,801,378,17,0,False,2025-05-25T15:00:00Z,1,2,38,False,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0,0.00,0.00,0.00,0.00,0,0,0,0,0,0,0,40,74,247,95,21,801,801,Brayden,Clarke,Arsenal,Defender,801_378,0.0,0.0,0.0,39,,,,,,
27602,802,371,3,0,False,2025-05-25T15:00:00Z,2,0,38,False,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0,0.00,0.00,0.00,0.00,0,0,0,0,0,0,0,45,0,0,0,0,802,802,Sammy,Braybrooke,Leicester,Midfielder,802_371,,,,39,,,,,,
27603,803,376,15,0,False,2025-05-25T15:00:00Z,0,1,38,False,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0,0.00,0.00,0.00,0.00,0,0,0,0,0,0,0,40,0,0,0,0,803,803,Reece,Welch,Everton,Defender,803_376,,,,39,,,,,,


In [40]:
full_history_with_fixture_df = full_history_with_fixture_df.rename(columns={
    "opponent_team_x": "opponentteam_id",
    "was_home_x": "was_home",
    "opponent_team_y": "opponent_team_name",
    "was_home_y": "was_home_opponent"
})


In [41]:
full_history_with_fixture_df

Unnamed: 0,element,fixture,opponentteam_id,total_points,was_home,kickoff_time,team_h_score,team_a_score,round,modified,minutes,goals_scored,assists,clean_sheets,goals_conceded,own_goals,penalties_saved,penalties_missed,yellow_cards,red_cards,saves,bonus,bps,influence,creativity,threat,ict_index,starts,expected_goals,expected_assists,expected_goal_involvements,expected_goals_conceded,mng_win,mng_draw,mng_loss,mng_underdog_win,mng_underdog_draw,mng_clean_sheets,mng_goals_scored,value,transfers_balance,selected,transfers_in,transfers_out,player_id,id,first_name,second_name,team_name,position,avg_points_last_3,avg_points_last_5,avg_points_last_10,opponent_team_name,was_home_opponent,fdr
0,1,2,20,0,True,2024-08-17T14:00:00Z,2,0,1,False,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0,0.00,0.00,0.00,0.00,0,0,0,0,0,0,0,55,0,2923,0,0,1,1,Fábio,Ferreira Vieira,Arsenal,Midfielder,,,,Aston Villa,False,4.0
1,1,11,2,0,False,2024-08-24T16:30:00Z,0,2,2,False,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0,0.00,0.00,0.00,0.00,0,0,0,0,0,0,0,55,-790,2321,84,874,1,1,Fábio,Ferreira Vieira,Arsenal,Midfielder,0.0,0.0,0.0,Brighton,True,3.0
2,1,21,5,0,True,2024-08-31T11:30:00Z,1,1,3,False,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0,0.00,0.00,0.00,0.00,0,0,0,0,0,0,0,54,-279,2397,355,634,1,1,Fábio,Ferreira Vieira,Arsenal,Midfielder,0.0,0.0,0.0,Spurs,False,3.0
3,1,39,18,0,False,2024-09-15T13:00:00Z,0,1,4,False,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0,0.00,0.00,0.00,0.00,0,0,0,0,0,0,0,54,-747,1650,0,747,1,1,Fábio,Ferreira Vieira,Arsenal,Midfielder,0.0,0.0,0.0,Man City,False,4.0
4,1,47,13,0,False,2024-09-22T15:30:00Z,2,2,5,False,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0,0.00,0.00,0.00,0.00,0,0,0,0,0,0,0,54,-174,1494,0,174,1,1,Fábio,Ferreira Vieira,Arsenal,Midfielder,0.0,0.0,0.0,Leicester,True,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
28043,801,361,15,0,True,2025-05-18T15:30:00Z,1,0,37,False,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0,0.00,0.00,0.00,0.00,0,0,0,0,0,0,0,40,28,149,42,14,801,801,Brayden,Clarke,Arsenal,Defender,0.0,0.0,0.0,Southampton,False,1.0
28044,801,378,17,0,False,2025-05-25T15:00:00Z,1,2,38,False,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0,0.00,0.00,0.00,0.00,0,0,0,0,0,0,0,40,74,247,95,21,801,801,Brayden,Clarke,Arsenal,Defender,0.0,0.0,0.0,,,
28045,802,371,3,0,False,2025-05-25T15:00:00Z,2,0,38,False,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0,0.00,0.00,0.00,0.00,0,0,0,0,0,0,0,45,0,0,0,0,802,802,Sammy,Braybrooke,Leicester,Midfielder,,,,,,
28046,803,376,15,0,False,2025-05-25T15:00:00Z,0,1,38,False,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0,0.00,0.00,0.00,0.00,0,0,0,0,0,0,0,40,0,0,0,0,803,803,Reece,Welch,Everton,Defender,,,,,,


In [42]:
# Sort first
full_history_with_fixture_df = full_history_with_fixture_df.sort_values(by=["player_id", "round"])

# Shift minutes by 1 game to prevent data leakage
full_history_with_fixture_df['minutes_shifted'] = full_history_with_fixture_df.groupby('player_id')['minutes'].shift(1)

# Rolling averages for minutes
full_history_with_fixture_df['avg_minutes_last_3'] = (
    full_history_with_fixture_df.groupby('player_id')['minutes_shifted']
    .rolling(window=3, min_periods=1)
    .mean()
    .reset_index(level=0, drop=True)
)

full_history_with_fixture_df['avg_minutes_last_5'] = (
    full_history_with_fixture_df.groupby('player_id')['minutes_shifted']
    .rolling(window=5, min_periods=1)
    .mean()
    .reset_index(level=0, drop=True)
)

full_history_with_fixture_df['avg_minutes_last_10'] = (
    full_history_with_fixture_df.groupby('player_id')['minutes_shifted']
    .rolling(window=10, min_periods=1)
    .mean()
    .reset_index(level=0, drop=True)
)

# Clean up intermediate column
full_history_with_fixture_df.drop(columns=["minutes_shifted"], inplace=True)


In [43]:
full_history_with_fixture_df

Unnamed: 0,element,fixture,opponentteam_id,total_points,was_home,kickoff_time,team_h_score,team_a_score,round,modified,minutes,goals_scored,assists,clean_sheets,goals_conceded,own_goals,penalties_saved,penalties_missed,yellow_cards,red_cards,saves,bonus,bps,influence,creativity,threat,ict_index,starts,expected_goals,expected_assists,expected_goal_involvements,expected_goals_conceded,mng_win,mng_draw,mng_loss,mng_underdog_win,mng_underdog_draw,mng_clean_sheets,mng_goals_scored,value,transfers_balance,selected,transfers_in,transfers_out,player_id,id,first_name,second_name,team_name,position,avg_points_last_3,avg_points_last_5,avg_points_last_10,opponent_team_name,was_home_opponent,fdr,avg_minutes_last_3,avg_minutes_last_5,avg_minutes_last_10
0,1,2,20,0,True,2024-08-17T14:00:00Z,2,0,1,False,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0,0.00,0.00,0.00,0.00,0,0,0,0,0,0,0,55,0,2923,0,0,1,1,Fábio,Ferreira Vieira,Arsenal,Midfielder,,,,Aston Villa,False,4.0,,,
1,1,11,2,0,False,2024-08-24T16:30:00Z,0,2,2,False,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0,0.00,0.00,0.00,0.00,0,0,0,0,0,0,0,55,-790,2321,84,874,1,1,Fábio,Ferreira Vieira,Arsenal,Midfielder,0.0,0.0,0.0,Brighton,True,3.0,0.0,0.0,0.0
2,1,21,5,0,True,2024-08-31T11:30:00Z,1,1,3,False,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0,0.00,0.00,0.00,0.00,0,0,0,0,0,0,0,54,-279,2397,355,634,1,1,Fábio,Ferreira Vieira,Arsenal,Midfielder,0.0,0.0,0.0,Spurs,False,3.0,0.0,0.0,0.0
3,1,39,18,0,False,2024-09-15T13:00:00Z,0,1,4,False,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0,0.00,0.00,0.00,0.00,0,0,0,0,0,0,0,54,-747,1650,0,747,1,1,Fábio,Ferreira Vieira,Arsenal,Midfielder,0.0,0.0,0.0,Man City,False,4.0,0.0,0.0,0.0
4,1,47,13,0,False,2024-09-22T15:30:00Z,2,2,5,False,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0,0.00,0.00,0.00,0.00,0,0,0,0,0,0,0,54,-174,1494,0,174,1,1,Fábio,Ferreira Vieira,Arsenal,Midfielder,0.0,0.0,0.0,Leicester,True,1.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
28043,801,361,15,0,True,2025-05-18T15:30:00Z,1,0,37,False,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0,0.00,0.00,0.00,0.00,0,0,0,0,0,0,0,40,28,149,42,14,801,801,Brayden,Clarke,Arsenal,Defender,0.0,0.0,0.0,Southampton,False,1.0,0.0,0.0,0.0
28044,801,378,17,0,False,2025-05-25T15:00:00Z,1,2,38,False,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0,0.00,0.00,0.00,0.00,0,0,0,0,0,0,0,40,74,247,95,21,801,801,Brayden,Clarke,Arsenal,Defender,0.0,0.0,0.0,,,,0.0,0.0,0.0
28045,802,371,3,0,False,2025-05-25T15:00:00Z,2,0,38,False,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0,0.00,0.00,0.00,0.00,0,0,0,0,0,0,0,45,0,0,0,0,802,802,Sammy,Braybrooke,Leicester,Midfielder,,,,,,,,,
28046,803,376,15,0,False,2025-05-25T15:00:00Z,0,1,38,False,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0,0.00,0.00,0.00,0.00,0,0,0,0,0,0,0,40,0,0,0,0,803,803,Reece,Welch,Everton,Defender,,,,,,,,,


In [49]:
# Fix malformed strings if needed
full_history_with_fixture_df['expected_goals'] = full_history_with_fixture_df['expected_goals'].astype(str).str.extract(r'(\d+\.\d+)', expand=False)
full_history_with_fixture_df['expected_assists'] = full_history_with_fixture_df['expected_assists'].astype(str).str.extract(r'(\d+\.\d+)', expand=False)
full_history_with_fixture_df['expected_goals'] = pd.to_numeric(full_history_with_fixture_df['expected_goals'], errors='coerce')
full_history_with_fixture_df['expected_assists'] = pd.to_numeric(full_history_with_fixture_df['expected_assists'], errors='coerce')


full_history_with_fixture_df["xgi"] = (
    full_history_with_fixture_df["expected_goals"].fillna(0) +
    full_history_with_fixture_df["expected_assists"].fillna(0)
)


In [50]:
# Shift xGI to avoid leakage
full_history_with_fixture_df['xgi_shifted'] = full_history_with_fixture_df.groupby('player_id')['xgi'].shift(1)

# Rolling averages
full_history_with_fixture_df['xgi_avg_last_3'] = (
    full_history_with_fixture_df.groupby('player_id')['xgi_shifted']
    .rolling(window=3, min_periods=1)
    .mean()
    .reset_index(level=0, drop=True)
)

full_history_with_fixture_df['xgi_avg_last_5'] = (
    full_history_with_fixture_df.groupby('player_id')['xgi_shifted']
    .rolling(window=5, min_periods=1)
    .mean()
    .reset_index(level=0, drop=True)
)

# Optional longer window
full_history_with_fixture_df['xgi_avg_last_10'] = (
    full_history_with_fixture_df.groupby('player_id')['xgi_shifted']
    .rolling(window=10, min_periods=1)
    .mean()
    .reset_index(level=0, drop=True)
)

# Drop temp
full_history_with_fixture_df.drop(columns=["xgi_shifted"], inplace=True)


In [52]:
full_history_with_fixture_df

Unnamed: 0,element,fixture,opponentteam_id,total_points,was_home,kickoff_time,team_h_score,team_a_score,round,modified,minutes,goals_scored,assists,clean_sheets,goals_conceded,own_goals,penalties_saved,penalties_missed,yellow_cards,red_cards,saves,bonus,bps,influence,creativity,threat,ict_index,starts,expected_goals,expected_assists,expected_goal_involvements,expected_goals_conceded,mng_win,mng_draw,mng_loss,mng_underdog_win,mng_underdog_draw,mng_clean_sheets,mng_goals_scored,value,transfers_balance,selected,transfers_in,transfers_out,player_id,id,first_name,second_name,team_name,position,avg_points_last_3,avg_points_last_5,avg_points_last_10,opponent_team_name,was_home_opponent,fdr,avg_minutes_last_3,avg_minutes_last_5,avg_minutes_last_10,xgi,xgi_avg_last_3,xgi_avg_last_5,xgi_avg_last_10
0,1,2,20,0,True,2024-08-17T14:00:00Z,2,0,1,False,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0,0.0,0.0,0.00,0.00,0,0,0,0,0,0,0,55,0,2923,0,0,1,1,Fábio,Ferreira Vieira,Arsenal,Midfielder,,,,Aston Villa,False,4.0,,,,0.0,,,
1,1,11,2,0,False,2024-08-24T16:30:00Z,0,2,2,False,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0,0.0,0.0,0.00,0.00,0,0,0,0,0,0,0,55,-790,2321,84,874,1,1,Fábio,Ferreira Vieira,Arsenal,Midfielder,0.0,0.0,0.0,Brighton,True,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,1,21,5,0,True,2024-08-31T11:30:00Z,1,1,3,False,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0,0.0,0.0,0.00,0.00,0,0,0,0,0,0,0,54,-279,2397,355,634,1,1,Fábio,Ferreira Vieira,Arsenal,Midfielder,0.0,0.0,0.0,Spurs,False,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,1,39,18,0,False,2024-09-15T13:00:00Z,0,1,4,False,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0,0.0,0.0,0.00,0.00,0,0,0,0,0,0,0,54,-747,1650,0,747,1,1,Fábio,Ferreira Vieira,Arsenal,Midfielder,0.0,0.0,0.0,Man City,False,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,1,47,13,0,False,2024-09-22T15:30:00Z,2,2,5,False,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0,0.0,0.0,0.00,0.00,0,0,0,0,0,0,0,54,-174,1494,0,174,1,1,Fábio,Ferreira Vieira,Arsenal,Midfielder,0.0,0.0,0.0,Leicester,True,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
28043,801,361,15,0,True,2025-05-18T15:30:00Z,1,0,37,False,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0,0.0,0.0,0.00,0.00,0,0,0,0,0,0,0,40,28,149,42,14,801,801,Brayden,Clarke,Arsenal,Defender,0.0,0.0,0.0,Southampton,False,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
28044,801,378,17,0,False,2025-05-25T15:00:00Z,1,2,38,False,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0,0.0,0.0,0.00,0.00,0,0,0,0,0,0,0,40,74,247,95,21,801,801,Brayden,Clarke,Arsenal,Defender,0.0,0.0,0.0,,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0
28045,802,371,3,0,False,2025-05-25T15:00:00Z,2,0,38,False,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0,0.0,0.0,0.00,0.00,0,0,0,0,0,0,0,45,0,0,0,0,802,802,Sammy,Braybrooke,Leicester,Midfielder,,,,,,,,,,0.0,,,
28046,803,376,15,0,False,2025-05-25T15:00:00Z,0,1,38,False,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0,0.0,0.0,0.00,0.00,0,0,0,0,0,0,0,40,0,0,0,0,803,803,Reece,Welch,Everton,Defender,,,,,,,,,,0.0,,,


In [57]:
features = [
    'avg_points_last_3', 'avg_points_last_5', 'avg_points_last_10',
    'avg_minutes_last_3', 'avg_minutes_last_5', 'avg_minutes_last_10',
    'xgi_avg_last_3', 'xgi_avg_last_5', 'xgi_avg_last_10',
    'fdr', 'starts', 'value', 'was_home',
    'transfers_in', 'transfers_out', 'selected',
    'mng_clean_sheets', 'mng_goals_scored',
    'position'
]


In [58]:
from sklearn.preprocessing import LabelEncoder

le = LabelEncoder()
full_history_with_fixture_df['position_encoded'] = le.fit_transform(full_history_with_fixture_df['position'])
features.remove('position')
features.append('position_encoded')

In [59]:
full_history_with_fixture_df[features] = full_history_with_fixture_df[features].fillna(0)

# Saving data

In [62]:
full_history_with_fixture_df.to_pickle("fpl_data.pkl")

In [44]:
fpl_data = pd.read_pickle("fpl_data.pkl")

In [46]:
features = [
    'avg_points_last_3', 'avg_points_last_5', 'avg_points_last_10',
    'avg_minutes_last_3', 'avg_minutes_last_5', 'avg_minutes_last_10',
    'xgi_avg_last_3', 'xgi_avg_last_5', 'xgi_avg_last_10',
    'fdr', 'starts', 'value', 'was_home',
    'transfers_in', 'transfers_out', 'selected',
    'mng_clean_sheets', 'mng_goals_scored',
    'position_encoded'
]


In [45]:
fpl_data

Unnamed: 0,element,fixture,opponentteam_id,total_points,was_home,kickoff_time,team_h_score,team_a_score,round,modified,minutes,goals_scored,assists,clean_sheets,goals_conceded,own_goals,penalties_saved,penalties_missed,yellow_cards,red_cards,saves,bonus,bps,influence,creativity,threat,ict_index,starts,expected_goals,expected_assists,expected_goal_involvements,expected_goals_conceded,mng_win,mng_draw,mng_loss,mng_underdog_win,mng_underdog_draw,mng_clean_sheets,mng_goals_scored,value,transfers_balance,selected,transfers_in,transfers_out,player_id,id,first_name,second_name,team_name,position,avg_points_last_3,avg_points_last_5,avg_points_last_10,opponent_team_name,was_home_opponent,fdr,avg_minutes_last_3,avg_minutes_last_5,avg_minutes_last_10,xgi,xgi_avg_last_3,xgi_avg_last_5,xgi_avg_last_10,position_encoded
0,1,2,20,0,True,2024-08-17T14:00:00Z,2,0,1,False,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0,0.0,0.0,0.00,0.00,0,0,0,0,0,0,0,55,0,2923,0,0,1,1,Fábio,Ferreira Vieira,Arsenal,Midfielder,0.0,0.0,0.0,Aston Villa,False,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4
1,1,11,2,0,False,2024-08-24T16:30:00Z,0,2,2,False,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0,0.0,0.0,0.00,0.00,0,0,0,0,0,0,0,55,-790,2321,84,874,1,1,Fábio,Ferreira Vieira,Arsenal,Midfielder,0.0,0.0,0.0,Brighton,True,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4
2,1,21,5,0,True,2024-08-31T11:30:00Z,1,1,3,False,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0,0.0,0.0,0.00,0.00,0,0,0,0,0,0,0,54,-279,2397,355,634,1,1,Fábio,Ferreira Vieira,Arsenal,Midfielder,0.0,0.0,0.0,Spurs,False,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4
3,1,39,18,0,False,2024-09-15T13:00:00Z,0,1,4,False,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0,0.0,0.0,0.00,0.00,0,0,0,0,0,0,0,54,-747,1650,0,747,1,1,Fábio,Ferreira Vieira,Arsenal,Midfielder,0.0,0.0,0.0,Man City,False,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4
4,1,47,13,0,False,2024-09-22T15:30:00Z,2,2,5,False,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0,0.0,0.0,0.00,0.00,0,0,0,0,0,0,0,54,-174,1494,0,174,1,1,Fábio,Ferreira Vieira,Arsenal,Midfielder,0.0,0.0,0.0,Leicester,True,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
28043,801,361,15,0,True,2025-05-18T15:30:00Z,1,0,37,False,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0,0.0,0.0,0.00,0.00,0,0,0,0,0,0,0,40,28,149,42,14,801,801,Brayden,Clarke,Arsenal,Defender,0.0,0.0,0.0,Southampton,False,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0
28044,801,378,17,0,False,2025-05-25T15:00:00Z,1,2,38,False,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0,0.0,0.0,0.00,0.00,0,0,0,0,0,0,0,40,74,247,95,21,801,801,Brayden,Clarke,Arsenal,Defender,0.0,0.0,0.0,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0
28045,802,371,3,0,False,2025-05-25T15:00:00Z,2,0,38,False,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0,0.0,0.0,0.00,0.00,0,0,0,0,0,0,0,45,0,0,0,0,802,802,Sammy,Braybrooke,Leicester,Midfielder,0.0,0.0,0.0,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4
28046,803,376,15,0,False,2025-05-25T15:00:00Z,0,1,38,False,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0,0.0,0.0,0.00,0.00,0,0,0,0,0,0,0,40,0,0,0,0,803,803,Reece,Welch,Everton,Defender,0.0,0.0,0.0,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0


In [47]:
# Step 1: Sort the data
fpl_data = fpl_data.sort_values(by=['round', 'player_id', 'kickoff_time'])

# Step 2: Create sub-rounds within each player-gameweek pair (double gameweeks)
fpl_data['fixture_order'] = fpl_data.groupby(['player_id', 'round']).cumcount() + 1
fpl_data['round_adjusted'] = fpl_data['round'].astype(float) + (fpl_data['fixture_order'] - 1) * 0.1

fpl_data['master_id'] = fpl_data['player_id'].astype(str) + '_' + fpl_data['fixture'].astype(str)


# Applying model

In [48]:
max_round_adj = fpl_data['round_adjusted'].max()
train_data = fpl_data[fpl_data['round_adjusted'] < max_round_adj]
test_data = fpl_data[fpl_data['round_adjusted'] == max_round_adj]

X_train = train_data[features]
y_train = train_data['total_points']

X_test = test_data[features]
y_test = test_data['total_points']

print(f"Training samples: {len(X_train)}")
print(f"Testing samples: {len(X_test)}")


Training samples: 27244
Testing samples: 804


In [49]:
from xgboost import XGBRegressor

# Define model with some common hyperparameters
xgb_model = XGBRegressor(
    n_estimators=300,
    max_depth=5,
    learning_rate=0.1,
    subsample=0.8,
    colsample_bytree=0.8,
    random_state=42,
    n_jobs=-1
)

# Train the model
xgb_model.fit(X_train, y_train)

print("Model training complete!")


Model training complete!


# Saving Predictions

In [50]:
import joblib

# Save model
joblib.dump(xgb_model, 'xgb_fpl_model.joblib')

# Later, to load:
# xgb_model = joblib.load('xgb_fpl_model.joblib')


['xgb_fpl_model.joblib']

In [51]:
#Later, to load:
xgb_model = joblib.load('xgb_fpl_model.joblib')

In [52]:
# Predict next gameweek points for all rows based on features
fpl_data['next_gw_pred'] = xgb_model.predict(fpl_data[features])

In [53]:
fpl_data_pred = fpl_data.copy()

In [137]:
fpl_data_pred

Unnamed: 0,element,fixture,opponentteam_id,total_points,was_home,kickoff_time,team_h_score,team_a_score,round,modified,minutes,goals_scored,assists,clean_sheets,goals_conceded,own_goals,penalties_saved,penalties_missed,yellow_cards,red_cards,saves,bonus,bps,influence,creativity,threat,ict_index,starts,expected_goals,expected_assists,expected_goal_involvements,expected_goals_conceded,mng_win,mng_draw,mng_loss,mng_underdog_win,mng_underdog_draw,mng_clean_sheets,mng_goals_scored,value,transfers_balance,selected,transfers_in,transfers_out,player_id,id,first_name,second_name,team_name,position,avg_points_last_3,avg_points_last_5,avg_points_last_10,opponent_team_name,was_home_opponent,fdr,avg_minutes_last_3,avg_minutes_last_5,avg_minutes_last_10,xgi,xgi_avg_last_3,xgi_avg_last_5,xgi_avg_last_10,position_encoded,fixture_order,round_adjusted,master_id,next_gw_pred
0,1,2,20,0,True,2024-08-17T14:00:00Z,2,0,1,False,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0,0.00,0.00,0.00,0.00,0,0,0,0,0,0,0,55,0,2923,0,0,1,1,Fábio,Ferreira Vieira,Arsenal,Midfielder,0.0,0.0,0.0,Aston Villa,False,4.0,0.0,0.0,0.0,0.00,0.0,0.0,0.0,4,1,1.0,1_2,0.365899
39,2,2,20,0,True,2024-08-17T14:00:00Z,2,0,1,False,5,0,0,0,0,0,0,0,1,0,0,0,1,0.2,0.8,0.0,0.1,0,0.00,0.00,0.00,0.15,0,0,0,0,0,0,0,70,0,199810,0,0,2,2,Gabriel,Fernando de Jesus,Arsenal,Forward,0.0,0.0,0.0,Aston Villa,False,4.0,0.0,0.0,0.0,0.00,0.0,0.0,0.0,1,1,1.0,2_2,0.595338
78,3,2,20,6,True,2024-08-17T14:00:00Z,2,0,1,False,90,0,0,1,0,0,0,0,0,0,0,0,22,13.8,1.4,8.0,2.3,1,0.12,0.02,0.14,0.47,0,0,0,0,0,0,0,60,0,1167503,0,0,3,3,Gabriel,dos Santos Magalhães,Arsenal,Defender,0.0,0.0,0.0,Aston Villa,False,4.0,0.0,0.0,0.0,0.14,0.0,0.0,0.0,0,1,1.0,3_2,4.734218
117,4,2,20,12,True,2024-08-17T14:00:00Z,2,0,1,False,90,1,1,1,0,0,0,0,0,0,0,3,48,54.8,24.1,46.0,12.5,1,0.45,0.04,0.49,0.47,0,0,0,0,0,0,0,80,0,1087445,0,0,4,4,Kai,Havertz,Arsenal,Forward,0.0,0.0,0.0,Aston Villa,False,4.0,0.0,0.0,0.0,0.49,0.0,0.0,0.0,1,1,1.0,4_2,5.633408
156,5,2,20,0,True,2024-08-17T14:00:00Z,2,0,1,False,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0,0.00,0.00,0.00,0.00,0,0,0,0,0,0,0,40,0,7300,0,0,5,5,Karl,Hein,Arsenal,Goalkeeper,0.0,0.0,0.0,Aston Villa,False,4.0,0.0,0.0,0.0,0.00,0.0,0.0,0.0,2,1,1.0,5_2,0.071210
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
28040,800,373,19,0,True,2025-05-25T15:00:00Z,1,3,38,False,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0,0.00,0.00,0.00,0.00,0,0,0,0,0,0,0,45,75,181,112,37,800,800,Tom,Taylor,Ipswich,Midfielder,0.0,0.0,0.0,,,0.0,0.0,0.0,0.0,0.00,0.0,0.0,0.0,4,1,38.0,800_373,0.045555
28044,801,378,17,0,False,2025-05-25T15:00:00Z,1,2,38,False,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0,0.00,0.00,0.00,0.00,0,0,0,0,0,0,0,40,74,247,95,21,801,801,Brayden,Clarke,Arsenal,Defender,0.0,0.0,0.0,,,0.0,0.0,0.0,0.0,0.00,0.0,0.0,0.0,0,1,38.0,801_378,0.003857
28045,802,371,3,0,False,2025-05-25T15:00:00Z,2,0,38,False,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0,0.00,0.00,0.00,0.00,0,0,0,0,0,0,0,45,0,0,0,0,802,802,Sammy,Braybrooke,Leicester,Midfielder,0.0,0.0,0.0,,,0.0,0.0,0.0,0.0,0.00,0.0,0.0,0.0,4,1,38.0,802_371,0.100530
28046,803,376,15,0,False,2025-05-25T15:00:00Z,0,1,38,False,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0,0.00,0.00,0.00,0.00,0,0,0,0,0,0,0,40,0,0,0,0,803,803,Reece,Welch,Everton,Defender,0.0,0.0,0.0,,,0.0,0.0,0.0,0.0,0.00,0.0,0.0,0.0,0,1,38.0,803_376,0.080942


In [142]:
fpl_data_pred_filtered = (
    fpl_data_pred
    .sort_values('next_gw_pred', ascending=False)   # sort by next_gw_pred desc so highest is first
    .drop_duplicates(subset=['player_id', 'round', 'opponent_team_name'], keep='first')  # keep highest next_gw_pred row
    .reset_index(drop=True)
)


In [143]:
fpl_data_pred_filtered.to_pickle("fpl_data_pred.pkl")

In [144]:
fpl_data_pred = pd.read_pickle("fpl_data_pred.pkl")

# New Season Pred



In [None]:
# X_new = new_season_data[features]
# new_season_data['next_gw_pred'] = xgb_model.predict(X_new)