# Behavior Diffusion Modelling: Understanding Ownership Diffusion in Fantasy Sports using Neural Networks’ Prediction

Fantasy sports represent an emerging entertainment opportunity that better involves the fans in the respective sports. Through the aspect of the purchase and sale of players in the fantasy leagues, the behavior diffusion in virtual entertainment can be studied and predicted. In this project, the diffusion of the ownership of players in the Fantasy Premier League (FPL), will be explored with predictability evaluated through social influence (in terms of respective club popularity), market movements (in terms of player pricings in the fantasy leagues), player performance (in terms of player performance metrics) and game metrics (in terms of game difficulty and team strength).

# Data Collection, Data Cleaning and Feature Engineering

In [10]:
# Packages
import requests
import pandas as pd
import numpy as np
from tqdm import tqdm
import time
from bs4 import BeautifulSoup
import html5lib
import re

## Data Collection and Data Cleaning

### FPL Data

In [2]:
# GW Data Collection Function 
def get_gw_data(gw):
    url = f"https://fantasy.premierleague.com/api/event/{gw}/live/"
    data = requests.get(url).json()
    
    rows = []
    for p in data['elements']:
        row = p['stats']
        row['id'] = p['id']
        row['gw'] = gw
        rows.append(row)
        
    return pd.DataFrame(rows)

In [3]:
# Collecting Data for GWs 1 to 14
gw_list = range(1, 14)
gw_data = pd.concat([get_gw_data(gw) for gw in tqdm(gw_list)], ignore_index=True)
gw_data.head()

100%|██████████| 13/13 [00:08<00:00,  1.49it/s]


Unnamed: 0,minutes,goals_scored,assists,clean_sheets,goals_conceded,own_goals,penalties_saved,penalties_missed,yellow_cards,red_cards,...,defensive_contribution,starts,expected_goals,expected_assists,expected_goal_involvements,expected_goals_conceded,total_points,in_dreamteam,id,gw
0,90,0,0,1,0,0,0,0,1,0,...,0,1,0.0,0.0,0.0,1.52,10,True,1,1
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0.0,0.0,0.0,0.0,0,False,2,1
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0.0,0.0,0.0,0.0,0,False,3,1
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0.0,0.0,0.0,0.0,0,False,4,1
4,90,0,0,1,0,0,0,0,0,0,...,7,1,0.0,0.0,0.0,1.52,6,False,5,1


In [4]:
# Checking Columns
gw_data.columns

Index(['minutes', 'goals_scored', 'assists', 'clean_sheets', 'goals_conceded',
       'own_goals', 'penalties_saved', 'penalties_missed', 'yellow_cards',
       'red_cards', 'saves', 'bonus', 'bps', 'influence', 'creativity',
       'threat', 'ict_index', 'clearances_blocks_interceptions', 'recoveries',
       'tackles', 'defensive_contribution', 'starts', 'expected_goals',
       'expected_assists', 'expected_goal_involvements',
       'expected_goals_conceded', 'total_points', 'in_dreamteam', 'id', 'gw'],
      dtype='object')

In [5]:
# Player Data Collection Function
def get_bootstrap():
    url = "https://fantasy.premierleague.com/api/bootstrap-static/"
    data = requests.get(url).json()
    
    players = pd.DataFrame(data['elements'])
    teams = pd.DataFrame(data['teams'])
    events = pd.DataFrame(data['events'])
    
    return players, teams, events


In [6]:
# Collection Data on all players
players_df, teams_df, events_df = get_bootstrap()
players_df.head()

Unnamed: 0,can_transact,can_select,chance_of_playing_next_round,chance_of_playing_this_round,code,cost_change_event,cost_change_event_fall,cost_change_start,cost_change_start_fall,dreamteam_count,...,now_cost_rank_type,form_rank,form_rank_type,points_per_game_rank,points_per_game_rank_type,selected_rank,selected_rank_type,starts_per_90,clean_sheets_per_90,defensive_contribution_per_90
0,True,True,,,154561,0,0,4,-4,1,...,1,127,9,41,2,8,2,1.0,0.57,0.0
1,True,True,,,109745,0,0,-3,3,0,...,37,495,73,567,78,258,34,0.0,0.0,0.0
2,True,False,0.0,0.0,463748,0,0,0,0,0,...,56,460,62,534,67,333,47,0.0,0.0,0.0
3,True,True,,,551221,0,0,-1,1,0,...,87,426,44,502,50,337,48,0.0,0.0,0.0
4,True,True,0.0,0.0,226597,0,0,3,-3,3,...,2,368,127,3,2,23,8,1.0,0.64,9.09


In [115]:
# Checking Columns
print(players_df.columns.tolist())

['can_transact', 'can_select', 'chance_of_playing_next_round', 'chance_of_playing_this_round', 'code', 'cost_change_event', 'cost_change_event_fall', 'cost_change_start', 'cost_change_start_fall', 'dreamteam_count', 'element_type', 'ep_next', 'ep_this', 'event_points', 'first_name', 'form', 'id', 'in_dreamteam', 'news', 'news_added', 'now_cost', 'photo', 'points_per_game', 'removed', 'second_name', 'selected_by_percent', 'special', 'squad_number', 'status', 'team', 'team_code', 'total_points', 'transfers_in', 'transfers_in_event', 'transfers_out', 'transfers_out_event', 'value_form', 'value_season', 'web_name', 'region', 'team_join_date', 'birth_date', 'has_temporary_code', 'opta_code', 'minutes', 'goals_scored', 'assists', 'clean_sheets', 'goals_conceded', 'own_goals', 'penalties_saved', 'penalties_missed', 'yellow_cards', 'red_cards', 'saves', 'bonus', 'bps', 'influence', 'creativity', 'threat', 'ict_index', 'clearances_blocks_interceptions', 'recoveries', 'tackles', 'defensive_contr

In [116]:
# Getting Player Metadata and Merging with GW data
players_meta = players_df[['id', 'web_name', 'team', 'now_cost', 'element_type', 'selected_by_percent', 'transfers_in', 'transfers_out']]
fpl_data = gw_data.merge(players_meta, on='id', how='left')
fpl_data.head()

Unnamed: 0,minutes,goals_scored,assists,clean_sheets,goals_conceded,own_goals,penalties_saved,penalties_missed,yellow_cards,red_cards,...,in_dreamteam,id,gw,web_name,team,now_cost,element_type,selected_by_percent,transfers_in,transfers_out
0,90,0,0,1,0,0,0,0,1,0,...,True,1,1,Raya,1,59,1,33.4,2625498,1062733
1,0,0,0,0,0,0,0,0,0,0,...,False,2,1,Arrizabalaga,1,42,1,0.4,9038,64652
2,0,0,0,0,0,0,0,0,0,0,...,False,3,1,Hein,1,40,1,0.2,5545,42922
3,0,0,0,0,0,0,0,0,0,0,...,False,4,1,Setford,1,39,1,0.2,18794,19145
4,90,0,0,1,0,0,0,0,0,0,...,False,5,1,Gabriel,1,63,2,17.1,3878350,4261998


### Social Media Data for Club Fanbase (from CIES Football Observatory)

In [13]:
# Importing Raw Data from CIES Football Observatory
with open("CIES_Raw_ENG.txt", "r", encoding="utf-8") as f:
    raw = f.read()
raw

'"\n233.6 M\n\nManchester United (ENG)\n44.8 M64.4 M85.1 M29.3 M10.0 M\n+7.3 M\n+3.4 %\n179.5 M\n\nManchester City (ENG)\n28.9 M56.1 M54.9 M31.2 M8.5 M\n+12.7 M\n+8.0 %\n166.7 M\n\nLiverpool (ENG)\n31.6 M47.0 M51.7 M25.0 M11.4 M\n+10.9 M\n+7.5 %\n152.9 M\n\nChelsea (ENG)\n29.2 M42.6 M56.5 M18.7 M5.8 M\n+5.3 M\n+3.8 %\n114.1 M\n\nArsenal (ENG)\n23.0 M30.9 M45.8 M9.7 M4.7 M\n+7.0 M\n+6.9 %\n108.1 M\n\nTottenham Hotspur (ENG)\n9.7 M17.4 M36.4 M40.6 M4.0 M\n+7.7 M\n+7.9 %\n24.5 M\n\nLeicester City (ENG)\n2.7 M7.9 M9.5 M3.7 M0.7 M\n+1.1 M\n+5.0 %\n19.1 M\n\nWest Ham United (ENG)\n2.8 M4.5 M4.4 M6.7 M0.7 M\n+0.9 M\n+4.9 %\n18.8 M\n\nAston Villa (ENG)\n2.6 M4.4 M6.7 M4.4 M0.7 M\n+2.1 M\n+13.2 %\n17.2 M\n\nNewcastle United (ENG)\n2.9 M3.2 M4.8 M5.5 M0.8 M\n+3.0 M\n+22.7 %\n14.5 M\n\nEverton (ENG)\n3.1 M3.2 M4.7 M2.6 M0.9 M\n+0.4 M\n+3.1 %\n12.0 M\n\nWolverhampton Wanderers (ENG)\n1.5 M3.0 M3.5 M3.3 M0.8 M\n+0.6 M\n+6.1 %\n9.7 M\n\nBrighton & Hove Albion (ENG)\n1.2 M2.2 M2.2 M3.4 M0.7 M\n+0.6 M

In [14]:
# Spliting raw data into block
blocks = re.split(r"\n\s*\n", raw)
blocks = blocks[1:]   
blocks

['Manchester United (ENG)\n44.8 M64.4 M85.1 M29.3 M10.0 M\n+7.3 M\n+3.4 %\n179.5 M',
 'Manchester City (ENG)\n28.9 M56.1 M54.9 M31.2 M8.5 M\n+12.7 M\n+8.0 %\n166.7 M',
 'Liverpool (ENG)\n31.6 M47.0 M51.7 M25.0 M11.4 M\n+10.9 M\n+7.5 %\n152.9 M',
 'Chelsea (ENG)\n29.2 M42.6 M56.5 M18.7 M5.8 M\n+5.3 M\n+3.8 %\n114.1 M',
 'Arsenal (ENG)\n23.0 M30.9 M45.8 M9.7 M4.7 M\n+7.0 M\n+6.9 %\n108.1 M',
 'Tottenham Hotspur (ENG)\n9.7 M17.4 M36.4 M40.6 M4.0 M\n+7.7 M\n+7.9 %\n24.5 M',
 'Leicester City (ENG)\n2.7 M7.9 M9.5 M3.7 M0.7 M\n+1.1 M\n+5.0 %\n19.1 M',
 'West Ham United (ENG)\n2.8 M4.5 M4.4 M6.7 M0.7 M\n+0.9 M\n+4.9 %\n18.8 M',
 'Aston Villa (ENG)\n2.6 M4.4 M6.7 M4.4 M0.7 M\n+2.1 M\n+13.2 %\n17.2 M',
 'Newcastle United (ENG)\n2.9 M3.2 M4.8 M5.5 M0.8 M\n+3.0 M\n+22.7 %\n14.5 M',
 'Everton (ENG)\n3.1 M3.2 M4.7 M2.6 M0.9 M\n+0.4 M\n+3.1 %\n12.0 M',
 'Wolverhampton Wanderers (ENG)\n1.5 M3.0 M3.5 M3.3 M0.8 M\n+0.6 M\n+6.1 %\n9.7 M',
 'Brighton & Hove Albion (ENG)\n1.2 M2.2 M2.2 M3.4 M0.7 M\n+0.6 M\

In [23]:
# Blocks Cleaning Functions through parsing
def extract(pattern, text):
    m = re.search(pattern, text)
    return float(m.group()) if m else np.nan

def parse_block(block):
    lines = [line.strip() for line in block.split("\n") if line.strip()]

    # team names
    club_match = re.search(r"(.*)(?= \(ENG\))", lines[0])
    club = club_match.group(1) if club_match else None

    # 5 social media sites
    nums = re.findall(r"\d+\.\d+", lines[1])
    while len(nums) < 5:
        nums.append(np.nan)

    X, Instagram, Facebook, TikTok, YouTube = [float(x) for x in nums[:5]]

    change_m   = extract(r"\d+\.\d+", lines[2]) if len(lines) > 2 else np.nan
    change_pct = extract(r"\d+\.\d+", lines[3]) if len(lines) > 3 else np.nan
    total_m    = extract(r"\d+\.\d+", lines[4]) if len(lines) > 4 else np.nan

    return {
        "Club": club,
        "X": X,
        "Instagram": Instagram,
        "Facebook": Facebook,
        "TikTok": TikTok,
        "YouTube": YouTube,
        "Change_M": change_m,
        "Change_Pct": change_pct,
        "Total_M": total_m
    }


In [24]:
# Cleaning the blocks
eng_df = pd.DataFrame([parse_block(b) for b in blocks])
eng_df

Unnamed: 0,Club,X,Instagram,Facebook,TikTok,YouTube,Change_M,Change_Pct,Total_M
0,Manchester United,44.8,64.4,85.1,29.3,10.0,7.3,3.4,179.5
1,Manchester City,28.9,56.1,54.9,31.2,8.5,12.7,8.0,166.7
2,Liverpool,31.6,47.0,51.7,25.0,11.4,10.9,7.5,152.9
3,Chelsea,29.2,42.6,56.5,18.7,5.8,5.3,3.8,114.1
4,Arsenal,23.0,30.9,45.8,9.7,4.7,7.0,6.9,108.1
5,Tottenham Hotspur,9.7,17.4,36.4,40.6,4.0,7.7,7.9,24.5
6,Leicester City,2.7,7.9,9.5,3.7,0.7,1.1,5.0,19.1
7,West Ham United,2.8,4.5,4.4,6.7,0.7,0.9,4.9,18.8
8,Aston Villa,2.6,4.4,6.7,4.4,0.7,2.1,13.2,17.2
9,Newcastle United,2.9,3.2,4.8,5.5,0.8,3.0,22.7,14.5


In [28]:
# Filtering for teams in the present 25/26 season
epl_teams = [
    "Arsenal", "Aston Villa", "Bournemouth", "Brentford",
    "Chelsea", "Crystal Palace", "Everton", "Fulham",
    "Liverpool", "Sunderland", "Manchester City",
    "Manchester United", "Newcastle United", "Nottingham Forest",
    "Leeds United", "Tottenham Hotspur", "West Ham United",
    "Wolverhampton Wanderers", "Brighton & Hove Albion", "Burnley"
]

# Present teams in data
present = eng_df[eng_df["Club"].isin(epl_teams)]
present


Unnamed: 0,Club,X,Instagram,Facebook,TikTok,YouTube,Change_M,Change_Pct,Total_M
0,Manchester United,44.8,64.4,85.1,29.3,10.0,7.3,3.4,179.5
1,Manchester City,28.9,56.1,54.9,31.2,8.5,12.7,8.0,166.7
2,Liverpool,31.6,47.0,51.7,25.0,11.4,10.9,7.5,152.9
3,Chelsea,29.2,42.6,56.5,18.7,5.8,5.3,3.8,114.1
4,Arsenal,23.0,30.9,45.8,9.7,4.7,7.0,6.9,108.1
5,Tottenham Hotspur,9.7,17.4,36.4,40.6,4.0,7.7,7.9,24.5
7,West Ham United,2.8,4.5,4.4,6.7,0.7,0.9,4.9,18.8
8,Aston Villa,2.6,4.4,6.7,4.4,0.7,2.1,13.2,17.2
9,Newcastle United,2.9,3.2,4.8,5.5,0.8,3.0,22.7,14.5
10,Everton,3.1,3.2,4.7,2.6,0.9,0.4,3.1,12.0


In [29]:
# Assigning values for the Misisng teams from data (using lowest follwing across all social media sites as the assumed maximum for the teams)
missing_teams = sorted(set(epl_teams) - set(present["Club"]))
numeric_cols = ["X", "Instagram", "Facebook", "TikTok", "YouTube",
                "Change_M", "Change_Pct", "Total_M"]
mins = eng_df[numeric_cols].min().to_dict()
missing_rows = pd.DataFrame([
    dict({"Club": team}, **mins) for team in missing_teams
])
missing_rows

Unnamed: 0,Club,X,Instagram,Facebook,TikTok,YouTube,Change_M,Change_Pct,Total_M
0,Bournemouth,1.1,1.2,1.6,1.0,0.3,0.1,2.0,6.4
1,Brentford,1.1,1.2,1.6,1.0,0.3,0.1,2.0,6.4
2,Burnley,1.1,1.2,1.6,1.0,0.3,0.1,2.0,6.4
3,Fulham,1.1,1.2,1.6,1.0,0.3,0.1,2.0,6.4
4,Nottingham Forest,1.1,1.2,1.6,1.0,0.3,0.1,2.0,6.4
5,Sunderland,1.1,1.2,1.6,1.0,0.3,0.1,2.0,6.4


In [36]:
# Merging the Present and Missing Teams and Removing Total_M variable
fanbase_epl = pd.concat([present, missing_rows], ignore_index=True)
fanbase_epl = fanbase_epl.sort_values("Club")
fanbase_epl = fanbase_epl.drop(columns=["Total_M"])
fanbase_epl

Unnamed: 0,Club,X,Instagram,Facebook,TikTok,YouTube,Change_M,Change_Pct
4,Arsenal,23.0,30.9,45.8,9.7,4.7,7.0,6.9
7,Aston Villa,2.6,4.4,6.7,4.4,0.7,2.1,13.2
14,Bournemouth,1.1,1.2,1.6,1.0,0.3,0.1,2.0
15,Brentford,1.1,1.2,1.6,1.0,0.3,0.1,2.0
11,Brighton & Hove Albion,1.2,2.2,2.2,3.4,0.7,0.6,7.3
16,Burnley,1.1,1.2,1.6,1.0,0.3,0.1,2.0
3,Chelsea,29.2,42.6,56.5,18.7,5.8,5.3,3.8
12,Crystal Palace,1.5,2.2,2.2,1.4,0.3,1.0,15.0
9,Everton,3.1,3.2,4.7,2.6,0.9,0.4,3.1
17,Fulham,1.1,1.2,1.6,1.0,0.3,0.1,2.0


### Merging FPL API Data and Social Media Data

In [69]:
# Cleaning team names in fanbase data to align with FPL Data
team_name_map = {
    "Arsenal": "Arsenal",
    "Aston Villa": "Aston Villa",
    "Bournemouth": "Bournemouth",
    "Brentford": "Brentford",
    "Brighton & Hove Albion": "Brighton",
    "Burnley": "Burnley",
    "Chelsea": "Chelsea",
    "Crystal Palace": "Crystal Palace",
    "Everton": "Everton",
    "Fulham": "Fulham",
    "Leeds United": "Leeds",
    "Liverpool": "Liverpool",
    "Manchester City": "Man City",
    "Manchester United": "Man Utd",
    "Newcastle United": "Newcastle",
    "Nottingham Forest": "Nott'm Forest",
    "Sunderland": "Sunderland",
    "Tottenham Hotspur": "Spurs",
    "West Ham United": "West Ham",
    "Wolverhampton Wanderers": "Wolves"
}

fanbase_epl["team_clean"] = fanbase_epl["Club"].map(team_name_map)
fanbase_epl

Unnamed: 0,Club,X,Instagram,Facebook,TikTok,YouTube,Change_M,Change_Pct,team_clean
4,Arsenal,23.0,30.9,45.8,9.7,4.7,7.0,6.9,Arsenal
7,Aston Villa,2.6,4.4,6.7,4.4,0.7,2.1,13.2,Aston Villa
14,Bournemouth,1.1,1.2,1.6,1.0,0.3,0.1,2.0,Bournemouth
15,Brentford,1.1,1.2,1.6,1.0,0.3,0.1,2.0,Brentford
11,Brighton & Hove Albion,1.2,2.2,2.2,3.4,0.7,0.6,7.3,Brighton
16,Burnley,1.1,1.2,1.6,1.0,0.3,0.1,2.0,Burnley
3,Chelsea,29.2,42.6,56.5,18.7,5.8,5.3,3.8,Chelsea
12,Crystal Palace,1.5,2.2,2.2,1.4,0.3,1.0,15.0,Crystal Palace
9,Everton,3.1,3.2,4.7,2.6,0.9,0.4,3.1,Everton
17,Fulham,1.1,1.2,1.6,1.0,0.3,0.1,2.0,Fulham


In [70]:
# Getting team lists
url = "https://fantasy.premierleague.com/api/bootstrap-static/"
bootstrap = requests.get(url).json()
fpl_metadata = pd.DataFrame(bootstrap["teams"])
fpl_metadata

Unnamed: 0,code,draw,form,id,loss,name,played,points,position,short_name,...,team_division,unavailable,win,strength_overall_home,strength_overall_away,strength_attack_home,strength_attack_away,strength_defence_home,strength_defence_away,pulse_id
0,3,0,,1,0,Arsenal,0,0,1,ARS,...,,False,0,1300,1375,1340,1400,1260,1350,1
1,7,0,,2,0,Aston Villa,0,0,3,AVL,...,,False,0,1145,1185,1150,1170,1140,1200,2
2,90,0,,3,0,Burnley,0,0,19,BUR,...,,False,0,1055,1095,1010,1090,1100,1100,43
3,91,0,,4,0,Bournemouth,0,0,14,BOU,...,,False,0,1150,1220,1100,1240,1200,1200,127
4,94,0,,5,0,Brentford,0,0,13,BRE,...,,False,0,1135,1175,1100,1110,1170,1240,130
5,36,0,,6,0,Brighton,0,0,7,BHA,...,,False,0,1150,1160,1090,1110,1210,1210,131
6,8,0,,7,0,Chelsea,0,0,4,CHE,...,,False,0,1180,1190,1140,1160,1220,1220,4
7,31,0,,8,0,Crystal Palace,0,0,5,CRY,...,,False,0,1165,1205,1150,1150,1180,1260,6
8,11,0,,9,0,Everton,0,0,10,EVE,...,,False,0,1100,1130,1110,1130,1090,1130,7
9,54,0,,10,0,Fulham,0,0,15,FUL,...,,False,0,1090,1130,1080,1140,1100,1120,34


In [117]:
# Getting team names for the FPL data
team_lookup = fpl_metadata[['id', 'name']].rename(columns={'id':'team', 'name':'team_name'})
fpl_data = fpl_data.merge(team_lookup, on="team", how="left")
fpl_data.head()

Unnamed: 0,minutes,goals_scored,assists,clean_sheets,goals_conceded,own_goals,penalties_saved,penalties_missed,yellow_cards,red_cards,...,id,gw,web_name,team,now_cost,element_type,selected_by_percent,transfers_in,transfers_out,team_name
0,90,0,0,1,0,0,0,0,1,0,...,1,1,Raya,1,59,1,33.4,2625498,1062733,Arsenal
1,0,0,0,0,0,0,0,0,0,0,...,2,1,Arrizabalaga,1,42,1,0.4,9038,64652,Arsenal
2,0,0,0,0,0,0,0,0,0,0,...,3,1,Hein,1,40,1,0.2,5545,42922,Arsenal
3,0,0,0,0,0,0,0,0,0,0,...,4,1,Setford,1,39,1,0.2,18794,19145,Arsenal
4,90,0,0,1,0,0,0,0,0,0,...,5,1,Gabriel,1,63,2,17.1,3878350,4261998,Arsenal


In [118]:
# Merging fanbase and FPL data
fpl_fanbase_merged = fpl_data.merge(
    fanbase_epl,
    left_on="team_name",
    right_on="team_clean",
    how="left"
)
fpl_fanbase_merged.head()

Unnamed: 0,minutes,goals_scored,assists,clean_sheets,goals_conceded,own_goals,penalties_saved,penalties_missed,yellow_cards,red_cards,...,team_name,Club,X,Instagram,Facebook,TikTok,YouTube,Change_M,Change_Pct,team_clean
0,90,0,0,1,0,0,0,0,1,0,...,Arsenal,Arsenal,23.0,30.9,45.8,9.7,4.7,7.0,6.9,Arsenal
1,0,0,0,0,0,0,0,0,0,0,...,Arsenal,Arsenal,23.0,30.9,45.8,9.7,4.7,7.0,6.9,Arsenal
2,0,0,0,0,0,0,0,0,0,0,...,Arsenal,Arsenal,23.0,30.9,45.8,9.7,4.7,7.0,6.9,Arsenal
3,0,0,0,0,0,0,0,0,0,0,...,Arsenal,Arsenal,23.0,30.9,45.8,9.7,4.7,7.0,6.9,Arsenal
4,90,0,0,1,0,0,0,0,0,0,...,Arsenal,Arsenal,23.0,30.9,45.8,9.7,4.7,7.0,6.9,Arsenal


### Getting Historical FPL Data for 24/25 season

In [77]:
# Getting Historical Data on player statistics
url_hist_stats = "https://raw.githubusercontent.com/FPLYogi/FPL-Data/main/data/2024-2025/playerstats/playerstats.csv"
fpl_hist_data = pd.read_csv(url_hist_stats)
fpl_hist_data

Unnamed: 0,id,status,chance_of_playing_next_round,chance_of_playing_this_round,now_cost,now_cost_rank,now_cost_rank_type,cost_change_event,cost_change_event_fall,cost_change_start,...,threat_rank,threat_rank_type,ict_index,ict_index_rank,ict_index_rank_type,corners_and_indirect_freekicks_order,direct_freekicks_order,penalties_order,gw,set_piece_threat
0,5,u,0.0,0.0,4.0,645,55,0,0,0,...,512,31,0.0,566,63,,,,23,
1,6,a,100.0,100.0,5.6,100,10,0,0,1,...,130,14,66.0,136,35,,,,23,
2,7,a,100.0,100.0,4.7,323,209,0,0,-3,...,421,221,10.4,384,187,,,,23,
3,8,a,,,4.8,314,40,0,0,-2,...,707,249,6.3,415,139,,,,23,
4,9,a,100.0,100.0,6.7,38,19,0,0,-3,...,26,14,124.7,37,25,,,,23,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
27652,545,u,0.0,0.0,5.0,220,47,0,0,0,...,777,79,0.0,785,79,,,,38,
27653,546,u,0.0,0.0,4.5,443,71,0,0,0,...,714,64,0.0,734,64,,,,38,
27654,547,i,0.0,0.0,4.4,554,335,0,0,-1,...,657,258,0.0,685,267,,,,38,
27655,548,a,100.0,100.0,5.3,147,88,0,0,-2,...,164,104,64.0,254,131,,,,38,


In [79]:
# Getting Player Metadata
url_hist_players = "https://raw.githubusercontent.com/FPLYogi/FPL-Data/refs/heads/main/data/2024-2025/players/players.csv"
players_hist_data = pd.read_csv(url_hist_players)
players_hist_data


Unnamed: 0,player_code,player_id,first_name,second_name,web_name,team_id,position
0,438098,1,Fábio,Ferreira Vieira,Fábio Vieira,3,Midfielder
1,100051017,735,Mikel,Arteta,Arteta,3,Unknown
2,616059,756,Jack,Porter,Porter,3,Goalkeeper
3,538182,790,Jimi,Gower,Gower,3,Midfielder
4,514307,793,Jack,Henry-Francis,Jack Henry-Francis,3,Midfielder
...,...,...,...,...,...,...,...
799,516939,720,Emmanuel,Agbadou,Agbadou,39,Defender
800,100040854,751,Vítor Manuel,de Oliveira Lopes Pereira,Pereira,39,Unknown
801,531170,769,Nasser,Djiga,Djiga,39,Defender
802,433312,770,Marshall,Munetsi,Munetsi,39,Midfielder


In [80]:
# Getting Teams Metadata for team names
url_hist_teams = "https://raw.githubusercontent.com/FPLYogi/FPL-Data/refs/heads/main/data/2024-2025/teams/teams.csv"
teams_hist_data = pd.read_csv(url_hist_teams)
teams_hist_data


Unnamed: 0,code,id,name,short_name,strength,strength_overall_home,strength_overall_away,strength_attack_home,strength_attack_away,strength_defence_home,strength_defence_away,pulse_id,elo
0,3,1,Arsenal,ARS,5,1350,1350,1390,1400,1310,1300,1,1991
1,7,2,Aston Villa,AVL,3,1145,1240,1130,1180,1160,1300,2,1870
2,91,3,Bournemouth,BOU,3,1170,1200,1120,1180,1220,1220,127,1806
3,94,4,Brentford,BRE,3,1130,1180,1100,1100,1160,1260,130,1809
4,36,5,Brighton,BHA,3,1140,1165,1090,1140,1190,1190,131,1825
5,8,6,Chelsea,CHE,3,1155,1235,1120,1170,1190,1300,4,1892
6,31,7,Crystal Palace,CRY,3,1150,1155,1140,1140,1160,1170,6,1833
7,11,8,Everton,EVE,3,1120,1145,1160,1160,1080,1130,7,1792
8,54,9,Fulham,FUL,3,1155,1155,1150,1150,1160,1160,34,1779
9,40,10,Ipswich,IPS,2,1065,1065,1040,1040,1090,1090,8,1589


In [86]:
# Merging players and teams data
players_teams_hist_merged = players_hist_data[["player_id", "team_id"]].merge(
    teams_hist_data[["code","id", "name"]],
    left_on="team_id",
    right_on="code",
    how="left"
)
players_teams_hist_merged

Unnamed: 0,player_id,team_id,code,id,name
0,1,3,3,1,Arsenal
1,735,3,3,1,Arsenal
2,756,3,3,1,Arsenal
3,790,3,3,1,Arsenal
4,793,3,3,1,Arsenal
...,...,...,...,...,...
799,720,39,39,20,Wolves
800,751,39,39,20,Wolves
801,769,39,39,20,Wolves
802,770,39,39,20,Wolves


In [88]:
# Adding the team name to the player stats data
fpl_hist_merged = fpl_hist_data.merge(
    players_teams_hist_merged,
    left_on="id",
    right_on="player_id",
    how="left"
)
fpl_hist_merged

Unnamed: 0,id_x,status,chance_of_playing_next_round,chance_of_playing_this_round,now_cost,now_cost_rank,now_cost_rank_type,cost_change_event,cost_change_event_fall,cost_change_start,...,corners_and_indirect_freekicks_order,direct_freekicks_order,penalties_order,gw,set_piece_threat,player_id,team_id,code,id_y,name
0,5,u,0.0,0.0,4.0,645,55,0,0,0,...,,,,23,,5,3,3,1,Arsenal
1,6,a,100.0,100.0,5.6,100,10,0,0,1,...,,,,23,,6,3,3,1,Arsenal
2,7,a,100.0,100.0,4.7,323,209,0,0,-3,...,,,,23,,7,3,3,1,Arsenal
3,8,a,,,4.8,314,40,0,0,-2,...,,,,23,,8,3,3,1,Arsenal
4,9,a,100.0,100.0,6.7,38,19,0,0,-3,...,,,,23,,9,3,3,1,Arsenal
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
27652,545,u,0.0,0.0,5.0,220,47,0,0,0,...,,,,38,,545,39,39,20,Wolves
27653,546,u,0.0,0.0,4.5,443,71,0,0,0,...,,,,38,,546,39,39,20,Wolves
27654,547,i,0.0,0.0,4.4,554,335,0,0,-1,...,,,,38,,547,39,39,20,Wolves
27655,548,a,100.0,100.0,5.3,147,88,0,0,-2,...,,,,38,,548,39,39,20,Wolves


In [90]:
# Adjusting Social Media Data for Historical Data
epl_hist_teams = teams_hist_data["name"].tolist()
print(epl_hist_teams)

# Cleaning eng_df for current team names
eng_df["team_clean"] = eng_df["Club"].map(team_name_map)


# Present teams in data
present_in_hist = eng_df[eng_df["team_clean"].isin(epl_hist_teams)]
present_in_hist

['Arsenal', 'Aston Villa', 'Bournemouth', 'Brentford', 'Brighton', 'Chelsea', 'Crystal Palace', 'Everton', 'Fulham', 'Ipswich', 'Leicester', 'Liverpool', 'Man City', 'Man Utd', 'Newcastle', "Nott'm Forest", 'Southampton', 'Spurs', 'West Ham', 'Wolves']


Unnamed: 0,Club,X,Instagram,Facebook,TikTok,YouTube,Change_M,Change_Pct,Total_M,team_clean
0,Manchester United,44.8,64.4,85.1,29.3,10.0,7.3,3.4,179.5,Man Utd
1,Manchester City,28.9,56.1,54.9,31.2,8.5,12.7,8.0,166.7,Man City
2,Liverpool,31.6,47.0,51.7,25.0,11.4,10.9,7.5,152.9,Liverpool
3,Chelsea,29.2,42.6,56.5,18.7,5.8,5.3,3.8,114.1,Chelsea
4,Arsenal,23.0,30.9,45.8,9.7,4.7,7.0,6.9,108.1,Arsenal
5,Tottenham Hotspur,9.7,17.4,36.4,40.6,4.0,7.7,7.9,24.5,Spurs
7,West Ham United,2.8,4.5,4.4,6.7,0.7,0.9,4.9,18.8,West Ham
8,Aston Villa,2.6,4.4,6.7,4.4,0.7,2.1,13.2,17.2,Aston Villa
9,Newcastle United,2.9,3.2,4.8,5.5,0.8,3.0,22.7,14.5,Newcastle
10,Everton,3.1,3.2,4.7,2.6,0.9,0.4,3.1,12.0,Everton


In [93]:
# Assigning values for the Misisng teams for historical data (using lowest follwing across all social media sites as the assumed maximum for the teams)
missing_in_hist = sorted(set(epl_hist_teams) - set(present_in_hist["team_clean"]))
numeric_cols = ["X", "Instagram", "Facebook", "TikTok", "YouTube",
                "Change_M", "Change_Pct", "Total_M"]
mins = eng_df[numeric_cols].min().to_dict()
missing_rows_in_hist = pd.DataFrame([
    dict({"team_clean": team}, **mins) for team in missing_in_hist
])
missing_rows_in_hist

Unnamed: 0,team_clean,X,Instagram,Facebook,TikTok,YouTube,Change_M,Change_Pct,Total_M
0,Bournemouth,1.1,1.2,1.6,1.0,0.3,0.1,2.0,6.4
1,Brentford,1.1,1.2,1.6,1.0,0.3,0.1,2.0,6.4
2,Fulham,1.1,1.2,1.6,1.0,0.3,0.1,2.0,6.4
3,Ipswich,1.1,1.2,1.6,1.0,0.3,0.1,2.0,6.4
4,Leicester,1.1,1.2,1.6,1.0,0.3,0.1,2.0,6.4
5,Nott'm Forest,1.1,1.2,1.6,1.0,0.3,0.1,2.0,6.4
6,Southampton,1.1,1.2,1.6,1.0,0.3,0.1,2.0,6.4


In [94]:
# Merging the Present and Missing Teams and Removing Total_M variable
fanbase_hist_epl = pd.concat([present_in_hist, missing_rows_in_hist], ignore_index=True)
fanbase_hist_epl = fanbase_hist_epl.sort_values("Club")
fanbase_hist_epl = fanbase_hist_epl.drop(columns=["Total_M"])
fanbase_hist_epl

Unnamed: 0,Club,X,Instagram,Facebook,TikTok,YouTube,Change_M,Change_Pct,team_clean
4,Arsenal,23.0,30.9,45.8,9.7,4.7,7.0,6.9,Arsenal
7,Aston Villa,2.6,4.4,6.7,4.4,0.7,2.1,13.2,Aston Villa
11,Brighton & Hove Albion,1.2,2.2,2.2,3.4,0.7,0.6,7.3,Brighton
3,Chelsea,29.2,42.6,56.5,18.7,5.8,5.3,3.8,Chelsea
12,Crystal Palace,1.5,2.2,2.2,1.4,0.3,1.0,15.0,Crystal Palace
9,Everton,3.1,3.2,4.7,2.6,0.9,0.4,3.1,Everton
2,Liverpool,31.6,47.0,51.7,25.0,11.4,10.9,7.5,Liverpool
1,Manchester City,28.9,56.1,54.9,31.2,8.5,12.7,8.0,Man City
0,Manchester United,44.8,64.4,85.1,29.3,10.0,7.3,3.4,Man Utd
8,Newcastle United,2.9,3.2,4.8,5.5,0.8,3.0,22.7,Newcastle


In [95]:
# Merging fanbase and FPL Historical data
fpl_hist_fanbase_merged = fpl_hist_merged.merge(
    fanbase_hist_epl,
    left_on="name",
    right_on="team_clean",
    how="left"
)
fpl_hist_fanbase_merged.head()

Unnamed: 0,id_x,status,chance_of_playing_next_round,chance_of_playing_this_round,now_cost,now_cost_rank,now_cost_rank_type,cost_change_event,cost_change_event_fall,cost_change_start,...,name,Club,X,Instagram,Facebook,TikTok,YouTube,Change_M,Change_Pct,team_clean
0,5,u,0.0,0.0,4.0,645,55,0,0,0,...,Arsenal,Arsenal,23.0,30.9,45.8,9.7,4.7,7.0,6.9,Arsenal
1,6,a,100.0,100.0,5.6,100,10,0,0,1,...,Arsenal,Arsenal,23.0,30.9,45.8,9.7,4.7,7.0,6.9,Arsenal
2,7,a,100.0,100.0,4.7,323,209,0,0,-3,...,Arsenal,Arsenal,23.0,30.9,45.8,9.7,4.7,7.0,6.9,Arsenal
3,8,a,,,4.8,314,40,0,0,-2,...,Arsenal,Arsenal,23.0,30.9,45.8,9.7,4.7,7.0,6.9,Arsenal
4,9,a,100.0,100.0,6.7,38,19,0,0,-3,...,Arsenal,Arsenal,23.0,30.9,45.8,9.7,4.7,7.0,6.9,Arsenal


## Feature Engineering 

### Historical Data

In [96]:
# Checking and correcting data types for fanbase data
fan_cols = ["X", "Instagram", "Facebook", "TikTok", "YouTube", "Change_M", "Change_Pct"]

for col in fan_cols:
    fpl_hist_fanbase_merged[col] = pd.to_numeric(fpl_hist_fanbase_merged[col], errors="coerce")

In [97]:
# Computing total and log for fanbase size
fpl_hist_fanbase_merged["fanbase_total"] = (
    fpl_hist_fanbase_merged["X"] + fpl_hist_fanbase_merged["Instagram"] + fpl_hist_fanbase_merged["Facebook"] +
    fpl_hist_fanbase_merged["TikTok"] + fpl_hist_fanbase_merged["YouTube"]
)

fpl_hist_fanbase_merged["fanbase_log"] = np.log1p(fpl_hist_fanbase_merged["fanbase_total"])


In [98]:
# Computing Team Influence
# Normalized fanbase
fpl_hist_fanbase_merged["fanbase_norm"] = (
    fpl_hist_fanbase_merged["fanbase_total"] - fpl_hist_fanbase_merged["fanbase_total"].min()
) / (
    fpl_hist_fanbase_merged["fanbase_total"].max() - fpl_hist_fanbase_merged["fanbase_total"].min()
)

# Fanbase Growth Adjusted Influence
fpl_hist_fanbase_merged["fanbase_growth_weighted"] = fpl_hist_fanbase_merged["fanbase_norm"] * (1 + fpl_hist_fanbase_merged["Change_Pct"] / 100)


In [174]:
# Ownership
# Checking and correctin ownership data type
fpl_hist_fanbase_merged["ownership"] = pd.to_numeric(fpl_hist_fanbase_merged["selected_by_percent"], errors="coerce")
fpl_hist_fanbase_merged['ownership'].replace(0, 0.1, inplace=True)

# Week-over-Week Ownership Growth
fpl_hist_fanbase_merged = fpl_hist_fanbase_merged.sort_values(["id_x", "gw"])
fpl_hist_fanbase_merged["ownership_prev"] = fpl_hist_fanbase_merged.groupby("id_x")["ownership"].shift(1)
fpl_hist_fanbase_merged["ownership_growth"] = fpl_hist_fanbase_merged["ownership"] - fpl_hist_fanbase_merged["ownership_prev"]


In [108]:
# Ownership
# Boom and Bursts 
fpl_hist_fanbase_merged["boom_flag"] = (fpl_hist_fanbase_merged["ownership_growth_pct"] > 15).astype(int)
fpl_hist_fanbase_merged["bust_flag"] = (fpl_hist_fanbase_merged["ownership_growth_pct"] < -15).astype(int)

In [119]:
# Difussion Metrics
# Player-level Net Transfers
fpl_hist_fanbase_merged["transfer_net"] = fpl_hist_fanbase_merged["transfers_in"] - fpl_hist_fanbase_merged["transfers_out"]
fpl_hist_fanbase_merged["transfer_pressure"] = fpl_hist_fanbase_merged["transfers_in"] / (fpl_hist_fanbase_merged["transfers_out"] + 1)

In [120]:
# Team-level Transfers
fpl_hist_fanbase_merged["team_transfer_pressure"] = (
    fpl_hist_fanbase_merged.groupby(["name", "gw"])["transfer_net"].transform("mean")
)

In [122]:
# Momentum Metrics
fpl_hist_fanbase_merged = fpl_hist_fanbase_merged.sort_values(["player_id", "gw"])

for metric in ["total_points", "expected_goals", "expected_goal_involvements"]:
    fpl_hist_fanbase_merged[f"{metric}_prev"] = fpl_hist_fanbase_merged.groupby("player_id")[metric].shift(1)
    fpl_hist_fanbase_merged[f"{metric}_diff"] = fpl_hist_fanbase_merged[metric] - fpl_hist_fanbase_merged[f"{metric}_prev"]
    fpl_hist_fanbase_merged[f"{metric}_momentum"] = fpl_hist_fanbase_merged.groupby("player_id")[metric].rolling(3).mean().reset_index(0, drop=True)


In [123]:
# Team Momentum Metrics
# Team average points per GW
fpl_hist_fanbase_merged["team_points_avg"] = (
    fpl_hist_fanbase_merged.groupby(["name", "gw"])["total_points"].transform("mean")
)

# 3-match Team form 
fpl_hist_fanbase_merged["team_form_3"] = (
    fpl_hist_fanbase_merged.groupby("name")["team_points_avg"].rolling(3).mean().reset_index(0,drop=True)
)


In [124]:
# Interaction Features
fpl_hist_fanbase_merged["fanbase_x_points"] = fpl_hist_fanbase_merged["fanbase_norm"] * fpl_hist_fanbase_merged["total_points"]
fpl_hist_fanbase_merged["fanbase_x_ownership"] = fpl_hist_fanbase_merged["fanbase_norm"] * fpl_hist_fanbase_merged["ownership"]
fpl_hist_fanbase_merged["fanbase_x_transfer_pressure"] = fpl_hist_fanbase_merged["fanbase_log"] * fpl_hist_fanbase_merged["transfer_pressure"]

fpl_hist_fanbase_merged["influence_score"] = (
    fpl_hist_fanbase_merged["fanbase_norm"] * 
    fpl_hist_fanbase_merged["ownership"] * 
    (1 + fpl_hist_fanbase_merged["Change_Pct"] / 100)
)

In [160]:
## Getting Target Variable
fpl_hist_fanbase_merged["ownership_next"] = fpl_hist_fanbase_merged.groupby("player_id")["ownership"].shift(-1)
fpl_hist_fanbase_merged["diffusion_target"] = fpl_hist_fanbase_merged["ownership_next"] - fpl_hist_fanbase_merged["ownership"]
fpl_hist_fanbase_merged["diffusion_class"] = (fpl_hist_fanbase_merged["diffusion_target"] > -0.0005).astype(int)

### Current Season Data

In [163]:
# Checking and correcting data types for fanbase data
fan_cols = ["X", "Instagram", "Facebook", "TikTok", "YouTube", "Change_M", "Change_Pct"]

for col in fan_cols:
    fpl_fanbase_merged[col] = pd.to_numeric(fpl_fanbase_merged[col], errors="coerce")

In [164]:
# Computing total and log for fanbase size
fpl_fanbase_merged["fanbase_total"] = (
    fpl_fanbase_merged["X"] + fpl_fanbase_merged["Instagram"] + fpl_fanbase_merged["Facebook"] +
    fpl_fanbase_merged["TikTok"] + fpl_fanbase_merged["YouTube"]
)

fpl_fanbase_merged["fanbase_log"] = np.log1p(fpl_fanbase_merged["fanbase_total"])


In [165]:
# Computing Team Influence
# Normalized fanbase
fpl_fanbase_merged["fanbase_norm"] = (
    fpl_fanbase_merged["fanbase_total"] - fpl_fanbase_merged["fanbase_total"].min()
) / (
    fpl_fanbase_merged["fanbase_total"].max() - fpl_fanbase_merged["fanbase_total"].min()
)

# Fanbase Growth Adjusted Influence
fpl_fanbase_merged["fanbase_growth_weighted"] = fpl_fanbase_merged["fanbase_norm"] * (1 + fpl_fanbase_merged["Change_Pct"] / 100)


In [167]:
fpl_fanbase_merged.columns.tolist()

['minutes',
 'goals_scored',
 'assists',
 'clean_sheets',
 'goals_conceded',
 'own_goals',
 'penalties_saved',
 'penalties_missed',
 'yellow_cards',
 'red_cards',
 'saves',
 'bonus',
 'bps',
 'influence',
 'creativity',
 'threat',
 'ict_index',
 'clearances_blocks_interceptions',
 'recoveries',
 'tackles',
 'defensive_contribution',
 'starts',
 'expected_goals',
 'expected_assists',
 'expected_goal_involvements',
 'expected_goals_conceded',
 'total_points',
 'in_dreamteam',
 'id',
 'gw',
 'web_name',
 'team',
 'now_cost',
 'element_type',
 'selected_by_percent',
 'transfers_in',
 'transfers_out',
 'team_name',
 'Club',
 'X',
 'Instagram',
 'Facebook',
 'TikTok',
 'YouTube',
 'Change_M',
 'Change_Pct',
 'team_clean',
 'fanbase_total',
 'fanbase_log',
 'fanbase_norm',
 'fanbase_growth_weighted']

In [178]:
# Ownership
# Checking and correctin ownership data type
fpl_fanbase_merged["ownership"] = pd.to_numeric(fpl_fanbase_merged["selected_by_percent"], errors="coerce")
fpl_fanbase_merged['ownership'].replace(0, 0.1, inplace=True)

# Week-over-Week Ownership Growth
fpl_fanbase_merged["ownership_prev"] = ((fpl_fanbase_merged["ownership"]*3334097 - fpl_fanbase_merged["transfers_in"] + fpl_fanbase_merged["transfers_out"])/3334097)*100
fpl_fanbase_merged["ownership_growth"] = fpl_fanbase_merged["ownership"] - fpl_fanbase_merged["ownership_prev"]

In [183]:
# Ownership
# Boom and Bursts 
fpl_fanbase_merged["boom_flag"] = (fpl_fanbase_merged["ownership_growth"] > 15).astype(int)
fpl_fanbase_merged["bust_flag"] = (fpl_fanbase_merged["ownership_growth"] < -15).astype(int)

In [184]:
# Difussion Metrics
# Player-level Net Transfers
fpl_fanbase_merged["transfer_net"] = fpl_fanbase_merged["transfers_in"] - fpl_fanbase_merged["transfers_out"]
fpl_fanbase_merged["transfer_pressure"] = fpl_fanbase_merged["transfers_in"] / (fpl_fanbase_merged["transfers_out"] + 1)

In [186]:
# Team-level Transfers
fpl_fanbase_merged["team_transfer_pressure"] = (
    fpl_fanbase_merged.groupby(["team_name", "gw"])["transfer_net"].transform("mean")
)

In [193]:
fpl_fanbase_merged.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 9573 entries, 0 to 9572
Data columns (total 74 columns):
 #   Column                               Non-Null Count  Dtype  
---  ------                               --------------  -----  
 0   minutes                              9573 non-null   int64  
 1   goals_scored                         9573 non-null   int64  
 2   assists                              9573 non-null   int64  
 3   clean_sheets                         9573 non-null   int64  
 4   goals_conceded                       9573 non-null   int64  
 5   own_goals                            9573 non-null   int64  
 6   penalties_saved                      9573 non-null   int64  
 7   penalties_missed                     9573 non-null   int64  
 8   yellow_cards                         9573 non-null   int64  
 9   red_cards                            9573 non-null   int64  
 10  saves                                9573 non-null   int64  
 11  bonus                         

In [190]:
# Momentum Metrics
# Setting relevant varibales to numeric
momentum_cols = ["total_points", "expected_goals", "expected_goal_involvements"]

for col in momentum_cols:
    fpl_fanbase_merged[col] = pd.to_numeric(fpl_fanbase_merged[col], errors="coerce")

fpl_fanbase_merged = fpl_fanbase_merged.sort_values(["id", "gw"])

for metric in ["total_points", "expected_goals", "expected_goal_involvements"]:
    fpl_fanbase_merged[f"{metric}_prev"] = fpl_fanbase_merged.groupby("id")[metric].shift(1)
    fpl_fanbase_merged[f"{metric}_diff"] = fpl_fanbase_merged[metric] - fpl_fanbase_merged[f"{metric}_prev"]
    fpl_fanbase_merged[f"{metric}_momentum"] = fpl_fanbase_merged.groupby("id")[metric].rolling(3).mean().reset_index(0, drop=True)

In [191]:
# Team Momentum Metrics
# Team average points per GW
fpl_fanbase_merged["team_points_avg"] = (
    fpl_fanbase_merged.groupby(["team_name", "gw"])["total_points"].transform("mean")
)

# 3-match Team form 
fpl_fanbase_merged["team_form_3"] = (
    fpl_fanbase_merged.groupby("team_name")["team_points_avg"].rolling(3).mean().reset_index(0,drop=True)
)

In [192]:
# Interaction Features
fpl_fanbase_merged["fanbase_x_points"] = fpl_fanbase_merged["fanbase_norm"] * fpl_fanbase_merged["total_points"]
fpl_fanbase_merged["fanbase_x_ownership"] = fpl_fanbase_merged["fanbase_norm"] * fpl_fanbase_merged["ownership"]
fpl_fanbase_merged["fanbase_x_transfer_pressure"] = fpl_fanbase_merged["fanbase_log"] * fpl_fanbase_merged["transfer_pressure"]

fpl_fanbase_merged["influence_score"] = (
    fpl_fanbase_merged["fanbase_norm"] * 
    fpl_fanbase_merged["ownership"] * 
    (1 + fpl_fanbase_merged["Change_Pct"] / 100)
)

## Saving Data

In [194]:
# Historical Data
fpl_hist_fanbase_merged.to_csv("fpl_hist_fanbase_merged.csv", index=False)
# Current Season Data
fpl_fanbase_merged.to_csv("fpl_fanbase_merged.csv", index=False)