# FPL Team Optimization using Linear Programming

Optimzation uses PuLP to select the optimal 15-player FPL squad based on predicted points.

**Constraints:**
- Budget: £100 million
- Formation: 2 GK, 5 DEF, 5 MID, 3 FWD
- Maximum 3 players per team

**Objective:**
- Maximize total predicted points

In [21]:
# Import libraries
import pandas as pd
import numpy as np
from pulp import LpProblem, LpMaximize, LpVariable, lpSum, LpStatus, value
from pulp import PULP_CBC_CMD
import sqlite3
import pickle
import os

# setting explicit database path
DB_PATH = '../fpl_data.db'

In [22]:
# connecting to the database using db+path
conn = sqlite3.connect(DB_PATH)

# checking that the tables exist
tables_query = "SELECT name FROM sqlite_master WHERE type='table';"
tables_df = pd.read_sql_query(tables_query, conn)
print("\nTables in database:")
print(tables_df['name'].tolist())
print()


Tables in database:
['players', 'gameweek_data', 'sqlite_sequence', 'features']



In [23]:
# loading player data from players table
players_df = pd.read_sql_query("SELECT * FROM players", conn)
print(f"Total players in database {len(players_df)}")
print(f"\nColumns {players_df.columns.tolist()}")
print(f"\nSample data")
players_df.head()

Total players in database 817

Columns ['player_id', 'name', 'team', 'position', 'price']

Sample data


Unnamed: 0,player_id,name,team,position,price
0,1,Raya,Arsenal,GK,5.9
1,2,Arrizabalaga,Arsenal,GK,4.1
2,3,Hein,Arsenal,GK,4.0
3,4,Setford,Arsenal,GK,3.9
4,5,Gabriel,Arsenal,DEF,7.1


In [24]:
# loading the trained Random Forest model
with open('../fpl_predictor_model.pkl', 'rb') as f:
    rf_model = pickle.load(f)
    
print("Model loaded successfully")

Model loaded successfully


In [25]:
# checking to see if features table exists, if not creating empty features table
features_table = 'features' in tables_df['name'].values

if features_table:
    print("Features table found")
    # loading features for all players
    query = """
    SELECT 
        player_id,
        rolling_avg_points,
        opponent_difficulty,
        minutes,
        is_home,
        price,
        pos_GK,
        pos_DEF,
        pos_MID,
        pos_FWD,
        clean_sheets_rolling_avg,
        gameweek
    FROM features
    WHERE gameweek = (SELECT MAX(gameweek) FROM features)
    """
    features_df = pd.read_sql_query(query, conn)
else:
    print("No features table found")
    # empty dataframe if no features table
    features_df = pd.DataFrame() 

print(f"\nFeatures loaded for gameweek: {features_df['gameweek'].iloc[0] if len(features_df) > 0 else 'N/A'}")
print(f"Number of players with features: {len(features_df)}")
print(f"\nFeatures included: {[col for col in features_df.columns if col not in ['player_id', 'gameweek']]}")
features_df.head()

Features table found

Features loaded for gameweek: 24
Number of players with features: 811

Features included: ['rolling_avg_points', 'opponent_difficulty', 'minutes', 'is_home', 'price', 'pos_GK', 'pos_DEF', 'pos_MID', 'pos_FWD', 'clean_sheets_rolling_avg']


Unnamed: 0,player_id,rolling_avg_points,opponent_difficulty,minutes,is_home,price,pos_GK,pos_DEF,pos_MID,pos_FWD,clean_sheets_rolling_avg,gameweek
0,1,3.2,6.1,90,0,5.9,1,0,0,0,0.4,24
1,2,0.0,6.1,0,0,4.1,1,0,0,0,0.0,24
2,3,0.0,6.1,0,0,4.0,1,0,0,0,0.0,24
3,4,0.0,6.1,0,0,3.9,1,0,0,0,0.0,24
4,5,7.8,6.1,90,0,7.1,0,1,0,0,0.6,24


In [26]:
# preparing the features for prediction 
feature_columns = [
    'rolling_avg_points',
    'opponent_difficulty', 
    'minutes',
    'is_home',
    'price',
    'pos_GK',
    'pos_DEF',
    'pos_MID',
    'pos_FWD',
    'clean_sheets_rolling_avg'
]

X_features = features_df[feature_columns]

print(f"Features used for prediction: {len(feature_columns)}")
print(f"Feature names: {feature_columns}")

# generating the predictions
predictions = rf_model.predict(X_features)

# adding predictions to the features dataframe
features_df['predicted_points'] = predictions

print(f"\nPredictions generated for {len(predictions)} players")
print(f"\nPrediction statistics:")
print(f"Mean: {predictions.mean():.2f} points")
print(f"Max: {predictions.max():.2f} points")
print(f"Min: {predictions.min():.2f} points")

Features used for prediction: 10
Feature names: ['rolling_avg_points', 'opponent_difficulty', 'minutes', 'is_home', 'price', 'pos_GK', 'pos_DEF', 'pos_MID', 'pos_FWD', 'clean_sheets_rolling_avg']

Predictions generated for 811 players

Prediction statistics:
Mean: 1.13 points
Max: 5.77 points
Min: 0.00 points


In [27]:
# combining predictions with player info
optimization_df = players_df.merge(features_df[['player_id', 'predicted_points']], 
                                   on='player_id', 
                                   how='inner')

print(f"\nTop 10 predicted players")
print(optimization_df.nlargest(10, 'predicted_points')[['name', 'team', 'position', 'price', 'predicted_points']])


Top 10 predicted players
            name           team position  price  predicted_points
514  Gibbs-White  Nott'm Forest      MID    7.3          5.770581
20          Rice        Arsenal      MID    7.5          5.602167
81       Semenyo       Man City      MID    7.8          5.288036
234       Palmer        Chelsea      MID   10.4          5.144049
381        Wirtz      Liverpool      MID    8.3          5.122415
46        Rogers    Aston Villa      MID    7.6          5.103797
660      Ekitiké      Liverpool      FWD    8.9          5.083041
380      M.Salah      Liverpool      MID   14.0          5.018350
429      Haaland       Man City      FWD   14.9          5.009574
96     Evanilson    Bournemouth      FWD    7.1          4.928142


In [28]:
# Optimization problem
# position mapping fpl uses 1,2,3,4 to define player positions
position_map = {'GK': 1, 'DEF': 2, 'MID': 3, 'FWD': 4}
optimization_df['position_name'] = optimization_df['position'].map(position_map)

# position requirements in fpl
# must have 2 goalkeepers, 5 defenders, 5 midfielders, 3 forwards
position_requirements = {
    'GK': 2,
    'DEF': 5,
    'MID': 5,
    'FWD': 3
}

# budget constraint of £100 million, in tenths of million
budget = 100

# max of 3 players from a single team
max_players_per_team = 3

In [29]:
# creating the optimization problem
prob = LpProblem("FPL_Team_Selection", LpMaximize)

# creating binary decision variables for each player
# variable for each player that indicates whether they are selected (1) or not (0)
player_vars = {}
for idx, row in optimization_df.iterrows():
    player_vars[row['player_id']] = LpVariable(f"player_{row['player_id']}", cat='Binary')

In [30]:
# objective function: maximize total predicted points of selected team
prob += lpSum(player_vars[row['player_id']] * row['predicted_points'] 
                for idx, row in optimization_df.iterrows()), "Total_Predicted_Points"

In [31]:
# fpl constraints
# budget constraint
prob += lpSum(player_vars[row['player_id']] * row['price']
                for idx, row in optimization_df.iterrows()) <= budget, "Total_Budget"

In [32]:
# fpl player constraint
# must select exactly 15 players
prob += lpSum([player_vars[pid] for pid in player_vars]) == 15, "Total_Players"


In [33]:
# fpl position constraints
# must have 2 goalkeepers, 5 defenders, 5 midfielders, 3 forwards
for pos, req in position_requirements.items():
    prob += lpSum(player_vars[row['player_id']] for idx, row in optimization_df.iterrows() if row['position'] == pos) == req, f"Total_{pos}"

In [34]:
# maximum of 3 players from a single team
teams = optimization_df['team'].unique()
for team in teams:
    team_players = optimization_df[optimization_df['team'] == team]
    prob += lpSum([player_vars[row['player_id']] 
                   for idx, row in team_players.iterrows()]) <= max_players_per_team, f"Team_{team}_Limit"

In [35]:
# solving the optimization problem
# generating 3 optimal teams
# storing the optimal teams in a list
all_squads = []
num_squads = 3

for squad_num in range(1, num_squads + 1):    
    # using pulp to solve the problem silently without output
    prob.solve(PULP_CBC_CMD(msg=0))

    # checking the solution found
    if prob.status != 1:
        print(f"No optimal solution found for squad #{squad_num}")
        break

    # extracting the selected players for the optimal team
    selected_player_ids = []
    selected_players = []

    for idx, row in optimization_df.iterrows():
        if value(player_vars[row['player_id']]) == 1:
            selected_player_ids.append(row['player_id'])
            selected_players.append({
                'player_id': row['player_id'],
                'name': row['name'],
                'team': row['team'],
                'position': row['position'],
                'cost': row['price'],
                'predicted_points': row['predicted_points']
            })

    selected_df = pd.DataFrame(selected_players)
    total_points = selected_df['predicted_points'].sum()
    total_cost = selected_df['cost'].sum()

    # storing the squad
    all_squads.append({
        'squad_num': squad_num,
        'dataframe': selected_df.copy(),
        'player_ids': selected_player_ids.copy(),
        'total_points': total_points,
        'total_cost': total_cost
    })
    
    print(f"Squad {squad_num} found")
    print(f"Total predicted points: {total_points:.2f}")
    print(f"Total cost: £{total_cost:.1f}m")
    
    # adding constraint to exclude this exact squad for next squad generated
    # next squad needs to atleast have 3 different players from the previous squad
    if squad_num < num_squads:
        prob += lpSum([player_vars[pid] for pid in selected_player_ids]) <= 12, f"Exclude_Squad_{squad_num}"
        print(f"next squad needs to atleast have 3 different players from the previous squad")


Squad 1 found
Total predicted points: 73.93
Total cost: £98.7m
next squad needs to atleast have 3 different players from the previous squad
Squad 2 found
Total predicted points: 73.64
Total cost: £99.9m
next squad needs to atleast have 3 different players from the previous squad
Squad 3 found
Total predicted points: 73.63
Total cost: £99.4m


In [36]:
# printing a squad comparion summary
# putting the squad comparison data into a dataframe for better formatting
comparison_data = []
for squad in all_squads:
    comparison_data.append({
        'Squad': f"Squad #{squad['squad_num']}",
        'Total Points': f"{squad['total_points']:.2f}",
        'Total Cost': f"£{squad['total_cost']:.1f}m",
        'Budget Left': f"£{(budget) - squad['total_cost']:.1f}m",
        'Avg Points/Player': f"{squad['total_points']/15:.2f}"
    })

comparison_df = pd.DataFrame(comparison_data)
print(comparison_df.to_string(index=False))

   Squad Total Points Total Cost Budget Left Avg Points/Player
Squad #1        73.93     £98.7m       £1.3m              4.93
Squad #2        73.64     £99.9m       £0.1m              4.91
Squad #3        73.63     £99.4m       £0.6m              4.91


In [37]:
# printing out each squad 
for squad in all_squads:
    squad_num = squad['squad_num']
    selected_df = squad['dataframe']
    total_points = squad['total_points']
    total_cost = squad['total_cost']
    
    print(f"\n{'='*70}")
    print(f"Squad {squad_num}")
    print(f"{'='*70}")
    print(f"\nPredicted Points: {total_points:.2f}")
    print(f"Cost: £{total_cost:.1f}m")
    print(f"Budget Left: £{(budget) - total_cost:.1f}m")
    
    # showing all 15 players of each squad 
    print(f"\n{'─'*70}")
    print("Squad Players:")
    print(f"{'─'*70}\n")
    print(selected_df.sort_values(['position', 'predicted_points'], ascending=[True, False]).to_string(index=False))


Squad 1

Predicted Points: 73.93
Cost: £98.7m
Budget Left: £1.3m

──────────────────────────────────────────────────────────────────────
Squad Players:
──────────────────────────────────────────────────────────────────────

 player_id        name           team position  cost  predicted_points
       256       Muñoz Crystal Palace      DEF   5.8          4.894869
       694     Mukiele     Sunderland      DEF   4.6          4.886221
        72      Senesi    Bournemouth      DEF   4.8          4.823961
       371      Kerkez      Liverpool      DEF   5.6          4.766603
       226    Chalobah        Chelsea      DEF   5.7          4.759797
       661     Ekitiké      Liverpool      FWD   8.9          5.083041
        97   Evanilson    Bournemouth      FWD   7.1          4.928142
       136      Thiago      Brentford      FWD   7.1          4.834153
       670       Roefs     Sunderland       GK   5.0          4.058253
       287    Pickford        Everton       GK   5.6          4.0