# FPL Team Optimization using Linear Programming

Optimzation uses PuLP to select the optimal 15-player FPL squad based on predicted points.

**Constraints:**
- Budget: £100 million
- Formation: 2 GK, 5 DEF, 5 MID, 3 FWD
- Maximum 3 players per team

**Objective:**
- Maximize total predicted points

In [36]:
# Import libraries
import pandas as pd
import numpy as np
from pulp import LpProblem, LpMaximize, LpVariable, lpSum, LpStatus, value
import sqlite3
import pickle
import os

# setting explicit database path
DB_PATH = '../fpl_data.db'

In [None]:
# connecting to the database using db+path
conn = sqlite3.connect(DB_PATH)

# checking that the tables exist
tables_query = "SELECT name FROM sqlite_master WHERE type='table';"
tables_df = pd.read_sql_query(tables_query, conn)
print("\nTables in database:")
print(tables_df['name'].tolist())
print()


Tables in database:
['players', 'gameweek_data', 'sqlite_sequence', 'features']



In [35]:
# loading player data from players table
players_df = pd.read_sql_query("SELECT * FROM players", conn)
print(f"Total players in database {len(players_df)}")
print(f"\nColumns {players_df.columns.tolist()}")
print(f"\nSample data")
players_df.head()

Total players in database 817

Columns ['player_id', 'name', 'team', 'position', 'price']

Sample data


Unnamed: 0,player_id,name,team,position,price
0,1,Raya,Arsenal,GK,5.9
1,2,Arrizabalaga,Arsenal,GK,4.1
2,3,Hein,Arsenal,GK,4.0
3,4,Setford,Arsenal,GK,3.9
4,5,Gabriel,Arsenal,DEF,7.0


In [40]:
# loading the trained Random Forest model
with open('../fpl_predictor_model.pkl', 'rb') as f:
    rf_model = pickle.load(f)
    
print("Model loaded successfully")

Model loaded successfully


In [None]:
# checking to see if features table exists, if not creating empty features table
features_table = 'features' in tables_df['name'].values

if features_table:
    print("Features table found")
    # loading features for all players
    query = """
    SELECT 
        player_id,
        rolling_avg_points,
        opponent_difficulty,
        minutes,
        is_home,
        price,
        pos_GK,
        pos_DEF,
        pos_MID,
        pos_FWD,
        clean_sheets_rolling_avg,
        gameweek
    FROM features
    WHERE gameweek = (SELECT MAX(gameweek) FROM features)
    """
    features_df = pd.read_sql_query(query, conn)
else:
    print("No features table found")
    # empty dataframe if no features table
    features_df = pd.DataFrame() 

print(f"\nFeatures loaded for gameweek: {features_df['gameweek'].iloc[0] if len(features_df) > 0 else 'N/A'}")
print(f"Number of players with features: {len(features_df)}")
print(f"\nFeatures included: {[col for col in features_df.columns if col not in ['player_id', 'gameweek']]}")
features_df.head()

Features table found

Features loaded for gameweek: 22
Number of players with features: 799

Features included: ['rolling_avg_points', 'opponent_difficulty', 'minutes', 'is_home', 'price', 'pos_GK', 'pos_DEF', 'pos_MID', 'pos_FWD', 'clean_sheets_rolling_avg']


Unnamed: 0,player_id,rolling_avg_points,opponent_difficulty,minutes,is_home,price,pos_GK,pos_DEF,pos_MID,pos_FWD,clean_sheets_rolling_avg,gameweek
0,1,3.6,8.0,90,0,5.9,1,0,0,0,0.4,22
1,2,0.0,8.0,0,0,4.1,1,0,0,0,0.0,22
2,3,0.0,8.0,0,0,4.0,1,0,0,0,0.0,22
3,4,0.0,8.0,0,0,3.9,1,0,0,0,0.0,22
4,5,6.2,8.0,90,0,7.0,0,1,0,0,0.4,22


In [42]:
# preparing the features for prediction 
feature_columns = [
    'rolling_avg_points',
    'opponent_difficulty', 
    'minutes',
    'is_home',
    'price',
    'pos_GK',
    'pos_DEF',
    'pos_MID',
    'pos_FWD',
    'clean_sheets_rolling_avg'
]

X_features = features_df[feature_columns]

print(f"Features used for prediction: {len(feature_columns)}")
print(f"Feature names: {feature_columns}")

# generating the predictions
predictions = rf_model.predict(X_features)

# adding predictions to the features dataframe
features_df['predicted_points'] = predictions

print(f"\nPredictions generated for {len(predictions)} players")
print(f"\nPrediction statistics:")
print(f"Mean: {predictions.mean():.2f} points")
print(f"Max: {predictions.max():.2f} points")
print(f"Min: {predictions.min():.2f} points")

Features used for prediction: 10
Feature names: ['rolling_avg_points', 'opponent_difficulty', 'minutes', 'is_home', 'price', 'pos_GK', 'pos_DEF', 'pos_MID', 'pos_FWD', 'clean_sheets_rolling_avg']

Predictions generated for 799 players

Prediction statistics:
Mean: 1.17 points
Max: 6.33 points
Min: 0.00 points


In [48]:
# combining predictions with player info
optimization_df = players_df.merge(features_df[['player_id', 'predicted_points']], 
                                   on='player_id', 
                                   how='inner')

print(f"\nTop 10 predicted players")
print(optimization_df.nlargest(10, 'predicted_points')[['name', 'team', 'position', 'price', 'predicted_points']])


Top 10 predicted players
          name       team position  price  predicted_points
381      Wirtz  Liverpool      MID    8.3          6.329361
4      Gabriel    Arsenal      DEF    7.0          5.578194
20        Rice    Arsenal      MID    7.5          5.467835
235       Neto    Chelsea      MID    7.1          5.379315
487   Bruno G.  Newcastle      MID    7.1          5.318770
225   Chalobah    Chelsea      DEF    5.7          5.287850
236       Enzo    Chelsea      MID    6.8          5.273405
223  Cucurella    Chelsea      DEF    6.0          5.266258
224      James    Chelsea      DEF    5.7          5.265240
234     Palmer    Chelsea      MID   10.4          5.205930


In [None]:
# Optimization problem
# position mapping fpl uses 1,2,3,4 to define player positions
position_map = {'GK': 1, 'DEF': 2, 'MID': 3, 'FWD': 4}
optimization_df['position_name'] = optimization_df['position'].map(position_map)

# position requirements in fpl
# must have 2 goalkeepers, 5 defenders, 5 midfielders, 3 forwards
position_requirements = {
    'GK': 2,
    'DEF': 5,
    'MID': 5,
    'FWD': 3
}

# budget constraint of £100 million, in tenths of million
budget = 1000 

# max of 3 players from a single team
max_players_per_team = 3