## Fantasy Premier League Player Optimization



#### Import all the necessary packages

In [1]:
import pulp
import pandas as pd
import numpy as np

#### Read the cleaned version of the data

In [2]:
df = pd.read_csv('data/players_cleaned.csv')

#### Splitting our data

In [3]:
player_names = df['name']
player_cost = df['cost']
player_points = df['points']
player_pos = df[['position_DEF', 'position_GKP', 'position_FWD', 'position_MID']]
teams = set(df.columns) - set(['name', 'cost', 'points', 'position_DEF', 'position_GKP', 'position_FWD', 'position_MID'])
player_team = df[list(teams)]

#### Check that we have correctly split the positions and teams

In [4]:
assert(len(player_team.columns) == 20)
assert(len(player_pos.columns) == 4)

#### Add the objective function 
##### Objective function: $max$ $p*x$ 
##### Where $p$ is the points vector and $x$ is the binary vector representing the choice of each player

In [5]:
def AddObjectiveFn(problem, players):
    objective = 0
    for i in range(len(players)):
        objective += players[i] * player_points[i]

    problem += objective, 'Objective'

#### Add the position constraints
##### Position constraint: $position * x = val$
##### For all ($position$, $val$) in the *number_of* dictionary below

In [6]:
def AddPositionConstraint(problem, players):
    number_of = {'position_GKP': 2, 'position_FWD': 3, 'position_MID': 5, 'position_DEF': 5}
    for pos in player_pos.columns:
        x = 0
        for i in range(len(players)):
            x += players[i] * player_pos[pos][i]
        problem += x == number_of[pos], 'Constraint_' + pos

#### Add the team constraint
##### Team constraint: $team * x \leq 3$
##### For all one-hot-encoded teams

In [7]:
def AddTeamConstraint(problem, players):
    for team in player_team.columns:
        x = 0
        for i in range(len(players)):
            x += players[i] * player_team[team][i]
        problem += x <= 3, 'Constraint_' + team

#### Add the cost constraint
##### Cost constraint: $c * x \leq 1000$
##### Where $c$ is the vector representing the cost of each player

In [8]:
def AddCostConstraint(problem, players):
    x = 0
    for i in range(len(players)):
        x += players[i] * player_cost[i]
    problem += x <= 1000, 'Constraint_cost'

In [9]:
def CreateProblem(problem, players):
    AddObjectiveFn(problem, players)
    AddPositionConstraint(problem, players)
    AddTeamConstraint(problem, players)
    AddCostConstraint(problem, players)

### Solving the problem
We will compare all the different algorithms to solve the problem and check that they produce the same thing

In [10]:
algorithms = [pulp.PULP_CBC_CMD(), 
              # pulp.GLPK_CMD(),
              # pulp.CPLEX_CMD(), 
              # pulp.GUROBI_CMD(), 
              # pulp.SCIP_CMD()
             ]

#### Create the vector of binary variables to be optimized

In [11]:
binary_vectors = []
for alg in algorithms:
    player_binary_choice = [pulp.LpVariable(player_names[i], cat='Binary') for i in range(len(player_names))]
    assert(len(player_binary_choice) == len(player_names))
    binary_vectors.append(player_binary_choice)

#### Create the maximization problem using the PuLP library

In [12]:
problems = []
for alg in algorithms:
    FPL_problem = pulp.LpProblem("Maximize_Fantasy_Points", pulp.LpMaximize)
    problems.append(FPL_problem)

In [13]:
original_df = pd.read_csv('data/players.csv')

In [14]:
for i in range(len(algorithms)):
    CreateProblem(problems[i], binary_vectors[i])
    problems[i].solve(algorithms[i])
    data = {'position': [], 'player': [], 'points': [], 'cost': [], 'team': []}

    for j in range(len(player_binary_choice)):
        if pulp.value(binary_vectors[i][j]) == 1:
            data['player'].append(player_names[j])
            data['cost'].append(player_cost[j])
            data['points'].append(player_points[j])
            data['team'].append(original_df['team'][j])
            data['position'].append(original_df['position'][j])

        
    df = pd.DataFrame(data)
    df['position'] = pd.Categorical(df['position'], categories=['GKP', 'DEF', 'MID', 'FWD'], ordered=True)
    df.sort_values(by='position', inplace=True)
    print(algorithms[i])
    print(df)

<pulp.apis.coin_api.PULP_CBC_CMD object at 0x0000018F35CB8100>
   position                     player  points  cost            team
5       GKP               Mark Flekken     119    45       Brentford
10      GKP            Jordan Pickford     153    50         Everton
1       DEF             William Saliba     164    60         Arsenal
2       DEF             Benjamin White     182    65         Arsenal
8       DEF           Joachim Andersen     121    45  Crystal Palace
9       DEF         Jarrad Branthwaite     124    50         Everton
14      DEF                Pedro Porro     136    55           Spurs
0       MID                Declan Rice     165    65         Arsenal
7       MID                Cole Palmer     244   105         Chelsea
11      MID                 Phil Foden     230    95        Man City
12      MID  Rodrigo 'Rodri' Hernandez     159    65        Man City
13      MID             Anthony Gordon     183    75       Newcastle
3       FWD              Ollie Watkins  