# Squad Optimization

### Load packages

In [1]:
library(dplyr)
library(ROI)
library(ROI.plugin.glpk)
library(ompr)
library(ompr.roi)
library(Matrix)


Attaching package: 'dplyr'

The following objects are masked from 'package:stats':

    filter, lag

The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union

ROI: R Optimization Infrastructure
Registered solver plugins: nlminb, cplex, glpk, lpsolve.
Default solver: auto.


### Load the dataset

In [2]:
df <- read.csv('gw20.csv')

In [3]:
dim(df)

In [17]:
# columns in the dataset
names(df)

In [5]:
head(df)

name,club,pos,status,news,chance_of_playing_this_round,chance_of_playing_next_round,now_cost,total_points,event_points,⋯,saves,bonus,bps,influence,creativity,threat,ict_index,ea_index,ep_this,ep_next
David Ospina,Arsenal,Goalkeeper,active,,100,100,4.7,0,0,,0,0,0,0.0,0.0,0,0.0,56,0.5,1.5
Petr Cech,Arsenal,Goalkeeper,active,,100,100,5.4,65,2,,54,0,334,377.0,0.1,0,37.9,409,4.8,5.8
Laurent Koscielny,Arsenal,Defender,active,,100,100,6.3,78,1,,0,11,447,507.0,13.2,117,64.0,493,4.8,5.8
Per Mertesacker,Arsenal,Defender,major injury,Knee injury - Unknown return date,0,0,4.8,0,0,,0,0,0,0.0,0.0,0,0.0,265,0.0,0.0
Gabriel Armando de Abreu,Arsenal,Defender,active,,100,100,4.8,19,1,,0,0,124,122.0,15.1,26,16.3,244,3.8,4.8
H<cc><c4><e5><a9>ctor Beller<cc><c4><e5>_n,Arsenal,Defender,major injury,Ankle injury - Expected back 21 Jan,0,0,6.5,70,0,,0,10,324,240.2,263.4,219,72.0,541,4.5,0.0


### Data Transformation

In [7]:
# Encode 'position' categorical variables to binary integer values
df$Goalkeeper = ifelse(df$pos == "Goalkeeper", 1, 0)
df$Defender = ifelse(df$pos == "Defender", 1, 0)
df$Midfielder = ifelse(df$pos == "Midfielder", 1, 0)
df$Forward = ifelse(df$pos == "Forward", 1, 0)

In [8]:
# Encode 'club' categorical variables to binary integer values
team_constraint <- t(fac2sparse(df$club))

# Mathematical Modelling

### Create the optimization model

In [9]:
model <- MIPModel()

### Iteration Indices

In [6]:
n = nrow(df)

### Define Decision Variables

In [10]:
model <- model %>%
  add_variable(x[i], i = 1:n, type = "binary")

### Define the Objective Function

In [11]:
# based on maximizing total points
model <- model %>%
    set_objective(sum_expr(df$total_points[i] * x[i], i = 1:n), "max")

Other possible objective metrics

    Metric                   Column Name
    ------------------------------------
    Total Points             total_points
    Points per Game          points_per_game
    Points per Game (last 3) points_per_game_last3
    Form                     form
    Value                    value
    Bonus Points             bonus             
    Dream Team Count         dreamteam_count

You can also form hybrid metrics based on columns present in this dataframe.

### Create the constraints

In [12]:
# Squad Value
max_cost = 105.8

# Formation
num_gk = 2
num_def = 5
num_mid = 5
num_fwd = 3

Other possible formations

    1-5-2-3
    1-5-3-2
    1-3-5-2
    1-4-5-1
    1-5-4-1
    1-4-3-3
    1-3-4-3
    1-4-4-2
    1-2-5-3 (All-Out-Attack chip)

In [13]:
# Constraint 1: Formation
model <- model %>%
    add_constraint(sum_expr(df$Goalkeeper[i] * x[i], i = 1:n) == num_gk) %>%
    add_constraint(sum_expr(df$Defender[i] * x[i], i = 1:n) == num_def) %>%
    add_constraint(sum_expr(df$Midfielder[i] * x[i], i = 1:n) == num_mid) %>%
    add_constraint(sum_expr(df$Forward[i] * x[i], i = 1:n) == num_fwd)

# Constraint 2: Total Squad Value
model <- model %>%
    add_constraint(sum_expr(df$now_cost[i] * x[i], i = 1:n) <= max_cost)

# Constraint 3: Max 3 players from a club
for (j in 1:20){
    model <-model %>%
    add_constraint(sum_expr(team_constraint[i,j] * x[i], i=1:n) <= 3)
} 

The above step takes a couple of minutes to execute. Please be patient.

### Solve the model

In [14]:
model <- model %>%
    solve_model(with_ROI(solver = "glpk")) %>% 
    get_solution(x[i]) %>% 
    filter(value > 0)

### View the solution

In [16]:
df[model$i,] %>% arrange(pos)

name,club,pos,status,news,chance_of_playing_this_round,chance_of_playing_next_round,now_cost,total_points,event_points,⋯,creativity,threat,ict_index,ea_index,ep_this,ep_next,Goalkeeper,Defender,Midfielder,Forward
Charlie Daniels,Bournemouth,Defender,active,,100,100,5.2,88,11,,315.6,202,95.9,491,5.3,6.8,0,1,0,0
Kyle Walker,Spurs,Defender,active,,100,100,6.1,86,6,,389.8,138,95.0,461,2.8,3.3,0,1,0,0
Gareth McAuley,West Brom,Defender,active,,100,100,4.9,72,8,,26.8,228,77.9,339,4.3,2.3,0,1,0,0
C<cc><c4><e5><a9>sar Azpilicueta,Chelsea,Defender,active,,100,100,6.6,97,1,,204.5,63,68.5,470,5.0,6.5,0,1,0,0
Gary Cahill,Chelsea,Defender,active,,100,100,6.4,96,0,,41.4,194,64.8,283,5.5,7.0,0,1,0,0
Romelu Lukaku,Everton,Forward,active,,100,100,9.5,110,11,,394.9,787,175.4,754,5.8,4.8,0,0,0,1
Zlatan Ibrahimovic,Man Utd,Forward,minor injury,Illness - 75% chance of playing,75,75,11.6,123,8,,548.8,1249,239.1,0,11.0,7.1,0,0,0,1
Jermain Defoe,Sunderland,Forward,active,,100,100,7.8,113,13,,160.4,749,134.4,595,5.3,6.3,0,0,0,1
Tom Heaton,Burnley,Goalkeeper,active,,100,100,4.9,80,1,,10.1,0,59.0,0,2.3,3.3,1,0,0,0
Thibaut Courtois,Chelsea,Goalkeeper,active,,100,100,5.9,85,1,,0.0,0,26.5,230,3.8,5.3,1,0,0,0


### Optimized Squad Value

In [18]:
sum(df[model$i,]$now_cost)