In [1]:
# before run the notebook download important libraries for data analysis and optimization
# to download copy and run below line of codes one by one:

# pip install pandas
# pip install numpy 
# pip install pulp

In [2]:
# import libraries
import pandas as pd

pd.options.mode.chained_assignment = None
import numpy as np
from pulp import *

In [3]:
# read the data from the csv files
df_lineup = pd.read_csv('lineups_fd_nfl_classic_11-21-2021_100pm.csv')
df_players = pd.read_csv('NFL_2021-11-21_FD_11_21-100PM-12-games-Main.csv')
df_players['Max Usage'] = df_players['Max Usage'].fillna(1)
df_players = df_players[df_players['Max Usage'] != 0]
df_players['Max Usage'] = df_players['Max Usage'] * 1000  # to increase the sensitivity of max usage in the optimization
# problem it is multiplied by 1000

In [4]:
# Enter the number of lineups you want to generate
N_LINEUPS_TO_SELECT = 5

In [5]:
# prepare data for optimization
df_players.set_index(df_players['DFS ID'], drop=True, inplace=True)
df_players['index'] = np.arange(0, df_players.shape[0])
lineup_score = df_lineup.apply(lambda x: list(df_players.loc[x]['My Proj']))
lineup_score = lineup_score.astype(float)
lineup_score['Total'] = lineup_score.sum(axis=1)


score_dict = dict(zip(lineup_score.index.tolist(), lineup_score['Total']))
maxUsage_dict = dict(zip(df_players['DFS ID'], df_players['Max Usage']))
nlineup = df_lineup.shape[0]

In [6]:
# change pandas dataframes to matrices(for faster operation)
pp = df_players.to_numpy()
ll = df_lineup.to_numpy()

In [7]:
# create matrix that checks if a player exists in a line
lst = np.empty([ll.shape[0], pp.shape[0]], dtype=int)
for i in pp[:, 4]:
    for j in range(0, ll.shape[0]):
        if pp[i, 0] in ll[j]:
            lst[j, i] = 1
        else:
            lst[j, i] = 0

In [8]:
lst = lst * 100000 * (1 / (ll.shape[1] * N_LINEUPS_TO_SELECT)) #individual players usage calculated before we plug it
# to the optimizer


In [9]:
# Creates the 'prob' variable to contain the problem data
prob = LpProblem('Maximization_of_score_of_players', LpMaximize)

In [10]:
# Create decision variable
n_element = list(range(nlineup))
vars = LpVariable.dicts('Line', n_element, 0, 1, LpInteger)

In [11]:
# First component of objective function created(limit max usage)
prob1 = ''
for j in range(lst.shape[1]):
    for i in range(lst.shape[0]):
        prob1 -= lst[i, j] * vars[i]
    prob1 += pp[j, 3]

# Second component of objective function created(maximize score of lineup)
prob2 = ''
for i in n_element:
    prob2 += score_dict[i] * vars[i]

# final objective function compiled
prob += (prob1 + prob2, "Total score of lineups",)


In [12]:
# constraint defined
prob += lpSum([vars[i] for i in n_element]) == N_LINEUPS_TO_SELECT, "No of lineup"

In [13]:
# The problem is solved using PuLP's choice of Solver
prob.solve()

# prepare the solution for final output
sol_ind = []
for v in prob.variables():
    if v.varValue == 1:
        sol_ind.append(int(v.name[5:]))

In [14]:
# selected lineups
df_lineup.iloc[sol_ind].reset_index(drop=True)

Unnamed: 0,QB,RB,RB.1,WR,WR.1,WR.2,TE,FLEX,DEF
0,67063-57439,67063-93107,67063-60930,67063-85701,67063-57986,67063-53681,67063-33076,67063-55552,67063-12550
1,67063-57439,67063-93107,67063-55552,67063-45889,67063-79979,67063-53681,67063-33076,67063-52897,67063-12550
2,67063-57439,67063-14211,67063-55552,67063-85845,67063-72846,67063-53681,67063-33076,67063-93107,67063-12553
3,67063-57439,67063-93539,67063-93107,67063-45889,67063-53681,67063-69956,67063-33076,67063-55552,67063-12552
4,67063-57439,67063-55552,67063-93107,67063-85701,67063-53681,67063-69956,67063-25011,67063-33076,67063-12545


In [15]:
#selected players summary
sol = df_lineup.iloc[sol_ind].to_numpy()
sol1 = np.ravel(df_lineup.iloc[sol_ind].to_numpy())
sol1 = np.unique(sol1)

maxU = np.zeros([sol.shape[0], sol1.shape[0]], dtype=float)
for i in range(sol1.shape[0]):
    for j in range(sol.shape[0]):
        if sol1[i] in sol[j]:
            maxU[j, i] = (1 * (1 / (sol.shape[1] * N_LINEUPS_TO_SELECT)))

maxU = dict(zip(sol1, maxU.sum(axis=0)))
selected_players = df_players[df_players['DFS ID'].isin(sol1)]

for i in maxU:
    selected_players.loc[i, 'Usage (selected lineup)'] = maxU[i]

selected_players.drop('index', inplace=True, axis=1)
selected_players['Max Usage'] = selected_players['Max Usage'] / 100000
selected_players[['Max Usage', 'Usage (selected lineup)']] = selected_players[['Max Usage', 'Usage (selected lineup)']]. \
    applymap("{0:.2%}".format)
selected_players.reset_index(drop=True)

Unnamed: 0,DFS ID,Name,My Proj,Max Usage,Usage (selected lineup)
0,67063-57439,Patrick Mahomes,25.75,15.90%,11.11%
1,67063-93107,AJ Dillon,19.52,25.40%,11.11%
2,67063-53681,Tyreek Hill,18.67,23.70%,11.11%
3,67063-45889,Davante Adams,17.91,20.40%,4.44%
4,67063-55552,Jeff Wilson Jr.,17.34,27.70%,11.11%
5,67063-85701,Ja'Marr Chase,16.61,21.30%,4.44%
6,67063-72846,A.J. Brown,15.68,19.00%,2.22%
7,67063-33076,Travis Kelce,14.95,15.70%,11.11%
8,67063-14211,Mark Ingram II,14.45,13.90%,2.22%
9,67063-60930,Myles Gaskin,14.08,14.40%,2.22%
