In [1]:
import pandas as pd
import numpy as np
import pulp

In [2]:
fd = pd.read_csv('FanDuel-NBA-2023 ET-01 ET-06 ET-85655-players-list.csv')

fd['FPPG'] = fd['FPPG'].round(2)

fd = fd[['Nickname', 'Position', 'FPPG', 'Salary', 'Game', 'Team', 'Injury Indicator']]

fd.head()

Unnamed: 0,Nickname,Position,FPPG,Salary,Game,Team,Injury Indicator
0,Giannis Antetokounmpo,PF/SF,56.47,12000,CHA@MIL,MIL,GTD
1,Joel Embiid,C,56.86,11500,CHI@PHI,PHI,O
2,Nikola Jokic,C,54.97,11300,CLE@DEN,DEN,
3,LeBron James,SF/PF,50.48,11200,ATL@LAL,LAL,GTD
4,Anthony Davis,PF/C,54.3,10900,ATL@LAL,LAL,O


## First Attempt
Flag each position for a player.

In [3]:
POSITIONS = ['PG', 'SG', 'SF', 'PF', 'C']

for pos in POSITIONS:
    fd[pos] = fd['Position'].str.contains(pos).astype(int)

In [4]:
def defineProblem(df):
    points = df['FPPG']
    consts = df[['PG', 'SG', 'SF', 'PF', 'C', 'Salary']]
    
    # initialize problem
    problem = pulp.LpProblem('Roster', pulp.LpMaximize)
    
    # initialize player variables
    players = np.zeros_like(df['Nickname'])
    
    for i, p in enumerate(df['Nickname']):
        players[i] = pulp.LpVariable(
            p, lowBound=0, upBound=1, cat=pulp.LpInteger)
        
    # objective function
    problem += pulp.lpSum(players * points)
    
    # constraints
    problem += pulp.lpSum(players * consts.loc[:, 'PG']) >= 2, 'PG Constraint'
    problem += pulp.lpSum(players * consts.loc[:, 'SG']) >= 2, 'SG Constraint'
    problem += pulp.lpSum(players * consts.loc[:, 'SF']) >= 2, 'SF Constraint'
    problem += pulp.lpSum(players * consts.loc[:, 'PF']) >= 2, 'PF Constraint'
    problem += pulp.lpSum(players * consts.loc[:, 'C']) >= 1, 'C Constraint'
    problem += pulp.lpSum(players) == 9, 'Number of Players'
    problem += pulp.lpSum(players * consts.loc[:, 'Salary']) <= 60000, 'Salary Constraint'
    
    # maximum signle team constraints
    for team in df['Team'].unique():
        problem += pulp.lpSum(players * (df['Team'] == team)) <= 4, f'{team} Constraint'
        
    return problem

In [5]:
def solveProblem(df):
    
    # define and solve problem
    problem = defineProblem(df)
    problem.solve()
    
    vars_dict = problem.variablesDict()
    
    # helper function to get values from FD name
    def getValue(x):
        # convert FD name to PuLP name
        key = x.replace(' ', '_').replace('-', '_')
        return vars_dict[key].varValue
    
    return df['Nickname'].apply(getValue)

In [6]:
first = fd[(fd['FPPG'] >= 0) & (fd['Injury Indicator'].isna())]

first_solution = solveProblem(first)

In [7]:
first_roster = first[first_solution == 1]

# Ugly method of getting the output to follow positional order.
# Luckily, this is an additional benefit of the next solution.
first_roster.loc[[87, 218, 69, 79, 8, 56, 82, 150, 11], ['Nickname', 'Position', 'FPPG', 'Salary', 'Team', 'PG', 'SG', 'SF', 'PF', 'C']]

Unnamed: 0,Nickname,Position,FPPG,Salary,Team,PG,SG,SF,PF,C
87,Kyle Lowry,PG,29.94,6000,MIA,1,0,0,0,0
218,Jordan Goodwin,PG,19.42,3900,WAS,1,0,0,0,0
69,Terry Rozier,SG/PG,33.3,6500,CHA,1,1,0,0,0
79,Anfernee Simons,SG/PG,32.96,6200,POR,1,1,0,0,0
8,Shai Gilgeous-Alexander,SG/PG,49.99,10000,OKC,1,1,0,0,0
56,OG Anunoby,SF/SG,35.37,6900,TOR,0,1,1,0,0
82,Mikal Bridges,SG/SF,30.27,6100,PHO,0,1,1,0,0
150,Royce O'Neale,SF/PF,24.41,4600,BKN,0,0,1,1,0
11,Pascal Siakam,C/PF,47.48,9600,TOR,0,0,0,1,1


PuLP's optimal roster is an invalid FanDuel roster. Siakam and O'Neale are the only eligible PFs. But Siakam must be used as a C. An extra guard was chosen.

## Solution
Create a row for each player's eligible position. This produces a one-hot layout where a player is chosen for their score and position. Constraints must be added to ensure each player appears at most one time in a roster.

In [8]:
# iterate through each player in FD list
# create dictionary Nickname : (primary, secondary)

player_dict = {}

for i in fd.index:
    nickname = fd.loc[i, 'Nickname']
    positions = fd.loc[i, 'Position']
    
    # find position separator
    sep = positions.find('/')
    
    # single position players
    if sep < 0:
        primary = positions
        player_dict[nickname] = (primary,)
    
    # multi position players
    else:
        primary = positions[: sep]
        secondary = positions[sep+1 :]
        player_dict[nickname] = (primary, secondary)

In [9]:
# create dataframe from ground up with LP Position and Name
cols = [
    'Nickname', 'Position', 'FPPG', 'Salary', 'Game', 'Team', 'Injury Indicator'
]

base = pd.DataFrame(columns = cols + ['LP Position', 'LP Name'])


for player in player_dict:
    # for each position for a player
    for pos in player_dict[player]:
        temp = fd[fd['Nickname'] == player][cols].iloc[0]
        
        # single position
        temp['LP Position'] = pos
    
        # LP name is position, space, nickname
        temp['LP Name'] = pos + ' ' + player
        
        # append to dataframe
        base.loc[base.shape[0]] = temp

# create one-hot position columns
for pos in POSITIONS:
    base[pos] = base['LP Position'].str.contains(pos).astype(int)

In [10]:
base.head()

Unnamed: 0,Nickname,Position,FPPG,Salary,Game,Team,Injury Indicator,LP Position,LP Name,PG,SG,SF,PF,C
0,Giannis Antetokounmpo,PF/SF,56.47,12000,CHA@MIL,MIL,GTD,PF,PF Giannis Antetokounmpo,0,0,0,1,0
1,Giannis Antetokounmpo,PF/SF,56.47,12000,CHA@MIL,MIL,GTD,SF,SF Giannis Antetokounmpo,0,0,1,0,0
2,Joel Embiid,C,56.86,11500,CHI@PHI,PHI,O,C,C Joel Embiid,0,0,0,0,1
3,Nikola Jokic,C,54.97,11300,CLE@DEN,DEN,,C,C Nikola Jokic,0,0,0,0,1
4,LeBron James,SF/PF,50.48,11200,ATL@LAL,LAL,GTD,SF,SF LeBron James,0,0,1,0,0


Modify defineProblem()

LP variables are now created with LP Nickname. Position constraints are straight equalities. Additional constraints for each player with multiple positions is created so that a player is only chosen once.

In [11]:
def solved_defineProblem(df):
    points = df['FPPG']
    consts = df[['PG', 'SG', 'SF', 'PF', 'C', 'Salary']]
    
    # initialize problem
    problem = pulp.LpProblem('Roster', pulp.LpMaximize)
    
    # initialize player variables
    players = np.zeros_like(df['LP Name'])
    
    for i, p in enumerate(df['LP Name']):
        players[i] = pulp.LpVariable(
            p, lowBound=0, upBound=1, cat=pulp.LpInteger)
        
    # objective function
    problem += pulp.lpSum(players * points)
    
    # constraints
    problem += pulp.lpSum(players * consts.loc[:, 'PG']) == 2, 'PG Constraint'
    problem += pulp.lpSum(players * consts.loc[:, 'SG']) == 2, 'SG Constraint'
    problem += pulp.lpSum(players * consts.loc[:, 'SF']) == 2, 'SF Constraint'
    problem += pulp.lpSum(players * consts.loc[:, 'PF']) == 2, 'PF Constraint'
    problem += pulp.lpSum(players * consts.loc[:, 'C']) == 1, 'C Constraint'
    problem += pulp.lpSum(players) == 9, 'Number of Players'
    problem += pulp.lpSum(players * consts.loc[:, 'Salary']) <= 60000, 'Salary Constraint'
    
    # maximum signle team constraints
    for team in df['Team'].unique():
        problem += pulp.lpSum(players * (df['Team'] == team)) <= 4, f'{team} Constraint'
        
    # player uniqueness constraints
    value_counts = base['Nickname'].value_counts()
    dupe_players = set(value_counts[value_counts > 1].index)
    
    for player in dupe_players:
        problem += pulp.lpSum(players * (df['Nickname'] == player)) <= 1, f'{player} Constraint'
    return problem

Modify solveProblem()

All that is needed is to change Nickname to LP Name.

In [12]:
def solved_solveProblem(df):
    
    # define and solve problem
    problem = solved_defineProblem(df)
    problem.solve()
    
    vars_dict = problem.variablesDict()
    
    # helper function to get values from FD name
    def getValue(x):
        # convert FD name to PuLP name
        key = x.replace(' ', '_').replace('-', '_')
        return vars_dict[key].varValue
    
    return df['LP Name'].apply(getValue)

In [13]:
solved = base[(base['FPPG'] > 0) & (base['Injury Indicator'].isna())]

solved_solution = solved_solveProblem(solved)

In [14]:
solved[solved_solution == 1].sort_values(POSITIONS, ascending=False)

Unnamed: 0,Nickname,Position,FPPG,Salary,Game,Team,Injury Indicator,LP Position,LP Name,PG,SG,SF,PF,C
119,Terry Rozier,SG/PG,33.3,6500,CHA@MIL,CHA,,PG,PG Terry Rozier,1,0,0,0,0
145,Kyle Lowry,PG,29.94,6000,MIA@PHO,MIA,,PG,PG Kyle Lowry,1,0,0,0,0
14,Shai Gilgeous-Alexander,SG/PG,49.99,10000,WAS@OKC,OKC,,SG,SG Shai Gilgeous-Alexander,0,1,0,0,0
133,Anfernee Simons,SG/PG,32.96,6200,POR@IND,POR,,SG,SG Anfernee Simons,0,1,0,0,0
95,OG Anunoby,SF/SG,35.37,6900,NY@TOR,TOR,,SF,SF OG Anunoby,0,0,1,0,0
138,Mikal Bridges,SG/SF,30.27,6100,MIA@PHO,PHO,,SF,SF Mikal Bridges,0,0,1,0,0
20,Pascal Siakam,C/PF,47.48,9600,NY@TOR,TOR,,PF,PF Pascal Siakam,0,0,0,1,0
253,Royce O'Neale,SF/PF,24.41,4600,BKN@NO,BKN,,PF,PF Royce O'Neale,0,0,0,1,0
311,Nick Richards,C,18.95,4100,CHA@MIL,CHA,,C,C Nick Richards,0,0,0,0,1


# Solved!
Each position has exactly as many players as required. LP Position and Name show which position a multi-position player was chosen for. Additionally, ordering the players is now as simple as using the LP Position column.

In [15]:
# solution for code
solved[solved_solution == 1].sort_values(POSITIONS, ascending=False)[['Nickname', 'Position', 'FPPG', 'Salary', 'Team', 'LP Position', 'PG', 'SG', 'SF', 'PF', 'C']]

Unnamed: 0,Nickname,Position,FPPG,Salary,Team,LP Position,PG,SG,SF,PF,C
119,Terry Rozier,SG/PG,33.3,6500,CHA,PG,1,0,0,0,0
145,Kyle Lowry,PG,29.94,6000,MIA,PG,1,0,0,0,0
14,Shai Gilgeous-Alexander,SG/PG,49.99,10000,OKC,SG,0,1,0,0,0
133,Anfernee Simons,SG/PG,32.96,6200,POR,SG,0,1,0,0,0
95,OG Anunoby,SF/SG,35.37,6900,TOR,SF,0,0,1,0,0
138,Mikal Bridges,SG/SF,30.27,6100,PHO,SF,0,0,1,0,0
20,Pascal Siakam,C/PF,47.48,9600,TOR,PF,0,0,0,1,0
253,Royce O'Neale,SF/PF,24.41,4600,BKN,PF,0,0,0,1,0
311,Nick Richards,C,18.95,4100,CHA,C,0,0,0,0,1
