Removed Fantasy Pros projections.

Added stud constraint. Stud is FanDuel points >= 40. Requires at least *3* players.

Keep GTD players. Simulate with 50% chance of not playing.

In [1]:
import pandas as pd
import numpy as np
import random, pulp, os
from datetime import datetime

In [2]:
players_file = 'FanDuel-NBA-2024 ET-02 ET-22 ET-99441-players-list'
full_players_file = 'Lib/Players Lists/NBA/' + players_file + '.csv'

# extract game ID from player file name
ind = players_file.find('-players')
game_id = players_file[ind-5 : ind]

# extract date from player file name
date = players_file[12:16] + players_file[19:22] + players_file[25:28]

projections_file = date + ' NBA Fantasy Pros Projections ' + game_id
full_projections_file = 'Lib/Projections/NBA/' + projections_file + '.xlsx'

## Load FanDuel Players List

In [3]:
ply = pd.read_csv(full_players_file)

# clean and select columns
ply['FPPG'] = ply['FPPG'].round(2)
ply = ply[['Id', 'Nickname', 'Position', 'FPPG', 'Salary', 'Game', 'Team', 'Injury Indicator']]

ply.head(3)

Unnamed: 0,Id,Nickname,Position,FPPG,Salary,Game,Team,Injury Indicator
0,99441-84669,Luka Doncic,PG,60.54,12600,PHO@DAL,DAL,GTD
1,99441-55062,Nikola Jokic,C,56.81,12100,WAS@DEN,DEN,
2,99441-84680,Shai Gilgeous-Alexander,PG,54.43,11400,LAC@OKC,OKC,


## Merge and Filter

In [4]:
dat = ply.copy()

dat['Stud'] = (dat['FPPG'] >= 40) * 1

print(dat.shape)
dat.head(3)

(342, 9)


Unnamed: 0,Id,Nickname,Position,FPPG,Salary,Game,Team,Injury Indicator,Stud
0,99441-84669,Luka Doncic,PG,60.54,12600,PHO@DAL,DAL,GTD,1
1,99441-55062,Nikola Jokic,C,56.81,12100,WAS@DEN,DEN,,1
2,99441-84680,Shai Gilgeous-Alexander,PG,54.43,11400,LAC@OKC,OKC,,1


In [5]:
# filter down to relevant players

# injuries
injury_mask = dat['Injury Indicator'].isin(['O'])
print(f'Dropping {sum(injury_mask)} players due to injuries.')
dat = dat[~injury_mask]

# projected points
proj_mask = (dat['FPPG'] < 10) | (dat['FPPG'].isna())
print(f'Dropping {sum(proj_mask)} players due to low projections.')
dat = dat[~proj_mask]

# salary mask
sal_mask = dat['Salary'] < 4500
print(f'Dropping {sum(sal_mask)} players due to low salary.')
dat = dat[~sal_mask]

dat.shape

Dropping 26 players due to injuries.
Dropping 108 players due to low projections.
Dropping 62 players due to low salary.


(146, 9)

## Download Actuals from Number Fire

In [6]:
url_mapping = pd.read_csv('Utils/NBA Helpers/NumberFire NBA URL Mappings.csv')

dat = dat.merge(
    pd.read_csv('Utils/NBA Helpers/NumberFire NBA URL Mappings.csv'),
    how = 'left', left_on = 'Nickname', right_on = 'FanDuel Nickname'
)

dat.drop(columns = ['Nickname_y', 'FanDuel Nickname'], inplace = True)
dat.rename(columns = {'Nickname_x' : 'Nickname'}, inplace = True)


dat['Suffix'].fillna(dat['Nickname'].str.replace(' ', '-'), inplace = True)

In [7]:
nf = pd.DataFrame(columns = ['Date', 'OPP', 'MIN', 'PTS', 'FGM-A', '3PM-A', 'FTM-A', 'REB', 'AST', 'STL', 'BLK', 'TOV', 'PF', 'Salary', 'FP', 'Value'])

start = datetime.now()
for nickname in dat['Nickname']:
    suffix = dat.loc[dat['Nickname'] == nickname, 'Suffix'].iloc[0]
    try:
        act = pd.read_html('https://www.numberfire.com/nba/players/daily-fantasy/' + suffix)
        
        temp = pd.concat([act[2], act[3]], axis = 1)
        temp['Nickname'] = nickname
        nf = pd.concat([nf, temp])
    except:
        print(nickname, suffix)

stop = datetime.now()
print(f'Total time: {stop - start}')
nf.shape

Mo Bamba Mo-Bamba
Total time: 0:02:44.394628


(6714, 17)

In [12]:
# get standard deviation and actuals from last N games
N = 3
nf_summary = nf.groupby('Nickname').head(N).groupby('Nickname').agg({'FP':['count', 'mean', 'std'], 'MIN':['mean']})
nf_summary.columns = ['FP_count', 'FP_mean', 'FP_std', 'Minutes']

# compute standard error
nf_summary['FP_SE'] = nf_summary['FP_std'] / np.sqrt(nf_summary['FP_count'])

# round all
nf_summary = np.round(nf_summary, 2)

## Regression and Simulate

In [13]:
base = dat.merge(
    nf_summary[['FP_mean', 'FP_SE', 'Minutes']],
    how = 'left', left_on = 'Nickname', right_index = True
)

# drop players without actuals; likely due to URL mapping
na_mask = base['FP_SE'].isna()
print(f'Dropping {sum(na_mask)} players due to missing FP_SE')
print([name for name in base.loc[na_mask, 'Nickname']])
base = base[~na_mask]

# add vars for regression
for p in ['PG', 'SG', 'SF', 'PF', 'C']:
    base[p] = base['Position'].str.contains(p) * 1
    
x_vars = ['FPPG', 'Salary', 'FP_mean', 'FP_SE', 'Minutes', 'PG', 'SG', 'SF', 'PF', 'C']
# x_weights = [.2551, .00496, -.00775, .17621, .29893, 1.15761, 2.0473, 2.47937, -1.06842, 5.41944]
x_weights = [.9249, -.0052, 1.5633, 1.1063, -1.7343, 3.1635, -5.5410, 6.21822, -4.4943, 3.3408]

base['FP_fitted'] = round((base[x_vars] * x_weights).sum(axis = 'columns'), 2)


# simulate games
# model: ~ N(FPTS, FP_SE)
N_sims = 20

for i in range(N_sims):
    norm = np.round(
        # normally distributed, centered at recent actuals, wtih standard error estimated from actuals
        np.random.normal(base['FP_fitted'], base['FP_SE']), 1
    )
    
    injury_factor = base['Injury Indicator'].apply(
        # 50/50 change GTD will play
        lambda x: 1 if x != x else (np.random.uniform() < .5)*1
    )
    
    # set sim value
    base[f'sim{i}'] = norm * injury_factor

base.drop(columns = ['PG', 'SG', 'SF', 'PF', 'C'], inplace = True)
base.head(3)

Dropping 1 players due to missing FP_SE
['Mo Bamba']


Unnamed: 0,Id,Nickname,Position,FPPG,Salary,Game,Team,Injury Indicator,Stud,Suffix,...,sim10,sim11,sim12,sim13,sim14,sim15,sim16,sim17,sim18,sim19
0,99441-84669,Luka Doncic,PG,60.54,12600,PHO@DAL,DAL,GTD,1,Luka-Doncic,...,28.3,25.8,27.7,42.4,0.0,0.0,35.2,0.0,22.2,0.0
1,99441-55062,Nikola Jokic,C,56.81,12100,WAS@DEN,DEN,,1,Nikola-Jokic,...,16.0,10.8,11.0,16.4,8.8,17.2,19.7,6.4,15.0,12.4
2,99441-84680,Shai Gilgeous-Alexander,PG,54.43,11400,LAC@OKC,OKC,,1,Shai-Gilgeous-Alexander,...,11.1,36.2,13.8,25.2,20.2,19.9,25.4,21.7,27.7,25.2


## Optimize Rosters

In [14]:
# create dictionary for players/positions
# Nickname : (primary, secondary)

player_dict = {}

for i in base.index:
    nickname = base.loc[i, 'Nickname']
    positions = base.loc[i, 'Position']
    
    # find position separator
    sep = positions.find('/')
    
    # single position players
    if sep < 0:
        player_dict[nickname] = (positions, )
        
    # multi position players
    else:
        player_dict[nickname] = (positions[: sep], positions[sep+1 :])

In [15]:
# create dataframe from ground up for Linear Program

cols = ['Id', 'Nickname', 'Position', 'Salary', 'Game', 'Team', 'Injury Indicator', 'Minutes', 'Stud']

cols = cols + [f'sim{i}' for i in range(N_sims)]

sim = pd.DataFrame(columns = cols + ['LP Position', 'LP Name'])

for player in player_dict:
    # each position
    for pos in player_dict[player]:
        temp = base[base['Nickname'] == player][cols].iloc[0]
        
        # LP labels
        temp['LP Position'] = pos
        temp['LP Name'] = pos + ' ' + player
        
        # append to dataframe
        sim.loc[sim.shape[0]] = temp

# create one-hot position columns
POSITIONS = ['PG', 'SG', 'SF', 'PF', 'C']

for pos in POSITIONS:
    sim[pos] = sim['LP Position'].str.contains(pos).astype(int)

In [16]:
# define linear program problem
def defineProblem(df, point_col, n_studs):
    points = df[point_col]
    consts = df[['PG', 'SG', 'SF', 'PF', 'C', 'Salary', 'Stud']]
    
    # initialize problem
    problem = pulp.LpProblem('Roster', pulp.LpMaximize)
    
    # initalize player variables
    players = np.zeros_like(df['LP Name'])
    
    for i, p in enumerate(df['LP Name']):
        players[i] = pulp.LpVariable(
            p, lowBound = 0, upBound = 1, cat = pulp.LpInteger
        )
    
    # objective function
    problem += pulp.lpSum(players * points)
    
    # constraints
    problem += pulp.lpSum(players * consts.loc[:, 'PG']) == 2, 'PG Constraint'
    problem += pulp.lpSum(players * consts.loc[:, 'SG']) == 2, 'SG Constraint'
    problem += pulp.lpSum(players * consts.loc[:, 'SF']) == 2, 'SF Constraint'
    problem += pulp.lpSum(players * consts.loc[:, 'PF']) == 2, 'PF Constraint'
    problem += pulp.lpSum(players * consts.loc[:, 'C']) == 1, 'C Constraint'
    problem += pulp.lpSum(players) == 9, 'Number of Players' # !!! is this necessary?
    problem += pulp.lpSum(players * consts.loc[:, 'Salary']) <= 60000, 'Salary Constraint'
    problem += pulp.lpSum(players * consts.loc[:, 'Stud']) >= n_studs, 'Stud Constraint'
    
    # maximum signle team constraints
    for team in df['Team'].unique():
        problem += pulp.lpSum(players * (df['Team'] == team)) <= n_studs, f'{team} Constraint'
        
    # player uniqueness constraints
    value_counts = df['Nickname'].value_counts()
    dupe_players = set(value_counts[value_counts > 1].index)
    
    for player in dupe_players:
        problem += pulp.lpSum(players * (df['Nickname'] == player)) <= 1, f'{player} Constraint'
    
    return problem


def solveProblem(df, point_col, n_studs):
    
    # define and solve problem
    problem = defineProblem(df, point_col, n_studs = n_studs)
    solution = problem.solve()
    
    if solution != 1:
        print(f'Non-optimal solution in columns {point_col}.')
    
    vars_dict = problem.variablesDict()
    
    # helper function to get values from FD name
    def getValue(x):
        # convert FD name to PuLP name
        key = x.replace(' ', '_').replace('-', '_')
        return vars_dict[key].varValue
    
    return df['LP Name'].apply(getValue)


def runSimulation(df, prefix = 'sim', n_sims = 1, n_studs = 2):
    temp = df.copy()
    
    # solve problem and append to df
    for i in range(n_sims):
        temp[f'roster{i}'] = solveProblem(df, f'{prefix}{i}', n_studs = n_studs)
    
    return temp
        

In [17]:
# optimize simulations
sim = runSimulation(sim, n_sims = N_sims, n_studs = 3)

## Generate Roster Export

In [18]:
ind = 0
sim[sim[f'roster{ind}'] == 1].sort_values(POSITIONS, ascending = False)[
    ['Id', 'Nickname', 'Position', 'Salary', 'Injury Indicator', 'Minutes', f'sim{ind}'] + POSITIONS
]

Unnamed: 0,Id,Nickname,Position,Salary,Injury Indicator,Minutes,sim0,PG,SG,SF,PF,C
164,99441-18393,T.J. McConnell,PG,5500,,16.33,3.8,1,0,0,0,0
226,99441-145335,Payton Pritchard,PG,4500,,20.82,3.3,1,0,0,0,0
13,99441-58460,Devin Booker,SG,9500,,27.33,22.5,0,1,0,0,0
219,99441-110337,Talen Horton-Tucker,SG/SF,4600,,12.5,10.3,0,1,0,0,0
152,99441-157847,Trey Murphy,SF,5600,,28.45,10.8,0,0,1,0,0
205,99441-18338,Royce O'Neale,SF/PF,4800,,21.5,4.0,0,0,1,0,0
15,99441-157822,Scottie Barnes,PF/SF,9200,,33.78,25.9,0,0,0,1,0
78,99441-145539,Deni Avdija,SF/PF,7400,,37.71,22.2,0,0,0,1,0
25,99441-157908,Alperen Sengun,C,8700,,30.24,21.2,0,0,0,0,1


In [19]:
# export = pd.DataFrame(columns = ['PG', 'PG', 'SG', 'SG', 'SF', 'SF', 'PF', 'PF', 'C'])
export = pd.DataFrame()

col = 'Id'

for i in range(N_sims):
    # filter to specific roster
    temp = sim[sim[f'roster{i}'] == 1].sort_values(POSITIONS, ascending = False)
    
    # append to export df
    export[f'roster{i}'] = temp[col].values
    
export = export.T.sort_values([8, 7, 6, 5, 4, 3, 2, 1, 0])

export.columns = ['PG', 'PG', 'SG', 'SG', 'SF', 'SF', 'PF', 'PF', 'C']

export

Unnamed: 0,PG,PG.1,SG,SG.1,SF,SF.1,PF,PF.1,C
roster15,99441-18393,99441-145335,99441-58460,99441-110337,99441-157822,99441-18338,99441-80808,99441-145339,99441-110325
roster10,99441-18393,99441-145335,99441-14498,99441-58460,99441-84702,99441-110337,99441-157822,99441-18338,99441-110325
roster17,99441-145337,99441-145335,99441-15636,99441-110337,99441-157822,99441-84694,99441-80808,99441-18338,99441-110325
roster13,99441-66113,99441-145335,99441-84702,99441-110337,99441-157822,99441-18338,99441-171770,99441-22541,99441-110325
roster9,99441-14498,99441-145335,99441-84667,99441-110337,99441-157822,99441-18338,99441-171770,99441-81621,99441-110325
roster2,99441-18393,99441-145335,99441-58460,99441-84702,99441-80808,99441-18338,99441-171770,99441-97263,99441-110325
roster12,99441-145327,99441-145335,99441-14498,99441-15636,99441-157822,99441-18338,99441-80808,99441-97263,99441-110325
roster16,99441-84680,99441-18393,99441-15636,99441-110337,99441-157822,99441-18338,99441-80808,99441-145339,99441-145322
roster6,99441-84667,99441-145335,99441-58460,99441-188412,99441-84702,99441-110337,99441-80808,99441-157822,99441-157839
roster7,99441-18393,99441-145335,99441-14498,99441-84667,99441-80808,99441-110337,99441-157822,99441-18338,99441-157839


In [20]:
export.to_csv('NBA export.csv', index = False)