Added stud constraint. Stud is FanDuel points >= 40. Requires at least *3* players.

Keep GTD players. Simulate with 50% chance of not playing.

In [1]:
import pandas as pd
import numpy as np
import random, pulp, os
from datetime import datetime

from NBA import *
%load_ext autoreload
%autoreload 2

## Load FanDuel Players List

In [2]:
players_file = 'FanDuel-NBA-2024 ET-04 ET-19 ET-101897-players-list'
full_players_file = 'Lib/Players Lists/NBA/' + players_file + '.csv'

ply = load_fanduel_playerslist(full_players_file)

## Merge and Filter

In [3]:
dat = ply.copy()

dat['Stud'] = (dat['FPPG'] >= 40) * 1

dat = filter_players(dat, salary_cutoff=4500, verbose=True)

Total players to start: 73
Total players filtered out: 47
Total players remaining: 26

Breakdown by category. Possible duplicates across categories.
Dropping 9 players due to injuries.
Dropping 30 players due to low projections.
Dropping 47 players due to low salary.


## Download Actuals from Number Fire

In [4]:
url_mapping = pd.read_csv('Utils/NBA Helpers/NumberFire NBA URL Mappings.csv')

dat = dat.merge(
    pd.read_csv('Utils/NBA Helpers/NumberFire NBA URL Mappings.csv'),
    how = 'left', left_on = 'Nickname', right_on = 'FanDuel Nickname'
)

dat.drop(columns = ['Nickname_y', 'FanDuel Nickname'], inplace = True)
dat.rename(columns = {'Nickname_x' : 'Nickname'}, inplace = True)


dat['Suffix'].fillna(dat['Nickname'].str.replace(' ', '-'), inplace = True)

In [5]:
nf = pd.DataFrame(columns = ['Date', 'OPP', 'MIN', 'PTS', 'FGM-A', '3PM-A', 'FTM-A', 'REB', 'AST', 'STL', 'BLK', 'TOV', 'PF', 'Salary', 'FP', 'Value'])

start = datetime.now()
for nickname in dat['Nickname']:
    suffix = dat.loc[dat['Nickname'] == nickname, 'Suffix'].iloc[0]
    try:
        act = pd.read_html('https://www.numberfire.com/nba/players/daily-fantasy/' + suffix)
        
        temp = pd.concat([act[2], act[3]], axis = 1)
        temp['Nickname'] = nickname
        nf = pd.concat([nf, temp])
    except:
        print(nickname, suffix)

stop = datetime.now()
print(f'Total time: {stop - start}')
nf.shape

DeMar DeRozan DeMar-DeRozan
Bam Adebayo Bam-Adebayo
Nikola Vucevic Nikola-Vucevic
Tyler Herro Tyler-Herro
Coby White Coby-White
Terry Rozier Terry-Rozier
Ayo Dosunmu Ayo-Dosunmu
Javonte Green Javonte-Green
Andre Drummond Andre-Drummond
Alex Caruso Alex-Caruso
Jaime Jaquez Jaime-Jaquez-Jr
Haywood Highsmith Haywood-Highsmith
Caleb Martin Caleb-Martin
Delon Wright Delon-Wright
Kevin Love Kevin-Love
Total time: 0:00:10.269702


(789, 17)

In [6]:
# get standard deviation and actuals from last N games
N = 3
nf_summary = nf.groupby('Nickname').head(N).groupby('Nickname').agg({'FP':['count', 'mean', 'std'], 'MIN':['mean']})
nf_summary.columns = ['FP_count', 'FP_mean', 'FP_std', 'Minutes']

# compute standard error
nf_summary['FP_SE'] = nf_summary['FP_std'] / np.sqrt(nf_summary['FP_count'])

# round all
nf_summary = np.round(nf_summary, 2)

## Regression and Simulate

In [7]:
base = dat.merge(
    nf_summary[['FP_mean', 'FP_SE', 'Minutes']],
    how = 'left', left_on = 'Nickname', right_index = True
)

# drop players without actuals; likely due to URL mapping
na_mask = base['FP_SE'].isna()
print(f'Dropping {sum(na_mask)} players due to missing FP_SE')
print([name for name in base.loc[na_mask, 'Nickname']])
base = base[~na_mask]

# add vars for regression
for p in ['PG', 'SG', 'SF', 'PF', 'C']:
    base[p] = base['Position'].str.contains(p) * 1
    
x_vars = ['FPPG', 'Salary', 'FP_mean', 'FP_SE', 'Minutes', 'PG', 'SG', 'SF', 'PF', 'C']
# x_weights = [.2551, .00496, -.00775, .17621, .29893, 1.15761, 2.0473, 2.47937, -1.06842, 5.41944]
x_weights = [.9249, -.0052, 1.5633, 1.1063, -1.7343, 3.1635, -5.5410, 6.21822, -4.4943, 3.3408]

base['FP_fitted'] = round((base[x_vars] * x_weights).sum(axis = 'columns'), 2)


# simulate games
# model: ~ N(FPTS, FP_SE)
N_sims = 20

for i in range(N_sims):
    norm = np.round(
        # normally distributed, centered at recent actuals, wtih standard error estimated from actuals
        np.random.normal(base['FP_fitted'], base['FP_SE']), 1
    )
    
    injury_factor = base['Injury Indicator'].apply(
        # 50/50 change GTD will play
        lambda x: 1 if x != x else (np.random.uniform() < .5)*1
    )
    
    # set sim value
    base[f'sim{i}'] = norm * injury_factor

base.drop(columns = ['PG', 'SG', 'SF', 'PF', 'C'], inplace = True)
base.head(3)

Dropping 15 players due to missing FP_SE
['DeMar DeRozan', 'Bam Adebayo', 'Nikola Vucevic', 'Tyler Herro', 'Coby White', 'Terry Rozier', 'Ayo Dosunmu', 'Javonte Green', 'Andre Drummond', 'Alex Caruso', 'Jaime Jaquez', 'Haywood Highsmith', 'Caleb Martin', 'Delon Wright', 'Kevin Love']


Unnamed: 0,Id,Nickname,Position,FPPG,Salary,Game,Team,Injury Indicator,Stud,Suffix,...,sim10,sim11,sim12,sim13,sim14,sim15,sim16,sim17,sim18,sim19
0,101897-59358,Domantas Sabonis,C/PF,49.27,10400,SAC@NO,SAC,,1,Domantas-Sabonis,...,16.5,13.2,12.1,12.0,7.5,2.8,9.1,1.1,2.5,15.8
1,101897-80810,De'Aaron Fox,PG,45.24,9700,SAC@NO,SAC,,1,de-aaron-fox,...,7.1,4.1,5.8,4.8,2.0,3.4,9.3,6.0,4.4,2.9
4,101897-19067,CJ McCollum,PG/SG,34.94,8400,SAC@NO,NO,,0,C-J-McCollum,...,-25.6,-19.5,-7.9,-7.3,-3.0,-11.2,-16.7,-12.8,-17.5,-10.9


## Optimize Rosters

In [8]:
# create dataframe from ground up for Linear Program
cols = ['Id', 'Nickname', 'Position', 'Salary', 'Game', 'Team', 'Injury Indicator', 'Minutes', 'Stud']

cols = cols + [f'sim{i}' for i in range(N_sims)]

sim = runSimulation(create_lp_df(base[cols]), n_sims=N_sims, n_studs=3)

Non-optimal solution in columns sim0.
Non-optimal solution in columns sim1.
Non-optimal solution in columns sim2.
Non-optimal solution in columns sim3.
Non-optimal solution in columns sim4.
Non-optimal solution in columns sim5.
Non-optimal solution in columns sim6.
Non-optimal solution in columns sim7.
Non-optimal solution in columns sim8.
Non-optimal solution in columns sim9.
Non-optimal solution in columns sim10.
Non-optimal solution in columns sim11.
Non-optimal solution in columns sim12.
Non-optimal solution in columns sim13.
Non-optimal solution in columns sim14.
Non-optimal solution in columns sim15.
Non-optimal solution in columns sim16.
Non-optimal solution in columns sim17.
Non-optimal solution in columns sim18.
Non-optimal solution in columns sim19.


## Generate Roster Export

In [9]:
ind = 0
POSITIONS = ['PG', 'SG', 'SF', 'PF', 'C']
sim[sim[f'roster{ind}'] == 1].sort_values(POSITIONS, ascending = False)[
    ['Id', 'Nickname', 'Position', 'Salary', 'Injury Indicator', 'Minutes', f'sim{ind}'] + POSITIONS
]

Unnamed: 0,Id,Nickname,Position,Salary,Injury Indicator,Minutes,sim0,PG,SG,SF,PF,C
3,101897-19067,CJ McCollum,PG/SG,8400,,33.73,-12.9,1,1,0,0,0
16,101897-171669,Keon Ellis,SG/PG,5600,,31.91,-21.6,1,1,0,0,0
17,101897-171669,Keon Ellis,SG/PG,5600,,31.91,-21.6,1,1,0,0,0
2,101897-80810,De'Aaron Fox,PG,9700,,35.03,5.8,1,0,0,0,0
7,101897-171787,Keegan Murray,SF/PF,6800,,36.17,-9.7,0,0,1,1,0
8,101897-171787,Keegan Murray,SF/PF,6800,,36.17,-9.7,0,0,1,1,0
1,101897-59358,Domantas Sabonis,C/PF,10400,,35.44,5.5,0,0,0,1,1
15,101897-14503,Jonas Valanciunas,C,5700,,14.62,-3.5,0,0,0,0,1


In [10]:
export = pd.DataFrame()

col = 'Id'

for i in range(N_sims):
    # filter to specific roster
    temp = sim[sim[f'roster{i}'] == 1].sort_values(POSITIONS, ascending = False)
    
    # append to export df
    export[f'roster{i}'] = temp[col].values
    
export = export.T.sort_values([8, 7, 6, 5, 4, 3, 2, 1, 0])

export.columns = ['PG', 'PG', 'SG', 'SG', 'SF', 'SF', 'PF', 'PF', 'C']

ValueError: Length of values (7) does not match length of index (8)

In [None]:
# export.to_csv('NBA export.csv', index = False)