In [4]:
import pandas as pd
from pulp import *
import time
import re
import warnings
import os
from selenium import webdriver


In [2]:
warnings.filterwarnings('ignore')
#pd.set_option('display.max_columns', 500)

In [46]:

def formula_generator(data, variable, row, opt_lineup):
    '''This function determines what gets optimized'''
    return data.loc[row, opt_lineup] * variable #(round(data.loc[row, 'Ceiling'], 3) + round(data.loc[row, 'Projection']) + round(data.loc[row, 'Floor'], 3)) / 3 * variable


def fanduel_maximizer(data, i, opt_lineup):
    prob = pulp.LpProblem('FanduelSelections', LpMaximize)
    #print(data.loc[0,'Projection'])
    decision_variables = []
    total_projections = ''
    salary_variables = []
    pg_variables = []
    sg_variables = []
    sf_variables = []
    pf_variables = []
    c_variables = []

    for row in range(len(data)):
        variable = str('x' + str(row))
        variable = pulp.LpVariable(str(variable), lowBound=0, upBound=1, cat='Integer')
        decision_variables.append(variable)
        #print(data.loc[row, 'Ceiling'])
        ### TODO Change this line below to change what gets optimized
        formula = formula_generator(data, variable, row, opt_lineup)
        total_projections += formula
        salary_variables.append(int(data.loc[row, 'Salary']) * variable)
        if data.loc[row, 'Position'] == 'PG':
            pg_variables.append(variable)
            data.loc[row, 'Position #'] = 1
        elif data.loc[row, 'Position'] == 'SG':
            sg_variables.append(variable)
            data.loc[row, 'Position #'] = 2
        elif data.loc[row, 'Position'] == 'SF':
            sf_variables.append(variable)
            data.loc[row, 'Position #'] = 3
        elif data.loc[row, 'Position'] == 'PF':
            pf_variables.append(variable)
            data.loc[row, 'Position #'] = 4
        elif data.loc[row, 'Position'] == 'C':
            c_variables.append(variable)
            data.loc[row, 'Position #'] = 5
    #posit_map = {'PG':1,'SG':2,'SF':3,'PF':4,'C':5}
    #data['Position #'] = data['Position'].map(posit_map)


    #print(pg_variables)

    prob += total_projections
    prob += lpSum(decision_variables) == 9
    prob += lpSum(pg_variables) == 2
    prob += lpSum(sg_variables) == 2
    prob += lpSum(sf_variables) == 2
    prob += lpSum(pf_variables) == 2
    prob += lpSum(c_variables) == 1

    prob += lpSum(salary_variables) <= 60000


    #prob += lpSum()
    #print(prob)
    prob.writeLP('FanduelSelections.lp')

    optimization_result = prob.solve()

    assert optimization_result == pulp.LpStatusOptimal

    variable_name = []
    variable_value = []
    for v in prob.variables():
        variable_name.append(v.name)
        variable_value.append(v.varValue)

    df = pd.DataFrame({'variable' : variable_name, 'value': variable_value})
    for rownum, row in df.iterrows():
        value = re.findall(r'(\d+)', row['variable'])   # this removes the x from the variable name e.g. x10 -> 10
        df.loc[rownum, 'variable'] = int(value[0])

    df = df.sort_values(by='variable')

    for rownum, row in data.iterrows():
        for results_rownum, results_row in df.iterrows():
            if rownum == results_row['variable']:
                data.loc[rownum, 'Play?'] = results_row['value']
    selected_players = data[data['Play?'] == 1].sort_values(by='Position #')
    #selected_players = selected_players.loc[:, 'Player Name':'Projection']
     #selected_players = [i.replace(".","") for i in selected_players]

    #print(selected_players)
    #print('Total Salary: {}'.format(sum(selected_players['Salary'])))
    #print('Total Projection: {}'.format((sum(selected_players[opt_lineup]))))#+sum(selected_players['Projection'])+sum(selected_players['Floor'])) / 3))
    #print('Actual Points: {}'.format(sum(selected_players['Actual FD PTS'])))
    end_time = time.time() - start_time
    #print(data['Date'].unique().values[0])
    print('Elapsed Time: {}'.format(round(end_time,3)) + ' seconds')

    return selected_players




In [40]:
def dates_scraper(st_yr, end_yr):
    total_dates = pd.Series()
    for curr_yr in range(st_yr, end_yr+1):
        url = 'https://en.wikipedia.org/wiki/20{}-{}_NBA_season'.format(curr_yr, curr_yr+1)
        driver.get(url)
        yr_finder = driver.find_element_by_xpath('/html/body/div[3]/div[3]/div[4]/div/table[1]/tbody/tr[4]/td')
        reg_season = yr_finder.text.split('\n')[0]
        range_reg_season = reg_season.split(' – ')
        start_date = range_reg_season[0]
        end_date = range_reg_season[1]
        #print(reg_season)
        daterange = pd.Series(pd.date_range(start_date, end_date))
        total_dates = total_dates.append(daterange)
    return total_dates.reset_index(drop=True)

In [5]:
# Download chrome webdriver here: https://chromedriver.storage.googleapis.com/index.html?path=79.0.3945.36/

driver = webdriver.Chrome('/Users/jordanlevy/Downloads/chromedriver') # change to your path for chromedriver
script_dir = '/Users/jordanlevy/Documents/GitHub/group078' # change to your script directory path

In [7]:
total_dates = dates_scraper(16, 18)

In [20]:
total_df

Unnamed: 0,Name,Position,Salary,Projected FD PTS,Actual FD PTS,Found?,Date
0,Damian Lillard,PG,8900,37.423734,54.8,0,2016-10-25
1,LaMarcus Aldridge,PF,7400,34.438556,47.3,0,2016-10-25
2,Jonathon Simmons,SF,3500,20.286056,29.3,0,2016-10-25
3,Kyle Anderson,SF,3500,16.733275,9.8,0,2016-10-25
4,Dewayne Dedmon,C,3900,12.876110,16.6,0,2016-10-25
...,...,...,...,...,...,...,...
165,Marvin Bagley III,PF,6900,35.304776,32.8,1,2019-04-10
166,Rodions Kurucs,PF,3500,24.117352,33.3,1,2019-04-10
167,Jonas Jerebko,PF,4000,15.861176,16.1,1,2019-04-10
168,Kemba Walker,PG,9800,48.827059,56.9,1,2019-04-10


In [44]:
%%time
total_df = pd.DataFrame()
for date in total_dates:
    d = str(date)[:10]
    print(d)
    try:
        date_df = pd.read_csv('data/model_predictions/{}.csv'.format(d))
        date_df['Date'] = d
        total_df = total_df.append(date_df)
    except:
        continue
total_df = total_df[['Date', 'Name', 'Position', 'Salary', 'Projected FD PTS', 'Actual FD PTS', 'Found?']]

2016-10-25
2016-10-26
2016-10-27
2016-10-28
2016-10-29
2016-10-30
2016-10-31
2016-11-01
2016-11-02
2016-11-03
2016-11-04
2016-11-05
2016-11-06
2016-11-07
2016-11-08
2016-11-09
2016-11-10
2016-11-11
2016-11-12
2016-11-13
2016-11-14
2016-11-15
2016-11-16
2016-11-17
2016-11-18
2016-11-19
2016-11-20
2016-11-21
2016-11-22
2016-11-23
2016-11-24
2016-11-25
2016-11-26
2016-11-27
2016-11-28
2016-11-29
2016-11-30
2016-12-01
2016-12-02
2016-12-03
2016-12-04
2016-12-05
2016-12-06
2016-12-07
2016-12-08
2016-12-09
2016-12-10
2016-12-11
2016-12-12
2016-12-13
2016-12-14
2016-12-15
2016-12-16
2016-12-17
2016-12-18
2016-12-19
2016-12-20
2016-12-21
2016-12-22
2016-12-23
2016-12-24
2016-12-25
2016-12-26
2016-12-27
2016-12-28
2016-12-29
2016-12-30
2016-12-31
2017-01-01
2017-01-02
2017-01-03
2017-01-04
2017-01-05
2017-01-06
2017-01-07
2017-01-08
2017-01-09
2017-01-10
2017-01-11
2017-01-12
2017-01-13
2017-01-14
2017-01-15
2017-01-16
2017-01-17
2017-01-18
2017-01-19
2017-01-20
2017-01-21
2017-01-22
2017-01-23

In [23]:
total_df.to_csv('data/total_proj.csv', index=False)

In [None]:
%%time
if __name__ == "__main__":
    start_time = time.time()
    total_data = pd.read_csv('data/total_proj.csv')

    start_date = '2018-10-16'
    end_date = '2019-04-10'
    date_range = pd.Series(pd.date_range(start_date, end_date)).astype(str).to_list()#.apply(lambda x: x.split('-')).to_list()  # Creates a list of datetime objects
    #types_of_lineups = ['Projected']
    lineup_name = 'Projected FD PTS'
    for date in date_range:
        #print(date+" Lineups")

        
        print(date+" "+'Projected FD PTS'+" Lineups")

        player_data = total_data[total_data['Date'] == date].reset_index(drop=True)
        #print(player_data)

        try:
            first_lineup = fanduel_maximizer(player_data, 1, lineup_name).reset_index(drop=True)
        except TypeError:
            continue
        data_path = os.path.join('data','Hist_Data',date)
        try:
            os.makedirs(data_path)
        except OSError:
            print ("Creation of the directory %s failed" % data_path)
        #else:
            #print ("Successfully created the directory %s " % path)
        first_lineup.to_csv(os.path.join(data_path, 'lineup1.csv'), index=False)
        for i in range(len(first_lineup)):
            print('Lineup {}'.format(i+2))
            updated_player_data = player_data[(player_data['Name'] != first_lineup.loc[i,'Name'])].reset_index(drop=True)
            players = fanduel_maximizer(updated_player_data, i+2, lineup_name)
            #print(players)
            #lineup = players['Name'].tolist()
            players.to_csv(os.path.join(data_path, 'lineup{}.csv'.format(i+2)), index=False)


            #print(first_lineup)
            #time.sleep(10)


2018-10-16 Projected FD PTS Lineups
Elapsed Time: 0.195 seconds
Creation of the directory data/Hist_Data/2018-10-16 failed
Lineup 2
Elapsed Time: 0.463 seconds
Lineup 3
Elapsed Time: 0.587 seconds
Lineup 4
Elapsed Time: 0.904 seconds
Lineup 5
Elapsed Time: 1.111 seconds
Lineup 6
Elapsed Time: 1.339 seconds
Lineup 7
Elapsed Time: 1.49 seconds
Lineup 8
Elapsed Time: 1.674 seconds
Lineup 9
Elapsed Time: 1.881 seconds
Lineup 10
Elapsed Time: 2.029 seconds
2018-10-17 Projected FD PTS Lineups
Elapsed Time: 3.947 seconds
Creation of the directory data/Hist_Data/2018-10-17 failed
Lineup 2
Elapsed Time: 5.797 seconds
Lineup 3
Elapsed Time: 7.554 seconds
Lineup 4
Elapsed Time: 9.319 seconds
Lineup 5
Elapsed Time: 11.237 seconds
Lineup 6
