In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

**Local:**

In [None]:
import constants
import constraints

import datagolf_api as dapi

In [None]:
#dapi.pres['projections']
dapi.proj.head()

**Optimizer: (bottom)**

In [None]:
import itertools
from itertools import combinations

from functools import cache
from tqdm.notebook import tqdm

from pandarallel import pandarallel
pandarallel.initialize(use_memory_fs=True)

In [None]:
constants.pandas_settings()

In [None]:
def edit_fanduel():
    df = pd.read_csv(f'../data/contest-files/{constants.tournament}.csv', usecols=constants.keep_cols)
    df.columns = df.columns.str.lower()

    ret = (df
           .rename({'nickname': 'name'}, axis=1)
           #.loc[(df['injury indicator']!='O') & (df['salary']>7000)]
           .drop('injury indicator', axis=1)
           .dropna()
           .reset_index(drop=True)
          )

    col_types = { 'name': 'str', 'fppg': 'float' }
    
    for col in ret.columns:
        ret[col] = ret[col].astype(col_types.get(col,'int'))
    
    ret.to_pickle('../data/pickle-buffer/fanduel-data.pkl')
    
    return None


def load_fanduel():
    
    edit_fanduel()
    
    return pd.read_pickle('../data/pickle-buffer/fanduel-data.pkl')

In [None]:
strokes_gained_components = {
    'tee': {
        'url_id': 2567,
        #'url': 'https://www.pgatour.com/stats/stat.02567.html',
        'shortened': 'ott'
    },
    'approach': {
        'url_id': 2568,
        #'url': 'https://www.pgatour.com/stats/stat.02568.html',
        'shortened': 'app'
    },
    'around': {
        'url_id': 2569,
        #'url': 'https://www.pgatour.com/stats/stat.02569.html',
        'shortened': 'arg'
    },
    'green': {
        'url_id': 2564,
        #'url': 'https://www.pgatour.com/stats/stat.02564.html',
        'shortened': 'putt'
    },
    'tee-to-green': {
        'url_id': 2674,
        #'url': 'https://www.pgatour.com/stats/stat.02674.html',
        'shortened' : 'ttg'
        }
}

new_col_names = {
    'player name': 'name',
    'rank this week': ' cur-rank',
    'rank last week': ' prev-rank',
    'average': ' sg',
    'rounds': ' num-rounds',
    'measured rounds': ' num-measured'
}

abbrev_col_names = [ 'name', ' sg' ]

In [None]:
def strokes_gained_per(golf_shot, abbreviate=True):
    
    if golf_shot.lower() not in strokes_gained_components:
        return None
    
    else:
        info = strokes_gained_components.get(golf_shot.lower(), None)
        if info is None:
            return None
        
        else:
            url = f'https://www.pgatour.com/stats/stat.0{ info["url_id"] }.html'
            
            ret = pd.read_html(url)[1].reset_index(drop=True)
            
            ret.columns = ret.columns.str.lower().str.replace('total sg:', ' sg').str.replace('\xa0', ' ')
            
            ret = ret.rename(new_col_names, axis=1)
            #ret.index = ret['name']
            #ret = ret.drop('name', axis=1)
            if abbreviate:
                ret = ret.loc[:, abbrev_col_names]
            
            ret.columns = ret.columns.str.replace(' ', f'{strokes_gained_components[golf_shot]["shortened"]}-')
            
            ret.to_pickle(f'../data/pickle-buffer/{strokes_gained_components[golf_shot]["shortened"]}-sg.pkl')
            
            return None
        
def load_strokes_gained_per(golf_shot):
    
    #**
    strokes_gained_per(golf_shot)
    
    return pd.read_pickle(f'../data/pickle-buffer/{strokes_gained_components[golf_shot]["shortened"]}-sg.pkl')

In [None]:
def aggregate_strokes_gained():
    
    # Create dictionary containing strokes-gained data for each stroke
    sg_frames = { golf_shot: load_strokes_gained_per(golf_shot) for golf_shot in strokes_gained_components }

    # Initialize frame as tee and merge rest of shots
    sgdf = sg_frames['tee']
    for k in list(strokes_gained_components.keys())[1:]:
        sgdf = sgdf.merge(sg_frames[k])
    
    ret = (sgdf
           #.sort_values(by=constants.focus_stat, ascending=False)
           .reset_index(drop=True)
          )

    ret.to_pickle(f'../data/pickle-buffer/strokes-gained.pkl')                                                 
                                                 
    return None

def load_strokes_gained():
    
    aggregate_strokes_gained()
    
    return pd.read_pickle('../data/pickle-buffer/strokes-gained.pkl')

In [None]:
len(load_fanduel())

In [None]:
len(load_strokes_gained().index)

In [None]:
def combine_pga_fanduel():
    fd = load_fanduel()
    sg = load_strokes_gained()
    
    focus_stats = [ constants.focus_stat ]
    if constants.focus_stat_2 is not None:
        focus_stats.append(constants.focus_stat_2)
        if constants.focus_stat_3 is not None:
            focus_stats.append(constants.focus_stat_3)
    
    focus_stats = tuple(focus_stats)
    
    sg_lookup = load_strokes_gained()
    sg_lookup.index = sg_lookup['name']
    sg_lookup = sg_lookup.drop('name', axis=1)
    
    for sg_col in focus_stats:
        fd[sg_col] = fd['name'].apply(lambda x: sg_lookup.loc[x, sg_col] if x in sg_lookup.index else 0.0)
        fd[f'{sg_col}-per-10k'] = np.array( 10000 * fd[sg_col] / fd['salary'] )
    
    #for sg_col in focus_stats:
        #fd[sg_col] = fd['name'].apply(lambda x: sg.loc[x, sg_col] if x in sg.index else 0.0)
        #fd[f'{sg_col}-per-10k'] = np.array( 10000 * fd[sg_col] / fd['salary'] )
    
    #convs = {'name': 'str', 'salary': 'int'}
    
    #for col in fd.columns:
        #fd[col] = fd[col].astype(convs.get(col, 'float'))
    
    # fd.index = fd['name']
    # fd = fd.drop('name', axis=1)
                           
    fd = (fd
          .sort_values(by=[constants.focus_stat], ascending=False)
          .dropna()
         )
    
    fd.to_pickle(f'../data/pickle-buffer/{constants.tournament}.pkl')

    return None

In [None]:
def add_constraints():
    
    combine_pga_fanduel()
    ret = pd.read_pickle(f'../data/pickle-buffer/{constants.tournament}.pkl')
    
    if constraints.min_salary is not None:
        
        ret = (ret
               .loc[ ret['salary']>=constraints.min_salary ]
               .reset_index(drop=True)
              )

        
    ret = (ret
           .loc[ ret['name'].isin(dapi.players_who_made_cut()) ]
           .reset_index(drop=True)
          )
    ret.to_pickle(f'../data/pickle-buffer/optimizer-data.pkl')
    
    return None

In [None]:
def prepare_input():
    
    add_constraints()
    ret = pd.read_pickle(f'../data/pickle-buffer/optimizer-data.pkl')
    ret_names = ret['name'].values.tolist()
    
    ret['proj-pts'] = ret['name'].apply(dapi.proj_pts)
    print(ret.head())
    ret['salary'] /= 100
    ret.index = ret['name']
    ret = ret.drop('name', axis=1)
    
    ret.to_pickle(f'../data/pickle-buffer/optimizer-data-clean.pkl')
    
    return ret_names
    

In [None]:
pnames = prepare_input()
data = pd.read_pickle(f'../data/pickle-buffer/optimizer-data-clean.pkl')

@cache
def get_value(name, column):
    return( data.loc[name, column] )

@cache
def sum_values(names, column):
    return( sum( [ get_value(name, column) for name in names ] ) )

@cache
def is_valid_lineup(lineup):
    return( sum_values(lineup, 'salary') in constraints.cost_range and len(set(lineup))==6 )

@cache
def lineup_analysis(lineup):
    return(tuple( [ sum_values(tuple(set(lineup)),column) for column in constraints.cols_to_sum ] ) )

def lineup_analysis_wrapper(lineup):
    return( lineup_analysis(tuple(set(lineup.to_numpy()))) if is_valid_lineup(tuple(set(lineup.to_numpy()))) else (0.0,)*len(constraints.cols_to_sum)  )

def create_lineup_2_slices(slate_dict):
#     2 things of three
    ret_list = list()
    
    for half_slates in tqdm( [p for p in itertools.product(*slate_dict.values())] ):
        
        g1,g2,g3 = tuple(sorted(list(half_slates[0])))
        g4,g5,g6 = tuple(sorted(list(half_slates[1])))
        
        lu = (g1,g2,g3,g4,g5,g6)
        if is_valid_lineup(lu):
            ret_list.append(lu)
    
    return(tuple(ret_list))

def create_lineup_3_slices(slate_dict):
    
#     3 things of two
    return None

# Trying to get better about only passing tuples or other completely immutable for default and for cache
def create_lineups():
    
    # Not necessary but makes reading easier
    num_players = 6 # (n)
    num_slices = constraints.slices
    step = int( len(pnames) / num_slices )
    
    r = int(num_players / num_slices) # (nCr)
    

    # slates = {f'slate{i+1}': tuple(map( tuple, itertools.combinations(pnames[:int(i*step)], r) )) for i in range(num_slices+1)}
    # slates.update( { f'slate{num_slices}': tuple(map( tuple, itertools.combinations(pnames[int(num_slices*step):], r) )) } )
    
    slates = dict()
    
    if num_slices == 2:
        
        slates = {
            'slate1': tuple(map( tuple, itertools.combinations(pnames[:step], r) )),
            'slate2': tuple(map( tuple, itertools.combinations(pnames[step:], r) ))
        }
        
    elif num_slices == 3:
        
        slates = {
            'slate1': tuple(map( tuple, itertools.combinations(pnames[:step], r) )),
            'slate2': tuple(map( tuple, itertools.combinations(pnames[step:int(2*step)], r) )),
            'slate3': tuple(map( tuple, itertools.combinations(pnames[int(2*step):], r) )),
            
        }
    
    operations = { 2: create_lineup_2_slices(slates), 3: create_lineup_3_slices(slates)}
    
    lineups = operations[num_slices]
    
    ret = pd.DataFrame(lineups, columns=['g1','g2','g3','g4','g5','g6'])
    #ret[constraints.cols_to_sum] = ret.apply( lineup_analysis_wrapper, axis=1, result_type='expand')
    ret[constraints.cols_to_sum] = ret.parallel_apply( lineup_analysis_wrapper, axis=1, result_type='expand' )
    
    ret['total-sg'] = ret.loc[:, constraints.sg_cols].sum(axis=1)
    ret['avg-value'] = ret.loc[:, constraints.val_cols].mean(axis=1)
    
    ret = (ret
           # .sort_values(by=f'{constants.focus_stat}-per-10k', ascending=False)
           .sort_values(by='proj-pts', ascending=False)
           .drop_duplicates()
           .reset_index(drop=True)
          )
    
    
    ret.to_pickle(f'../data/lineups-created/{constants.tournament}.pkl')
    
    print('Done...')
    
    return None

def output_lineups(top_num=100):
    return pd.read_pickle(f'../data/lineups-created/{constants.tournament}.pkl').head(top_num)
    

In [None]:
def output_lineups_by(sort_by=('proj-pts',)):
    return pd.read_pickle(f'../data/lineups-created/{constants.tournament}.pkl').sort_values(by=sort_by[0], ascending=False).head(100)

In [None]:
def create_func():
    #if constants.create:
        #create_lineups()
    return output_lineups_by()

In [None]:
create_func()