In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

**Local:**

In [None]:
import constants
import constraints

from datagolf import datagolf
from fileman import fileman
from picklejar import PickleJar

**Optimizer: (bottom)**

In [None]:
import itertools
from itertools import combinations

from functools import cache
from tqdm.notebook import tqdm

from pandarallel import pandarallel
pandarallel.initialize(use_memory_fs=True)

In [None]:
constants.pandas_settings()

In [None]:
def edit_fanduel():
    # df = PickleJar.load('fanduel', 'csv')
    # df = pd.read_csv( fileman.getfs('fanduel', 'csv') )
    
    df = pd.read_csv(f'../data/contest-files/{constants.tournament}.csv', usecols=constants.keep_cols)
    df.columns = df.columns.str.lower()

    ret = (df
           .rename({'nickname': 'name'}, axis=1)
           .loc[(df['injury indicator']!='O')]
           .drop('injury indicator', axis=1)
           .dropna()
           .reset_index(drop=True)
          )

    col_types = { 'name': 'str', 'fppg': 'float' }
    for col in ret.columns:
        ret[col] = ret[col].astype(col_types.get(col,'int'))
    
    PickleJar.prepare(ret, 'fanduel')
    return None
                

In [None]:
base_url = 'https://www.pgatour.com/stats/stat.0{}.html'
sg_components = {
    'tee': {
        'url-id': 2567,
        'sg-id': 'ott'
    },
    'approach': {
        'url-id': 2568,
        'sg-id': 'app'
    },
    'around': {
        'url-id': 2569,
        'sg-id': 'arg'
    },
    'green': {
        'url-id': 2564,
        'sg-id': 'putt'
    },
    'tee-to-green': {
        'url-id': 2674,
        'sg-id' : 'ttg'
        }
}

new_col_names = {
    'player name': 'name',
    'rank this week': ' cur-rank',
    'rank last week': ' prev-rank',
    'average': ' sg',
    'rounds': ' num-rounds',
    'measured rounds': ' num-measured'
}

ab_cols = ('name', ' sg')

In [None]:
def sg_per(component, abbreviate=True):
    
    if component.lower() not in sg_components:
        return None
    
    else:
        info = sg_components.get(component.lower(), None)
        if info is None:
            print('ERROR: Never even loaded html...\n')
            return None
        
        else:
            # url = f'https://www.pgatour.com/stats/stat.0{ info["url_id"] }.html'
            
            ret = pd.read_html(base_url.format(info['url-id']))[1].reset_index(drop=True)
            
            ret.columns = ret.columns.str.lower().str.replace('total sg:', ' sg').str.replace('\xa0', ' ')
            ret = ret.rename(new_col_names, axis=1)

            if abbreviate:
                ret = ret.loc[:, ab_cols]
            
            ret.columns = ret.columns.str.replace(' ', f'{sg_components[component]["sg-id"]}-')

            PickleJar.prepare(ret, 'strokes', fparam=component)
            
            return None
        

In [None]:
def agg_sg():

    for component in sg_components:
        sg_per(component)

#         ---------------------------------------         #
        
    sg_frames = { component: PickleJar.load('strokes', fparam=component) for component in sg_components }

    # Initialize frame as tee and merge rest of shots
    sgdf = sg_frames['tee']
    for k in list(sg_components.keys())[1:]:
        sgdf = sgdf.merge(sg_frames[k])
    ret = sgdf.reset_index(drop=True)
    
    PickleJar.prepare(ret,'strokes')                                                                
    return None

In [None]:
def combine_pga_fanduel():
    
    edit_fanduel()
    fd = PickleJar.load('fanduel')
    
    agg_sg()
    sg = PickleJar.load('strokes')
    
    focus_stats = [ constants.focus_stat ]
    if constants.focus_stat_2 is not None:
        focus_stats.append(constants.focus_stat_2)
        if constants.focus_stat_3 is not None:
            focus_stats.append(constants.focus_stat_3)
    focus_stats = tuple(focus_stats)
    
    sg_lookup = PickleJar.load_set_idx('strokes', idx='name')
    for sg_col in focus_stats:
        fd[sg_col] = fd['name'].apply(lambda x: sg_lookup.loc[x, sg_col] if x in sg_lookup.index else 0.0)
        fd[f'{sg_col}-per-10k'] = np.array( 10000 * fd[sg_col] / fd['salary'] )
                           
    combo = (fd
             .dropna()
             .reset_index(drop=True)
            )
    
    PickleJar.prepare(combo, 'combined')
    
    return None

In [None]:
def add_constraints():
    
    combine_pga_fanduel()
    ret = PickleJar.load('combined')
    
    if constraints.min_salary is not None:
        print(f'Excluding players less than ${constraints.min_salary}...')
        ret = (ret
               .loc[ ret['salary']>=constraints.min_salary ]
               .reset_index(drop=True)
              )

    PickleJar.prepare(ret, 'optimizer', fparam='raw')
    return None

In [None]:
def prepare_input():
    
    add_constraints()
    ret = PickleJar.load('optimizer', fparam='raw')
    ret_names = ret['name'].values.tolist()
    
    ret['proj-pts'] = ret['name'].apply(datagolf.proj_pts)
    #ret['cfit-adj'] = ret['name'].apply(dapi.proj_skd)
    #ret['cfit-pts'] = ret['proj-pts']+(ret['proj-pts']*ret['cfit-adj'])
    ret['salary'] /= 100
    
    PickleJar.prepare(ret, 'optimizer')
    return None
    

`from numba import vectorize

@vectorize(['float32(float32, float32)'], target='cuda')
def Multi(a, b):
    return a*b`

In [None]:
prepare_input()
data = PickleJar.load_set_idx('optimizer', idx='name')
pnames = data.index.values.tolist()

@cache
def get_value(name, column):
    return( data.loc[name, column] )

@cache
def sum_values(names, column):
    return( sum( [ get_value(name, column) for name in names ] ) )

@cache
def is_valid_lineup(lineup):
    return( sum_values(lineup, 'salary') in constraints.cost_range and len(set(lineup))==6 )

@cache
def lineup_analysis(lineup):
    return(tuple( [ sum_values(tuple(set(lineup)),column) for column in constraints.cols_to_sum ] ) )

def lineup_analysis_wrapper(lineup):
    return( lineup_analysis(tuple(set(lineup.to_numpy()))) if is_valid_lineup(tuple(set(lineup.to_numpy()))) else (0.0,)*len(constraints.cols_to_sum)  )

def create_lineup_2_slices(slate_dict):
#     2 things of three
    ret_list = list()
    
    for half_slates in tqdm( [p for p in itertools.product(*slate_dict.values())] ):
        g1,g2,g3 = tuple(sorted(list(half_slates[0])))
        g4,g5,g6 = tuple(sorted(list(half_slates[1])))
        
        lu = (g1,g2,g3,g4,g5,g6)
        if is_valid_lineup(lu):
            ret_list.append(lu)
    
    return tuple(ret_list)

def create_lineup_3_slices(slate_dict):
#     3 things of two
    ret_list = list()
    
    for third_slates in tqdm( [p for p in itertools.product(*slate_dict.values())] ):
        g1,g2 = tuple(sorted(list(third_slates[0])))
        g3,g4 = tuple(sorted(list(third_slates[1])))
        g5,g6 = tuple(sorted(list(third_slates[2])))
        
        lu = (g1,g2,g3,g4,g5,g6)
        if is_valid_lineup(lu):
            ret_list.append(lu)
    
    return tuple(ret_list)

# Trying to get better about only passing tuples or other completely immutable for default and for cache
def create_lineups():
    
    # Not necessary but makes reading easier
    num_players = 6 # (n)
    num_slices = constraints.slices
    
    step = int( len(pnames) * num_slices**-1 ) # Refers to partition of all names --> 2 slices of 120 players == (:120,120:)
    r = int(num_players / num_slices) # (nCr)
    #     All possible combos for each half of pnames, dont intersect as of rn
    
    slates = dict()
    if num_slices == 2:
        slates = {
            'slate1': tuple(map( tuple, itertools.combinations(pnames[:step], r) )),
            'slate2': tuple(map( tuple, itertools.combinations(pnames[step:], r) ))
        }
        
    elif num_slices == 3:
        slates = {
            'slate1': tuple(map( tuple, itertools.combinations(pnames[:step], r) )),
            'slate2': tuple(map( tuple, itertools.combinations(pnames[step:int(2*step)], r) )),
            'slate3': tuple(map( tuple, itertools.combinations(pnames[int(2*step):], r) )),
        }
    
    lineups = create_lineup_2_slices(slates) if num_slices==2 else create_lineup_3_slices(slates)
    ret = pd.DataFrame(lineups, columns=['g1','g2','g3','g4','g5','g6'])
    
    # Badda bing
    ret[constraints.cols_to_sum] = ret.parallel_apply( lineup_analysis_wrapper, axis=1, result_type='expand' )
    ret = (ret
           .sort_values(by='proj-pts', ascending=False)
           .drop_duplicates()
           .reset_index(drop=True)
          )

    PickleJar.prepare(ret, 'created')
    print('Done...')
    
    return None

def output_lineups(top_num=100):
    return PickleJar.load('created').head(top_num)
    

In [None]:
def output_lineups_by(sort_by=('proj-pts',)):
    return PickleJar.load('created').sort_values(by=sort_by[0], ascending=False).head(100)

In [None]:
def create_func():
    if constants.create:
        print(f'Creating lineups...\n{constraints.slices} partitions of names\n')
        create_lineups()
    return output_lineups_by()

In [None]:
create_func()