In [1]:
%matplotlib inline
import player as data_pl
import team as data_te
import game as data_ga
import utils as anal_ut
import multiprocessing as mp
import numpy as np
import scipy
import scipy.stats as scst
import datetime
from numpy import linalg
import matplotlib.pyplot as plt
plt.rcParams['figure.figsize'] = (10.0, 8.0)
import matplotlib.colors as colors
import pandas as pd
import time
from sklearn import cluster
from sklearn import linear_model
from collections import Counter
import math
import statsmodels.api as sm
import urllib2
import re
import os
import unicodedata
import pulp_lineup_optimization as lopt
from scipy import special
import cvxopt
import dfs_utils as dfsutils
DFS_SITES = dfsutils.DFS_SITES

In [2]:
with open('pid2pos.txt', 'r') as reader:
    pid2pos = eval(reader.read())
        
global_stats = pd.read_csv('global_stats.csv', index_col=0)
global_stats.GAME_DATE = [datetime.datetime.strptime(x, '%Y-%m-%d').date() for x in global_stats.GAME_DATE]
global_stats = global_stats.sort('GAME_DATE', ascending=True)

In [7]:
today = datetime.date(2015, 10, 27)
whole_start_time = time.time()
while today < datetime.datetime.now().date():
    start_time = time.time()
    try:
        pl = dfsutils.get_player_list(today)
    except:
        try:
            pl = pd.read_csv('PlayerLists/pl_fd_nba_%02d_%02d_%4d.csv' % (today.month, today.day, today.year))
        except:
            print 'Missing', today
            today += datetime.timedelta(days=1)
            continue
        del pl['Id']
        del pl['Unnamed: 12']
        del pl['Unnamed: 13']
    pl = pl.drop_duplicates()
    pl.Game = [dfsutils.fix_game_abbrev(x) for x in pl.Game]
    pl.Team = [dfsutils.fix_team_abbrev[x] if x in dfsutils.fix_team_abbrev else x for x in pl.Team]
    pl.Opponent = [dfsutils.fix_team_abbrev[x] if x in dfsutils.fix_team_abbrev else x for x in pl.Opponent]
    pl['LAST_COMMA_FIRST'] = ['%s, %s' % (l,f) for l,f in zip(pl['Last Name'],pl['First Name'])]
    pl['LAST_COMMA_FIRST'] = [x if x not in dfsutils.COMMA_NAME_CORRECTIONS else dfsutils.COMMA_NAME_CORRECTIONS[x] for x in pl.LAST_COMMA_FIRST]
    pl['Player_ID'] = [dfsutils.get_pid(x) for x in pl.LAST_COMMA_FIRST]
    pl['FD_Salary'] = [float(x) for x in pl.Salary]
    pl['FD_Position'] = pl['Position']
    pl = pl.set_index('Player_ID')
    pl = pl.groupby(pl.index).first()

    filename = 'Parameters/player_parameters_%s.csv' % today
    player_params = pd.read_csv(filename, index_col=False)
    player_params = player_params.set_index('PERSON_ID')

    with open('AdjustmentRegressions/opp_vs_pos_%s' % today, 'r') as reader:
        tvp_adj = eval(reader.read().replace('array','np.array'))

    global_stats = global_stats.sort('GAME_DATE', ascending=True)
    tmp = global_stats[global_stats.GAME_DATE < today]
    projs = pd.DataFrame()
    for row_num, row in pl.iterrows():
        if np.isnan(row.name):
            print row.LAST_COMMA_FIRST, ' has nan pid'
            continue
        pid = int(row.name)

        team_abbr = row.Team

        # Estimate Playing Time
        tmpsub = tmp[tmp.Player_ID == pid]
        if len(tmpsub) == 0:
            print row.LAST_COMMA_FIRST, 'has no data'
            continue
        if len(tmpsub.MIN.iloc[-7:])>0:
            mins = .5*(tmpsub.MIN.iloc[-7:].median() + tmpsub.MIN.iloc[-7:].mean())
            tmpsub2 = tmpsub[tmpsub.GAME_DATE >= datetime.date(2015, 7, 7)].sort('MIN')
            ngames = len(tmpsub2.MIN)
            try:
                mad = np.median(np.abs(tmpsub2.MIN - tmpsub2.MIN.median()))
                floor_mins = tmpsub2.MIN.iloc[ngames/10]
                ceil_mins  = tmpsub2.MIN.iloc[(9*ngames+1)/10]
            except:
                floor_mins = np.maximum(mins - 10, 0)
                ceil_mins = mins + 5
        else:
            print row.LAST_COMMA_FIRST
            mins = 0
            floor_mins = 0
            ceil_mins = 20

        params = player_params.loc[pid].copy()
        rates = dfsutils.params2rates(pd.DataFrame(params).T)

        try:
            adj = tvp_adj[pid2pos[pid]][row.Opponent]
            proj = dfsutils.rates_mins2projs(rates, mins, adj)
            unadj = dfsutils.rates_mins2projs(rates, mins)
            for SITE in dfsutils.DFS_SITES:
                proj['UNADJ_%s'%SITE] = unadj[SITE]
            floor_proj = dfsutils.rates_mins2projs(rates, floor_mins, adj)
            ceil_proj  = dfsutils.rates_mins2projs(rates, ceil_mins,  adj)
            proj['FLOOR_MIN'] = floor_mins
            proj['CEIL_MIN'] = ceil_mins
            for site in DFS_SITES:
                proj['FLOOR_%s'%site] = floor_proj[site]
                proj['CEIL_%s'%site] = ceil_proj[site]
            tmpsubseason = tmpsub[tmpsub.SEASON_ID==22015]
            proj['HIST_FD_Skew'] = tmpsubseason.FD.skew()
            proj['HIST_FD_Kurt'] = tmpsubseason.FD.kurt()
            minstd = np.std(tmpsubseason.MIN, ddof=1)
            if np.isnan(minstd):
                minstd = 40.
            proj['HIST_MIN_std'] = minstd
            proj['HIST_FD_std'] = np.std(tmpsubseason.FD, ddof=1)
            proj['Bust5'] = np.mean(tmpsub.FD.iloc[-5:]  <=.0035*pl.loc[pid].FD_Salary)
            proj['Bust10'] = np.mean(tmpsub.FD.iloc[-10:]<=.0035*pl.loc[pid].FD_Salary)
            proj['Bust20'] = np.mean(tmpsub.FD.iloc[-20:]<=.0035*pl.loc[pid].FD_Salary)
            proj['Boom5'] = np.mean(tmpsub.FD.iloc[-5:]  >=.005*pl.loc[pid].FD_Salary)
            proj['Boom10'] = np.mean(tmpsub.FD.iloc[-10:]>=.005*pl.loc[pid].FD_Salary)
            proj['Boom20'] = np.mean(tmpsub.FD.iloc[-20:]>=.005*pl.loc[pid].FD_Salary)
        except KeyError:
            print row.LAST_COMMA_FIRST, ' pid not in pid2pos'
            continue
        proj = pd.DataFrame(proj, columns=[pid]).T
        proj.columns = ['PROJ_%s' % x for x in proj.columns]
        proj['Player_ID'] = pid
        proj = proj.set_index('Player_ID')
        projs = projs.append(proj)

    projs2 = projs.copy()
    # TODO: Value plays
    overrides = {
        # LAST_COMMA_FIRST : MIN
    }

    increases = {
        # LCF : (plus, times)
    }

    filename = 'Parameters/player_parameters_%s.csv' % today
    player_params = pd.read_csv(filename, index_col=False)
    player_params = player_params.set_index('PERSON_ID')
    tmp = global_stats[global_stats.GAME_DATE < today]
    for lcf, mins in overrides.iteritems():
        pid = dfsutils.get_pid(lcf)
        if np.isnan(pid):
            print lcf, ' has nan pid'
            continue
        pid = int(pid)

        try:
            row = pl.loc[pid]
        except:
            print lcf, 'not found'
            continue

        params = player_params.loc[pid].copy()
        rates = dfsutils.params2rates(pd.DataFrame(params).T)

        try:
            proj = dfsutils.rates_mins2projs(rates, mins, tvp_adj[pid2pos[pid]][row.Opponent])
            unadj = dfsutils.rates_mins2projs(rates, mins)
            for SITE in dfsutils.DFS_SITES:
                proj['UNADJ_%s'%SITE] = unadj[SITE]
        except KeyError:
            print row.LAST_COMMA_FIRST, ' pid not in pid2pos'
            continue
        proj = pd.DataFrame(proj, columns=[pid]).T
        proj.columns = ['PROJ_%s' % x for x in proj.columns]
        proj['Player_ID'] = pid
        proj = proj.set_index('Player_ID')

        for col in proj.columns:
            projs2.set_value(pid, col, proj[col].iloc[0])

    PROJS = pl.merge(projs2, left_index=True, right_index=True)

    for name, s in increases.iteritems():
        f, r = s
        pid = dfsutils.get_pid(name)
        for site in DFS_SITES:
            PROJS.set_value(pid, 'PROJ_%s'%site, f + r*PROJS.loc[pid]['PROJ_%s'%site])

    for SITE in dfsutils.DFS_SITES:
        PROJS['ADJ_%s'%SITE] = (PROJS['PROJ_%s'%SITE] - PROJS['PROJ_UNADJ_%s'%SITE])
    PROJS['Injury Indicator'] = PROJS['Injury Indicator'].fillna('')
    PROJS['Injury Details'] = PROJS['Injury Details'].fillna('')
    PROJS['Status'] = [' - '.join([x,y]) if x != '' else '-' for x,y in zip(PROJS['Injury Indicator'],PROJS['Injury Details'])]

    PROJS.to_csv('MultiSiteProjections/nba_%s.csv' % today)
    
    print today, time.time()-start_time, whole_start_time - time.time()
    today += datetime.timedelta(days=1)

NameError: name 'fix_team_abbrev' is not defined