In [234]:
%matplotlib inline
import pandas as pd
import urllib2
import bs4
import re
import datetime
import unidecode
import numpy as np
import time
import itertools
import matplotlib.pyplot as plt
from scipy import stats
plt.rcParams['figure.figsize'] = (12, 16)

### TODO:
- url to player's page is 'http://www.hockey-reference.com/players/%s/%s.html' % (pid[0], pid)
- url to player's gamelogs is 'http://www.hockey-reference.com/players/%s/%s/gamelog/%d/' % (pid[0], pid, year) where year is the 2nd year of the season (2015-2016 => year=2016)

In [2]:
def get_player_list():
    BASEURL = 'http://www.hockey-reference.com'
    URL = '%s/players' % BASEURL
    dic = dict(LEAGUE=[], NAME=[], PID=[], START_YEAR=[], END_YEAR=[], POS=[], HEIGHT=[], WEIGHT=[], BIRTHDAY=[])
    for i in range(97, 97+26):
        letter = chr(i)
        url = '%s/%s/' % (URL, letter)
        try:
            page = urllib2.urlopen(url)
        except:
            print letter, 'page not found'
            continue
        html = page.read()
        soup = bs4.BeautifulSoup(html, "lxml")
        
        rows = soup.findAll('tr')[1:]
        for row in rows:
            lg = row['class'][0]
            items = row.findAll('td')
            a = items[0].find('a')
            name = unidecode.unidecode(a.text)
            href = a['href']
            try:
                pid = re.search('/players/%s/(.+)\.html' % letter, href).group(1)
            except:
                continue
            
            try:
                yr0 = int(items[1].text)
            except ValueError:
                yr0 = None
            
            try:
                yr1 = int(items[2].text)
            except ValueError:
                yr1 = None
            
            pos = items[3].text
            hgt = items[4].text
            
            try:
                wgt = int(items[5].text)
            except ValueError:
                wgt = None
            
            try:
                bd = datetime.datetime.strptime(items[6].text, '%B %d, %Y').date()
            except:
                bd = None
            
            dic['LEAGUE'].append(lg)
            dic['NAME'].append(name)
            dic['PID'].append(pid)
            dic['START_YEAR'].append(yr0)
            dic['END_YEAR'].append(yr1)
            dic['POS'].append(pos)
            dic['HEIGHT'].append(hgt)
            dic['WEIGHT'].append(wgt)
            dic['BIRTHDAY'].append(bd)
    return pd.DataFrame(dic)

# pl = get_player_list()

In [142]:
pl.to_csv('/Users/andingo/Desktop/NHL/data/playerlist.csv')

In [136]:
pl = pd.read_csv('/Users/andingo/Desktop/NHL/data/playerlist.csv', index_col=0)

In [318]:
def get_gamelog_url(pid, season):
    return 'http://www.hockey-reference.com/players/%s/%s/gamelog/%d/' % (pid[0], pid, season)

def get_player_gamelogs(pid, season):
    url = get_gamelog_url(pid, season)
    page = urllib2.urlopen(url)
    html = page.read()
    soup = bs4.BeautifulSoup(html, 'lxml')
    
    cols = [x.text for x in soup.findAll('thead')[0].findAll('tr')[1].findAll('th')]
    cols[1] = 'GM_NUM'
    if cols[5] == '':
        cols[5] = 'H/A'
    if cols[7] == '':
        cols[7] = 'W/L'
    FLAG = True
    FLAG2 = True
    for k, col in enumerate(cols):
        if FLAG2:
            if col == 'G':
                cols[k] = 'GM_NUM'
                FLAG2 = False
        if FLAG:
            cols[k] = 'GOAL_%s' % col if col in ('EV','PP','SH','GW') else col
        else:
            cols[k] = 'AST_%s' % col if col in ('EV','PP','SH','GW') else col
        if col == 'GW':
            FLAG = False
    
    output = []
    for row in soup.findAll('tbody')[0].findAll('tr'):
        if row['class'] != ['']:
            continue
        
        items = [x.text for x in row.findAll('td')]
        items = map(lambda x: 0 if x == '' else x, items)
        for i in range(len(items)):
            try:
                if '.' in items[i]:
                    items[i] = float(items[i])
                else:
                    items[i] = int(items[i])
            except:
                try:
                    items[i] = datetime.datetime.strptime(items[i], '%Y-%m-%d').date()
                except:
                    try:
                        m, s = items[i].split(':')
                        items[i] = int(m) + int(s)/60.
                    except:
                        pass
            
        output.append(items)
    
    output = pd.DataFrame(output, columns=cols)
    output['H/A'][output['H/A']==0] = 'v'
    output['PID'] = pid
    output['GID'] = ['%s@%s%s' % (t,o,d) if ha=='@' else '%s@%s%s' % (o,t,d) for i,d,t,ha,o in output[['Date','Tm','H/A','Opp']].itertuples()]
    output['SEASON'] = season
    return output

# gl = get_player_gamelogs(pl.PID.iloc[0], 1999)

In [327]:
global_stats = pd.read_csv('/Users/andingo/Desktop/NHL/data/global_stats.csv')
goalie_stats = pd.read_csv('/Users/andingo/Desktop/NHL/data/goalie_stats.csv')
try:
    del global_stats['Unnamed: 0']
    del goalie_stats['Unnamed: 0']
except:
    pass
global_stats.Date = [datetime.datetime.strptime(x, '%Y-%m-%d').date() for x in global_stats.Date]
goalie_stats.Date = [datetime.datetime.strptime(x, '%Y-%m-%d').date() for x in goalie_stats.Date]

In [None]:
start_time = time.time()
global_stats = pd.DataFrame()
goalie_stats = pd.DataFrame()
pids = pl[(pl.END_YEAR==2016) & (pl.LEAGUE=='nhl')].PID
tmp = pl.set_index('PID')
# last_date = np.max(global_stats.Date)
for k, pid in enumerate(pids):
#     if k < 468:
#         continue
    print pid, '%d / %d' % (k, len(pids))
    try:
        gl = get_player_gamelogs(pid, 2016)
#         gl = gl[gl.Date > last_date].copy()
        if tmp.loc[pid].POS != 'G':
            global_stats = global_stats.append(gl, ignore_index=True)
        else:
            goalie_stats = goalie_stats.append(gl, ignore_index=True)
    except Exception, e:
        print pid, 'error in 2016', get_gamelog_url(pid, 2016), e.message
print 'Time Elapsed:', time.time() - start_time

global_stats = global_stats.drop_duplicates()
goalie_stats = goalie_stats.drop_duplicates()

abdelju01 0 / 887
agozzan01 1 / 887
allenja01 2 / 887
alzneka01 3 / 887
anderfr01 4 / 887
andercr01 5 / 887
anderjo05 6 / 887
anderjo03 7 / 887
andrean01 8 / 887
andrisv01 9 / 887
angelmi01 10 / 887
anisiar01 11 / 887
arcobma01 12 / 887
armiajo01 13 / 887
arvidvi01 14 / 887
athanan01 15 / 887
atkinca01 16 / 887
bachmri01 17 / 887
backeda01 18 / 887
backlmi01 19 / 887
backsni02 20 / 887
baertsv01 21 / 887
bailejo01 22 / 887
barbema01 23 / 887
barkoal01 24 / 887
barrity01 25 / 887
bartkma01 26 / 887
bartlvi01 27 / 887
bassco01 28 / 887
baunky01 29 / 887
beaglja01 30 / 887
beaucfr01 31 / 887
beaulna01 32 / 887
beckta01 33 / 887
belesma01 34 / 887
bellepi01 35 / 887
bennja01 36 / 887
bennjo01 37 / 887
bennebe01 38 / 887
bennesa01 39 / 887
benoian01 40 / 887
bergepa01 41 / 887
berglpa01 42 / 887
bernijo01 43 / 887
bernist01 44 / 887
berrare01 45 / 887
bertsch02 46 / 887
berubje02 47 / 887
bickebr01 48 / 887
biegaal01 49 / 887
biekske01 50 / 887
bigrach01 51 / 887
binnijo01 52 / 887
bishobe0

In [329]:
global_stats.to_csv('/Users/andingo/Desktop/NHL/data/global_stats.csv')
goalie_stats.to_csv('/Users/andingo/Desktop/NHL/data/goalie_stats.csv')

In [330]:
def fantasy_points(GL):
    gl = GL.copy()
    if 'GA' in gl.keys():
        try:
            gl['DK'] = 3*(gl['W/L']=='W') - gl.GA + .2*gl.SV + gl.SO
        except KeyError:
            gl['DK'] = 3*gl.W - gl.GA + .2*gl.SV + gl.SO
        gl['FD'] = gl.DK
    else:
        gl['DK'] = 3*gl.G + 2*gl.A + .5*gl.S + .5*gl.BLK + gl.GOAL_SH + gl.AST_SH + (1.5*(gl.G>=3) if 'HT' not in gl.keys() else 1.5*gl.HT)
        gl['FD'] = 3*gl.G + 2*gl.A + .5*gl.GOAL_PP + .5*gl.AST_PP + .4*gl.S + gl['+/-'] + .25*gl.PIM
    return gl

In [333]:
global_stats = fantasy_points(global_stats)
goalie_stats = fantasy_points(goalie_stats)

In [335]:
global_stats[global_stats.Date == datetime.date(2016, 2, 10)]

Unnamed: 0,+/-,A,AST_EV,AST_PP,AST_SH,Age,BLK,DEC,Date,FO%,...,SEASON,SHFT,SO,SV,SV%,TOI,Tm,W/L,DK,FD
73112,0,0,0,0,0,28-350,1,,2016-02-10,0.0,...,2016,26,,,,21.166667,DET,W,,
73113,,,,,,34-265,,L,2016-02-10,,...,2016,,0,26,0.929,57.866667,OTT,L,3.2,3.2
73114,1,1,1,0,0,21-188,0,,2016-02-10,16.7,...,2016,10,,,,6.750000,DET,W,,
73115,0,0,0,0,0,23-128,0,,2016-02-10,0.0,...,2016,20,,,,15.533333,VAN,W,,
73116,1,0,0,0,0,27-251,0,,2016-02-10,0.0,...,2016,24,,,,17.083333,VAN,W,,
73117,1,0,0,0,0,27-312,0,,2016-02-10,0.0,...,2016,23,,,,15.933333,VAN,W,,
73118,-1,0,0,0,0,26-056,0,,2016-02-10,0.0,...,2016,29,,,,18.033333,ARI,L,,
73119,0,0,0,0,0,26-213,0,,2016-02-10,0.0,...,2016,14,,,,11.266667,OTT,L,,
73120,0,0,0,0,0,39-213,5,,2016-02-10,0.0,...,2016,23,,,,17.633333,NYR,W,,
73121,1,0,0,0,0,28-141,0,,2016-02-10,68.8,...,2016,23,,,,17.333333,NYR,W,,


## Projections

In [30]:
def get_pid(name, pl, **kwargs):
    I = pl.NAME == name
    if not any(I):
        raise Exception('%s not found' % name)
    for col, st in kwargs.iteritems():
        I = I & (pl[col] == st)
    if not any(I):
        raise Exception('%s not found with specifications %s' % (name, kwargs))
    return pl[I].PID.iloc[0]

In [113]:
NAME_CORRECTIONS = {
    'Alexander Ovechkin' : 'Alex Ovechkin',
    'Johnny Gaudreau' : 'John Gaudreau',
    'Jon Quick' : 'Jonathan Quick',
    "Matthew O'Connor" : "Matt O'Connor",
    'Cameron Talbot' : 'Cam Talbot',
    'Jacob Muzzin' : 'Jake Muzzin',
    'Samuel Bennett' : 'Sam Bennett',
    'Michael Ferland' : 'Micheal Ferland',
    'Pierre Parenteau' : 'P.A. Parenteau',
    'Viatcheslav Voynov' : 'Slava Voynov',
    'Alex Burmistrov' : 'Alexander Burmistrov',
    'Trevor Van Riemsdyk' : 'Trevor van Riemsdyk',
    'Dylan Demelo' : 'Dylan DeMelo',
    'Tobias Enstrom' : 'Toby Enstrom',
    'Chris Vande Velde' : 'Chris VandeVelde',
    'Joe Morrow' : 'Joseph Morrow',
    'Andew Miller' : 'Andrew Miller',
    'Phil Varone' : 'Philip Varone',
    'Richard Clune' : 'Rich Clune',
    'Max Talbot' : 'Maxime Talbot',
    'Yevgeni Medvedev' : 'Evgeny Medvedev',
}

In [180]:
TEAM_CORRECTION = dict(itertools.izip(
    ['ANH', 'ARI', 'BOS', 'BUF', 'CAR', 'CLS', 'CGY', 'CHI', 'COL',
   'DAL', 'DET', 'EDM', 'FLA', 'LA', 'MIN', 'MTL', 'NJD', 'NSH',
   'NYI', 'NYR', 'OTT', 'PHI', 'PIT', 'SJ', 'STL', 'TBL', 'TOR',
   'VAN', 'WPG', 'WAS'],
    ['ANA', 'ARI', 'BOS', 'BUF', 'CAR', 'CBJ', 'CGY', 'CHI', 'COL',
   'DAL', 'DET', 'EDM', 'FLA', 'LAK', 'MIN', 'MTL', 'NJD', 'NSH',
   'NYI', 'NYR', 'OTT', 'PHI', 'PIT', 'SJS', 'STL', 'TBL', 'TOR',
   'VAN', 'WPG', 'WSH']
))

In [181]:
fdpl = pd.read_csv('/Users/andingo/Desktop/NHL/PlayerLists/pl_fd_nhl_2016-02-11.csv')
fdpl['NAME'] = ['%s %s' % (f,l) for f,l in itertools.izip(fdpl['First Name'], fdpl['Last Name'])]
fdpl['PID'] = [get_pid(NAME_CORRECTIONS[x], pl) if x in NAME_CORRECTIONS else get_pid(x, pl) for x in fdpl.NAME]
fdpl.Team = map(lambda x: TEAM_CORRECTION[x], fdpl.Team)
fdpl.Opponent = map(lambda x: TEAM_CORRECTION[x], fdpl.Opponent)
del fdpl['Unnamed: 12']
del fdpl['Unnamed: 13']
fdpl
# for x in fdpl.NAME:
#     try:
#         get_pid(NAME_CORRECTIONS[x], pl) if x in NAME_CORRECTIONS else get_pid(x, pl)
#     except Exception, e:
#         print e.message

Unnamed: 0,Id,Position,First Name,Last Name,FPPG,Played,Salary,Game,Team,Opponent,Injury Indicator,Injury Details,NAME,PID
0,14675-8273,G,Corey,Crawford,5.834783,46,10200,DAL@CHI,CHI,DAL,,,Corey Crawford,crawfco01
1,14675-9416,G,Braden,Holtby,5.627907,43,9900,WAS@MIN,WSH,MIN,,,Braden Holtby,holtbbr01
2,14675-8718,LW,Alexander,Ovechkin,5.158000,50,9500,WAS@MIN,WSH,MIN,,,Alexander Ovechkin,ovechal01
3,14675-8732,G,Semyon,Varlamov,4.583784,37,9300,COL@OTT,COL,OTT,,,Semyon Varlamov,varlasi01
4,14675-8986,LW,Jamie,Benn,4.706481,54,9100,DAL@CHI,DAL,CHI,,,Jamie Benn,bennja01
5,14675-8494,C,John,Tavares,3.561224,49,8900,LA@NYI,NYI,LAK,,,John Tavares,tavarjo01
6,14675-9302,G,Michal,Neuvirth,5.476190,21,8700,BUF@PHI,PHI,BUF,,,Michal Neuvirth,neuvimi01
7,14675-8633,G,Thomas,Greiss,5.554545,22,8600,LA@NYI,NYI,LAK,,,Thomas Greiss,greisth01
8,14675-54514,LW,Johnny,Gaudreau,3.729412,51,8500,CGY@SJ,CGY,SJS,DTD,Suspension,Johnny Gaudreau,gaudrjo01
9,14675-8262,RW,Patrick,Kane,5.328070,57,8500,DAL@CHI,CHI,DAL,,,Patrick Kane,kanepa01


In [227]:
FIELD_PROJS = pd.DataFrame()
GOALIE_PROJS = pd.DataFrame()
for pid in fdpl.PID:
    I = global_stats.PID == pid
    if any(I):
        sub = global_stats[I]
        mins = sub.TOI.sum()
        if sub.S.sum() > 0:
            sperc = float(sub.G.sum()) / sub.S.sum()
        else:
            sperc = 0.01
        proj = sub.copy()[['A','+/-','PIM','GOAL_PP','AST_PP','GOAL_SH','AST_SH','S','BLK']].sum() / mins
        proj['G'] = proj.S * sperc
        proj['PID'] = pid
        FIELD_PROJS = FIELD_PROJS.append(proj, ignore_index=True)
    else:
        sub = goalie_stats[goalie_stats.PID==pid]
        mins = sub.TOI.sum()
        proj = pd.Series()
        if (sub.GA + sub.SV).sum() > 0:
            proj['SV%'] = float(sub.SV.sum()) / (sub.GA + sub.SV).sum()
        else:
            proj['SV%'] = .85
        proj['PID'] = pid
        GOALIE_PROJS = GOALIE_PROJS.append(proj, ignore_index=True)
GOALIE_PROJS = GOALIE_PROJS.set_index('PID')
FIELD_PROJS = FIELD_PROJS.set_index('PID')

## Idiosyncratic Adjustment

In [213]:
adj = {}
for team in np.unique(fdpl.Team):
    sub = global_stats[global_stats.Opp==team].copy()
    adj[team] = {}
    for st in ('G','A','+/-','PIM','GOAL_PP','GOAL_SH','AST_PP','AST_SH','BLK','S'):
        act = sub[st]
        prj = np.array(sub.TOI) * FIELD_PROJS[st].loc[sub.PID.tolist()]
        I = (~np.isnan(act.tolist())) & (~np.isnan(prj.tolist()))
        b = np.array(act[I]).dot(prj[I].tolist()) / np.array(prj[I].tolist()).dot(prj[I].tolist())
        adj[team][st] = b

## Make Adjustments

In [229]:
FIELD_PROJS

Unnamed: 0_level_0,+/-,A,AST_PP,AST_SH,BLK,G,GOAL_PP,GOAL_SH,PIM,S
PID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
ovechal01,0.014291,0.014100,0.003287,0.000000,0.016996,0.028808,0.009493,0.000000,0.028640,0.233893
bennja01,0.006667,0.026553,0.005263,0.001789,0.031691,0.018354,0.005316,0.001704,0.027351,0.154347
tavarjo01,0.001023,0.023887,0.009001,0.000215,0.021431,0.020105,0.006933,0.000000,0.019933,0.151746
gaudrjo01,0.006149,0.029617,0.009671,0.000000,0.014196,0.019291,0.004842,0.000000,0.011158,0.119376
kanepa01,0.012598,0.039270,0.011872,0.000000,0.013699,0.026172,0.007516,0.000000,0.014273,0.161631
toewsjo01,0.016139,0.023762,0.004702,0.002033,0.015825,0.019370,0.003349,0.004106,0.028431,0.121906
paveljo01,0.014432,0.021519,0.007533,0.000000,0.049966,0.022772,0.006496,0.000000,0.012658,0.149233
getzlry01,0.011901,0.037404,0.008664,0.000000,0.041207,0.014808,0.002315,0.000000,0.049257,0.130269
marchbr03,0.007121,0.013461,0.001143,0.000484,0.026425,0.022001,0.003927,0.001662,0.081995,0.150868
giroucl01,-0.002519,0.030072,0.013583,0.000356,0.014502,0.016645,0.006823,0.000235,0.024386,0.178437


In [233]:
ADJ_PROJS = FIELD_PROJS.copy()
for pid in ADJ_PROJS.index:
    for st in ('G','A','+/-','PIM','GOAL_PP','GOAL_SH','AST_PP','AST_SH','BLK','S'):
        ADJ_PROJS.set_value(pid, st, ADJ_PROJS[st].loc[pid] * adj[fdpl.set_index('PID').loc[pid].Opponent][st])
ADJ_PROJS

Unnamed: 0_level_0,+/-,A,AST_PP,AST_SH,BLK,G,GOAL_PP,GOAL_SH,PIM,S
PID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
ovechal01,0.017031,0.012632,0.002403,0.000000,0.016059,0.026365,0.006500,0.000000,0.025454,0.225994
bennja01,0.007116,0.022574,0.004189,0.002178,0.033745,0.014553,0.003496,0.001976,0.018679,0.152915
tavarjo01,0.001400,0.021799,0.008334,0.000123,0.022332,0.018965,0.007561,0.000000,0.014759,0.140599
gaudrjo01,0.004871,0.031577,0.010903,0.000000,0.014510,0.020244,0.005218,0.000000,0.012697,0.114372
kanepa01,0.011861,0.045525,0.012484,0.000000,0.016621,0.027761,0.006754,0.000000,0.014328,0.162682
toewsjo01,0.015194,0.027547,0.004944,0.003696,0.019201,0.020546,0.003009,0.009048,0.028540,0.122698
paveljo01,0.018760,0.020853,0.006389,0.000000,0.054403,0.022599,0.004224,0.000000,0.008489,0.148951
getzlry01,0.014539,0.044186,0.010274,0.000000,0.034858,0.018005,0.002640,0.000000,0.056386,0.143658
marchbr03,0.006953,0.013252,0.001434,0.000257,0.029465,0.021675,0.004834,0.001213,0.090501,0.146454
giroucl01,-0.001585,0.032266,0.013700,0.000349,0.011792,0.018565,0.006731,0.000152,0.021218,0.204383


## Project TOI and Finalize Projections

In [236]:
TOI_OVERRIDES = {
    
}

for pid in ADJ_PROJS.index:
    try:
        toi = TOI_OVERRIDES[pid]
    except KeyError:
        toi = global_stats[(global_stats.PID==pid) & (global_stats.SEASON==2016)].TOI
    ADJ_PROJS.set_value(pid, 'TOI', 1)
    ADJ_PROJS.loc[pid] *= .5*toi.mean() + .5*toi.median()
    ADJ_PROJS.set_value(pid, 'HT', 1. - stats.poisson.cdf(2, ADJ_PROJS.G.loc[pid]))

ADJ_PROJS

Unnamed: 0_level_0,+/-,A,AST_PP,AST_SH,BLK,G,GOAL_PP,GOAL_SH,PIM,S,TOI,HT
PID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
ovechal01,0.347047,0.257407,0.048972,0.000000,0.327237,0.537243,0.132454,0.000000,0.518671,4.605052,20.376833,1.736965e-02
bennja01,0.142474,0.451958,0.083858,0.043612,0.675599,0.291356,0.069999,0.039552,0.373961,3.061488,20.020833,3.318343e-03
tavarjo01,0.027953,0.435259,0.166392,0.002461,0.445888,0.378661,0.150957,0.000000,0.294682,2.807276,19.966497,6.830521e-03
gaudrjo01,0.098164,0.636380,0.219728,0.000000,0.292423,0.407971,0.105159,0.000000,0.255892,2.304958,20.153105,8.360631e-03
kanepa01,0.244670,0.939128,0.257521,0.000000,0.342878,0.572670,0.139328,0.000000,0.295563,3.355927,20.628801,2.050152e-02
toewsjo01,0.298772,0.541694,0.097226,0.072677,0.377568,0.404017,0.059178,0.177929,0.561225,2.412772,19.664286,8.143480e-03
paveljo01,0.379363,0.421690,0.129199,0.000000,1.100134,0.457001,0.085407,0.000000,0.171656,3.012074,20.221955,1.133675e-02
getzlry01,0.275542,0.837431,0.194717,0.000000,0.660642,0.341244,0.050030,0.000000,1.068644,2.722646,18.952257,5.138765e-03
marchbr03,0.129885,0.247531,0.026790,0.004807,0.550380,0.404872,0.090300,0.022655,1.690479,2.735635,18.679167,8.190141e-03
giroucl01,-0.033004,0.671941,0.285306,0.007266,0.245566,0.386621,0.140180,0.003159,0.441867,4.256301,20.825160,7.227989e-03


In [252]:
STARTERS = [
    'Jhonas Enroth',
    'Thomas Greiss',
    'Robin Lehner',
    'Steve Mason',
    'Frederik Andersen',
    'Joonas Korpisalo',
    'Semyon Varlamov',
    'Andrew Hammond',
    'Braden Holtby',
    'Devan Dubnyk',
    'Tuukka Rask',
    'Connor hellebuyck',
    'Kari Lehtonen',
    'Corey Crawford',
    'Jonathan Bernier',
    'Cam Talbot',
    'Karri Ramo',
    'Alex Stalock',
]

for pid in GOALIE_PROJS.index:
#     try:
    toi = 60 if fdpl.set_index('PID').loc[pid].NAME in STARTERS else 0
#     except KeyError:
#         toi = goalie_stats[(goalie_stats.PID==pid) & (goalie_stats.SEASON==2016)].TOI
    GOALIE_PROJS.set_value(pid, 'TOI', toi)

GOALIE_PROJS

Unnamed: 0_level_0,SV%,TOI,GA,SV,SO,W,DK,FD
PID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
crawfco01,0.907902,60,3.764162,37.107222,0.023187,0.220,4.340469,4.340469
holtbbr01,0.913943,60,2.959972,31.435638,0.051820,0.130,3.768976,3.768976
varlasi01,0.931225,60,2.939006,39.794424,0.052918,0.500,6.572797,6.572797
neuvimi01,0.924772,0,3.796630,46.671477,0.022446,0.355,6.625112,6.625112
greisth01,0.904234,60,3.005961,28.382575,0.049491,0.500,4.220045,4.220045
rasktu01,0.905195,60,3.618040,34.544752,0.026835,0.820,5.777746,5.777746
anderfr01,0.910523,60,3.984884,40.550389,0.018595,0.190,4.713788,4.713788
quickjo01,0.917689,0,3.138552,34.991929,0.043346,0.500,5.403180,5.403180
johnsch02,0.937412,0,2.328930,34.881809,0.097400,0.645,6.679831,6.679831
jonesma02,0.908941,0,3.291636,32.856679,0.037193,0.500,4.816892,4.816892


In [242]:
for game in np.unique(fdpl.Game):
    a, h = game.split('@')
    print '"%s"'%TEAM_CORRECTION[a],':,'
    print '"%s"'%TEAM_CORRECTION[h],':,'

"ANA" :,
"CBJ" :,
"BOS" :,
"WPG" :,
"BUF" :,
"PHI" :,
"CGY" :,
"SJS" :,
"COL" :,
"OTT" :,
"DAL" :,
"CHI" :,
"LAK" :,
"NYI" :,
"TOR" :,
"EDM" :,
"WSH" :,
"MIN" :,


In [253]:
WIN_ODDS = {
"ANA" : .19,
"CBJ" : .81,
"BOS" : .82,
"WPG" : .18,
"BUF" : .645,
"PHI" : .355,
"CGY" : .5,
"SJS" : .5,
"COL" : .5,
"OTT" : .5,
"DAL" : .78,
"CHI" : .22,
"LAK" : .5,
"NYI" : .5,
"TOR" : .625,
"EDM" : .375,
"WSH" : .13,
"MIN" : .87,
}

In [256]:
for pid in GOALIE_PROJS.index:
    opp = fdpl.set_index('PID').loc[pid].Opponent
    tm = fdpl.set_index('PID').loc[pid].Team
    sub = fdpl[fdpl.Opponent==opp]
    tmp = ADJ_PROJS.loc[sub.PID.tolist()]
    GOALIE_PROJS.set_value(pid, 'GA', tmp.G.sum())
    GOALIE_PROJS.set_value(pid, 'SV', tmp.S.sum()-tmp.G.sum())
    GOALIE_PROJS.set_value(pid, 'SO', stats.poisson.pmf(0, GOALIE_PROJS.loc[pid].GA))
    GOALIE_PROJS.set_value(pid, 'W', WIN_ODDS[tm])
    GOALIE_PROJS.set_value(pid, 'SV%', GOALIE_PROJS.loc[pid].SV / (GOALIE_PROJS.loc[pid].GA + GOALIE_PROJS.loc[pid].SV))
    GOALIE_PROJS.loc[pid] *= GOALIE_PROJS.loc[pid].TOI / 60.
GOALIE_PROJS

Unnamed: 0_level_0,SV%,TOI,GA,SV,SO,W,DK,FD
PID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
crawfco01,0.907902,60,3.764162,37.107222,0.023187,0.220,4.340469,4.340469
holtbbr01,0.913943,60,2.959972,31.435638,0.051820,0.130,3.768976,3.768976
varlasi01,0.931225,60,2.939006,39.794424,0.052918,0.500,6.572797,6.572797
neuvimi01,0.000000,0,0.000000,0.000000,0.000000,0.000,0.000000,0.000000
greisth01,0.904234,60,3.005961,28.382575,0.049491,0.500,4.220045,4.220045
rasktu01,0.905195,60,3.618040,34.544752,0.026835,0.820,5.777746,5.777746
anderfr01,0.910523,60,3.984884,40.550389,0.018595,0.190,4.713788,4.713788
quickjo01,0.000000,0,0.000000,0.000000,0.000000,0.000,0.000000,0.000000
johnsch02,0.000000,0,0.000000,0.000000,0.000000,0.000,0.000000,0.000000
jonesma02,0.000000,0,0.000000,0.000000,0.000000,0.000,0.000000,0.000000


In [257]:
GOALIE_PROJS = fantasy_points(GOALIE_PROJS)
GOALIE_PROJS

Unnamed: 0_level_0,SV%,TOI,GA,SV,SO,W,DK,FD
PID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
crawfco01,0.907902,60,3.764162,37.107222,0.023187,0.220,4.340469,4.340469
holtbbr01,0.913943,60,2.959972,31.435638,0.051820,0.130,3.768976,3.768976
varlasi01,0.931225,60,2.939006,39.794424,0.052918,0.500,6.572797,6.572797
neuvimi01,0.000000,0,0.000000,0.000000,0.000000,0.000,0.000000,0.000000
greisth01,0.904234,60,3.005961,28.382575,0.049491,0.500,4.220045,4.220045
rasktu01,0.905195,60,3.618040,34.544752,0.026835,0.820,5.777746,5.777746
anderfr01,0.910523,60,3.984884,40.550389,0.018595,0.190,4.713788,4.713788
quickjo01,0.000000,0,0.000000,0.000000,0.000000,0.000,0.000000,0.000000
johnsch02,0.000000,0,0.000000,0.000000,0.000000,0.000,0.000000,0.000000
jonesma02,0.000000,0,0.000000,0.000000,0.000000,0.000,0.000000,0.000000


In [258]:
FIELDER_PROJS = fantasy_points(ADJ_PROJS)
FIELDER_PROJS

Unnamed: 0_level_0,+/-,A,AST_PP,AST_SH,BLK,G,GOAL_PP,GOAL_SH,PIM,S,TOI,HT,DK,FD
PID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
ovechal01,0.347047,0.257407,0.048972,0.000000,0.327237,0.537243,0.132454,0.000000,0.518671,4.605052,20.376833,1.736965e-02,4.618743,4.535993
bennja01,0.142474,0.451958,0.083858,0.043612,0.675599,0.291356,0.069999,0.039552,0.373961,3.061488,20.020833,3.318343e-03,3.734668,3.315470
tavarjo01,0.027953,0.435259,0.166392,0.002461,0.445888,0.378661,0.150957,0.000000,0.294682,2.807276,19.966497,6.830521e-03,3.645789,3.389709
gaudrjo01,0.098164,0.636380,0.219728,0.000000,0.292423,0.407971,0.105159,0.000000,0.255892,2.304958,20.153105,8.360631e-03,3.807904,3.743237
kanepa01,0.244670,0.939128,0.257521,0.000000,0.342878,0.572670,0.139328,0.000000,0.295563,3.355927,20.628801,2.050152e-02,5.476420,5.455621
toewsjo01,0.298772,0.541694,0.097226,0.072677,0.377568,0.404017,0.059178,0.177929,0.561225,2.412772,19.664286,8.143480e-03,3.953430,3.777828
paveljo01,0.379363,0.421690,0.129199,0.000000,1.100134,0.457001,0.085407,0.000000,0.171656,3.012074,20.221955,1.133675e-02,4.287491,3.948791
getzlry01,0.275542,0.837431,0.194717,0.000000,0.660642,0.341244,0.050030,0.000000,1.068644,2.722646,18.952257,5.138765e-03,4.397946,4.452728
marchbr03,0.129885,0.247531,0.026790,0.004807,0.550380,0.404872,0.090300,0.022655,1.690479,2.735635,18.679167,8.190141e-03,3.392433,3.414981
giroucl01,-0.033004,0.671941,0.285306,0.007266,0.245566,0.386621,0.140180,0.003159,0.441867,4.256301,20.825160,7.227989e-03,4.775944,4.496470


In [292]:
PROJS = FIELDER_PROJS.copy()[['FD','DK']]
PROJS = PROJS.append(GOALIE_PROJS.copy()[['FD','DK']])
PROJS['Pos'] = map(lambda x: fdpl.set_index('PID').loc[x].Position, PROJS.index)
PROJS['Salary'] = map(lambda x: fdpl.set_index('PID').loc[x].Salary, PROJS.index)
PROJS['Team'] = map(lambda x: fdpl.set_index('PID').loc[x].Team, PROJS.index)
PROJS['Game'] = map(lambda x: fdpl.set_index('PID').loc[x].Game, PROJS.index)
PROJS['Name'] = map(lambda x: fdpl.set_index('PID').loc[x].NAME, PROJS.index)
PROJS['Status'] = map(lambda x: fdpl.set_index('PID').loc[x]['Injury Indicator'], PROJS.index)
PROJS

Unnamed: 0_level_0,FD,DK,Pos,Salary,Team,Game,Name,Status
PID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
ovechal01,4.535993,4.618743,LW,9500,WSH,WAS@MIN,Alexander Ovechkin,
bennja01,3.315470,3.734668,LW,9100,DAL,DAL@CHI,Jamie Benn,
tavarjo01,3.389709,3.645789,C,8900,NYI,LA@NYI,John Tavares,
gaudrjo01,3.743237,3.807904,LW,8500,CGY,CGY@SJ,Johnny Gaudreau,DTD
kanepa01,5.455621,5.476420,RW,8500,CHI,DAL@CHI,Patrick Kane,
toewsjo01,3.777828,3.953430,C,8200,CHI,DAL@CHI,Jonathan Toews,
paveljo01,3.948791,4.287491,RW,8200,SJS,CGY@SJ,Joe Pavelski,
getzlry01,4.452728,4.397946,C,8000,ANA,ANH@CLS,Ryan Getzlaf,
marchbr03,3.414981,3.392433,LW,7800,BOS,BOS@WPG,Brad Marchand,
giroucl01,4.496470,4.775944,C,7800,PHI,BUF@PHI,Claude Giroux,


## Create Optimizer for Each Site

In [279]:
import pulp_lineup_optimization as lopt
def get_optimizer(PROJS, site):
    return eval('get_optimizer_%s(PROJS)' % site)

def get_optimizer_FD(PROJS):
    salary = 55000.
    nslots = 9
    
    PROJS['Proj'] = PROJS.FD
    posnums = {'LW': 1, 'RW': 2, 'C': 3, 'D': 4, 'G': 5}
    PROJS['PosNum'] = map(lambda x: posnums[x], PROJS.Pos)
    
    lo = lopt.LineupOptimizer(PROJS, nslots, salary)
    lo.addPositionConstraint('LW', 'eq', 2)
    lo.addPositionConstraint('RW', 'eq', 2)
    lo.addPositionConstraint('C', 'eq', 2)
    lo.addPositionConstraint('D', 'eq', 2)
    lo.addPositionConstraint('G', 'eq', 1)
    
    games = np.unique(PROJS.Game)
    # At least players from 3 teams
    for k, team in enumerate(np.unique(PROJS.Team)):
        for team2 in np.unique(PROJS.Team)[k+1:]:
            lo.addTeamLimitConstraint('%s/%s' % (team,team2), 'le', 8)
        # Max 4 players from a single team
        lo.addTeamLimitConstraint(team, 'le', 4)

    return lo

def get_optimizer_DK(PROJS):
    salary = 50000.
    nslots = 9
    
    PROJS['Proj'] = PROJS.DK
    posnums = {'W': 1, 'C': 2, 'D': 3, 'G': 4}
    PROJS['PosNum'] = map(lambda x: posnums[x], PROJS.Pos)
    
    lo = lopt.LineupOptimizer(PROJS, nslots, salary)
    lo.addPositionConstraint('W/C/D/G', 'eq', nslots)
    lo.addPositionConstraint('W', 'ge', 3)
    lo.addPositionConstraint('C', 'ge', 2)
    lo.addPositionConstraint('D', 'ge', 2)
    lo.addPositionConstraint('G', 'ge', 1)
    
    # At least skates from 3 teams
    for k, team in enumerate(np.unique(PROJS.Team)):
        for team2 in np.unique(PROJS.Team)[k+1:]:
            lo.addCustomConstraint(lambda x: ((x.Team==team) | (x.Team==team2))&(x.Pos!='G'), 'le', 8)
    
    return lo

In [297]:
I = ~np.isnan(PROJS.FD)
# %time lo = get_optimizer(PROJS[I].copy(), 'FD')

OMITS = [
    'Artemi Panarin',
]

for name in OMITS:
    lo.addPlayerConstraint(get_pid(name, pl), 'eq', 0)

%time status, lu = lo.solve()
print status
print 'Proj: %.2f' % lu.Proj.sum()
print 'Salary: %.2f' % lu.Salary.sum()
print lu[['Name','Proj','Salary','Pos','Team','Game','Status']]

CPU times: user 372 ms, sys: 83.9 ms, total: 456 ms
Wall time: 1.56 s
Optimal
Proj: 38.97
Salary: 54700.00
                          Name      Proj  Salary Pos Team     Game Status
PID                                                                      
laddan01           Andrew Ladd  3.462235    5800  LW  WPG  BOS@WPG    NaN
perrema01    Mathieu Perreault  3.153158    4600  LW  WPG  BOS@WPG    NaN
kanepa01          Patrick Kane  5.455621    8500  RW  CHI  DAL@CHI    NaN
perryco01          Corey Perry  4.759579    7300  RW  ANA  ANH@CLS    NaN
mcdavco01       Connor McDavid  4.340869    6900   C  EDM  TOR@EDM    NaN
littlbr01         Bryan Little  3.443816    5400   C  WPG  BOS@WPG    NaN
gostish01  Shayne Gostisbehere  3.705330    5100   D  PHI  BUF@PHI    NaN
theodsh01        Shea Theodore  2.924505    3700   D  ANA  ANH@CLS    NaN
dubnyde01         Devan Dubnyk  7.728169    7400   G  MIN  WAS@MIN    NaN


In [298]:
print status
print 'Proj: %.2f' % lu.Proj.sum()
print 'Salary: %.2f' % lu.Salary.sum()
print lu[['Name','Proj','Salary','Pos','Team','Game','Status']]

Optimal
Proj: 38.97
Salary: 54700.00
                          Name      Proj  Salary Pos Team     Game Status
PID                                                                      
laddan01           Andrew Ladd  3.462235    5800  LW  WPG  BOS@WPG    NaN
perrema01    Mathieu Perreault  3.153158    4600  LW  WPG  BOS@WPG    NaN
kanepa01          Patrick Kane  5.455621    8500  RW  CHI  DAL@CHI    NaN
perryco01          Corey Perry  4.759579    7300  RW  ANA  ANH@CLS    NaN
mcdavco01       Connor McDavid  4.340869    6900   C  EDM  TOR@EDM    NaN
littlbr01         Bryan Little  3.443816    5400   C  WPG  BOS@WPG    NaN
gostish01  Shayne Gostisbehere  3.705330    5100   D  PHI  BUF@PHI    NaN
theodsh01        Shea Theodore  2.924505    3700   D  ANA  ANH@CLS    NaN
dubnyde01         Devan Dubnyk  7.728169    7400   G  MIN  WAS@MIN    NaN
