In [1]:
import urllib

import lxml.html
import pandas as pd
import plotly
import plotly.offline
import plotly.graph_objs as go
import requests

plotly.offline.init_notebook_mode(connected=True)

# get the game results

In [2]:
leagueid = 209006
scoreboardurl = 'http://games.espn.com/ffl/scoreboard'

In [3]:
session = requests.Session()

class FflError(Exception):
    pass


def scoreboard(leagueid, seasonid, matchupperiodid):
    resp = session.get(
        url='http://games.espn.com/ffl/scoreboard',
        params={
            'leagueId': leagueid,
            'seasonId': seasonid,
            'matchupPeriodId': matchupperiodid
        },
        allow_redirects=False
    )
    
    if resp.status_code == 302:
        raise FflError("end of season")
        
    return lxml.html.fromstring(resp.text)

In [4]:
sbelem = scoreboard(leagueid, 2016, 2)

In [5]:
sbelem.find('.//td[@class="winning score"]').attrib

{'class': 'winning score', 'title': '118.51', 'width': '18%'}

In [6]:
def parse_scoreboard(sbelem):
    matchups = sbelem.get_element_by_id('scoreboardMatchups').xpath('.//td[@width="49%"]')
    for matchup in matchups:
        pass

In [7]:
matchups = sbelem.xpath('.//table[@class="ptsBased matchup"]')
print(len(matchups))
matchup = matchups[0]
matchup

6


<Element table at 0x7fabc0c6d368>

In [8]:
tr0, tr1 = matchup.xpath('.//tr')[:2]

In [9]:
def parse_team(teamelem):
    try:
        a = teamelem.find('.//a')
        return {
            'name': a.attrib['title'],
            'id': int(
                urllib.parse.parse_qs(
                    urllib.parse.urlparse(a.attrib['href']).query
                )['teamId'][0]
            ),
            'abbrev': teamelem.find('.//span').text[1: -1],
            'owner': teamelem.find('.//span[@class="owners"]').text,
            'score': float(teamelem.xpath('.//td[contains(@class, "score")]/text()')[0]),
        }
    except Exception as e:
        print(e)
        return {}

In [10]:
team0, team1 = [parse_team(_) for _ in matchup.xpath('.//tr')[:2]]
team0

{'abbrev': 'FDKK',
 'id': 1,
 'name': 'Frog Disguised Killer Kittens (Dylan Thomson)',
 'owner': 'Dylan Thomson',
 'score': 118.4}

In [11]:
team1

{'abbrev': 'CFB',
 'id': 6,
 'name': 'Chicken Fried Blumpkins (Brad Nicolai)',
 'owner': 'Brad Nicolai',
 'score': 118.5}

In [12]:
def parse_matchup_results(matchupelem):
    team0, team1 = [parse_team(_) for _ in matchupelem.xpath('.//tr')[:2]]
    yield {
        'id': team0['id'],
        'score': team0['score'],
        'opponent': team1['id'],
        'opponent_score': team1['score'],
    }
    yield {
        'id': team1['id'],
        'score': team1['score'],
        'opponent': team0['id'],
        'opponent_score': team0['score'],
    }

In [13]:
results = pd.DataFrame([
    result
    for matchup in matchups
    for result in parse_matchup_results(matchup)
])
results

Unnamed: 0,id,opponent,opponent_score,score
0,1,6,118.5,118.4
1,6,1,118.4,118.5
2,2,9,106.0,81.9
3,9,2,81.9,106.0
4,5,12,108.5,71.1
5,12,5,71.1,108.5
6,4,11,100.5,79.9
7,11,4,79.9,100.5
8,8,7,138.9,122.5
9,7,8,122.5,138.9


In [14]:
def week_results(leagueid, seasonid, matchupperiodid):
    sbelem = scoreboard(leagueid, seasonid, matchupperiodid)
    matchups = sbelem.xpath('.//table[@class="ptsBased matchup"]')
    results = pd.DataFrame([
        result
        for matchup in matchups
        for result in parse_matchup_results(matchup)
    ])
    return results

In [15]:
week_results(leagueid, 2016, 1)

Unnamed: 0,id,opponent,opponent_score,score
0,2,6,88.5,120.6
1,6,2,120.6,88.5
2,1,5,105.8,138.5
3,5,1,138.5,105.8
4,9,12,106.9,106.9
5,12,9,106.9,106.9
6,8,11,123.6,108.8
7,11,8,108.8,123.6
8,4,3,132.8,163.3
9,3,4,163.3,132.8


In [16]:
def season_results(leagueid, seasonid):
    matchupperiodid = 1
    df = pd.DataFrame()
    
    while True:
        try:
            results = week_results(leagueid, seasonid, matchupperiodid)
            if results.empty:
                break
            results.loc[:, 'matchupperiodid'] = matchupperiodid
            df = df.append(results)
        except FflError:
            break
        
        if matchupperiodid == 20:
            raise ValueError('wtf')
            
        matchupperiodid += 1
    return df.reset_index(drop=True)

In [17]:
season = season_results(leagueid, 2017)

In [18]:
season.head()

Unnamed: 0,id,opponent,opponent_score,score,matchupperiodid
0,5,9,85.6,68.5,1
1,9,5,68.5,85.6,1
2,2,12,102.9,133.3,1
3,12,2,133.3,102.9,1
4,6,1,84.3,69.5,1


In [19]:
def league_score_history(leagueid, seasonid=2017):
    df = pd.DataFrame()
    
    while True:
        season = season_results(leagueid, seasonid)
            
        if season.empty:
            break
            
        season.loc[:, 'seasonid'] = seasonid
        df = df.append(season)
        
        seasonid -= 1
    
    return df.reset_index(drop=True)

In [20]:
lh = league_score_history(leagueid)

In [21]:
lh.head()

Unnamed: 0,id,opponent,opponent_score,score,matchupperiodid,seasonid
0,5,9,85.6,68.5,1,2017
1,9,5,68.5,85.6,1,2017
2,2,12,102.9,133.3,1,2017
3,12,2,133.3,102.9,1,2017
4,6,1,84.3,69.5,1,2017


# get team info

for joining in and using as legends and shit

In [22]:
def parse_matchup_teams(matchupelem):
    for tr in matchupelem.xpath('.//tr')[:2]:
        yield parse_team(tr)

        
def team_info(leagueid, seasonid):
    sbelem = scoreboard(leagueid, seasonid, 1)
    matchups = sbelem.xpath('.//table[@class="ptsBased matchup"]')
    results = pd.DataFrame([
        result
        for matchup in matchups
        for result in parse_matchup_teams(matchup)
    ])
    return results.drop(['score'], axis=1).sort_values(by='id')


def team_info_history(leagueid, seasonid=2017):
    df = pd.DataFrame()
    
    while True:
        try:
            teams = team_info(leagueid, seasonid)

            if teams.empty:
                break

            teams.loc[:, 'seasonid'] = seasonid
            df = df.append(teams)
        except FflError:
            break
    
        seasonid -= 1
    return df.reset_index(drop=True)

In [23]:
teams = team_info_history(leagueid)
teams.head()

Unnamed: 0,abbrev,id,name,owner,seasonid
0,SFF,1,SuFu Fury (Dylan Thomson),Dylan Thomson,2017
1,ZLM,2,Zach Lives Matter (Ron Zach Lamberty),Ron Zach Lamberty,2017
2,ASS,3,Great White Sharts! (Jeffrey Miller),Jeffrey Miller,2017
3,BJ,4,Blackjack And Hookers (Collin Solberg),Collin Solberg,2017
4,JNZ,5,Just Noise (Ben Koch),Ben Koch,2017


# Elo scoring

In [24]:
import sys

In [25]:
sys.path.insert(0, '/home/zlamberty/code/elo')

In [26]:
import elo

In [27]:
myelo = elo.Elo(k=20, ptscale=400, hfa=0, base=10)

let's start with just the current season

In [28]:
teamsnow = teams[teams.seasonid == 2017]
teamsnow

Unnamed: 0,abbrev,id,name,owner,seasonid
0,SFF,1,SuFu Fury (Dylan Thomson),Dylan Thomson,2017
1,ZLM,2,Zach Lives Matter (Ron Zach Lamberty),Ron Zach Lamberty,2017
2,ASS,3,Great White Sharts! (Jeffrey Miller),Jeffrey Miller,2017
3,BJ,4,Blackjack And Hookers (Collin Solberg),Collin Solberg,2017
4,JNZ,5,Just Noise (Ben Koch),Ben Koch,2017
5,CFB,6,Chicken Fried Blumpkins (Brad Nicolai),Brad Nicolai,2017
6,VEU,7,Very European Uppercuts (Jake Hillesheim),Jake Hillesheim,2017
7,FURY,8,Doof Warriors (nick igoe),nick igoe,2017
8,LMKS,9,DA Lil' Mookies (Dana Kinsella),Dana Kinsella,2017
9,ZEKE,11,Zeke the Pummeler (Michael Lubke),Michael Lubke,2017


In [29]:
dfelo = pd.DataFrame({'id': teamsnow.id})
dfelo.loc[:, 'matchupperiod'] = 1
dfelo.loc[:, 'elo'] = 1500
dfelo

Unnamed: 0,id,matchupperiod,elo
0,1,1,1500
1,2,1,1500
2,3,1,1500
3,4,1,1500
4,5,1,1500
5,6,1,1500
6,7,1,1500
7,8,1,1500
8,9,1,1500
9,11,1,1500


In [30]:
season.head()

Unnamed: 0,id,opponent,opponent_score,score,matchupperiodid
0,5,9,85.6,68.5,1
1,9,5,68.5,85.6,1
2,2,12,102.9,133.3,1
3,12,2,133.3,102.9,1
4,6,1,84.3,69.5,1


In [31]:
import numpy as np

In [32]:
dfelo = pd.DataFrame({'id': teamsnow.id})
dfelo.loc[:, 'matchupperiodid'] = 1
dfelo.loc[:, 'elo'] = 1500
dfelo

matchupperiodid = 1

while True:
    try:
        df = season[season.matchupperiodid == matchupperiodid].copy()
        assert not df.empty
        
        df.loc[:, 'wlt'] = np.where(
            df.score > df.opponent_score,
            1,
            np.where(df.score < df.opponent_score, 0, 0.5)
        )
        thiselo = dfelo[dfelo.matchupperiodid == matchupperiodid]
        dfwelo = df.merge(thiselo, on=['id', 'matchupperiodid'])
        dfwelo = dfwelo.merge(
            thiselo,
            left_on=['opponent', 'matchupperiodid'],
            right_on=['id', 'matchupperiodid'],
            suffixes=('', '_opp')
        )
        
        dfwelo.loc[:, 'matchupperiodid'] = matchupperiodid + 1
        
        dfwelo.loc[:, 'es'] = myelo.expected_score(
            r0=dfwelo.elo,
            r1=dfwelo.elo_opp
        )
        
        dfwelo.loc[:, 'elo'] = myelo.update_score(
            r=dfwelo.elo,
            realizedScore=dfwelo.wlt,
            expectedScore=dfwelo.es
        )
        
        dfelo = dfelo.append(dfwelo[['id', 'matchupperiodid', 'elo']])
        
        matchupperiodid += 1
    except Exception as e:
        print(e)
        break
        
dfwelo.head()




Unnamed: 0,id,opponent,opponent_score,score,matchupperiodid,wlt,elo,id_opp,elo_opp,es
0,13,7,105.2,56.5,16,0.0,1501.194261,7,1545.942193,0.448698
1,7,13,56.5,105.2,16,1.0,1554.916159,13,1510.168227,0.551302
2,12,1,107.8,89.1,16,0.0,1513.099951,1,1558.028714,0.448433
3,1,12,89.1,107.8,16,1.0,1566.997379,12,1522.068616,0.551567
4,4,11,95.3,102.6,16,1.0,1504.778396,11,1518.84223,0.464405


join the team name in...

In [33]:
dfelo = dfelo.merge(teamsnow[['id', 'abbrev']], on=['id'])
dfelo.head()

Unnamed: 0,id,matchupperiodid,elo,abbrev
0,1,1,1500.0,SFF
1,1,2,1510.0,SFF
2,1,3,1499.424989,SFF
3,1,4,1509.424989,SFF
4,1,5,1518.882974,SFF


In [34]:
data = [
    go.Scatter(
        x=g.matchupperiodid,
        y=g.elo,
        mode='lines+markers',
        name=team,
    )
    for (team, g) in dfelo.groupby('abbrev')
]

plotly.offline.iplot(data)

## a general function

In [35]:
def elo_history(scoredf, idcol='id', seasoncol='seasonid', 
                matchupcol='matchupperiodid', reversioncoef=0.75, 
                elomean=1500):
    # we need an elo object to calculate elo rankings week to week
    myelo = elo.Elo(k=20, ptscale=400, hfa=0, base=10)
    
    # sorting makes our groupby life easier
    scoredf = scoredf.sort_values(by=[seasoncol, matchupcol, idcol])
    
    # we will also eventually want to calculate the win/lose/tie value
    scoredf.loc[:, 'wlt'] = np.where(
        scoredf.score > scoredf.opponent_score,
        1,
        np.where(scoredf.score < scoredf.opponent_score, 0, 0.5)
    )
    
    dfelo = pd.DataFrame()
    for (season, scoredfnow) in scoredf.groupby(seasoncol):
        # elo init or revert to mean
        dfelonow = pd.DataFrame({idcol: scoredfnow[idcol].unique()})
        dfelonow.loc[:, seasoncol] = season
        dfelonow.loc[:, matchupcol] = scoredfnow[matchupcol].min()
        
        if dfelo.empty:
            # init an elo dataframe
            dfelonow.loc[:, 'elo'] = elomean
        else:
            # mean reversion! first, we need the elo at the end of the prev season
            eloprev = dfelo[dfelo[seasoncol] < season]
            eloprev = eloprev[eloprev[seasoncol] == eloprev[seasoncol].max()]
            eloprev = eloprev[eloprev[matchupcol] == eloprev[matchupcol].max()]
            
            # now, regress that to the mean by our reversion factor and join that
            # in for each idcol value. this is a complicated way of writing
            eloprev.loc[:, 'elo'] = elomean + reversioncoef * (eloprev.elo - elomean)

            dfelonow = dfelonow.merge(
                eloprev[[idcol, 'elo']],
                how='left',
                left_on=[idcol],
                right_on=[idcol]
            ).fillna(elomean)
            
        dfelo = dfelo.append(dfelonow).reset_index(drop=True)
        
        # iterate through weeks *in this year*. I would love to do groupby but
        # i need to know what the next matchup is every time and in case there's
        # a skip in the matchups...
        matchups = scoredfnow[matchupcol].unique()
        for (i, matchup) in enumerate(matchups):
            try:
                # pull up the scores and elo rankings for this (season, matchup)
                dfweek = scoredfnow[scoredfnow[matchupcol] == matchup].copy()
                assert not dfweek.empty

                thiselo = dfelo[
                    (dfelo[seasoncol] == season)
                    & (dfelo[matchupcol] == matchup)
                ]
                
                # join the current elo values for both teams with the scores
                dfwelo = dfweek.merge(thiselo, how='right', on=[idcol, matchupcol])
                dfwelo = dfwelo.merge(
                    thiselo,
                    how='left',
                    left_on=['opponent', matchupcol],
                    right_on=[idcol, matchupcol],
                    suffixes=('', '_opp')
                )
                
                # iterate the matchup number (we're about to calculate 
                # *next week's* elo ranking)
                try:
                    dfwelo.loc[:, matchupcol] = matchups[i + 1]
                except IndexError:
                    dfwelo.loc[:, matchupcol] = matchup + 1
                dfwelo.loc[:, seasoncol] = season

                # magic time
                dfwelo.loc[:, 'es'] = myelo.expected_score(
                    r0=dfwelo.elo,
                    r1=dfwelo.elo_opp
                )
                dfwelo.loc[:, 'elo'] = myelo.update_score(
                    r=dfwelo.elo,
                    realizedScore=dfwelo.wlt,
                    expectedScore=dfwelo.es
                ).fillna(dfwelo.elo)

                dfelo = dfelo.append(dfwelo[[idcol, matchupcol, seasoncol, 'elo']])
                
                #print('i = {}'.format(i))
                #print('matchup = {}'.format(matchup))
                #print('dfelo.tail(10) = {}'.format(dfelo.tail(10)))
                #print('dfelo.shape = {}'.format(dfelo.shape))
                #input()
            except Exception as e:
                print(e)
                break
                
    return dfelo

In [36]:
dfelo = elo_history(lh)

In [37]:
# add team name in
dfelo = dfelo.merge(teamsnow[['id', 'abbrev']], on=['id'])

In [38]:
dfelo.head()

Unnamed: 0,elo,id,matchupperiodid,seasonid,abbrev
0,1500.0,1,1,2005,SFF
1,1510.0,1,2,2005,SFF
2,1510.0,1,3,2005,SFF
3,1499.712256,1,4,2005,SFF
4,1489.44885,1,5,2005,SFF


In [39]:
# create an x values that is a combo of seasonid and matchupperiodid
dftmp = dfelo.merge(
    dfelo.groupby('seasonid')['matchupperiodid'].agg(['min', 'max']).reset_index(),
    how='left',
    on='seasonid',
    suffixes=('', '_agg')
)
# won't get to 1 -- feature, not bug! need to leave the whole number for
# the start of the following season
dfelo.loc[:, 'x'] = dfelo['seasonid'] + (dftmp['matchupperiodid'] - dftmp['min']) / dftmp['max']

In [40]:
data = [
    go.Scatter(
        x=g.x,
        y=g.elo,
        mode='lines+markers',
        name=team,
    )
    for (team, g) in dfelo.groupby('abbrev')
]

plotly.offline.iplot(data)

Elo ranking distributions over time:

In [41]:
dfelo.head()

Unnamed: 0,elo,id,matchupperiodid,seasonid,abbrev,x
0,1500.0,1,1,2005,SFF,2005.0
1,1510.0,1,2,2005,SFF,2005.066667
2,1510.0,1,3,2005,SFF,2005.133333
3,1499.712256,1,4,2005,SFF,2005.2
4,1489.44885,1,5,2005,SFF,2005.266667


In [44]:
dfelo.groupby(['id', 'abbrev']).elo.agg(['min', 'max', 'mean', 'std']).sort_values(by='mean')

Unnamed: 0_level_0,Unnamed: 1_level_0,min,max,mean,std
id,abbrev,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
5,JNZ,1392.3142,1531.93445,1459.143919,29.919621
6,CFB,1413.080103,1529.408691,1472.899121,23.875029
9,LMKS,1400.237819,1585.259382,1493.101354,38.956537
8,FURY,1402.153064,1572.951475,1493.540466,30.878313
3,ASS,1425.449582,1587.686444,1499.809419,34.76775
13,NoNa,1466.986314,1535.450825,1501.19851,17.035737
4,BJ,1437.154355,1566.60428,1509.393924,25.667455
1,SFF,1420.43181,1592.29333,1510.08996,35.862016
11,ZEKE,1405.32234,1622.420513,1513.150497,44.090172
2,ZLM,1447.075673,1576.389978,1515.465282,30.020548


In [50]:
data = [
    go.Histogram(
        x=grp.elo,
        opacity=0.75,
        name=abbrev,
        histnorm='probability',
        nbinsx=25
    )
    for (abbrev, grp) in dfelo.groupby('abbrev')
]

layout = go.Layout(
    barmode='overlay'
)

fig = go.Figure(data=data, layout=layout)

plotly.offline.iplot(fig)