In [1]:
from bs4 import BeautifulSoup
import requests
from urllib import urlopen
from time import sleep
import pandas as pd

In [2]:
pd.set_option('display.max_columns', 500)

### Functions to scrape NFL team Data

In [17]:
def teamSite(team):
    return "https://www.pro-football-reference.com/teams/{team}/2017_games.htm".format(team=team)

def getTeamData(team):
    
    html = urlopen(teamSite(team)) # DL webpage
    soup = BeautifulSoup(html, "html5lib") # get html
    table = soup.find('table', attrs= {'class': "sortable stats_table"}) # get Team data table

    cols = ['day','Date','Time','bs','Win','OT','Rec','At','Opp','PF','PA','1stD','TotYd','PassY','RushY','TO',
            'o1stD','oTotYd','oPassY','oRushY','oTO','Off','Def','Sp.Tms'] # Columns available

    team_data = []
    for row in table.findAll('tr'): # foreach table row (game)
        game_data = []
        for cell in row.findAll('td'): #append table data (stat)
            game_data.append(cell.text)
        if (game_data != []):
            team_data.append(game_data)

    team_table = pd.DataFrame(team_data, columns=cols) # create table
    return team_table.drop(['day','bs','o1stD','oTotYd','oPassY','oRushY','oTO','Off','Def','Sp.Tms'],axis=1)
    

In [18]:
# teamList = [
#     'pit', 'rav', 'cin', 'cle', 'nwe', 'buf', 'mia', 'nyj', 'jax', 'oti', 'htx', 'clt', 'kan', 'sdg', 'rai', 'den',
#     'min', 'det', 'gnb', 'chi', 'phi', 'dal', 'was', 'nyg', 'car', 'nor', 'atl', 'tam', 'ram', 'sea', 'crd', 'sfo']
# nflData = {} # Contains data for each team
# for team in teamList:
#     try:
#         nflData[team] = getTeamData(team)
#     except:
#         print "FAILED:", team

In [19]:
def toFloat(series):
    return series.map(lambda x: 0 if x == "" else float(x))

for i in nflData:
    nflData[i]['result'] = toFloat(nflData[i]['PF']) / (toFloat(nflData[i]['PF']) + toFloat(nflData[i]['PA']))

In [20]:
# getting a list of all full team names
all_teams = []
for team in nflData.keys():
    all_teams += list(nflData[team]['Opp'])
all_teams = list(set(all_teams))

In [21]:
teamMap = {
    u'New Orleans Saints' : 'nor',
    u'Pittsburgh Steelers' : 'pit',
     u'New England Patriots' : 'nwe',
     u'Tampa Bay Buccaneers' : 'tam',
     u'Philadelphia Eagles' : 'phi',
     u'Atlanta Falcons' : 'atl',
     u'Cleveland Browns' : 'cle',
     u'Cincinnati Bengals' : 'cin',
     u'Los Angeles Chargers' : 'sdg',
     u'Oakland Raiders' : 'rai',
     u'Buffalo Bills' : 'buf',
     u'New York Giants' : 'nyg',
     u'Detroit Lions' : 'det',
     u'Bye Week' : 'None',
     u'Los Angeles Rams' : 'ram',
     u'Carolina Panthers' : 'car',
     u'San Francisco 49ers' : 'sfo',
     u'Washington Redskins' : 'was',
     u'Seattle Seahawks' : 'sea',
     u'Arizona Cardinals' : 'crd',
     u'Houston Texans' : 'htx',
     u'Tennessee Titans' : 'oti',
     u'Jacksonville Jaguars' : 'jax',
     u'Chicago Bears' : 'chi',
     u'Indianapolis Colts' : 'clt',
     u'Miami Dolphins' : 'mia',
     u'New York Jets' : 'nyj',
     u'Baltimore Ravens' : 'rav',
     u'Kansas City Chiefs' : 'kan',
     u'Denver Broncos' : 'den',
     u'Green Bay Packers' : 'gnb',
     u'Minnesota Vikings' : 'min',
     u'Dallas Cowboys' :'dal'
}

### Creating Glicko Module

In [22]:
import math

In [23]:
MU = 1500
PHI = 350
SIGMA = 0.06
TAU = 1.0
EPSILON = 0.000001
Q = math.log(10)/ 400.0

In [24]:
class Rating(object):
    
    def __init__(self, mu=MU, phi=PHI, sigma=SIGMA):
        self.mu = mu
        self.phi = phi
        self.sigma = sigma
        
    def __repr__(self):
        c = type(self)
        args = (c.__module__, c.__name__, self.mu, self.phi, self.sigma)
        return '%s.%s(mu=%.3f, phi=%.3f, sigma=%.3f)' % (args)
    


## Glicko2 Object

In [25]:
class Glicko2(object):
    
    def __init__(self, mu=MU, phi=PHI, sigma=SIGMA, tau=TAU, epsilon=EPSILON):
        self.mu = mu
        self.phi = phi
        self.sigma = sigma
        self.tau = tau
        self.epsilon = epsilon
    
    def create_rating(self, mu=None, phi=None, sigma=None):
        if mu is None:
            mu = self.mu
        if phi is None:
            phi = self.phi
        if sigma is None:
            sigma = self.sigma
        return Rating(mu, phi, sigma)
    
    def scale_up(self, rating, ratio=173.7178):
        mu = rating.mu * ratio + self.mu
        phi = rating.phi * ratio
        return self.create_rating(mu, phi, rating.sigma)
    def scale_down(self, rating, ratio=173.7178):
        mu = (rating.mu - self.mu) / ratio
        phi = rating.phi / ratio
        return self.create_rating(mu, phi, rating.sigma)
    
    def reduce_impact(self, rating):
        """The original form is `g(RD)`. This function reduces the impact of
        games as a function of an opponent's RD.
        """
        return 1 / math.sqrt(1 + (3 * rating.phi ** 2) / (math.pi ** 2))
    
    def expect_score(self, rating, other_rating, impact):
        return 1.0 / (1 + math.exp(-impact * (rating.mu - other_rating.mu)))
    
    def determine_sigma(self, rating, difference, variance):
        """Determines new sigma."""
        phi = rating.phi
        difference_squared = difference ** 2
        # 1. Let a = ln(s^2), and define f(x)
        alpha = math.log(rating.sigma ** 2)
        def f(x):
            """This function is twice the conditional log-posterior density of
            phi, and is the optimality criterion.
            """
            tmp = phi ** 2 + variance + math.exp(x)
            a = math.exp(x) * (difference_squared - tmp) / (2 * tmp ** 2)
            b = (x - alpha) / (self.tau ** 2)
            return a - b
        # 2. Set the initial values of the iterative algorithm.
        a = alpha
        if difference_squared > phi ** 2 + variance:
            b = math.log(difference_squared - phi ** 2 - variance)
        else:
            k = 1
            while f(alpha - k * math.sqrt(self.tau ** 2)) < 0:
                k += 1
            b = alpha - k * math.sqrt(self.tau ** 2)
        # 3. Let fA = f(A) and f(B) = f(B)
        f_a, f_b = f(a), f(b)
        # 4. While |B-A| > e, carry out the following steps.
        # (a) Let C = A + (A - B)fA / (fB-fA), and let fC = f(C).
        # (b) If fCfB < 0, then set A <- B and fA <- fB; otherwise, just set
        #     fA <- fA/2.
        # (c) Set B <- C and fB <- fC.
        # (d) Stop if |B-A| <= e. Repeat the above three steps otherwise.
        while abs(b - a) > self.epsilon:
            c = a + (a - b) * f_a / (f_b - f_a)
            f_c = f(c)
            if f_c * f_b < 0:
                a, f_a = b, f_b
            else:
                f_a /= 2
            b, f_b = c, f_c
        # 5. Once |B-A| <= e, set s' <- e^(A/2)
        return math.exp(1) ** (a / 2)
    
    def rate(self, rating, series):
            # Step 2. For each player, convert the rating and RD's onto the
            #         Glicko-2 scale.
            rating = self.scale_down(rating)
            # Step 3. Compute the quantity v. This is the estimated variance of the
            #         team's/player's rating based only on game outcomes.
            # Step 4. Compute the quantity difference, the estimated improvement in
            #         rating by comparing the pre-period rating to the performance
            #         rating based only on game outcomes.
            d_square_inv = 0
            variance_inv = 0
            difference = 0
            for actual_score, other_rating in series:
                other_rating = self.scale_down(other_rating)
                impact = self.reduce_impact(other_rating)
                expected_score = self.expect_score(rating, other_rating, impact)
                variance_inv += impact ** 2 * expected_score * (1 - expected_score)
                difference += impact * (actual_score - expected_score)
                d_square_inv += (
                    expected_score * (1 - expected_score) *
                    (Q ** 2) * (impact ** 2))
            difference /= variance_inv
            variance = 1. / variance_inv
            denom = rating.phi ** -2 + d_square_inv
            mu = rating.mu + Q / denom * (difference / variance_inv)
            phi = math.sqrt(1 / denom)
            # Step 5. Determine the new value, Sigma', ot the sigma. This
            #         computation requires iteration.
            sigma = self.determine_sigma(rating, difference, variance)
            # Step 6. Update the rating deviation to the new pre-rating period
            #         value, Phi*.
            phi_star = math.sqrt(phi ** 2 + sigma ** 2)
            # Step 7. Update the rating and RD to the new values, Mu' and Phi'.
            phi = 1 / math.sqrt(1 / phi_star ** 2 + 1 / variance)
            mu = rating.mu + phi ** 2 * (difference / variance)
            # Step 8. Convert ratings and RD's back to original scale.
            return self.scale_up(self.create_rating(mu, phi, sigma))

 


### Rate Player in a series

In [26]:
class Team(object):
    def toFloat(series):
        return series.map(lambda x: 0 if x == "" else float(x))
    
    def winValue(self):
        return toFloat(self.data['PF']) / (toFloat(self.data['PF']) + toFloat(self.data['PA']))
    
    def __init__(self, name):
        self.name = name
        self.rating = Rating()
        self.data = getTeamData(name)
        self.data['result'] = self.winValue()
        self.ratings = [self.rating]
        
    def __repr__(self):
        return self.name + ": " + self.rating.__repr__()
        
    def getSchedule(self):
        return map(lambda x: teamMap[x], list(nflData[self.name]['Opp']))
    
    def getResults(self):
        return list(self.data.dropna().apply(lambda row: (teamMap[row['Opp']], row['result']), axis=1)) 
    
    def getOnlyResults(self):
        return list(self.data['result'].dropna())
    
    def getMatchResult(self, opp):
        for tm in self.getResults():
            if opp == tm[0]:
                return tm[1]
            
    def setRating(self, rating):
        self.rating = rating
        self.ratings.append(rating)
        

### Ranking Teams By week

In [27]:
teamData = {}
for team in teamList:
    teamData[team] = Team(team)
    series = []

In [28]:
sea = teamData['sea']
len(sea.getSchedule())

17

In [29]:
matchups = []
for week in range(17):
    weekly_matchups = set()
    for team in teamList:
        try:
            match = tuple(sorted((team, teamData[team].getSchedule()[week])))
            weekly_matchups.add(match)
        except:
            continue
    matchups.append(weekly_matchups)

In [30]:
env = Glicko2(tau=0.5)
for week in matchups:
    for team1,team2 in week:
        if team1 == 'None' or team2 == 'None':
            continue
        result = teamData[team1].getMatchResult(team2)
        if not result:
            continue
        r1 = env.rate(teamData[team1].rating, [(result, teamData[team2].rating)])
        r2 = env.rate(teamData[team2].rating, [(1 - result, teamData[team1].rating)])
        teamData[team1].setRating(r1)
        teamData[team2].setRating(r2)
        
    

In [54]:
from plotly import __version__
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
import plotly.plotly as py
import plotly.graph_objs as go
init_notebook_mode(connected=True)

In [73]:
AFCNorth = ['pit', 'rav', 'cin', 'cle']
AFCEast = ['nwe', 'buf', 'mia', 'nyj']
AFCSouth = ['jax', 'oti', 'htx', 'clt']
ACFWest = ['kan', 'sdg', 'rai', 'den']
NFCNorth = ['min', 'det', 'gnb', 'chi']
NFCEast = ['phi', 'dal', 'was', 'nyg']
NFCSouth = ['car', 'nor', 'atl', 'tam']
NCFWest = ['ram', 'sea', 'crd', 'sfo']

AFC = [AFCNorth, AFCEast, AFCSouth, ACFWest]
NFC = [NFCNorth, NFCEast, NFCSouth, NCFWest]

In [90]:
# Create a trace
def teamRatingOverTime(team):
    
    trace = go.Scatter(
        name = team,
        x = teamData[team].data.index,
        y = map(lambda x: round(x.mu, 2), teamData[team].ratings)
    )
    return trace

In [102]:
# iplot([teamRatingOverTime(team) for team in AFCEast], filename='basic-line')

In [101]:
def flatten(lst):
    flat = []
    for el in lst:
        if  isinstance(el, list):
            flat += flatten(el)
        else:
            flat.append(el)
    return flat
            

a = [1, [2, [3, [4, [[5, 6, [8], 9, 10]], 11], 12], 13]]
# flatten(a)

### Team rating by Week

In [100]:
iplot([teamRatingOverTime(team) for team in flatten(AFC + NFC)], filename='basic-line')

In [31]:
def reverse_dict(dic):
    rev_dict = {}
    for key in dic.keys():
        rev_dict[dic[key]] = key
    return rev_dict

In [32]:
revteamMap = reverse_dict(teamMap)

In [33]:
def winProbability(team, opp):
    env = Glicko2(tau=0.5)
    return env.expect_score(teamData[team].rating, teamData[opp].rating, env.reduce_impact(teamData[opp].rating))
    

In [104]:
winProbability('sea', 'sea')

0.5

In [106]:
winProbability('rav', 'sea') # sucks to suck, Oscar

0.21155664813393607

In [38]:
allRatings = []
for team in teamList:
    allRatings.append([teamData[team].name, teamData[team].rating.mu])
map(lambda x: (revteamMap[x[0]], round(x[1], 2)), sorted(allRatings, key=lambda tup: tup[1]))

[(u'Cleveland Browns', 1384.98),
 (u'New York Giants', 1402.03),
 (u'Indianapolis Colts', 1426.4),
 (u'Tampa Bay Buccaneers', 1439.98),
 (u'Houston Texans', 1446.53),
 (u'Miami Dolphins', 1451.84),
 (u'Tennessee Titans', 1452.76),
 (u'Cincinnati Bengals', 1455.93),
 (u'San Francisco 49ers', 1457.54),
 (u'Oakland Raiders', 1465.19),
 (u'Denver Broncos', 1466.38),
 (u'New York Jets', 1476.77),
 (u'Baltimore Ravens', 1484.21),
 (u'Arizona Cardinals', 1488.82),
 (u'Green Bay Packers', 1491.12),
 (u'Chicago Bears', 1493.04),
 (u'Dallas Cowboys', 1500.03),
 (u'Buffalo Bills', 1500.45),
 (u'Washington Redskins', 1515.44),
 (u'Detroit Lions', 1516.76),
 (u'Los Angeles Chargers', 1520.56),
 (u'Kansas City Chiefs', 1532.62),
 (u'Los Angeles Rams', 1535.26),
 (u'Jacksonville Jaguars', 1540.8),
 (u'Atlanta Falcons', 1542.69),
 (u'Seattle Seahawks', 1555.77),
 (u'Minnesota Vikings', 1568.6),
 (u'Carolina Panthers', 1570.05),
 (u'Pittsburgh Steelers', 1572.03),
 (u'Philadelphia Eagles', 1593.17),
 (

### Ranking teams with static elo comparison

In [109]:
allData = {}
for team in teamList:
    allData[team] = Team(team)
    series = []
    for i in allData[team].getOnlyResults():
        series.append((i, env.create_rating(1500, 350, 0.06)))

    final_rating = env.rate(allData[team].rating, series)
    allData[team].rating = final_rating

In [111]:
sorted_data = []
for team in allData:
    sorted_data.append([team, allData[team].rating.mu])
map(lambda x: (revteamMap[x[0]], round(x[1], 2)), sorted(sorted_data, key=lambda tup: tup[1])) 

[(u'Cleveland Browns', 1358.0),
 (u'New York Giants', 1375.34),
 (u'Indianapolis Colts', 1389.04),
 (u'Miami Dolphins', 1411.54),
 (u'Houston Texans', 1425.42),
 (u'San Francisco 49ers', 1432.21),
 (u'Cincinnati Bengals', 1432.57),
 (u'Green Bay Packers', 1440.82),
 (u'Denver Broncos', 1442.23),
 (u'New York Jets', 1444.16),
 (u'Oakland Raiders', 1451.25),
 (u'Tampa Bay Buccaneers', 1459.2),
 (u'Buffalo Bills', 1464.2),
 (u'Arizona Cardinals', 1469.32),
 (u'Chicago Bears', 1480.3),
 (u'Tennessee Titans', 1487.52),
 (u'Washington Redskins', 1491.54),
 (u'Atlanta Falcons', 1506.83),
 (u'Dallas Cowboys', 1513.59),
 (u'Detroit Lions', 1523.19),
 (u'Seattle Seahawks', 1535.06),
 (u'Kansas City Chiefs', 1542.16),
 (u'Carolina Panthers', 1546.49),
 (u'Los Angeles Chargers', 1557.89),
 (u'Pittsburgh Steelers', 1569.39),
 (u'New Orleans Saints', 1598.47),
 (u'Minnesota Vikings', 1599.37),
 (u'Baltimore Ravens', 1600.51),
 (u'Los Angeles Rams', 1602.03),
 (u'Philadelphia Eagles', 1608.13),
 (u'N

### Rough Stuff

In [None]:
for i in table.findAll('tr'):
    for j in i.findAll('th'):
        pass
#         print j

for i in table.findAll('tr'):
    for j in i.findAll('th', attrs={"scope":"col"}):
        pass
#         print j.text
#         if 'poptip' in j:
#             print j.text