# Table of Contents
* [Libraries](#Libraries)
* [Example Requests](#Example-Requests)
	* &nbsp;
		* [Header Diagnosis](#Header-Diagnosis)
* [Accessing Play by Play Data via API](#Accessing-Play-by-Play-Data-via-API)
* [Quarterly Game Data by Team](#Quarterly-Game-Data-by-Team)
	* [With no iterations](#With-no-iterations)
		* [Grab desired arguments](#Grab-desired-arguments)
		* [Load Arguments](#Load-Arguments)
		* [Grab data](#Grab-data)
	* [Prep for Quarterly Data with Iterations](#Prep-for-Quarterly-Data-with-Iterations)
* [Data Acquisition Script](#Data-Acquisition-Script)
* [Functions and Constants](#Functions-and-Constants)


# Libraries


In [2]:
import requests
import pandas as pd
import numpy as np

import cPickle as pickle

import urllib2


%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sns


# Example Requests


* Load the page
* Check network, XHR filters
* Grab the relevant request url

For example: This [initial link](http://stats.nba.com/league/player/#!/advanced/?sort=PIE&dir=1&Season=2014-15&SeasonType=Pre%20Season) leads to [target link](http://stats.nba.com/stats/leaguedashplayerstats?College=&Conference=&Country=&DateFrom=&DateTo=&Division=&DraftPick=&DraftYear=&GameScope=&GameSegment=&Height=&LastNGames=0&LeagueID=00&Location=&MeasureType=Advanced&Month=0&OpponentTeamID=0&Outcome=&PORound=0&PaceAdjust=N&PerMode=Totals&Period=0&PlayerExperience=&PlayerPosition=&PlusMinus=N&Rank=N&Season=2014-15&SeasonSegment=&SeasonType=Pre+Season&ShotClockRange=&StarterBench=&TeamID=0&VsConference=&VsDivision=&Weight=)

In [2]:
# Example

url="http://stats.nba.com/stats/leaguedashplayerstats?College=&Conference=&Country=&DateFrom=&DateTo=&Division=&DraftPick=&DraftYear=&GameScope=&GameSegment=&Height=&LastNGames=0&LeagueID=00&Location=&MeasureType=Advanced&Month=0&OpponentTeamID=0&Outcome=&PORound=0&PaceAdjust=N&PerMode=Totals&Period=0&PlayerExperience=&PlayerPosition=&PlusMinus=N&Rank=N&Season=2014-15&SeasonSegment=&SeasonType=Pre+Season&ShotClockRange=&StarterBench=&TeamID=0&VsConference=&VsDivision=&Weight="

response=requests.get(url)
response.raise_for_status()
#data = response.json()['resultSets'][0]['rowSet']

### Header Diagnosis

In [228]:
# Checkout keys to find appropriate key
response.json().keys()

[u'resource', u'resultSets', u'parameters']

In [7]:
# Convert headers and stats to frame



In [29]:
    
    
# View the headers
headers = response.json()['resultSets'][0]['headers']

# Looks like a list of 512 stats
stats = response.json()['resultSets'][0]['rowSet']

df = pd.DataFrame(stats,columns = headers )

df.head()

Unnamed: 0,PLAYER_ID,PLAYER_NAME,TEAM_ID,TEAM_ABBREVIATION,AGE,GP,W,L,W_PCT,MIN,...,USG_PCT,PACE,PIE,FGM,FGA,FGM_PG,FGA_PG,FG_PCT,CFID,CFPARAMS
0,201985,A.J. Price,1610612756,PHX,28,6,4,2,0.667,13.3,...,0.217,95.68,0.106,15,33,2.5,5.5,0.455,5,2019851610612756
1,201166,Aaron Brooks,1610612741,CHI,30,8,4,4,0.5,19.5,...,0.23,94.3,0.072,22,59,2.8,7.4,0.373,5,2011661610612741
2,203905,Aaron Craft,1610612744,GSW,24,4,2,2,0.5,14.7,...,0.13,98.15,0.081,5,10,1.3,2.5,0.5,5,2039051610612744
3,203932,Aaron Gordon,1610612753,ORL,19,8,4,4,0.5,22.2,...,0.183,100.18,0.097,26,56,3.3,7.0,0.464,5,2039321610612753
4,203519,Adonis Thomas,1610612754,IND,22,4,2,2,0.5,8.9,...,0.142,94.97,0.025,2,10,0.5,2.5,0.2,5,2035191610612754


# Accessing Play by Play Data via API

Example URL:

`http://stats.nba.com/stats/playbyplayv2?EndPeriod=10&EndRange=55800&GameID=0021500020&RangeType=2&Season=2015-16&SeasonType=Regular+Season&StartPeriod=1&StartRange=0`


In [4]:
playurl = "http://stats.nba.com/stats/playbyplayv2?EndPeriod=10&EndRange=55800&GameID=0021500020&RangeType=2&Season=2015-16&SeasonType=Regular+Season&StartPeriod=1&StartRange=0"


In [5]:
response=requests.get(playurl)
response.raise_for_status()


In [8]:
df = response_to_frame(response)

In [16]:
df.head(2)

Unnamed: 0,GAME_ID,EVENTNUM,EVENTMSGTYPE,EVENTMSGACTIONTYPE,PERIOD,WCTIMESTRING,PCTIMESTRING,HOMEDESCRIPTION,NEUTRALDESCRIPTION,VISITORDESCRIPTION,...,PLAYER2_TEAM_CITY,PLAYER2_TEAM_NICKNAME,PLAYER2_TEAM_ABBREVIATION,PERSON3TYPE,PLAYER3_ID,PLAYER3_NAME,PLAYER3_TEAM_ID,PLAYER3_TEAM_CITY,PLAYER3_TEAM_NICKNAME,PLAYER3_TEAM_ABBREVIATION
0,21500020,0,12,0,1,10:51 PM,12:00,,,,...,,,,0,0,,,,,
1,21500020,1,10,0,1,10:51 PM,12:00,Jump Ball Jordan vs. Pachulia: Tip to Griffin,,,...,Dallas,Mavericks,DAL,4,201933,Blake Griffin,1610612746.0,Los Angeles,Clippers,LAC


## Data on Per-Play Basis Excludes TO and Rebounds as Plays

In [48]:
# Determine number of plays
# Wondering if 4th quarter LAC plays = 28
print "Total Plays"
for P in [1,2,3,4]:
    print "Plays in Period %s = %s" % (str(P),str(len(df[df.PERIOD ==P])))

Total Plays
Plays in Period 1 = 134
Plays in Period 2 = 136
Plays in Period 3 = 124
Plays in Period 4 = 108


In [49]:
print "LAC Plays"
for P in [1,2,3,4]:
    print "Plays in Period %s = %s" % (str(P),str(len(df[~df.HOMEDESCRIPTION.isnull()][df.PERIOD ==P])))

LAC Plays
Plays in Period 1 = 74
Plays in Period 2 = 69
Plays in Period 3 = 56
Plays in Period 4 = 57




In [68]:
fourth = df[~df.HOMEDESCRIPTION.isnull()][df.PERIOD ==4]

In [79]:
fourth

Unnamed: 0,GAME_ID,EVENTNUM,EVENTMSGTYPE,EVENTMSGACTIONTYPE,PERIOD,WCTIMESTRING,PCTIMESTRING,HOMEDESCRIPTION,NEUTRALDESCRIPTION,VISITORDESCRIPTION,...,PLAYER2_TEAM_CITY,PLAYER2_TEAM_NICKNAME,PLAYER2_TEAM_ABBREVIATION,PERSON3TYPE,PLAYER3_ID,PLAYER3_NAME,PLAYER3_TEAM_ID,PLAYER3_TEAM_CITY,PLAYER3_TEAM_NICKNAME,PLAYER3_TEAM_ABBREVIATION
395,21500020,454,2,45,4,12:50 AM,11:45,MISS Pierce 25' 3PT Jump Shot,,,...,,,,0,0,,,,,
397,21500020,456,2,42,4,12:51 AM,11:24,W. Johnson BLOCK (1 BLK),,MISS Nowitzki 4' Driving Layup,...,,,,4,202325,Wesley Johnson,1610612746.0,Los Angeles,Clippers,LAC
398,21500020,457,4,0,4,12:51 AM,11:22,Smith REBOUND (Off:1 Def:2),,,...,,,,0,0,,,,,
399,21500020,458,1,98,4,12:51 AM,11:00,Rivers 2' Cutting Layup Shot (9 PTS) (Pierce 2...,,,...,Los Angeles,Clippers,LAC,0,0,,,,,
400,21500020,459,6,1,4,12:51 AM,10:55,Smith P.FOUL (P4.T1) (S.Corbin),,,...,Dallas,Mavericks,DAL,1,0,,,,,
402,21500020,461,6,2,4,12:52 AM,10:43,Pierce S.FOUL (P2.T2) (E.Dalen),,,...,Dallas,Mavericks,DAL,1,0,,,,,
404,21500020,464,1,1,4,12:52 AM,10:30,Rivers 26' 3PT Jump Shot (12 PTS) (Crawford 1 ...,,,...,Los Angeles,Clippers,LAC,0,0,,,,,
406,21500020,466,4,0,4,12:53 AM,10:14,Pierce REBOUND (Off:0 Def:3),,,...,,,,0,0,,,,,
407,21500020,467,1,43,4,12:53 AM,10:08,W. Johnson 4' Alley Oop Layup (4 PTS) (Crawfor...,,,...,Los Angeles,Clippers,LAC,0,0,,,,,
409,21500020,469,4,0,4,12:53 AM,9:47,Smith REBOUND (Off:1 Def:3),,,...,,,,0,0,,,,,


In [80]:
print "Number of 4th quarter...\nfouls: %s" % fourth.HOMEDESCRIPTION.str.contains("FOUL").sum()
print "Free throws: %s" % fourth.HOMEDESCRIPTION.str.contains("Free").sum()
print "Rebounds: %s" % fourth.HOMEDESCRIPTION.str.contains("Rebound").sum()

Number of 4th quarter...
fouls: 3
Free throws: 4
Rebounds: 2


In [100]:
# Maybe exclude when EVENTMSGACTIONTYPE is 0 (rebound) or 1 (TO)
fourth[(fourth["EVENTMSGACTIONTYPE"] != 0) & (fourth["EVENTMSGACTIONTYPE"] != 1) ]

Unnamed: 0,GAME_ID,EVENTNUM,EVENTMSGTYPE,EVENTMSGACTIONTYPE,PERIOD,WCTIMESTRING,PCTIMESTRING,HOMEDESCRIPTION,NEUTRALDESCRIPTION,VISITORDESCRIPTION,...,PLAYER2_TEAM_CITY,PLAYER2_TEAM_NICKNAME,PLAYER2_TEAM_ABBREVIATION,PERSON3TYPE,PLAYER3_ID,PLAYER3_NAME,PLAYER3_TEAM_ID,PLAYER3_TEAM_CITY,PLAYER3_TEAM_NICKNAME,PLAYER3_TEAM_ABBREVIATION
395,21500020,454,2,45,4,12:50 AM,11:45,MISS Pierce 25' 3PT Jump Shot,,,...,,,,0,0,,,,,
397,21500020,456,2,42,4,12:51 AM,11:24,W. Johnson BLOCK (1 BLK),,MISS Nowitzki 4' Driving Layup,...,,,,4,202325,Wesley Johnson,1610612746.0,Los Angeles,Clippers,LAC
399,21500020,458,1,98,4,12:51 AM,11:00,Rivers 2' Cutting Layup Shot (9 PTS) (Pierce 2...,,,...,Los Angeles,Clippers,LAC,0,0,,,,,
402,21500020,461,6,2,4,12:52 AM,10:43,Pierce S.FOUL (P2.T2) (E.Dalen),,,...,Dallas,Mavericks,DAL,1,0,,,,,
407,21500020,467,1,43,4,12:53 AM,10:08,W. Johnson 4' Alley Oop Layup (4 PTS) (Crawfor...,,,...,Los Angeles,Clippers,LAC,0,0,,,,,
411,21500020,471,3,11,4,12:53 AM,9:39,MISS Smith Free Throw 1 of 2,,,...,,,,0,0,,,,,
415,21500020,476,3,12,4,12:54 AM,9:39,MISS Smith Free Throw 2 of 2,,,...,,,,0,0,,,,,
419,21500020,481,2,3,4,12:55 AM,9:12,MISS Smith 5' Hook Shot,,,...,,,,0,0,,,,,
421,21500020,483,2,97,4,12:55 AM,9:12,MISS Smith 4' Tip Layup Shot,,,...,,,,0,0,,,,,
423,21500020,485,1,97,4,12:55 AM,9:12,Smith 1' Tip Layup Shot (5 PTS),,,...,,,,0,0,,,,,


In [102]:
fourth[(fourth["EVENTMSGACTIONTYPE"] != 0) & (fourth["EVENTMSGACTIONTYPE"] != 1) ]

Unnamed: 0,GAME_ID,EVENTNUM,EVENTMSGTYPE,EVENTMSGACTIONTYPE,PERIOD,WCTIMESTRING,PCTIMESTRING,HOMEDESCRIPTION,NEUTRALDESCRIPTION,VISITORDESCRIPTION,...,PLAYER2_TEAM_CITY,PLAYER2_TEAM_NICKNAME,PLAYER2_TEAM_ABBREVIATION,PERSON3TYPE,PLAYER3_ID,PLAYER3_NAME,PLAYER3_TEAM_ID,PLAYER3_TEAM_CITY,PLAYER3_TEAM_NICKNAME,PLAYER3_TEAM_ABBREVIATION
395,21500020,454,2,45,4,12:50 AM,11:45,MISS Pierce 25' 3PT Jump Shot,,,...,,,,0,0,,,,,
397,21500020,456,2,42,4,12:51 AM,11:24,W. Johnson BLOCK (1 BLK),,MISS Nowitzki 4' Driving Layup,...,,,,4,202325,Wesley Johnson,1610612746.0,Los Angeles,Clippers,LAC
399,21500020,458,1,98,4,12:51 AM,11:00,Rivers 2' Cutting Layup Shot (9 PTS) (Pierce 2...,,,...,Los Angeles,Clippers,LAC,0,0,,,,,
402,21500020,461,6,2,4,12:52 AM,10:43,Pierce S.FOUL (P2.T2) (E.Dalen),,,...,Dallas,Mavericks,DAL,1,0,,,,,
407,21500020,467,1,43,4,12:53 AM,10:08,W. Johnson 4' Alley Oop Layup (4 PTS) (Crawfor...,,,...,Los Angeles,Clippers,LAC,0,0,,,,,
411,21500020,471,3,11,4,12:53 AM,9:39,MISS Smith Free Throw 1 of 2,,,...,,,,0,0,,,,,
415,21500020,476,3,12,4,12:54 AM,9:39,MISS Smith Free Throw 2 of 2,,,...,,,,0,0,,,,,
419,21500020,481,2,3,4,12:55 AM,9:12,MISS Smith 5' Hook Shot,,,...,,,,0,0,,,,,
421,21500020,483,2,97,4,12:55 AM,9:12,MISS Smith 4' Tip Layup Shot,,,...,,,,0,0,,,,,
423,21500020,485,1,97,4,12:55 AM,9:12,Smith 1' Tip Layup Shot (5 PTS),,,...,,,,0,0,,,,,


#  Quarterly Game Data by Team

## With no iterations

### Grab desired arguments

In [113]:
# Get gamedates as list
gamedates = get2014gamedates("Clippers","Warriors")

### Load Arguments

In [259]:
#  Initialize payload
payload = init_payload()

In [260]:
newargs = {
    'DateFrom':gamedate,
    'DateTo':gamedate,
    'MeasureType':'Base',
    'Period':'1',
    'TeamID':'1610612746',
    'OpponentTeamID':'1610612744'
    
}

### Grab data

## Prep for Quarterly Data with Iterations

In [404]:
# Set additional iterations needed
Periods = ["1","2","3","4"]
MeasureTypes = ["Base","Advanced","Misc","Four Factors","Scoring","Opponent"] # "Usage" doesn't apply as these are team stats


In [352]:
# Create a master dictionary to hold each measure's data
superdatadict = dict()
for m in MeasureTypes:
    superdatadict[m] = 0

In [438]:
#  Save Super columns for Later Work
# supercolumns = periodframe.columns.tolist()
# pickle.dump(supercolumns,open("supercolumns.p","wb"))


# Data Acquisition Script

In [112]:
# Initialize list
quarters_list = []

for D in gamedates:
        newargs.update({'DateFrom':D,'DateTo':D})
        for P in Periods:
            newargs.update({"Period":P})
            # initialize a periodframe
            periodframe = pd.DataFrame()
            for m in MeasureTypes:
                print m

                newargs.update({"MeasureType":m}) # Set MeasureType
                newline = grabdataline(payload,newargs)
                # Check for new arguments
                old_cols = periodframe.columns.tolist()
                print old_cols
                all_new_cols = newline.columns.tolist()
                newcols = list(set(all_new_cols)-set(old_cols))     
                print newcols
                if old_cols == []: # If first measure type, initialize
                    print "initializing periodframe"
                    periodframe = newline
                else:

                    # Add columns to frame
                    periodframe = periodframe.join(newline[newcols])
            
            quarters_list.append(periodframe)
            
full_frame = pd.concat(quarters_list)

NameError: name 'gamedates' is not defined

In [468]:
# Save full data
# full_frame.to_pickle("data/fullstats_lac_gsw_20142015_103015_v1.p")

# Functions and Constants

In [110]:
# API Call List
apivariables = ["DateFrom","DateTo","Division","GameScope","GameSegment","LastNGames","LeagueID","Location","MeasureType","Month","OpponentTeamID","Outcome","PORound","PaceAdjust","PerMode","Period","PlayerExperience","PlayerPosition","PlusMinus","Rank","Season","SeasonSegment","SeasonType","ShotClockRange","StarterBench","TeamID","VsConference","VsDivision"]

In [109]:
# API Initialize List
dictinit={"LastNGames":"0","LeagueID":"00","MeasureType":"Base","Month":"0","TeamID":"0","OpponentTeamID":"0","PaceAdjust":"N","PaceAdjust":"N","Period":"0","PerMode":"Totals","PlusMinus":"N","PORound":"0","Rank":"N","Season":"2014-15","SeasonType":"Regular Season"}



In [111]:
def init_payload():
    """Initialize payload
    
    >>> payload = init_payload()
    """
    # Initialize payload
    _payload = {}

    # Load blank payload
    _payload = _payload.fromkeys(apivariables,"")

    # Load initialized values from payloadinit
    for k,v in dictinit.iteritems():
        _payload[k] = v
    return(_payload)



In [107]:
def get2014gamedates(team1,team2):
    """Get gamedates in a list for two teams
    """
    # Load df
    df = pd.read_csv("data/schedule_20142015_regularseason.csv")

    # Set Date
    df['Date'] = pd.DatetimeIndex(df['Date'])

    # Get all clippers games
    df = df[df['Visitor/Neutral'].str.contains(team1) | df['Home/Neutral'].str.contains(team1)]

    # Get all warriors games
    df = df[df['Visitor/Neutral'].str.contains(team2) | df['Home/Neutral'].str.contains(team2)]
    
    # Convert to string
    
    
    # Output List
    gamedates = df['Date'].tolist()
    
    
    # Convert to string
    new_gamedates = []
    for d in gamedates:
        new_date = str(d.month) + "-" + str(d.day) + "-" + str(d.year)
        new_gamedates.append(new_date)

    gamedates = new_gamedates
    
    return(gamedates)



In [106]:
# Grab data
def grabdataline(payload,newargs):
    """
    Grab one line of stats for a quarter
    """
    payload.update(newargs)
    baseurl = "http://stats.nba.com/stats/leaguedashteamstats"
    response=requests.get(baseurl,payload)
    print response.url
    print response
    # Grab headers
    headers = response.json()['resultSets'][0]['headers']

    # Grab stats

    stats = response.json()['resultSets'][0]['rowSet']

    periodframe = pd.DataFrame(stats,columns = headers )
    # Add arguments to data line
    for k,v in newargs.iteritems():
        periodframe[k] = v
    return(periodframe)

In [104]:
def response_to_frame(requestresponse):
    """Take a request response and convert to dataframe
    Only if headers and stats conform to sample nba protocol
    
    >>> df = response_to_frame(response)
    
    """
    # Grab column heads
    headers = response.json()['resultSets'][0]['headers']

    # Grab rows
    stats = response.json()['resultSets'][0]['rowSet']
    
    # Form frame
    df = pd.DataFrame(stats,columns = headers )
    
    # Return frame
    return(df)