# AZ Cardinals Project

ETL NFL boxscore data for Arizona Cardinals games.

## Imports

In [1]:
import pandas as pd
import numpy as np
import json
import os
import requests

from dotenv import load_dotenv

## Setup API Authentication

In [2]:
#API Endpoint for NFL Box Scores
url = "https://tank01-nfl-live-in-game-real-time-statistics-nfl.p.rapidapi.com/getNFLBoxScore"

In [3]:
#Get API Key and host
load_dotenv()
api_token = os.getenv('nfl_api_key')
api_host = os.getenv('rapid_api_host')

In [4]:
#rapidapi headers
headers = {
	"X-RapidAPI-Key": "{key}".format(key=api_token),
	"X-RapidAPI-Host": "{host}".format(host=api_host)
}

### Pull Single Game Data - Test

#### Set QueryString

In [5]:
#tested with raiders game, now I'll try with one that didn't go to OT
querystring = {"gameID":"20230108_ARI@SF","fantasyPoints":"false"}

#### Call API and Review Results

In [6]:
response = requests.get(url, headers=headers, params=querystring)

In [8]:
test_list = []

In [13]:
game_list = ['20230108_ARI@SF']

In [17]:
for game in game_list:
    for score in response.json()['body']['scoringPlays']:
        sub_list = [response.json()['body']['gameID'], score]
        test_list.append(sub_list)

In [21]:
test_list[0][1]

{'score': 'A.J. Green 77 Yd pass from David Blough (Matt Prater PAT failed)',
 'scorePeriod': 'Q1',
 'homeScore': '0',
 'awayScore': '6',
 'teamID': '1',
 'scoreDetails': '2 plays, 80 yards, 0:42',
 'scoreType': 'TD',
 'scoreTime': '14:18',
 'team': 'ARI',
 'playerIDs': ['3116188', '13983', '11122']}

In [11]:
for game in response.json():
    for score in game['body']['scoringPlays']:
        test_list.append(game['gameID'], score)

TypeError: string indices must be integers, not 'str'

In [38]:
print(json.dumps(response.json(), indent=2)) #pretty print

{
  "statusCode": 200,
  "body": {
    "playerStats": {
      "4361510": {
        "gameID": "20220911_KC@ARI",
        "playerName": "Cameron Thomas",
        "teamID": "1",
        "team": "ARI",
        "teamAbv": "ARI",
        "Defense": {
          "totalTackles": "1",
          "defTD": "0",
          "soloTackles": "0",
          "tfl": "0",
          "qbHits": "0",
          "sacks": "0",
          "passDeflections": "0"
        },
        "playerID": "4361510"
      },
      "4035004": {
        "gameID": "20220911_KC@ARI",
        "Receiving": {
          "receptions": "3",
          "recTD": "1",
          "longRec": "8",
          "targets": "6",
          "recYds": "16",
          "recAvg": "5.3"
        },
        "SpecialTeams": {
          "puntReturns": "1",
          "puntReturnYds": "3",
          "puntReturnAvg": "3.0",
          "puntReturnLong": "3",
          "puntReturnTD": "0"
        },
        "playerName": "Mecole Hardman",
        "scoringPlays": [
       

#### Export Test Json

In [15]:
with open('../data/test_boxscore.json', 'w') as file:
    json.dump(response.json(), file)

### Save to DataFrame

Create a table of the game details combined with the linescore and some summary team stats

In [25]:
boxscore = response.json()['body']

In [26]:
game_list = []

In [27]:
#create dictionary for game summary
game_summary = {
    "game_id": boxscore['gameID'],
    "game_type": boxscore['seasonType'],
    "game_date_id": boxscore['gameDate'],
    "game_location": boxscore['gameLocation'],
    "away_team_id": boxscore['lineScore']['away']['teamID'],
    "away_team": boxscore['away'],
    "away_q1_score": boxscore['lineScore']['away']['Q1'],
    "away_q2_score": boxscore['lineScore']['away']['Q2'],
    "away_q3_score": boxscore['lineScore']['away']['Q3'],
    "away_q4_score": boxscore['lineScore']['away']['Q4'],
    "away_ot_score": '',
    "away_total_score": boxscore['lineScore']['away']['totalPts'],
    "away_total_plays": boxscore['teamStats']['away']['totalPlays'],
    "away_total_yards": boxscore['teamStats']['away']['totalYards'],
    "away_passing_yards": boxscore['teamStats']['away']['passingYards'],
    "away_rushing_yards": boxscore['teamStats']['away']['rushingYards'],
    "away_turnovers": boxscore['teamStats']['away']['turnovers'],
    "away_time_of_possession": boxscore['teamStats']['away']['possession'],
    "away_result": boxscore['awayResult'],
    "home_team_id": boxscore['lineScore']['home']['teamID'],
    "home_team": boxscore['home'],
    "home_q1_score": boxscore['lineScore']['home']['Q1'],
    "home_q2_score": boxscore['lineScore']['home']['Q2'],
    "home_q3_score": boxscore['lineScore']['home']['Q3'],
    "home_q4_score": boxscore['lineScore']['home']['Q4'],
    "home_ot_score": '',
    "home_total_score": boxscore['lineScore']['home']['totalPts'],
    "home_total_plays": boxscore['teamStats']['home']['totalPlays'],
    "home_total_yards": boxscore['teamStats']['home']['totalYards'],
    "home_passing_yards": boxscore['teamStats']['home']['passingYards'],
    "home_rushing_yards": boxscore['teamStats']['home']['rushingYards'],
    "home_turnovers": boxscore['teamStats']['home']['turnovers'],
    "home_time_of_possession": boxscore['teamStats']['home']['possession'],
    "home_result": boxscore['homeResult']
}

# Try to access the 'OT' field and set the value if it exists, if not set 0
try:
    game_summary["away_ot_score"] = boxscore['lineScore']['away']['OT']
except KeyError:
    game_summary["away_ot_score"]=0

try:
    game_summary["home_ot_score"] = boxscore['lineScore']['home']['OT']
except KeyError:
    game_summary["home_ot_score"]=0


In [28]:
#save this one record to the "list" of games - we'll use this to convert to a dataframe. In the future it will be an iterable
game_list.append(game_summary)

In [29]:
#convert to dataframe
summary_df = pd.DataFrame(game_list)

summary_df.head()

Unnamed: 0,game_id,game_type,game_date_id,game_location,away_team_id,away_team,away_q1_score,away_q2_score,away_q3_score,away_q4_score,...,home_q4_score,home_ot_score,home_total_score,home_total_plays,home_total_yards,home_passing_yards,home_rushing_yards,home_turnovers,home_time_of_possession,home_result
0,20230108_ARI@SF,Regular Season,20230108,"Santa Clara, CA",1,ARI,6,7,0,0,...,0,0,38,61,311,142,169,0,34:14,W


#### Update Datatypes

In [30]:
#check datatypes
summary_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1 entries, 0 to 0
Data columns (total 34 columns):
 #   Column                   Non-Null Count  Dtype 
---  ------                   --------------  ----- 
 0   game_id                  1 non-null      object
 1   game_type                1 non-null      object
 2   game_date_id             1 non-null      object
 3   game_location            1 non-null      object
 4   away_team_id             1 non-null      object
 5   away_team                1 non-null      object
 6   away_q1_score            1 non-null      object
 7   away_q2_score            1 non-null      object
 8   away_q3_score            1 non-null      object
 9   away_q4_score            1 non-null      object
 10  away_ot_score            1 non-null      int64 
 11  away_total_score         1 non-null      object
 12  away_total_plays         1 non-null      object
 13  away_total_yards         1 non-null      object
 14  away_passing_yards       1 non-null      objec

In [31]:
#setup dtype mapping
dtype_mapping = {
    'game_id': 'object',
    'game_type': 'object',
    'game_date_id': 'object',
    'game_location': 'object',
    'away_team_id': 'object',
    'away_team': 'object',
    'away_q1_score': 'int64',
    'away_q2_score': 'int64',
    'away_q3_score': 'int64',
    'away_q4_score': 'int64',
    'away_ot_score': 'int64',
    'away_total_score': 'int64',
    'away_total_plays': 'int64',
    'away_total_yards': 'int64',
    'away_passing_yards': 'int64',
    'away_rushing_yards': 'int64',
    'away_turnovers': 'int64',
    'away_time_of_possession': 'object',  # Update with the correct data type
    'away_result': 'object',
    'home_team_id': 'object',
    'home_team': 'object',
    'home_q1_score': 'int64',
    'home_q2_score': 'int64',
    'home_q3_score': 'int64',
    'home_q4_score': 'int64',
    'home_ot_score': 'int64',
    'home_total_score': 'int64',
    'home_total_plays': 'int64',
    'home_total_yards': 'int64',
    'home_passing_yards': 'int64',
    'home_rushing_yards': 'int64',
    'home_turnovers': 'int64',
    'home_time_of_possession': 'object',  # Update with the correct data type
    'home_result': 'object',
}

In [32]:
#apply the mappings
summary_df = summary_df.astype(dtype_mapping)

In [61]:
#convert timestamps
summary_df['away_time_of_possession'] = pd.to_datetime(summary_df['away_time_of_possession'], format='%M:%S').dt.time
summary_df['home_time_of_possession'] = pd.to_datetime(summary_df['home_time_of_possession'], format='%M:%S').dt.time

In [18]:
summary_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1 entries, 0 to 0
Data columns (total 34 columns):
 #   Column                   Non-Null Count  Dtype 
---  ------                   --------------  ----- 
 0   game_id                  1 non-null      object
 1   game_type                1 non-null      object
 2   game_date_id             1 non-null      object
 3   game_location            1 non-null      object
 4   away_team_id             1 non-null      object
 5   away_team                1 non-null      object
 6   away_q1_score            1 non-null      int64 
 7   away_q2_score            1 non-null      int64 
 8   away_q3_score            1 non-null      int64 
 9   away_q4_score            1 non-null      int64 
 10  away_ot_score            1 non-null      int64 
 11  away_total_score         1 non-null      int64 
 12  away_total_plays         1 non-null      int64 
 13  away_total_yards         1 non-null      int64 
 14  away_passing_yards       1 non-null      int64

### Build Scoring Plays Data

### Pull in saved json file
Don't waste an API call for this test. :)

In [3]:
with open('../data/test_boxscore.json') as f:
    test_box = json.load(f)

In [9]:
test_box['body']['scoringPlays'][0]

{'score': 'A.J. Green 77 Yd pass from David Blough (Matt Prater PAT failed)',
 'scorePeriod': 'Q1',
 'homeScore': '0',
 'awayScore': '6',
 'teamID': '1',
 'scoreDetails': '2 plays, 80 yards, 0:42',
 'scoreType': 'TD',
 'scoreTime': '14:18',
 'team': 'ARI',
 'playerIDs': ['3116188', '13983', '11122']}

In [10]:
#setup mapping for quarters
#it switches mid season from 1Q to Q1, so I want to adjust for this
period_mapping = {
    '1Q': 'Q1',
    '2Q': 'Q2',
    '3Q': 'Q3',
    '4Q': 'Q4',
    'OT': 'OT'
}

In [12]:
boxscore = test_box['body']

In [13]:
#loop through scoring plays and convert to dataframe
scores = [] #setup blank list to store scores

for score in boxscore['scoringPlays']:
    #map the scorePeriod to our new mapping
    score_period = period_mapping.get(score['scorePeriod'], score['scorePeriod'])


    score_detail = {
        "team_id": score['teamID'],
        #"team": score['teamAbv'], --inconsistent naming in different games. I can just map this later, so I won't adjust for it
        "score_type": score['scoreType'],
        #"score_period": score['scorePeriod'], #original score period function
        "score_period": score_period, #new method - using our mapping
        "score_time": score['scoreTime'],
        "drive_detail": score['scoreDetails'],
        "score_detail": score['score'],
        "away_team_score": score['awayScore'],
        "home_team_score": score['homeScore']
    }

    scores.append(score_detail)

scores_df = pd.DataFrame(scores)

In [14]:
scores_df.head(15)

Unnamed: 0,team_id,score_type,score_period,score_time,drive_detail,score_detail,away_team_score,home_team_score
0,1,TD,Q1,14:18,"2 plays, 80 yards, 0:42",A.J. Green 77 Yd pass from David Blough (Matt ...,6,0
1,28,TD,Q1,10:40,"6 plays, 75 yards, 3:38",Christian McCaffrey 21 Yd pass from Brock Purd...,6,7
2,28,TD,Q2,12:07,"3 plays, 18 yards, 1:43",Elijah Mitchell 5 Yd Run (Robbie Gould Kick),6,14
3,1,TD,Q2,4:41,"12 plays, 75 yards, 7:26",Corey Clement 1 Yd Run (Matt Prater Kick),13,14
4,28,TD,Q2,0:19,"9 plays, 66 yards, 4:22",George Kittle 4 Yd pass from Brock Purdy (Robb...,13,21
5,28,TD,Q3,11:32,"7 plays, 75 yards, 3:28",Elijah Mitchell 6 Yd Run (Robbie Gould Kick),13,28
6,28,FG,Q3,5:47,"8 plays, 25 yards, 4:36",Robbie Gould 27 Yd Field Goal,13,31
7,28,TD,Q3,1:01,"6 plays, 28 yards, 3:12",George Kittle 1 Yd pass from Brock Purdy (Robb...,13,38


#### Update Dtypes

In [65]:
#check datatypes
scores_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10 entries, 0 to 9
Data columns (total 9 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   team_id          10 non-null     object
 1   team             10 non-null     object
 2   score_type       10 non-null     object
 3   score_period     10 non-null     object
 4   score_time       10 non-null     object
 5   drive_detail     10 non-null     object
 6   score_detail     10 non-null     object
 7   away_team_score  10 non-null     object
 8   home_team_score  10 non-null     object
dtypes: object(9)
memory usage: 852.0+ bytes


In [39]:
dtype_mapping_2 = {
    'team_id': 'object',
    #'team': 'object',
    'score_type': 'category',  # Make score_type categorical
    'score_period': 'object',
    'score_time': 'object',  # convert to datetime in nextsteyp
    'drive_detail': 'object',
    'score_detail': 'object',
    'away_team_score': 'int64',
    'home_team_score': 'int64',
}

In [40]:
scores_df = scores_df.astype(dtype_mapping_2)

In [43]:
#convert timestamps
scores_df['score_time'] = pd.to_datetime(scores_df['score_time'], format='%M:%S').dt.time

#### Add Period Elapsed Time
The score time is the time remaining on the clock at the time of score, but for our purposes having the elapsed quarter will be helpful. I could do this in a BI tool, but would rather standardize it in the dataset. Circle back on game time elapsed. Wasn't able to get that yet.

In [41]:
scores_df.head(10)

Unnamed: 0,team_id,score_type,score_period,score_time,drive_detail,score_detail,away_team_score,home_team_score
0,1,TD,Q1,14:18,"2 plays, 80 yards, 0:42",A.J. Green 77 Yd pass from David Blough (Matt ...,6,0
1,28,TD,Q1,10:40,"6 plays, 75 yards, 3:38",Christian McCaffrey 21 Yd pass from Brock Purd...,6,7
2,28,TD,Q2,12:07,"3 plays, 18 yards, 1:43",Elijah Mitchell 5 Yd Run (Robbie Gould Kick),6,14
3,1,TD,Q2,4:41,"12 plays, 75 yards, 7:26",Corey Clement 1 Yd Run (Matt Prater Kick),13,14
4,28,TD,Q2,0:19,"9 plays, 66 yards, 4:22",George Kittle 4 Yd pass from Brock Purdy (Robb...,13,21
5,28,TD,Q3,11:32,"7 plays, 75 yards, 3:28",Elijah Mitchell 6 Yd Run (Robbie Gould Kick),13,28
6,28,FG,Q3,5:47,"8 plays, 25 yards, 4:36",Robbie Gould 27 Yd Field Goal,13,31
7,28,TD,Q3,1:01,"6 plays, 28 yards, 3:12",George Kittle 1 Yd pass from Brock Purdy (Robb...,13,38


In [44]:
#calculate elapsed time in quarter
scores_df['period_elapsed_time'] = np.where(
    scores_df.score_period == 'OT',
    (pd.to_datetime('00:10:00', format='%H:%M:%S') - pd.to_datetime(scores_df.score_time, format='%H:%M:%S')).dt.total_seconds(),
    (pd.to_datetime('00:15:00', format='%H:%M:%S') - pd.to_datetime(scores_df.score_time, format='%H:%M:%S')).dt.total_seconds()
)


In [45]:
scores_df.head(10)

Unnamed: 0,team_id,score_type,score_period,score_time,drive_detail,score_detail,away_team_score,home_team_score,period_elapsed_time
0,1,TD,Q1,00:14:18,"2 plays, 80 yards, 0:42",A.J. Green 77 Yd pass from David Blough (Matt ...,6,0,42.0
1,28,TD,Q1,00:10:40,"6 plays, 75 yards, 3:38",Christian McCaffrey 21 Yd pass from Brock Purd...,6,7,260.0
2,28,TD,Q2,00:12:07,"3 plays, 18 yards, 1:43",Elijah Mitchell 5 Yd Run (Robbie Gould Kick),6,14,173.0
3,1,TD,Q2,00:04:41,"12 plays, 75 yards, 7:26",Corey Clement 1 Yd Run (Matt Prater Kick),13,14,619.0
4,28,TD,Q2,00:00:19,"9 plays, 66 yards, 4:22",George Kittle 4 Yd pass from Brock Purdy (Robb...,13,21,881.0
5,28,TD,Q3,00:11:32,"7 plays, 75 yards, 3:28",Elijah Mitchell 6 Yd Run (Robbie Gould Kick),13,28,208.0
6,28,FG,Q3,00:05:47,"8 plays, 25 yards, 4:36",Robbie Gould 27 Yd Field Goal,13,31,553.0
7,28,TD,Q3,00:01:01,"6 plays, 28 yards, 3:12",George Kittle 1 Yd pass from Brock Purdy (Robb...,13,38,839.0


##### Game Elapsed

Including a try-accept here to account for the fact that someimtes it uses a Q1 format and other times a Q1.

I've not updated the dataframe creation to account for this, but I may still leave it as a good quality control step.

In [47]:
#create a custom function to apply
def game_time_calc(row):
    if row['score_period'] == 'OT':
        return (15 * 4 * 60) + row['period_elapsed_time']
    else:
        #try to grab quarter from both 'Q1' and '1Q' format - API changes mid year
        try:
            quarter = int(str(row['score_period'])[0])
        except ValueError:
            quarter = int(str(row['score_period'])[1])
            
        return ((quarter - 1) * 15 * 60) + row['period_elapsed_time']

In [48]:
# Apply the custom function to each row and store the result in a new column
scores_df['game_elapsed_time'] = scores_df.apply(game_time_calc, axis=1)

In [49]:
#convert to hh:mm:ss time format
scores_df['period_elapsed_time'] = pd.to_datetime(scores_df.period_elapsed_time, unit='s').dt.time
scores_df['game_elapsed_time'] = pd.to_datetime(scores_df.game_elapsed_time, unit='s').dt.time

In [50]:
scores_df.head(10)

Unnamed: 0,team_id,score_type,score_period,score_time,drive_detail,score_detail,away_team_score,home_team_score,period_elapsed_time,game_elapsed_time
0,1,TD,Q1,00:14:18,"2 plays, 80 yards, 0:42",A.J. Green 77 Yd pass from David Blough (Matt ...,6,0,00:00:42,00:00:42
1,28,TD,Q1,00:10:40,"6 plays, 75 yards, 3:38",Christian McCaffrey 21 Yd pass from Brock Purd...,6,7,00:04:20,00:04:20
2,28,TD,Q2,00:12:07,"3 plays, 18 yards, 1:43",Elijah Mitchell 5 Yd Run (Robbie Gould Kick),6,14,00:02:53,00:17:53
3,1,TD,Q2,00:04:41,"12 plays, 75 yards, 7:26",Corey Clement 1 Yd Run (Matt Prater Kick),13,14,00:10:19,00:25:19
4,28,TD,Q2,00:00:19,"9 plays, 66 yards, 4:22",George Kittle 4 Yd pass from Brock Purdy (Robb...,13,21,00:14:41,00:29:41
5,28,TD,Q3,00:11:32,"7 plays, 75 yards, 3:28",Elijah Mitchell 6 Yd Run (Robbie Gould Kick),13,28,00:03:28,00:33:28
6,28,FG,Q3,00:05:47,"8 plays, 25 yards, 4:36",Robbie Gould 27 Yd Field Goal,13,31,00:09:13,00:39:13
7,28,TD,Q3,00:01:01,"6 plays, 28 yards, 3:12",George Kittle 1 Yd pass from Brock Purdy (Robb...,13,38,00:13:59,00:43:59


## Test Full Season of Games

In [76]:
#ids of all Cardies 2022 games
game_list_22 = [
    '20220911_KC@ARI',
    '20220918_ARI@LV',
    '20220925_LAR@ARI',
    '20221002_ARI@CAR',
    '20221009_PHI@ARI',
    '20221016_ARI@SEA',
    '20221020_NO@ARI',
    '20221030_ARI@MIN',
    '20221106_SEA@ARI',
    '20221113_ARI@LAR',
    '20221121_SF@ARI',
    '20221127_LAC@ARI',
    '20221212_NE@ARI',
    '20221218_ARI@DEN',
    '20221225_TB@ARI',
    '20230101_ARI@ATL',
    '20230108_ARI@SF',
]

### BoxScore

In [87]:
#setup blank list for boxscore data
boxscore_data_list = []

for game in game_list_22:
    #setup querystring for API endpoint
    querystring = {"gameID":"{game_id}".format(game_id=game),"fantasyPoints":"false"}
    
    #call using predefined headers and querystring
    response = requests.get(url, headers=headers, params=querystring)

    #drill into json to get to boxscore data
    boxscore = response.json()['body']

    #create dictionary for game summary
    game_summary = {
        "game_id": boxscore['gameID'],
        "game_type": boxscore['seasonType'],
        "game_date_id": boxscore['gameDate'],
        "game_location": boxscore['gameLocation'],
        "away_team_id": boxscore['lineScore']['away']['teamID'],
        "away_team": boxscore['away'],
        "away_q1_score": boxscore['lineScore']['away']['Q1'],
        "away_q2_score": boxscore['lineScore']['away']['Q2'],
        "away_q3_score": boxscore['lineScore']['away']['Q3'],
        "away_q4_score": boxscore['lineScore']['away']['Q4'],
        "away_ot_score": '',
        "away_total_score": boxscore['lineScore']['away']['totalPts'],
        "away_total_plays": boxscore['teamStats']['away']['totalPlays'],
        "away_total_yards": boxscore['teamStats']['away']['totalYards'],
        "away_passing_yards": boxscore['teamStats']['away']['passingYards'],
        "away_rushing_yards": boxscore['teamStats']['away']['rushingYards'],
        "away_turnovers": boxscore['teamStats']['away']['turnovers'],
        "away_time_of_possession": boxscore['teamStats']['away']['possession'],
        "away_result": boxscore['awayResult'],
        "home_team_id": boxscore['lineScore']['home']['teamID'],
        "home_team": boxscore['home'],
        "home_q1_score": boxscore['lineScore']['home']['Q1'],
        "home_q2_score": boxscore['lineScore']['home']['Q2'],
        "home_q3_score": boxscore['lineScore']['home']['Q3'],
        "home_q4_score": boxscore['lineScore']['home']['Q4'],
        "home_ot_score": '',
        "home_total_score": boxscore['lineScore']['home']['totalPts'],
        "home_total_plays": boxscore['teamStats']['home']['totalPlays'],
        "home_total_yards": boxscore['teamStats']['home']['totalYards'],
        "home_passing_yards": boxscore['teamStats']['home']['passingYards'],
        "home_rushing_yards": boxscore['teamStats']['home']['rushingYards'],
        "home_turnovers": boxscore['teamStats']['home']['turnovers'],
        "home_time_of_possession": boxscore['teamStats']['home']['possession'],
        "home_result": boxscore['homeResult']
    }

    # Try to access the 'OT' field and set the value if it exists, if not set 0
    try:
        game_summary["away_ot_score"] = boxscore['lineScore']['away']['OT']
    except KeyError:
        game_summary["away_ot_score"]=0

    try:
        game_summary["home_ot_score"] = boxscore['lineScore']['home']['OT']
    except KeyError:
        game_summary["home_ot_score"]=0

    #append compiled data to list
    boxscore_data_list.append(game_summary)   

20220911_KC@ARI
20220918_ARI@LV
20220925_LAR@ARI
20221002_ARI@CAR
20221009_PHI@ARI
20221016_ARI@SEA
20221020_NO@ARI
20221030_ARI@MIN
20221106_SEA@ARI
20221113_ARI@LAR
20221121_SF@ARI
20221127_LAC@ARI
20221212_NE@ARI
20221218_ARI@DEN
20221225_TB@ARI
20230101_ARI@ATL
20230108_ARI@SF


In [88]:
#convert to dataframe
boxscore_df = pd.DataFrame(boxscore_data_list)

boxscore_df.head()

Unnamed: 0,game_id,game_type,game_date_id,game_location,away_team_id,away_team,away_q1_score,away_q2_score,away_q3_score,away_q4_score,...,home_q4_score,home_ot_score,home_total_score,home_total_plays,home_total_yards,home_passing_yards,home_rushing_yards,home_turnovers,home_time_of_possession,home_result
0,20220911_KC@ARI,Regular Season,20220911,"Glendale, AZ",16,KC,14,9,14,7,...,14,0,21,63,282,179,103,0,25:18,L
1,20220918_ARI@LV,Regular Season,20220918,"Las Vegas, NV",1,ARI,0,0,7,16,...,0,0,23,61,324,244,80,1,29:47,L
2,20220925_LAR@ARI,Regular Season,20220925,"Glendale, AZ",19,LAR,10,3,7,0,...,3,0,12,81,365,295,70,0,33:56,L
3,20221002_ARI@CAR,Regular Season,20221002,"Charlotte, NC",1,ARI,0,3,7,16,...,6,0,16,51,220,180,40,3,21:25,L
4,20221009_PHI@ARI,Regular Season,20221009,"Glendale, AZ",27,PHI,7,7,3,3,...,7,0,17,69,363,239,124,1,25:21,L


### Scoring Details

I'm going to set this up with it's own API call, but for a final product we should do one call and split it into seperate dataframes at once.

In [92]:
#setup blank list for boxscore data
scoring_data_list = []

for game in game_list_22:
    #setup querystring for API endpoint
    querystring = {"gameID":"{game_id}".format(game_id=game),"fantasyPoints":"false"}
    
    #call using predefined headers and querystring
    response = requests.get(url, headers=headers, params=querystring)

    #drill into json to get to boxscore data
    boxscore = response.json()['body']

    #now loop through each score in body-linescore
    for score in boxscore['scoringPlays']:
        #setup data
        score_detail = {
            "game_id": game,
            "team_id": score['teamID'],
            #"team": score['teamAbv'], #inconsistent labeling, won't need as long as teamId works. That we can join in with the other table
            "score_type": score['scoreType'],
            "score_period": score['scorePeriod'],
            "score_time": score['scoreTime'],
            "drive_detail": score['scoreDetails'],
            "score_detail": score['score'],
            "away_team_score": score['awayScore'],
            "home_team_score": score['homeScore']
        }

        scoring_data_list.append(score_detail)

20220911_KC@ARI {'scorePeriod': '1Q', 'score': 'Travis Kelce 9 Yd pass from Patrick Mahomes (Harrison Butker Kick)', 'homeScore': '0', 'awayScore': '7', 'teamID': '16', 'scoreDetails': '11 plays, 75 yards, 5:23', 'scoreType': 'TD', 'scoreTime': '9:37', 'team': 'KC', 'playerIDs': ['3139477', '15847', '3055899'], 'teamAbv': 'KC'}
20220911_KC@ARI {'scorePeriod': '1Q', 'score': 'Clyde Edwards-Helaire 3 Yd pass from Patrick Mahomes (Justin Reid Kick)', 'homeScore': '0', 'awayScore': '14', 'teamID': '16', 'scoreDetails': '7 plays, 86 yards, 4:23', 'scoreType': 'TD', 'scoreTime': '3:20', 'team': 'KC', 'playerIDs': ['3139477', '4242214', '3931399'], 'teamAbv': 'KC'}
20220911_KC@ARI {'scorePeriod': '2Q', 'score': 'James Conner 2 Yd Run (Matt Prater Kick)', 'homeScore': '7', 'awayScore': '14', 'teamID': '1', 'scoreDetails': '11 plays, 75 yards, 5:15', 'scoreType': 'TD', 'scoreTime': '13:05', 'team': 'ARI', 'playerIDs': ['3045147', '11122'], 'teamAbv': 'ARI'}
20220911_KC@ARI {'scorePeriod': '2Q',

KeyError: 'body'