# AZ Cardinals Project

ETL NFL boxscore data for Arizona Cardinals games.

## Imports

In [296]:
import pandas as pd
import numpy as np
import json
import os
import requests

from dotenv import load_dotenv

## Setup API Authentication

In [297]:
#API Endpoint for NFL Box Scores
url = "https://tank01-nfl-live-in-game-real-time-statistics-nfl.p.rapidapi.com/getNFLBoxScore"

In [298]:
#Get API Key and host
load_dotenv()
api_token = os.getenv('nfl_api_key')
api_host = os.getenv('rapid_api_host')

In [299]:
#rapidapi headers
headers = {
	"X-RapidAPI-Key": "{key}".format(key=api_token),
	"X-RapidAPI-Host": "{host}".format(host=api_host)
}

### Pull Single Game Data - Test

#### Set QueryString

In [300]:
#setup querystring to pull in the exciting 9/18 game against the Raiders (one of our few wins....)
querystring = {"gameID":"20220918_ARI@LV","fantasyPoints":"false"}

#### Call API and Review Results

In [301]:
response = requests.get(url, headers=headers, params=querystring)

In [302]:
print(json.dumps(response.json(), indent=2)) #pretty print

{
  "statusCode": 200,
  "body": {
    "playerStats": {
      "2991662": {
        "gameID": "20220918_ARI@LV",
        "Receiving": {
          "receptions": "5",
          "recTD": "0",
          "longRec": "23",
          "targets": "8",
          "recYds": "66",
          "recAvg": "13.2"
        },
        "playerName": "Mack Hollins",
        "teamID": "17",
        "team": "LV",
        "teamAbv": "LV",
        "Defense": {
          "totalTackles": "1",
          "defTD": "0",
          "soloTackles": "1",
          "tfl": "0",
          "qbHits": "0",
          "sacks": "0",
          "passDeflections": "0"
        },
        "playerID": "2991662"
      },
      "2971816": {
        "gameID": "20220918_ARI@LV",
        "playerName": "Nick Vigil",
        "teamID": "1",
        "team": "ARI",
        "teamAbv": "ARI",
        "Defense": {
          "totalTackles": "5",
          "defTD": "0",
          "soloTackles": "3",
          "tfl": "1",
          "qbHits": "0",
         

#### Export Test Json

In [303]:
with open('../data/test_boxscore.json', 'w') as file:
    json.dump(response.json(), file)

### Save to DataFrame

Create a table of the game details combined with the linescore and some summary team stats

In [304]:
boxscore = response.json()['body']

In [305]:
game_list = []

In [306]:
#create dictionary for game summary
game_summary = {
    "game_id": boxscore['gameID'],
    "game_type": boxscore['seasonType'],
    "game_date_id": boxscore['gameDate'],
    "away_team_id": boxscore['teamIdAway'],
    "away_team": boxscore['away'],
    "away_q1_score": boxscore['lineScore']['away']['Q1'],
    "away_q2_score": boxscore['lineScore']['away']['Q2'],
    "away_q3_score": boxscore['lineScore']['away']['Q3'],
    "away_q4_score": boxscore['lineScore']['away']['Q4'],
    "away_ot_score": boxscore['lineScore']['away']['OT'],
    "away_total_score": boxscore['lineScore']['away']['totalPts'],
    "away_total_plays": boxscore['teamStats']['away']['totalPlays'],
    "away_total_yards": boxscore['teamStats']['away']['totalYards'],
    "away_passing_yards": boxscore['teamStats']['away']['passingYards'],
    "away_rushing_yards": boxscore['teamStats']['away']['rushingYards'],
    "away_turnovers": boxscore['teamStats']['away']['turnovers'],
    "away_time_of_possession": boxscore['teamStats']['away']['possession'],
    "away_result": boxscore['awayResult'],
    "home_team_id": boxscore['teamIDHome'],
    "home_team": boxscore['home'],
    "home_q1_score": boxscore['lineScore']['home']['Q1'],
    "home_q2_score": boxscore['lineScore']['home']['Q2'],
    "home_q3_score": boxscore['lineScore']['home']['Q3'],
    "home_q4_score": boxscore['lineScore']['home']['Q4'],
    "home_ot_score": boxscore['lineScore']['home']['OT'],
    "home_total_score": boxscore['lineScore']['home']['totalPts'],
    "home_total_plays": boxscore['teamStats']['home']['totalPlays'],
    "home_total_yards": boxscore['teamStats']['home']['totalYards'],
    "home_passing_yards": boxscore['teamStats']['home']['passingYards'],
    "home_rushing_yards": boxscore['teamStats']['home']['rushingYards'],
    "home_turnovers": boxscore['teamStats']['home']['turnovers'],
    "home_time_of_possession": boxscore['teamStats']['home']['possession'],
    "home_result": boxscore['homeResult']
}

In [307]:
#save this one record to the "list" of games - we'll use this to convert to a dataframe. In the future it will be an iterable
game_list.append(game_summary)

In [308]:
#convert to dataframe
summary_df = pd.DataFrame(game_list)

summary_df.head()

Unnamed: 0,game_id,game_type,game_date_id,away_team_id,away_team,away_q1_score,away_q2_score,away_q3_score,away_q4_score,away_ot_score,...,home_q4_score,home_ot_score,home_total_score,home_total_plays,home_total_yards,home_passing_yards,home_rushing_yards,home_turnovers,home_time_of_possession,home_result
0,20220918_ARI@LV,Regular Season,20220918,1,ARI,0,0,7,16,6,...,0,0,23,61,324,244,80,1,29:47,L


#### Update Datatypes

In [309]:
#check datatypes
summary_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1 entries, 0 to 0
Data columns (total 33 columns):
 #   Column                   Non-Null Count  Dtype 
---  ------                   --------------  ----- 
 0   game_id                  1 non-null      object
 1   game_type                1 non-null      object
 2   game_date_id             1 non-null      object
 3   away_team_id             1 non-null      object
 4   away_team                1 non-null      object
 5   away_q1_score            1 non-null      object
 6   away_q2_score            1 non-null      object
 7   away_q3_score            1 non-null      object
 8   away_q4_score            1 non-null      object
 9   away_ot_score            1 non-null      object
 10  away_total_score         1 non-null      object
 11  away_total_plays         1 non-null      object
 12  away_total_yards         1 non-null      object
 13  away_passing_yards       1 non-null      object
 14  away_rushing_yards       1 non-null      objec

In [310]:
#setup dtype mapping
dtype_mapping = {
    'game_id': 'object',
    'game_type': 'object',
    'game_date_id': 'object',
    'away_team_id': 'object',
    'away_team': 'object',
    'away_q1_score': 'int64',
    'away_q2_score': 'int64',
    'away_q3_score': 'int64',
    'away_q4_score': 'int64',
    'away_ot_score': 'int64',
    'away_total_score': 'int64',
    'away_total_plays': 'int64',
    'away_total_yards': 'int64',
    'away_passing_yards': 'int64',
    'away_rushing_yards': 'int64',
    'away_turnovers': 'int64',
    'away_time_of_possession': 'object',  # Update with the correct data type
    'away_result': 'object',
    'home_team_id': 'object',
    'home_team': 'object',
    'home_q1_score': 'int64',
    'home_q2_score': 'int64',
    'home_q3_score': 'int64',
    'home_q4_score': 'int64',
    'home_ot_score': 'int64',
    'home_total_score': 'int64',
    'home_total_plays': 'int64',
    'home_total_yards': 'int64',
    'home_passing_yards': 'int64',
    'home_rushing_yards': 'int64',
    'home_turnovers': 'int64',
    'home_time_of_possession': 'object',  # Update with the correct data type
    'home_result': 'object',
}

In [311]:
#apply the mappings
summary_df = summary_df.astype(dtype_mapping)

In [312]:
#convert timestamps
summary_df['away_time_of_possession'] = pd.to_datetime(summary_df['away_time_of_possession'], format='%M:%S').dt.time
summary_df['home_time_of_possession'] = pd.to_datetime(summary_df['home_time_of_possession'], format='%M:%S').dt.time

In [313]:
summary_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1 entries, 0 to 0
Data columns (total 33 columns):
 #   Column                   Non-Null Count  Dtype 
---  ------                   --------------  ----- 
 0   game_id                  1 non-null      object
 1   game_type                1 non-null      object
 2   game_date_id             1 non-null      object
 3   away_team_id             1 non-null      object
 4   away_team                1 non-null      object
 5   away_q1_score            1 non-null      int64 
 6   away_q2_score            1 non-null      int64 
 7   away_q3_score            1 non-null      int64 
 8   away_q4_score            1 non-null      int64 
 9   away_ot_score            1 non-null      int64 
 10  away_total_score         1 non-null      int64 
 11  away_total_plays         1 non-null      int64 
 12  away_total_yards         1 non-null      int64 
 13  away_passing_yards       1 non-null      int64 
 14  away_rushing_yards       1 non-null      int64

### Build Scoring Plays Data

In [314]:
boxscore['scoringPlays'][0]

{'scorePeriod': '1Q',
 'score': 'Davante Adams 1 Yd pass from Derek Carr (Daniel Carlson Kick)',
 'homeScore': '7',
 'awayScore': '0',
 'teamID': '17',
 'scoreDetails': '15 plays, 75 yards, 9:38',
 'scoreType': 'TD',
 'scoreTime': '5:22',
 'team': 'LV',
 'playerIDs': ['16757', '16800', '3051909'],
 'teamAbv': 'LV'}

In [315]:
#loop through scoring plays and convert to dataframe
scores = [] #setup blank list to store scores

for score in boxscore['scoringPlays']:

    score_detail = {
        "team_id": score['teamID'],
        "team": score['teamAbv'],
        "score_type": score['scoreType'],
        "score_period": score['scorePeriod'],
        "score_time": score['scoreTime'],
        "drive_detail": score['scoreDetails'],
        "score_detail": score['score'],
        "away_team_score": score['awayScore'],
        "home_team_score": score['homeScore']
    }

    scores.append(score_detail)

scores_df = pd.DataFrame(scores)

In [316]:
scores_df.head(15)

Unnamed: 0,team_id,team,score_type,score_period,score_time,drive_detail,score_detail,away_team_score,home_team_score
0,17,LV,TD,1Q,5:22,"15 plays, 75 yards, 9:38",Davante Adams 1 Yd pass from Derek Carr (Danie...,0,7
1,17,LV,FG,2Q,11:40,"9 plays, 66 yards, 4:41",Daniel Carlson 32 Yd Field Goal,0,10
2,17,LV,TD,2Q,7:00,"7 plays, 59 yards, 2:02",Darren Waller 3 Yd pass from Derek Carr (Danie...,0,17
3,17,LV,FG,2Q,0:00,"9 plays, 36 yards, 1:51",Daniel Carlson 55 Yd Field Goal,0,20
4,1,ARI,TD,3Q,6:03,"11 plays, 79 yards, 5:27",Greg Dortch 5 Yd pass from Kyler Murray (Matt ...,7,20
5,17,LV,FG,3Q,1:38,"8 plays, 68 yards, 4:25",Daniel Carlson 25 Yd Field Goal,7,23
6,1,ARI,TD,4Q,8:13,"9 plays, 54 yards, 3:54",Darrel Williams 1 Yd Run (Kyler Murray Run for...,15,23
7,1,ARI,TD,4Q,0:00,"18 plays, 73 yards, 4:43",Kyler Murray 3 Yd Run (Kyler Murray Pass to A....,23,23
8,1,ARI,TD,OT,3:51,"5 plays, 23 yards, 1:39",Byron Murphy Jr. 59 Yd Fumble Return,29,23


#### Update Dtypes

In [317]:
#check datatypes
scores_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9 entries, 0 to 8
Data columns (total 9 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   team_id          9 non-null      object
 1   team             9 non-null      object
 2   score_type       9 non-null      object
 3   score_period     9 non-null      object
 4   score_time       9 non-null      object
 5   drive_detail     9 non-null      object
 6   score_detail     9 non-null      object
 7   away_team_score  9 non-null      object
 8   home_team_score  9 non-null      object
dtypes: object(9)
memory usage: 780.0+ bytes


In [318]:
dtype_mapping_2 = {
    'team_id': 'object',
    'team': 'object',
    'score_type': 'category',  # Make score_type categorical
    'score_period': 'object',
    'score_time': 'object',  # convert to datetime in nextsteyp
    'drive_detail': 'object',
    'score_detail': 'object',
    'away_team_score': 'int64',
    'home_team_score': 'int64',
}

In [319]:
scores_df = scores_df.astype(dtype_mapping_2)

In [320]:
#convert timestamps
scores_df['score_time'] = pd.to_datetime(scores_df['score_time'], format='%M:%S').dt.time

#### Add Period Elapsed Time
The score time is the time remaining on the clock at the time of score, but for our purposes having the elapsed quarter will be helpful. I could do this in a BI tool, but would rather standardize it in the dataset. Circle back on game time elapsed. Wasn't able to get that yet.

In [321]:
scores_df.head(10)

Unnamed: 0,team_id,team,score_type,score_period,score_time,drive_detail,score_detail,away_team_score,home_team_score
0,17,LV,TD,1Q,00:05:22,"15 plays, 75 yards, 9:38",Davante Adams 1 Yd pass from Derek Carr (Danie...,0,7
1,17,LV,FG,2Q,00:11:40,"9 plays, 66 yards, 4:41",Daniel Carlson 32 Yd Field Goal,0,10
2,17,LV,TD,2Q,00:07:00,"7 plays, 59 yards, 2:02",Darren Waller 3 Yd pass from Derek Carr (Danie...,0,17
3,17,LV,FG,2Q,00:00:00,"9 plays, 36 yards, 1:51",Daniel Carlson 55 Yd Field Goal,0,20
4,1,ARI,TD,3Q,00:06:03,"11 plays, 79 yards, 5:27",Greg Dortch 5 Yd pass from Kyler Murray (Matt ...,7,20
5,17,LV,FG,3Q,00:01:38,"8 plays, 68 yards, 4:25",Daniel Carlson 25 Yd Field Goal,7,23
6,1,ARI,TD,4Q,00:08:13,"9 plays, 54 yards, 3:54",Darrel Williams 1 Yd Run (Kyler Murray Run for...,15,23
7,1,ARI,TD,4Q,00:00:00,"18 plays, 73 yards, 4:43",Kyler Murray 3 Yd Run (Kyler Murray Pass to A....,23,23
8,1,ARI,TD,OT,00:03:51,"5 plays, 23 yards, 1:39",Byron Murphy Jr. 59 Yd Fumble Return,29,23


In [322]:
#calculate elapsed time in quarter
scores_df['period_elapsed_time'] = np.where(
    scores_df.score_period == 'OT',
    (pd.to_datetime('00:10:00', format='%H:%M:%S') - pd.to_datetime(scores_df.score_time, format='%H:%M:%S')).dt.total_seconds(),
    (pd.to_datetime('00:15:00', format='%H:%M:%S') - pd.to_datetime(scores_df.score_time, format='%H:%M:%S')).dt.total_seconds()
)

# Format the elapsed time as desired (in your case as HH:MM:SS)
#scores_df['period_elapsed_time'] = pd.to_datetime(scores_df.period_elapsed_time, unit='s').dt.time


In [323]:
scores_df.head(10)

Unnamed: 0,team_id,team,score_type,score_period,score_time,drive_detail,score_detail,away_team_score,home_team_score,period_elapsed_time
0,17,LV,TD,1Q,00:05:22,"15 plays, 75 yards, 9:38",Davante Adams 1 Yd pass from Derek Carr (Danie...,0,7,578.0
1,17,LV,FG,2Q,00:11:40,"9 plays, 66 yards, 4:41",Daniel Carlson 32 Yd Field Goal,0,10,200.0
2,17,LV,TD,2Q,00:07:00,"7 plays, 59 yards, 2:02",Darren Waller 3 Yd pass from Derek Carr (Danie...,0,17,480.0
3,17,LV,FG,2Q,00:00:00,"9 plays, 36 yards, 1:51",Daniel Carlson 55 Yd Field Goal,0,20,900.0
4,1,ARI,TD,3Q,00:06:03,"11 plays, 79 yards, 5:27",Greg Dortch 5 Yd pass from Kyler Murray (Matt ...,7,20,537.0
5,17,LV,FG,3Q,00:01:38,"8 plays, 68 yards, 4:25",Daniel Carlson 25 Yd Field Goal,7,23,802.0
6,1,ARI,TD,4Q,00:08:13,"9 plays, 54 yards, 3:54",Darrel Williams 1 Yd Run (Kyler Murray Run for...,15,23,407.0
7,1,ARI,TD,4Q,00:00:00,"18 plays, 73 yards, 4:43",Kyler Murray 3 Yd Run (Kyler Murray Pass to A....,23,23,900.0
8,1,ARI,TD,OT,00:03:51,"5 plays, 23 yards, 1:39",Byron Murphy Jr. 59 Yd Fumble Return,29,23,369.0


##### Game Elapsed

In [331]:
#create a custom function to apply
def game_time_calc(row):
    if row['score_period'] == 'OT':
        return (15 * 4 * 60) + row['period_elapsed_time']
    else:
        first_digit = int(str(row['score_period'])[0])
        return ((first_digit - 1) * 15 * 60) + row['period_elapsed_time']

In [332]:
# Apply the custom function to each row and store the result in a new column
scores_df['game_elapsed_time'] = scores_df.apply(game_time_calc, axis=1)

In [334]:
#convert to hh:mm:ss time format
scores_df['period_elapsed_time'] = pd.to_datetime(scores_df.period_elapsed_time, unit='s').dt.time
scores_df['game_elapsed_time'] = pd.to_datetime(scores_df.game_elapsed_time, unit='s').dt.time

In [335]:
scores_df.head(10)

Unnamed: 0,team_id,team,score_type,score_period,score_time,drive_detail,score_detail,away_team_score,home_team_score,period_elapsed_time,game_elapsed_time
0,17,LV,TD,1Q,00:05:22,"15 plays, 75 yards, 9:38",Davante Adams 1 Yd pass from Derek Carr (Danie...,0,7,00:09:38,00:09:38
1,17,LV,FG,2Q,00:11:40,"9 plays, 66 yards, 4:41",Daniel Carlson 32 Yd Field Goal,0,10,00:03:20,00:18:20
2,17,LV,TD,2Q,00:07:00,"7 plays, 59 yards, 2:02",Darren Waller 3 Yd pass from Derek Carr (Danie...,0,17,00:08:00,00:23:00
3,17,LV,FG,2Q,00:00:00,"9 plays, 36 yards, 1:51",Daniel Carlson 55 Yd Field Goal,0,20,00:15:00,00:30:00
4,1,ARI,TD,3Q,00:06:03,"11 plays, 79 yards, 5:27",Greg Dortch 5 Yd pass from Kyler Murray (Matt ...,7,20,00:08:57,00:38:57
5,17,LV,FG,3Q,00:01:38,"8 plays, 68 yards, 4:25",Daniel Carlson 25 Yd Field Goal,7,23,00:13:22,00:43:22
6,1,ARI,TD,4Q,00:08:13,"9 plays, 54 yards, 3:54",Darrel Williams 1 Yd Run (Kyler Murray Run for...,15,23,00:06:47,00:51:47
7,1,ARI,TD,4Q,00:00:00,"18 plays, 73 yards, 4:43",Kyler Murray 3 Yd Run (Kyler Murray Pass to A....,23,23,00:15:00,01:00:00
8,1,ARI,TD,OT,00:03:51,"5 plays, 23 yards, 1:39",Byron Murphy Jr. 59 Yd Fumble Return,29,23,00:06:09,01:06:09
