Created by: [SmirkyGraphs](https://smirkygraphs.github.io/). Code: [Github](https://github.com/SmirkyGraphs/Python-Notebooks). Source: [NHL API](https://gitlab.com/dword4/nhlapi).
<hr>

# NHL Team Timeouts - 2020-21 Season

Nearing the end of the season during playoffs, one of the Tampa Bay's games against Montreal had a pretty early timeout `2021-07-03` the 196th timeout of the season was called by Montreal after falling down by 2 in the 1st period in game 3. The commentators started talking about how while rare, many teams end up losing a game and never even using their timeout at all. 

This stuck with me so I decided to find any potentially interesting fact about timeouts that I could for the 2020-21 season. I chose to stick to just a single season rather then multiple seasons as previously failed coaches challenges lead to a loss of a timeout and didn't want to mix the data.
 
This code below is used to loop over the game event json files, find any timeouts, excluding TV-timeouts and do some basic data cleaning like adding time (in seconds) which team called timeout, who was winning at the time, by how much, etc.

<hr>

In [1]:
import json
import pandas as pd
from pathlib import Path

In [2]:
def find_team_timeout(file):   
    with open(file, 'r') as f:
        data = json.load(f)

    meta = data['gameData']
    data = data['liveData']['plays']['allPlays']

    game_meta_data = {
        "game_id": file.stem,
        "game_type": meta['game']['type'],
        "home_team": meta['teams']['home']['name'],
        "away_team": meta['teams']['away']['name'],
        "final_score": data[-1]['about']['goals']
    }
    
    all_goals = []
    all_events = []
    timeout_id = 1
    for idx, event in enumerate(data):
        if event['result']['eventTypeId'] == 'STOP':
            desc = event['result']['description'].lower()
            
            if 'timeout' in desc and 'tv' not in desc:
                all_events.append({
                    "timeout_id": timeout_id, 
                    "events": [data[idx]]
                })

                timeout_id += 1
            
        if event['result']['eventTypeId'] == 'GOAL':
            all_goals.append({"goals": data[idx]})
            
    if len(all_events) > 0:  
        return {"game_data": game_meta_data, "data": all_events, "goals": all_goals}
    
def get_game_seconds(row):
    """
    input: list [period, time(MM:SS)]
    returns total seconds in the game at the time of the event
    """
    period, time = row
    period_seconds = 0
    
    time = time.split(':')
    mins = int(time[0])
    seconds = int(time[1])
    
    min_seconds = mins * 60
    
    if period > 1:
        period_seconds = (period-1) * 20 * 60
        
    return seconds + min_seconds + period_seconds
   
def team_timeout(df):
    df.loc[df['result.description'] == 'Home Timeout', 'timeout_team'] = df['home_team']
    df.loc[df['result.description'] == 'Home Timeout', 'timeout_against'] = df['away_team']

    df.loc[df['result.description'] == 'Visitor Timeout', 'timeout_team'] = df['away_team']
    df.loc[df['result.description'] == 'Visitor Timeout', 'timeout_against'] = df['home_team']
    
    return df

def score_diff(df):
    df.loc[df['result.description'] == 'Home Timeout', 'score_diff'] = df['about.goals.home'] - df['about.goals.away']
    df.loc[df['result.description'] == 'Visitor Timeout', 'score_diff'] = df['about.goals.away'] - df['about.goals.home']
    
    df.loc[df['result.description'] == 'Home Timeout', 'end_score_diff'] = df['home_final_score'] - df['away_final_score']
    df.loc[df['result.description'] == 'Visitor Timeout', 'end_score_diff'] = df['away_final_score'] - df['home_final_score']
    
    return df

def pre_timeout(df):
    df.loc[df['score_diff'] > 0, 'before_timeout'] = 'winning'
    df.loc[df['score_diff'] < 0, 'before_timeout'] = 'losing'
    df.loc[df['score_diff'] == 0, 'before_timeout'] = 'tied'
    
    return df

def end_result(df):
    df.loc[df['end_score_diff'] > 0, 'end_result'] = 'winning'
    df.loc[df['end_score_diff'] < 0, 'end_result'] = 'losing'
    df.loc[df['end_score_diff'] == 0, 'end_result'] = 'tied'
    
    return df

def home_away_timeout(desc):
    desc = desc.split(' ')[0]
    if desc == 'Home':
        return 'home'
    else:
        return 'away'

In [3]:
game_files = Path('../data/raw/live-feed/20202021/').glob('*.json')

count = 0
all_timeouts = []
errors = []
for game in game_files:
    try: timeouts = find_team_timeout(game)
    except: errors.append(game.stem)
        
    if timeouts != None:
        print(f"id: {game.stem} | len: {len(timeouts['data'])} | count: {count}", end='\r')       
        all_timeouts.append(timeouts)
        count += 1
        
# event dataset
df = pd.json_normalize(
    all_timeouts, 
    meta=[['game_data'], ['data', 'timeout_id']], 
    record_path=['data', 'events']
)

meta_cols = list(df['game_data'][0].keys())
score_cols = ['away_final_score', 'home_final_score']

df[meta_cols] = df['game_data'].apply(pd.Series)
df[score_cols] = df['final_score'].apply(pd.Series)
df = df.drop(columns=['game_data', 'final_score'])

# get team that called timeout and score diff
df = (df
    .pipe(team_timeout)
    .pipe(score_diff)
    .pipe(pre_timeout)
    .pipe(end_result)
)

df['game_seconds'] = df[['about.period', 'about.periodTime']].apply(get_game_seconds, axis=1)
df.to_csv('../data/clean/timeouts_clean.csv', index=False)