# Lines Analysis

## Import

In [1]:
import os

import numpy as np
import pandas as pd

import janitor

## Constants

In [2]:
scores = ['GOAL', 'SCORED_ON', 'CALLAHAN', 'CALLAHAN_THROWN']
we_score = ['GOAL', 'CALLAHAN']
they_score = ['SCORED_ON', 'CALLAHAN_THROWN']

turnovers = ['THROWAWAY', 'DROP', 'STALL', 'CALLAHAN_THROWN']
takeaways = ['BLOCK', 'CALLAHAN', 'STALL_CAUSED', 'THROWAWAY_CAUSED']

pulls = ['PULL_OUT_OF_BOUNDS', 'PULL_INBOUNDS']

pens = ['D_PENALTY_ON_THEM', 'D_PENALTY_ON_US', 'O_PENALTY_ON_THEM', 'O_PENALTY_ON_US']

quarter_ends = ['START_OF_GAME', 'END_OF_Q1', 'HALFTIME', 'END_OF_Q3', 'GAME_OVER']

line_sets = ['SET_D_LINE', 'SET_O_LINE', 'SET_D_LINE_NO_PULL', 'SET_O_LINE_NO_PULL']
o_lines = ['SET_O_LINE', 'SET_O_LINE_NO_PULL']
d_lines = ['SET_D_LINE', 'SET_D_LINE_NO_PULL']

timeouts = ['THEIR_MIDPOINT_TIMEOUT', 'OUR_MIDPOINT_TIMEOUT']

other = ['REF_TIMEOUT_DISCUSSION???', 'INJURY_ON_O', 'INJURY_ON_D']


## Analysis

### File IO

In [3]:
df = pd.read_csv("Flyers_Games.csv").rename({"Unnamed: 0" : "event_index"}, axis=1)
df_home = pd.read_csv("Flyers_Games_home.csv").rename({"Unnamed: 0" : "event_index"}, axis=1)
df_away = pd.read_csv("Flyers_Games_away.csv").rename({"Unnamed: 0" : "event_index"}, axis=1)

In [4]:
df.head(35)

Unnamed: 0,event_index,game_id,date,home_team,away_team,event_counter,team_id,current_quarter,time,event_type,player,x,y,o_point,d_point,point_id,our_score,their_score,line
0,0,2908,2022-06-25,245,242,0,245,1.0,,START_OF_GAME,,,,False,False,0,0,0,
1,1,2908,2022-06-25,245,242,1,245,1.0,,SET_D_LINE,"Michael Arbutine, Cody Coates, Garrett Knobel,...",,,False,True,1,0,0,"['Michael Arbutine', 'Cody Coates', 'Garrett K..."
2,2,2908,2022-06-25,245,242,2,245,1.0,,PULL_INBOUNDS,Michael Arbutine,7.57,80.31,False,True,1,0,0,
3,3,2908,2022-06-25,245,242,3,245,1.0,,BLOCK,Michael Arbutine,,,False,True,1,0,0,
4,4,2908,2022-06-25,245,242,4,245,1.0,,POSSESSION,Michael Arbutine,-0.48,39.95,False,True,1,0,0,
5,5,2908,2022-06-25,245,242,5,245,1.0,,POSSESSION,Sean Plunkett,7.36,44.31,False,True,1,0,0,
6,6,2908,2022-06-25,245,242,6,245,1.0,,DROP,Clayton Partlow,22.91,60.61,False,True,1,0,0,
7,7,2908,2022-06-25,245,242,7,245,1.0,Q1 11:10,SCORED_ON,,,,False,True,1,0,1,
8,8,2908,2022-06-25,245,242,8,245,1.0,,SET_O_LINE,"Tannon Hedges, Carl Johnson, Billy O'Bryan, Un...",,,True,False,2,0,1,"['Tannon Hedges', 'Carl Johnson', ""Billy O'Bry..."
9,9,2908,2022-06-25,245,242,9,245,1.0,,POSSESSION,Tannon Hedges,0.0,40.0,True,False,2,0,1,


### Cleaning

In [5]:
df['is_home'] = df['home_team'] == df['team_id']
df['is_away'] = df['away_team'] == df['team_id']

df["game_point_id"] = df["game_id"]*1000 + df["point_id"]

df['player'] = np.where(df['event_type'].isin(line_sets), df['player'], "")

df['home_score']  = np.where((df['is_home']) & (df['event_type'].isin(we_score)), 1, 0)
df['away_score']  = np.where((df['is_away']) & (df['event_type'].isin(we_score)), 1, 0)

### LINES

In [6]:
all_lines = df.loc[df['event_type'].isin(line_sets)].copy()

# How many lines are there per point
all_lines['line_number_in_point'] = all_lines.groupby('game_point_id')['event_counter'].cumcount()
print("Max Number of Lines in a point:", all_lines['line_number_in_point'].unique().max())

# Make Line ID within point within game
# Multiply by 10 is safe
all_lines['game_point_line_id'] = all_lines['game_point_id']*10 + all_lines['line_number_in_point']



Max Number of Lines in a point: 7


In [7]:
all_lines.columns

Index(['event_index', 'game_id', 'date', 'home_team', 'away_team',
       'event_counter', 'team_id', 'current_quarter', 'time', 'event_type',
       'player', 'x', 'y', 'o_point', 'd_point', 'point_id', 'our_score',
       'their_score', 'line', 'is_home', 'is_away', 'game_point_id',
       'home_score', 'away_score', 'line_number_in_point',
       'game_point_line_id'],
      dtype='object')

In [8]:
# Players in each line
all_lines['line_players_original'] = [[x.strip() for x in y[:-1]] for y in all_lines['player'].str.split(",")]
all_lines['line_players'] = all_lines['line_players_original'].apply(lambda x: sorted(x))

all_lines['line_players'].value_counts()

[Evan Swiatek, Jake Radack, Kyle Henke, Mark Evans, Paul Starkel, Reese Bowman, Vinay Valsaraj]                         50
[Alex Davis, Eric Taylor, Grayson Sanner, Henry Fisher, Jacob Fairfax, Matt Gouchoe-Hanas, Sol Yanuck]                  26
[Ben Lewis, Brandon Malecek, Carson Wilder, Jimmy Zuraw, Kevin Richardson, Michael Matthis, Rory Orloff]                20
[Alex Davis, Anders Juengst, Henry Fisher, Jacob Fairfax, Noah Saul, Sol Yanuck, Terrence Mitchell]                     18
[Ben Lewis, Brooks Wallace, Carson Wilder, Emmanuel Bilolo, Kevin Richardson, Michael Matthis, Rory Orloff]             17
                                                                                                                        ..
[Alex Cloud, Andrew Vande Berg, Elijah Long, Grayson Sanner, Justin Allen, Marc Rovner, Rick Hennighausen]               1
[Alex Davis, David Richardson, Dylan Hawkins, Jacob Fairfax, Rick Hennighausen, Suraj Madiraju, William Coffin]          1
[Andrew Vande Be

In [45]:
# ID for each unique line
deduped_lines = all_lines['line_players'].drop_duplicates()
deduped_idx = deduped_lines.index

df_line_ids = pd.concat([all_lines.loc[deduped_idx, 'team_id'], deduped_lines], axis=1).reset_index(drop=True)
df_line_ids['line_id'] = df['team_id']*10000 + df_line_ids.groupby('team_id')['line_players'].transform('cumcount')
df_line_ids['line_id'] = df_line_ids['line_id'].astype(int)
df_line_ids

Unnamed: 0,team_id,line_players,line_id
0,245,"[Clayton Partlow, Cody Coates, David Witte, Ga...",2450000
1,245,"[Adam Vinson, Billy O'Bryan, Carl Johnson, Jai...",2450001
2,245,"[Austin Gaines, Billy O'Bryan, Logan Diehl, Ry...",2450002
3,245,"[Anish Indhupuru, Clayton Partlow, Cody Coates...",2450003
4,245,"[Adam Vinson, Anish Indhupuru, Billy O'Bryan, ...",2450004
...,...,...,...
433,241,"[Austin Taylor, Bradley Seuntjens, Christian O...",2410060
434,241,"[Austin Taylor, Bradley Seuntjens, Eli Motycka...",2410061
435,241,"[Austin Taylor, Bradley Seuntjens, Christian O...",2410062
436,241,"[Austin Taylor, Christian Olsen, Dean Ramsey, ...",2410063


In [10]:
# Need to get line ids
df_line_sets = all_lines.loc[:, ['team_id', 'game_id', 'line_players']]

# result = df.a.sort_values().apply(lambda x: sorted(x))

df_line_sets['sorted_line_players'] = df_line_sets['line_players'].apply(lambda x: sorted(x))
split_series = df_line_sets.sorted_line_players.apply(lambda x:pd.Series(str(x).replace("[", "").replace("]", "").split(",")))
print(split_series.shape)
split_series.drop_duplicates()

# ser = pd.get_dummies(df_line_sets['line_players'])

(885, 7)


Unnamed: 0,0,1,2,3,4,5,6
1,'Clayton Partlow','Cody Coates','David Witte','Garrett Knobel','Michael Arbutine','Sean Plunkett','Tim Lootens'
8,'Adam Vinson',"""Billy O'Bryan""",'Carl Johnson','Jaime Perez','Jorge Delgado','Tannon Hedges','Unmil Patel'
28,'Austin Gaines',"""Billy O'Bryan""",'Logan Diehl','Ryan Hiser','Sean Plunkett','Tim Lootens','Tyler Hebert'
38,'Anish Indhupuru','Clayton Partlow','Cody Coates','David Witte','Garrett Knobel','Michael Arbutine','Sean Plunkett'
42,'Adam Vinson','Anish Indhupuru',"""Billy O'Bryan""",'Carl Johnson','Jaime Perez','Tannon Hedges','Unmil Patel'
...,...,...,...,...,...,...,...
7877,'Austin Taylor','Bradley Seuntjens','Christian Olsen','JP Burns','Karl Ekwurtzel','Matt Smith','Sam Gabrielson'
7900,'Austin Taylor','Bradley Seuntjens','Eli Motycka','Elijah Jaime','JP Burns','Matt Smith','Sam Gabrielson'
7911,'Austin Taylor','Bradley Seuntjens','Christian Olsen','Elijah Jaime','Karl Ekwurtzel','Khalif El-Salaam','Matt Smith'
7947,'Austin Taylor','Christian Olsen','Dean Ramsey','JP Burns','Kelvin Williams','Khalif El-Salaam','Tanner Robinson'


In [11]:
# Last Line on for each point
last_line_by_point_idx = all_lines.groupby(['game_point_id', 'team_id'], sort=False)['game_point_line_id'].transform('max') == all_lines['game_point_line_id']
last_lines = all_lines.loc[last_line_by_point_idx].sort_values('game_point_id')
# last_lines.sort_values(['game_point_id', 'game_point_line_id']).head(5)

all_lines = all_lines.merge(last_lines.loc[:, 'game_point_line_id'], how='left', on='game_point_line_id', indicator=True)
all_lines['is_last_line'] = all_lines['_merge'] == 'both'
del all_lines['_merge']

In [12]:
home_lines = all_lines.loc[all_lines['is_home'] == True].copy()
away_lines = all_lines.loc[all_lines['is_home'] == False].copy()

In [13]:
all_lines.columns

Index(['event_index', 'game_id', 'date', 'home_team', 'away_team',
       'event_counter', 'team_id', 'current_quarter', 'time', 'event_type',
       'player', 'x', 'y', 'o_point', 'd_point', 'point_id', 'our_score',
       'their_score', 'line', 'is_home', 'is_away', 'game_point_id',
       'home_score', 'away_score', 'line_number_in_point',
       'game_point_line_id', 'line_players_original', 'line_players',
       'is_last_line'],
      dtype='object')

In [14]:
keep_cols_a = [
    'date', 'game_id', 'game_point_id', 'game_point_line_id', 
    'home_team', 'away_team', 'team_id', 'current_quarter',
    'event_type', 'o_point', 'd_point', 'point_id',
    'our_score', 'their_score', 'is_home', 'is_away', 'line_players',
    'is_last_line', 'line_number_in_point'
]

keep_cols_b = ['game_point_id', 'game_point_line_id', 'team_id', 'line_players', 'event_type', 'is_last_line']

rename_cols_a = {
    'team_id': 'team_id_a',
    'is_home': 'is_home_a',
    'is_away': 'is_away_a',
    'game_point_line_id': 'line_id_a',
    'line_players': 'line_players_a',
    'is_last_line': 'is_last_line_a',
    'our_score': 'our_score_point_start',
    'their_score': 'their_score_point_start',
    'event_type': 'line_set_type_a',
    'o_point': 'o_point_a'
}

rename_cols_b = {
    'team_id': 'team_id_b',
    'game_point_line_id': 'line_id_b',
    'line_players': 'line_players_b',
    'is_last_line': 'is_last_line_b',
    'event_type': 'line_set_type_b',
}

final_lines = home_lines.loc[:, keep_cols_a].rename(rename_cols_a, axis=1).merge(
    away_lines.loc[:, keep_cols_b].rename(rename_cols_b, axis=1),
    how='outer',
    on='game_point_id',
    indicator=True,
    copy=True
)
del final_lines['_merge']
print(final_lines.shape)
final_lines.head()

(568, 24)


Unnamed: 0,date,game_id,game_point_id,line_id_a,home_team,away_team,team_id_a,current_quarter,line_set_type_a,o_point_a,...,is_home_a,is_away_a,line_players_a,is_last_line_a,line_number_in_point,line_id_b,team_id_b,line_players_b,line_set_type_b,is_last_line_b
0,2022-06-25,2908,2908001,29080010,245,242,245,1.0,SET_D_LINE,False,...,True,False,"[Clayton Partlow, Cody Coates, David Witte, Ga...",True,0,29080011,242,"[Alex Davis, Elijah Long, Eric Taylor, Henry F...",SET_O_LINE,True
1,2022-06-25,2908,2908002,29080020,245,242,245,1.0,SET_O_LINE,True,...,True,False,"[Adam Vinson, Billy O'Bryan, Carl Johnson, Jai...",True,0,29080021,242,"[Andrew Li, Andrew McKelvey, Charlie McCutcheo...",SET_D_LINE,True
2,2022-06-25,2908,2908003,29080030,245,242,245,1.0,SET_O_LINE,True,...,True,False,"[Austin Gaines, Billy O'Bryan, Logan Diehl, Ry...",True,0,29080031,242,"[Alex Cloud, Charlie McCutcheon, Dylan Hawkins...",SET_D_LINE,True
3,2022-06-25,2908,2908004,29080040,245,242,245,1.0,SET_D_LINE,False,...,True,False,"[Anish Indhupuru, Clayton Partlow, Cody Coates...",False,0,29080042,242,"[Alex Davis, Elijah Long, Eric Taylor, Henry F...",SET_O_LINE,False
4,2022-06-25,2908,2908004,29080040,245,242,245,1.0,SET_D_LINE,False,...,True,False,"[Anish Indhupuru, Clayton Partlow, Cody Coates...",False,0,29080043,242,"[Andrew McKelvey, Charlie McCutcheon, David Ri...",SET_D_LINE_NO_PULL,True


In [15]:
df_quarter_ends = df.loc[(df['event_type'].isin(quarter_ends))].copy()
df_quarter_ends.shape
df_quarter_ends.head()

Unnamed: 0,event_index,game_id,date,home_team,away_team,event_counter,team_id,current_quarter,time,event_type,...,d_point,point_id,our_score,their_score,line,is_home,is_away,game_point_id,home_score,away_score
0,0,2908,2022-06-25,245,242,0,245,1.0,,START_OF_GAME,...,False,0,0,0,,True,False,2908000,0,0
110,110,2908,2022-06-25,245,242,110,245,2.0,,END_OF_Q1,...,True,9,3,5,,True,False,2908009,0,0
227,227,2908,2022-06-25,245,242,227,245,3.0,,HALFTIME,...,False,19,6,11,,True,False,2908019,0,0
324,324,2908,2022-06-25,245,242,324,245,4.0,,END_OF_Q3,...,True,27,9,15,,True,False,2908027,0,0
422,422,2908,2022-06-25,245,242,422,245,4.0,,GAME_OVER,...,False,42,15,23,,True,False,2908042,0,0


### POINTS

In [16]:
df_scores = df.loc[(df['event_type'].isin(we_score))].copy()

df_scores['scoring_team'] = df_scores['team_id']
# df_scores = df.loc[(df['event_type'] == 'GOAL') | (df['event_type'] == 'CALLAHAN')].copy()
df_scores.sort_values('game_point_id').head(10)

Unnamed: 0,event_index,game_id,date,home_team,away_team,event_counter,team_id,current_quarter,time,event_type,...,point_id,our_score,their_score,line,is_home,is_away,game_point_id,home_score,away_score,scoring_team
438,15,2884,2022-06-17,241,242,15,241,1.0,Q1 10:55,GOAL,...,1,1,0,,True,False,2884001,1,0,241
4356,17,2884,2022-06-17,241,242,17,242,1.0,Q1 10:07,GOAL,...,2,1,1,,False,True,2884002,0,1,242
453,30,2884,2022-06-17,241,242,30,241,1.0,Q1 09:18,GOAL,...,3,2,1,,True,False,2884003,1,0,241
459,36,2884,2022-06-17,241,242,36,241,1.0,Q1 08:29,GOAL,...,4,3,1,,True,False,2884004,1,0,241
4387,48,2884,2022-06-17,241,242,48,242,1.0,Q1 07:45,GOAL,...,5,2,3,,False,True,2884005,0,1,242
4397,58,2884,2022-06-17,241,242,58,242,1.0,Q1 07:03,GOAL,...,6,3,3,,False,True,2884006,0,1,242
476,53,2884,2022-06-17,241,242,53,241,1.0,Q1 06:35,GOAL,...,7,4,3,,True,False,2884007,1,0,241
4436,97,2884,2022-06-17,241,242,97,242,1.0,Q1 04:16,GOAL,...,8,4,4,,False,True,2884008,0,1,242
4442,103,2884,2022-06-17,241,242,103,242,1.0,Q1 03:37,GOAL,...,9,5,4,,False,True,2884009,0,1,242
508,85,2884,2022-06-17,241,242,85,241,1.0,Q1 02:58,GOAL,...,10,5,5,,True,False,2884010,1,0,241


In [17]:
df_scores.columns

Index(['event_index', 'game_id', 'date', 'home_team', 'away_team',
       'event_counter', 'team_id', 'current_quarter', 'time', 'event_type',
       'player', 'x', 'y', 'o_point', 'd_point', 'point_id', 'our_score',
       'their_score', 'line', 'is_home', 'is_away', 'game_point_id',
       'home_score', 'away_score', 'scoring_team'],
      dtype='object')

In [18]:
score_keep_cols = [
     'game_point_id', 'time', 'event_type', 'o_point', 'is_home', 'is_away', 
    'our_score', 'their_score', 'scoring_team', 'home_score', 'away_score'
]

score_rename_cols = {
    'event_type': 'score_type',
    'time': 'time_of_score',
    'our_score': 'our_score_point_end',
    'their_score': 'their_score_point_end',
    'home_score': 'home_team_scored', 
    'away_score': 'away_team_scored'
}
final_scores = df_scores.loc[:, score_keep_cols].rename(score_rename_cols, axis=1).sort_values('game_point_id')

### FINAL DF

In [19]:
df_final = final_lines.merge(
    final_scores,
    on='game_point_id',
    how='outer',
    indicator=True,
    copy=True
)

df_final.head(10)
df_final._merge.value_counts()

both          527
left_only      41
right_only      0
Name: _merge, dtype: int64

In [20]:
df_final.columns

Index(['date', 'game_id', 'game_point_id', 'line_id_a', 'home_team',
       'away_team', 'team_id_a', 'current_quarter', 'line_set_type_a',
       'o_point_a', 'd_point', 'point_id', 'our_score_point_start',
       'their_score_point_start', 'is_home_a', 'is_away_a', 'line_players_a',
       'is_last_line_a', 'line_number_in_point', 'line_id_b', 'team_id_b',
       'line_players_b', 'line_set_type_b', 'is_last_line_b', 'time_of_score',
       'score_type', 'o_point', 'is_home', 'is_away', 'our_score_point_end',
       'their_score_point_end', 'scoring_team', 'home_team_scored',
       'away_team_scored', '_merge'],
      dtype='object')

In [21]:
df_final.loc[df_final['line_set_type_a'].str.contains("O_LINE"), ['game_point_id', 'home_team', 'away_team', 'team_id_a', 'o_point_a']]

Unnamed: 0,game_point_id,home_team,away_team,team_id_a,o_point_a
1,2908002,245,242,245,True
2,2908003,245,242,245,True
5,2908004,245,242,245,False
6,2908004,245,242,245,False
8,2908006,245,242,245,True
...,...,...,...,...,...
557,2893027,242,241,242,True
559,2893029,242,241,242,True
563,2893031,242,241,242,False
564,2893031,242,241,242,False


In [22]:
cols = [
    'date', 'game_id', 'home_team', 'away_team', 'game_point_id', 
    'current_quarter', 'time_of_score',
    'team_id_a', 'line_id_a', 'line_players_a', 'line_set_type_a', 'is_last_line_a', 'is_home_a', 'o_point_a',
    'team_id_b', 'line_id_b', 'line_players_b', 'line_set_type_b', 'is_last_line_b',
    'our_score_point_start',
    'their_score_point_start', 'our_score_point_end', 'their_score_point_end',
    'home_team_scored', 'away_team_scored',
    'scoring_team', 'score_type'
]

final_rename = {
    'our_score_point_start': 'team_a_score_point_start',
    'their_score_point_start': 'team_b_score_point_start'
}

leftover = [x for x in df_final.columns if x not in cols]
leftover

['d_point',
 'point_id',
 'is_away_a',
 'line_number_in_point',
 'o_point',
 'is_home',
 'is_away',
 '_merge']

In [23]:
df_final['team_a_scored'] = (df_final['scoring_team'] == df_final['team_id_a']) & df_final['score_type'].notna()
df_final['team_b_scored'] = (df_final['scoring_team'] == df_final['team_id_b']) & df_final['score_type'].notna()

cols.remove('home_team_scored')
cols.remove('away_team_scored')
cols.remove('our_score_point_end')
cols.remove('their_score_point_end')

cols.append('team_a_scored')
cols.append('team_b_scored')
cols

['date',
 'game_id',
 'home_team',
 'away_team',
 'game_point_id',
 'current_quarter',
 'time_of_score',
 'team_id_a',
 'line_id_a',
 'line_players_a',
 'line_set_type_a',
 'is_last_line_a',
 'is_home_a',
 'o_point_a',
 'team_id_b',
 'line_id_b',
 'line_players_b',
 'line_set_type_b',
 'is_last_line_b',
 'our_score_point_start',
 'their_score_point_start',
 'scoring_team',
 'score_type',
 'team_a_scored',
 'team_b_scored']

In [24]:
df_output = df_final.loc[:, cols].rename(final_rename, axis=1)
df_output.columns

Index(['date', 'game_id', 'home_team', 'away_team', 'game_point_id',
       'current_quarter', 'time_of_score', 'team_id_a', 'line_id_a',
       'line_players_a', 'line_set_type_a', 'is_last_line_a', 'is_home_a',
       'o_point_a', 'team_id_b', 'line_id_b', 'line_players_b',
       'line_set_type_b', 'is_last_line_b', 'team_a_score_point_start',
       'team_b_score_point_start', 'scoring_team', 'score_type',
       'team_a_scored', 'team_b_scored'],
      dtype='object')

In [25]:
df_output.head()

Unnamed: 0,date,game_id,home_team,away_team,game_point_id,current_quarter,time_of_score,team_id_a,line_id_a,line_players_a,...,line_id_b,line_players_b,line_set_type_b,is_last_line_b,team_a_score_point_start,team_b_score_point_start,scoring_team,score_type,team_a_scored,team_b_scored
0,2022-06-25,2908,245,242,2908001,1.0,Q1 11:10,245,29080010,"[Clayton Partlow, Cody Coates, David Witte, Ga...",...,29080011,"[Alex Davis, Elijah Long, Eric Taylor, Henry F...",SET_O_LINE,True,0,0,242.0,GOAL,False,True
1,2022-06-25,2908,245,242,2908002,1.0,Q1 08:06,245,29080020,"[Adam Vinson, Billy O'Bryan, Carl Johnson, Jai...",...,29080021,"[Andrew Li, Andrew McKelvey, Charlie McCutcheo...",SET_D_LINE,True,0,1,242.0,GOAL,False,True
2,2022-06-25,2908,245,242,2908003,1.0,Q1 07:17,245,29080030,"[Austin Gaines, Billy O'Bryan, Logan Diehl, Ry...",...,29080031,"[Alex Cloud, Charlie McCutcheon, Dylan Hawkins...",SET_D_LINE,True,0,2,245.0,GOAL,True,False
3,2022-06-25,2908,245,242,2908004,1.0,Q1 05:23,245,29080040,"[Anish Indhupuru, Clayton Partlow, Cody Coates...",...,29080042,"[Alex Davis, Elijah Long, Eric Taylor, Henry F...",SET_O_LINE,False,1,2,245.0,GOAL,True,False
4,2022-06-25,2908,245,242,2908004,1.0,Q1 05:23,245,29080040,"[Anish Indhupuru, Clayton Partlow, Cody Coates...",...,29080043,"[Andrew McKelvey, Charlie McCutcheon, David Ri...",SET_D_LINE_NO_PULL,True,1,2,245.0,GOAL,True,False


In [26]:
# df_output.to_csv("Test_Data_220730_1830.csv", index=False)

In [27]:
# Making sure there are only 41 times no one scored
df_output.loc[(df_output['team_a_scored'] == False) & (df_output['team_b_scored'] == False)].shape

(41, 25)

In [28]:
df['event_type'].unique()

array(['START_OF_GAME', 'SET_D_LINE', 'PULL_INBOUNDS', 'BLOCK',
       'POSSESSION', 'DROP', 'SCORED_ON', 'SET_O_LINE', 'THROWAWAY',
       'GOAL', 'OUR_MIDPOINT_TIMEOUT', 'SET_O_LINE_NO_PULL',
       'PULL_OUT_OF_BOUNDS', 'THROWAWAY_CAUSED', 'O_PENALTY_ON_US',
       'D_PENALTY_ON_US', 'END_OF_Q1', 'D_PENALTY_ON_THEM',
       'O_PENALTY_ON_THEM', 'INJURY_ON_D', 'SET_D_LINE_NO_PULL', 'STALL',
       'HALFTIME', 'THEIR_MIDPOINT_TIMEOUT', 'END_OF_Q3', 'GAME_OVER',
       nan, 'INJURY_ON_O', 'STALL_CAUSED', 'PULL_OUR_OFFSIDES',
       'REF_TIMEOUT_DISCUSSION???', 'CALLAHAN', 'CALLAHAN_THROWN'],
      dtype=object)