# Lines Analysis

## Import

In [None]:
import os

import numpy as np
import pandas as pd

import janitor

## Constants

In [None]:
scores = ['GOAL', 'SCORED_ON', 'CALLAHAN', 'CALLAHAN_THROWN']
we_score = ['GOAL', 'CALLAHAN']
they_score = ['SCORED_ON', 'CALLAHAN_THROWN']

turnovers = ['THROWAWAY', 'DROP', 'STALL', 'CALLAHAN_THROWN']
takeaways = ['BLOCK', 'CALLAHAN', 'STALL_CAUSED', 'THROWAWAY_CAUSED']

pulls = ['PULL_OUT_OF_BOUNDS', 'PULL_INBOUNDS']

pens = ['D_PENALTY_ON_THEM', 'D_PENALTY_ON_US', 'O_PENALTY_ON_THEM', 'O_PENALTY_ON_US']

quarter_ends = ['START_OF_GAME', 'END_OF_Q1', 'HALFTIME', 'END_OF_Q3', 'GAME_OVER']

line_sets = ['SET_D_LINE', 'SET_O_LINE', 'SET_D_LINE_NO_PULL', 'SET_O_LINE_NO_PULL']
o_lines = ['SET_O_LINE', 'SET_O_LINE_NO_PULL']
d_lines = ['SET_D_LINE', 'SET_D_LINE_NO_PULL']

timeouts = ['THEIR_MIDPOINT_TIMEOUT', 'OUR_MIDPOINT_TIMEOUT']

other = ['REF_TIMEOUT_DISCUSSION???', 'INJURY_ON_O', 'INJURY_ON_D']


## Analysis

### File IO

In [None]:
df = pd.read_csv("Flyers_Games.csv").rename({"Unnamed: 0" : "event_index"}, axis=1)
df_home = pd.read_csv("Flyers_Games_home.csv").rename({"Unnamed: 0" : "event_index"}, axis=1)
df_away = pd.read_csv("Flyers_Games_away.csv").rename({"Unnamed: 0" : "event_index"}, axis=1)

In [None]:
df.head(35)

### Cleaning

In [None]:
df['is_home'] = df['home_team'] == df['team_id']
df['is_away'] = df['away_team'] == df['team_id']

df["game_point_id"] = df["game_id"]*1000 + df["point_id"]

df['player'] = np.where(df['event_type'].isin(line_sets), df['player'], "")

df['home_score']  = np.where((df['is_home']) & (df['event_type'].isin(we_score)), 1, 0)
df['away_score']  = np.where((df['is_away']) & (df['event_type'].isin(we_score)), 1, 0)

### LINES

In [None]:
all_lines = df.loc[df['event_type'].isin(line_sets)].copy()

# How many lines are there per point
all_lines['line_number_in_point'] = all_lines.groupby('game_point_id')['event_counter'].cumcount()
print("Max Number of Lines in a point:", all_lines['line_number_in_point'].unique().max())

# Make Line ID within point within game
# Multiply by 10 is safe
all_lines['game_point_line_id'] = all_lines['game_point_id']*10 + all_lines['line_number_in_point']

In [None]:
# Players in each line
all_lines['line_players'] = [[x.strip() for x in y[:-1]] for y in all_lines['player'].str.split(",")]

In [None]:
# Last Line on for each point
last_line_by_point_idx = all_lines.groupby(['game_point_id', 'team_id'], sort=False)['game_point_line_id'].transform('max') == all_lines['game_point_line_id']
last_lines = all_lines.loc[last_line_by_point_idx].sort_values('game_point_id')
# last_lines.sort_values(['game_point_id', 'game_point_line_id']).head(5)

all_lines = all_lines.merge(last_lines.loc[:, 'game_point_line_id'], how='left', on='game_point_line_id', indicator=True)
all_lines['is_last_line'] = all_lines['_merge'] == 'both'
del all_lines['_merge']

In [None]:
home_lines = all_lines.loc[all_lines['is_home'] == True].copy()
away_lines = all_lines.loc[all_lines['is_home'] == False].copy()

In [None]:
all_lines.columns

In [None]:
keep_cols_a = [
    'date', 'game_id', 'game_point_id', 'game_point_line_id', 
    'home_team', 'away_team', 'team_id', 'current_quarter',
    'event_type', 'o_point', 'd_point', 'point_id',
    'our_score', 'their_score', 'is_home', 'is_away', 'line_players',
    'is_last_line', 'line_number_in_point'
]

keep_cols_b = ['game_point_id', 'game_point_line_id', 'team_id', 'line_players', 'event_type', 'is_last_line']

rename_cols_a = {
    'team_id': 'team_id_a',
    'is_home': 'is_home_a',
    'is_away': 'is_away_a',
    'game_point_line_id': 'line_id_a',
    'line_players': 'line_players_a',
    'is_last_line': 'is_last_line_a',
    'our_score': 'our_score_point_start',
    'their_score': 'their_score_point_start',
    'event_type': 'line_set_type_a',
    'o_point': 'o_point_a'
}

rename_cols_b = {
    'team_id': 'team_id_b',
    'game_point_line_id': 'line_id_b',
    'line_players': 'line_players_b',
    'is_last_line': 'is_last_line_b',
    'event_type': 'line_set_type_b',
}

final_lines = home_lines.loc[:, keep_cols_a].rename(rename_cols_a, axis=1).merge(
    away_lines.loc[:, keep_cols_b].rename(rename_cols_b, axis=1),
    how='outer',
    on='game_point_id',
    indicator=True,
    copy=True
)
del final_lines['_merge']
print(final_lines.shape)
final_lines.head()

In [None]:
df_quarter_ends = df.loc[(df['event_type'].isin(quarter_ends))].copy()
df_quarter_ends.shape
df_quarter_ends.head()

### POINTS

In [None]:
df_scores = df.loc[(df['event_type'].isin(we_score))].copy()

df_scores['scoring_team'] = df_scores['team_id']
# df_scores = df.loc[(df['event_type'] == 'GOAL') | (df['event_type'] == 'CALLAHAN')].copy()
df_scores.sort_values('game_point_id').head(10)

In [None]:
df_scores.columns

In [None]:
score_keep_cols = [
     'game_point_id', 'time', 'event_type', 'o_point', 'is_home', 'is_away', 
    'our_score', 'their_score', 'scoring_team', 'home_score', 'away_score'
]

score_rename_cols = {
    'event_type': 'score_type',
    'time': 'time_of_score',
    'our_score': 'our_score_point_end',
    'their_score': 'their_score_point_end',
    'home_score': 'home_team_scored', 
    'away_score': 'away_team_scored'
}
final_scores = df_scores.loc[:, score_keep_cols].rename(score_rename_cols, axis=1).sort_values('game_point_id')

### FINAL DF

In [None]:
df_final = final_lines.merge(
    final_scores,
    on='game_point_id',
    how='outer',
    indicator=True,
    copy=True
)

df_final.head(10)
df_final._merge.value_counts()

In [None]:
df_final.columns

In [None]:
df_final.loc[df_final['line_set_type_a'].str.contains("O_LINE"), ['game_point_id', 'home_team', 'away_team', 'team_id_a', 'o_point_a']]

In [None]:
cols = [
    'date', 'game_id', 'home_team', 'away_team', 'game_point_id', 
    'current_quarter', 'time_of_score',
    'team_id_a', 'line_id_a', 'line_players_a', 'line_set_type_a', 'is_last_line_a', 'is_home_a', 'o_point_a',
    'team_id_b', 'line_id_b', 'line_players_b', 'line_set_type_b', 'is_last_line_b',
    'our_score_point_start',
    'their_score_point_start', 'our_score_point_end', 'their_score_point_end',
    'home_team_scored', 'away_team_scored',
    'scoring_team', 'score_type'
]
leftover = [x for x in df_final.columns if x not in cols]
leftover

In [None]:
df_final.head()

In [None]:
df_final.loc[:, cols].to_csv("Test_Data_220730_1600.csv", index=False)

In [None]:
df['event_type'].unique()