## Cacluate The amount of times each team has spent tied, leading, trailing, ect 

In [14]:
import pandas as pd

# path to single game file
path = '../TEMP/single_game_scoring.csv'
path = '../TEMP/ss_example_3.csv'

def calculate_game_states_dataframe_ot_tie_corrected_v2(file_path):
    data = pd.read_csv(file_path)

    def period_elapsed_to_seconds(period, time_str):
        minutes, seconds = map(int, time_str.split(':'))
        elapsed_seconds = minutes * 60 + seconds
        
        if '1st' in period:
            return elapsed_seconds
        elif '2nd' in period:
            return 20 * 60 + elapsed_seconds
        elif '3rd' in period:
            return 40 * 60 + elapsed_seconds
        else:  # Overtime
            return 60 * 60 + elapsed_seconds

    data['Total_Seconds'] = data.apply(lambda row: period_elapsed_to_seconds(row['Period'], row['Time']), axis=1)

    teams = list(set(data['Home_Team'].unique().tolist() + data['Away_Team'].unique().tolist()))
    team_states = {team: {'Tied': 0, 'Lead by 1': 0, 'Lead by 2': 0, 'Lead by 3+': 0,
                          'Down by 1': 0, 'Down by 2': 0, 'Down by 3+': 0} for team in teams}
    
    current_score = {teams[0]: 0, teams[1]: 0}
    last_timestamp = 0
    game_end = 3 * 20 * 60  # Default game end, will adjust if OT is detected
    overtime_played = False

    first_goal_time = data.sort_values(by='Total_Seconds').iloc[0]['Total_Seconds'] if not data.empty else 0
    team_states[teams[0]]['Tied'] = first_goal_time
    team_states[teams[1]]['Tied'] = first_goal_time
    last_timestamp = first_goal_time

    for index, row in data.sort_values(by='Total_Seconds').iterrows():
        current_score[row['Team']] += 1
        lead_diff = current_score[teams[0]] - current_score[teams[1]]
        time_elapsed = row['Total_Seconds'] - last_timestamp

        if 'OT' in row['Period']:
            overtime_played = True
            game_end = row['Total_Seconds']  # Adjust game end to when OT goal is scored

        if lead_diff == 0:
            for team in teams:
                team_states[team]['Tied'] += time_elapsed
        else:
            leading_team, trailing_team = (teams[0], teams[1]) if lead_diff > 0 else (teams[1], teams[0])
            lead_abs = abs(lead_diff)
            update_states = lambda lead: (f'Lead by {min(lead, 3)}+' if lead >= 3 else f'Lead by {lead}',
                                          f'Down by {min(lead, 3)}+' if lead >= 3 else f'Down by {lead}')
            state_lead, state_trail = update_states(lead_abs)
            team_states[leading_team][state_lead] += time_elapsed
            team_states[trailing_team][state_trail] += time_elapsed
        
        last_timestamp = row['Total_Seconds']

    # Determine if game ended in a tie without explicit OT goal and adjust game_end correctly
    lead_diff_final = current_score[teams[0]] - current_score[teams[1]]
    if lead_diff_final == 0 and not overtime_played:
        # Adjust for games that end in a tie without 'OT' period goals
        overtime_played = True
        game_end = 65 * 60  # Adjust game end to include 5 minutes of OT

    final_time_elapsed = game_end - last_timestamp
    if lead_diff_final == 0 and not data.empty:
        for team in teams:
            team_states[team]['Tied'] += final_time_elapsed

    team_states_df = pd.DataFrame.from_dict(team_states, orient='index').reset_index().rename(columns={'index': 'Team'})
    
    return team_states_df

# Re-generate the DataFrame with the corrected function for the new file
game_states_df_ot_tie_corrected_v2 = calculate_game_states_dataframe_ot_tie_corrected_v2(path)
game_states_df_ot_tie_corrected_v2


Unnamed: 0,Team,Tied,Lead by 1,Lead by 2,Lead by 3+,Down by 1,Down by 2,Down by 3+
0,St. Thomas,2040,913,0,0,718,0,0
1,St. Cloud State,2040,718,0,0,913,0,0


In [17]:
season_file_path = '../TEMP/scoring_summary_example.csv'

from tqdm import tqdm  # For progress tracking

# Function to process each game in the dataset and aggregate game states
def process_season_game_states(file_path):
    season_data = pd.read_csv(file_path)
    unique_games = season_data['Game_ID'].unique()
    
    # Initialize an empty DataFrame for aggregating results
    season_aggregate_states = pd.DataFrame()
    
    for game_id in tqdm(unique_games, desc="Processing Games"):
        game_data = season_data[season_data['Game_ID'] == game_id]
        
        # Save individual game data to a temporary CSV to use with the existing function
        temp_game_path = '../TEMP/temp_game.csv'
        game_data.to_csv(temp_game_path, index=False)
        
        # Calculate game states for the current game
        game_states_df = calculate_game_states_dataframe_ot_tie_corrected_v2(temp_game_path)
        
        # Aggregate the results
        if season_aggregate_states.empty:
            season_aggregate_states = game_states_df
        else:
            season_aggregate_states = season_aggregate_states.merge(game_states_df, on='Team', how='outer')
            for state in ['Tied', 'Lead by 1', 'Lead by 2', 'Lead by 3+', 'Down by 1', 'Down by 2', 'Down by 3+']:
                season_aggregate_states[state] = season_aggregate_states[f'{state}_x'].fillna(0) + season_aggregate_states[f'{state}_y'].fillna(0)
                season_aggregate_states.drop([f'{state}_x', f'{state}_y'], axis=1, inplace=True)
    
    return season_aggregate_states

# Process the season dataset
season_aggregate_states_df = process_season_game_states(season_file_path)

season_aggregate_states_df




Processing Games: 100%|██████████| 949/949 [00:14<00:00, 64.71it/s]


Unnamed: 0,Team,Tied,Lead by 1,Lead by 2,Lead by 3+,Down by 1,Down by 2,Down by 3+
0,Michigan State,37306.0,11575.0,16762.0,15615.0,6522.0,7398.0,9076.0
1,Lake Superior,42732.0,5041.0,9740.0,10163.0,12381.0,12215.0,8420.0
2,Clarkson,43168.0,13894.0,8768.0,7265.0,10748.0,9360.0,9947.0
3,Notre Dame,38696.0,12298.0,9300.0,14070.0,10975.0,9200.0,11125.0
4,St. Lawrence,44779.0,10003.0,6121.0,1764.0,15015.0,10175.0,12540.0
...,...,...,...,...,...,...,...,...
59,Harvard,37949.0,7778.0,2807.0,1674.0,10991.0,11827.0,8045.0
60,Brown,41847.0,10080.0,6267.0,1444.0,5585.0,9305.0,12833.0
61,Yale,40284.0,4130.0,6330.0,5472.0,9553.0,9754.0,7444.0
62,Cornell,39702.0,8719.0,13281.0,13734.0,5317.0,2142.0,1287.0


In [21]:
# Sort by Down by 3+ time to find the teams that were trailing by 3+ goals the most
season_aggregate_states_df.sort_values(by='Down by 3+', ascending=True).head(10)


Unnamed: 0,Team,Tied,Lead by 1,Lead by 2,Lead by 3+,Down by 1,Down by 2,Down by 3+
14,Quinnipiac,44010.0,12005.0,13185.0,22966.0,9036.0,277.0,0.0
26,Boston University,38900.0,14559.0,15283.0,17556.0,8227.0,4381.0,209.0
41,Western Michigan,34793.0,11557.0,10982.0,16218.0,14565.0,6006.0,337.0
15,Boston College,32695.0,15381.0,17377.0,17825.0,9288.0,4069.0,1059.0
31,Wisconsin,45887.0,10226.0,15350.0,17234.0,9372.0,5100.0,1097.0
62,Cornell,39702.0,8719.0,13281.0,13734.0,5317.0,2142.0,1287.0
56,Arizona State,56470.0,12342.0,9784.0,9025.0,12103.0,7272.0,1805.0
7,St. Cloud State,48115.0,9103.0,13268.0,6496.0,7255.0,8768.0,2034.0
42,Maine,33543.0,15194.0,12445.0,10574.0,11815.0,5221.0,2104.0
40,Holy Cross,50350.0,12131.0,16086.0,11219.0,11374.0,8069.0,2349.0
