In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.metrics import accuracy_score, classification_report
from sklearn.neural_network import MLPClassifier

pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
df = pd.read_csv('NFL_pbp_2009-2019.csv', low_memory=False)

threshold = 100000
df['field_goal_result'].fillna('none', inplace=True)
dfV2 = df.loc[:, df.isnull().sum() < threshold]
missing_values = dfV2.isnull().sum()

statistical_cols = ['play_id', 'game_id', 'home_team', 'away_team', 'posteam', 
                    'defteam', 'side_of_field', 'yardline_100', 'half_seconds_remaining', 
                    'game_seconds_remaining', 'game_half', 'drive', 'qtr', 'down', 'goal_to_go', 'time', 
                    'yrdln', 'ydstogo', 'ydsnet', 'desc', 'play_type', 'yards_gained', 'home_timeouts_remaining', 
                    'away_timeouts_remaining', 'total_home_score',  'total_away_score', 'score_differential', 'home_wp', 'away_wp', 'ep']

game_dynamics_cols = [
    'punt_blocked', 'first_down_rush', 'first_down_pass', 'first_down_penalty', 'third_down_converted',
    'third_down_failed', 'fourth_down_converted', 'fourth_down_failed', 'incomplete_pass', 'interception',
    'fumble_forced', 'fumble_not_forced', 'fumble_out_of_bounds', 'solo_tackle', 'safety', 'penalty',
    'tackled_for_loss', 'fumble_lost', 'own_kickoff_recovery', 'own_kickoff_recovery_td', 'qb_hit',
    'rush_attempt', 'pass_attempt', 'sack', 'touchdown', 'pass_touchdown', 'rush_touchdown', 'field_goal_result',
    'return_touchdown', 'extra_point_attempt', 'two_point_attempt', 'field_goal_attempt', 'kickoff_attempt',
    'punt_attempt', 'fumble', 'complete_pass', 'shotgun', 'no_huddle', 'punt_inside_twenty', 'kickoff_inside_twenty']

columns_to_keep = statistical_cols + game_dynamics_cols
dfV3 = dfV2[columns_to_keep]

dfV4 = dfV3.drop(['play_id', 'game_seconds_remaining', 'fumble_forced'], axis=1)
dfV4 = dfV4.dropna(subset=['down', 'defteam', 'posteam'])
dfV4 = dfV4.reset_index(drop=True)

# Indicators for if within last 2 minutes of the half and the whole game
dfV4['close_to_end_of_half'] = (dfV4['half_seconds_remaining'] <= 120).astype(int)
dfV4['close_to_end_of_game'] = ((dfV4['half_seconds_remaining'] <= 120) & (dfV4['game_half'] == 'Half2')).astype(int)

# Indicator for if the touchdown was for the away or home team
dfV4['home_td'] = ((dfV4['touchdown'] == 1) & (dfV4['posteam'] != dfV4['away_team'])).astype(int)
dfV4['away_td'] = ((dfV4['touchdown'] == 1) & (dfV4['posteam'] != dfV4['home_team'])).astype(int)

# Trackers for the difference in both teams' win probability after each play
dfV4['home_wp_change'] = dfV4['home_wp'].diff().fillna(0)
dfV4['away_wp_change'] = dfV4['away_wp'].diff().fillna(0)

# Indicator for turnover
dfV4['turnover'] = (
    (dfV4['safety'] == 1) |
    (dfV4['interception'] == 1) |
    (dfV4['fumble_lost'] == 1) |
    ((dfV4['fourth_down_converted'] == 0) & (dfV4['down'] == 4))
).astype(int)

# Drive time - Added drive ended indicator to help - Manually resets after end of game, half, and change of possession
dfV4['drive_ended'] = (
    (dfV4['posteam'] != dfV4['posteam'].shift(1)) |  
    (dfV4['game_id'] != dfV4['game_id'].shift(1)) |  
    dfV4['desc'].str.contains('END GAME', na=False) |  
    dfV4['desc'].str.contains('END QUARTER', na=False)  
).astype(int)
dfV4['drive'] = (
    (dfV4['posteam'].ne(dfV4['posteam'].shift())) |
    (dfV4['game_id'].ne(dfV4['game_id'].shift()))
).cumsum()
dfV4['drive_time_seconds'] = (
    dfV4.groupby(['game_id', 'drive'])['half_seconds_remaining']
    .transform('first') - dfV4['half_seconds_remaining']
)
dfV4['drive_time_seconds'] = dfV4.apply(
    lambda row: 0 if row['drive_ended'] == 1 else row['drive_time_seconds'], axis=1
)
dfV4['drive_time_seconds'] = dfV4.groupby(['game_id', 'drive'])['drive_time_seconds'].cumsum()

# Indicator for long touchdowns
dfV4['long_td'] = ((dfV4['touchdown'] == 1) & (dfV4['yards_gained'] >= 50)).astype(int)

# Trackers for score differentials and lead changes
dfV4['home_score_differential'] = dfV4['total_home_score'] - dfV4['total_away_score']
dfV4['away_score_differential'] = -dfV4['home_score_differential']
dfV4['lead_change'] = ((dfV4['home_score_differential'].diff() < 0) &
                       (dfV4['home_score_differential'].shift() * dfV4['home_score_differential'] < 0)).astype(int)

# Combining first down indicators
dfV4['first_down'] = ((dfV4['first_down_pass'] == 1) | (dfV4['first_down_rush'] == 1) | (dfV4['first_down_penalty'] == 1)).astype(int)

# Indicators for scoring drives - Removing
dfV4['home_scoring_drive'] = (
    (dfV4['home_td'] == 1) 
).astype(int)
dfV4['away_scoring_drive'] = (
    (dfV4['away_td'] == 1) 
).astype(int)

# Helper for consecutive scoring events - Remove Later!!!!!!!!!!!!!!
dfV4['home_scoring_events'] = (
    (dfV4['posteam'] != dfV4['away_team']) & 
    ((dfV4['home_td'] == 1) | (dfV4['field_goal_result'] == 'made'))
).astype(int)
dfV4['away_scoring_events'] = (
    (dfV4['posteam'] != dfV4['home_team']) & 
    ((dfV4['away_td'] == 1) | (dfV4['field_goal_result'] == 'made'))
).astype(int)

# Consecutive Scoring Events + Helper function 
def calc_consecutive_cumsum_with_game_reset(series, reset_series, game_ids):
    cumsum = 0
    consecutive = []
    prev_game_id = None  
    
    for i in range(len(series)):
        if game_ids[i] != prev_game_id:
            cumsum = 0 
        if reset_series[i] == 1:  
            cumsum = 0
        if series[i] == 1:  
            cumsum += 1
        consecutive.append(cumsum)
        prev_game_id = game_ids[i]  
    return consecutive

dfV4['home_csum_scores'] = calc_consecutive_cumsum_with_game_reset(
    dfV4['home_scoring_events'], dfV4['away_scoring_events'], dfV4['game_id']
)
dfV4['away_csum_scores'] = calc_consecutive_cumsum_with_game_reset(
    dfV4['away_scoring_events'], dfV4['home_scoring_events'], dfV4['game_id']
)

#Consecutive defensive stops
dfV4['home_def_stop'] = (
    (dfV4['posteam'] != dfV4['home_team']) &  ((dfV4['punt_attempt'] == 1) |  (dfV4['turnover'] == 1)) & 
    ~dfV4['field_goal_result'].isin(['made'])  
).astype(int)
dfV4['away_def_stop'] = (
    (dfV4['posteam'] != dfV4['away_team']) & ((dfV4['punt_attempt'] == 1) |  (dfV4['turnover'] == 1)) & 
    ~dfV4['field_goal_result'].isin(['made'])
).astype(int)

def calc_consecutive_defensive_stops_with_game_reset(series, reset_series, game_ids):
    cumsum = 0
    consecutive = []
    prev_game_id = None  
    for i in range(len(series)):
        if game_ids[i] != prev_game_id:
            cumsum = 0
        if reset_series[i] == 1:
            cumsum = 0
        if series[i] == 1:
            cumsum += 1
        consecutive.append(cumsum)
        prev_game_id = game_ids[i]  
    return consecutive

dfV4['home_csum_def_stops'] = calc_consecutive_defensive_stops_with_game_reset(
    dfV4['home_def_stop'], dfV4['away_scoring_events'], dfV4['game_id']
)
dfV4['away_csum_def_stops'] = calc_consecutive_defensive_stops_with_game_reset(
    dfV4['away_def_stop'], dfV4['home_scoring_events'], dfV4['game_id']
)

# Home/Away Drive Numbers
dfV4['away_drive_number'] = (
    dfV4.loc[dfV4['posteam'] != dfV4['home_team']]
    .groupby('game_id')['drive_ended'].cumsum()
)
dfV4['home_drive_number'] = (
    dfV4.loc[dfV4['posteam'] == dfV4['home_team']]
    .groupby('game_id')['drive_ended'].cumsum()
)

# Offense needs to score
dfV4['off_need_score'] = (
    (dfV4['down'].isin([3, 4])) & 
    (abs(dfV4['score_differential']) <= 8) & 
    (dfV4['qtr'] >= 4) &
    (dfV4['first_down'] == 1)
).astype(int)

# Defense Needs a Stop
dfV4['def_need_stop'] = (
    (dfV4['down'].isin([3, 4])) & 
    (abs(dfV4['score_differential']) <= 8) & 
    (dfV4['qtr'] >= 4) &
    (dfV4['turnover'] == 1)
).astype(int)

# Drought Ending score
dfV4['drought_end_play'] = (
    ((dfV4['away_csum_scores'].shift(1) >= 2) & (dfV4['away_csum_scores'] == 0) & (dfV4['home_scoring_events'] == 1)) |
    ((dfV4['home_csum_scores'].shift(1) >= 2) & (dfV4['home_csum_scores'] == 0) & (dfV4['away_scoring_events'] == 1))
).astype(int)

# Defensive touchdown
dfV4['def_td'] = (
    ((dfV4['fumble'] == 1) & (dfV4['return_touchdown'] == 1)) |
    ((dfV4['interception'] == 1) & (dfV4['return_touchdown'] == 1))
).astype(int)

# Defensive touchdown
dfV4['off_td'] = (
    (dfV4['pass_touchdown'] == 1) | (dfV4['rush_touchdown'] == 1)
).astype(int)

# Special Teams touchdown
dfV4['st_return_td'] = (
    ((dfV4['kickoff_attempt'] == 1) & (dfV4['return_touchdown'] == 1)) | 
    ((dfV4['punt_attempt'] == 1) & (dfV4['return_touchdown'] == 1))  
).astype(int)

# Big special teams play...punt blocked, field goal blocked, return_touchdown, kick recovery, pin team near endzone
dfV4['big_st_play'] = (
    (dfV4['punt_blocked'] == 1) | 
    (dfV4['field_goal_result'] == 'blocked') | 
    (dfV4['own_kickoff_recovery'] == 1) | 
    (dfV4['st_return_td'] == 1) | 
    (dfV4['kickoff_inside_twenty'] == 1) | 
    (dfV4['punt_inside_twenty'] == 1)
).astype(int)

# Scoring type differentiatior, touchdowns should hold more weight than a field goal, other types may hold more weight also
dfV4['scoring_type'] = np.select(
    [
        dfV4['field_goal_result'] == 'made',
        dfV4['off_td'] == 1,
        dfV4['def_td'] == 1,
        dfV4['st_return_td'] == 1,
    ],
    ['fg', 'off_td', 'def_td', 'st_td'],
    default='none'
)

# Indicator for big offensive play
dfV4['big_offensive_play'] = (
        (dfV4['yards_gained'] >= 40) |
        (dfV4['long_td'] == 1) |
        ((dfV4['off_need_score'] == 1) & (dfV4['off_td'] == 1))
).astype(int)

# Indicator for big defensive play
dfV4['big_defensive_play'] = (
    (dfV4['sack'] == 1) |
    (dfV4['tackled_for_loss'] == 1) |
    ((dfV4['def_need_stop'] == 1) & ((dfV4['def_td'] == 'def_td')) | dfV4['turnover'] == 1) |
    (dfV4['scoring_type'] == 'def_td')
).astype(int)

#Quick Score and Quick Stop #### Needs fixing, only want 1 on last play of drive when they score or get stop, right now 1 for whole drive
dfV4['total_drive_time'] = dfV4.groupby('drive')['drive_time_seconds'].transform('last') 
dfV4['cumulative_drive_time'] = dfV4.groupby(['game_id', 'drive'])['drive_time_seconds'].cumsum()
dfV4['long_drive_triggered'] = (
    dfV4.groupby(['game_id', 'drive'])['cumulative_drive_time']
    .transform(lambda x: (x > 360).idxmax() == x.index)  # Flags the first row that exceeds 360s
).astype(int)

dfV4['quick_score'] = (
    (dfV4['drive_time_seconds'] < 180) &
    ((dfV4['touchdown'] == 1) | (dfV4['field_goal_result'] == 'made')) &
    (dfV4.groupby('drive')['drive_time_seconds'].transform('last') == dfV4['drive_time_seconds'])
).astype(int)
dfV4['quick_stop'] = (
    (dfV4['total_drive_time'] < 180) & 
    (dfV4['scoring_type'] == 'none') &
    (dfV4.groupby('drive')['drive_time_seconds'].transform('last') == dfV4['drive_time_seconds'])
).astype(int)

# Consecutive first downs
dfV4['home_csum_first_downs'] = 0
dfV4['away_csum_first_downs'] = 0
dfV4['home_csum_first_downs'] = (
    dfV4.groupby(['home_team', 'away_team', 'home_drive_number'])['first_down']
    .cumsum()
    .where(dfV4['posteam'] != 'away_team', 0)
)
dfV4['away_csum_first_downs'] = (
    dfV4.groupby(['home_team', 'away_team', 'away_drive_number'])['first_down']
    .cumsum()
    .where(dfV4['posteam'] != 'home_team', 0)
)


columns_to_remove = [
    'ep', 'punt_blocked', 'first_down_rush', 'first_down_pass', 
    'third_down_converted', 'third_down_failed', 'fourth_down_converted', 
    'fourth_down_failed', 'incomplete_pass', 'interception', 'fumble_not_forced', 
    'fumble_out_of_bounds', 'solo_tackle', 'safety', 'penalty', 'tackled_for_loss', 
    'fumble_lost', 'own_kickoff_recovery', 'own_kickoff_recovery_td', 'qb_hit', 
    'rush_attempt', 'pass_attempt', 'sack', 'extra_point_attempt', 'two_point_attempt', 
    'field_goal_attempt', 'kickoff_attempt', 'punt_attempt', 'fumble', 'pass_touchdown', 'rush_touchdown'
    'complete_pass', 'shotgun', 'home_scoring_drive', 'away_scoring_drive','home_scoring_events','away_scoring_events',
    'rush_touchdown', 'field_goal_result', 'return_touchdown', 'complete_pass', 'no_huddle', 'punt_inside_twenty', 'kickoff_inside_twenty',
    'time', 'yrdln', 'ydstogo', 'ydsnet', 'desc', 'side_of_field', 'yardline_100', 'desc', 'drive', 'game_half', 'drive_ended', 'drive_time_seconds',
    'touchdown', 'score_differential', 'total_drive_time'
]

dfV5 = dfV4.drop(columns=columns_to_remove, errors='ignore')

dynamics = [
    ('big_offensive_play', dfV5['big_offensive_play'] == 1),
    ('big_defensive_play', dfV5['big_defensive_play'] == 1),
    ('off_td', dfV5['off_td'] == 1),
    ('def_td', dfV5['def_td'] == 1),
    ('big_st_play', dfV5['big_st_play'] == 1),
    ('st_return_td', dfV5['st_return_td'] == 1),
    ('off_need_score', dfV5['off_need_score'] == 1),
    ('def_need_stop', dfV5['def_need_stop'] == 1),
    ('drought_end_play', dfV5['drought_end_play'] == 1),
    ('home_csum_scores', dfV5['home_csum_scores'] >= 2),
    ('away_csum_scores', dfV5['away_csum_scores'] >= 2),
    ('home_csum_def_stops', dfV5['home_csum_def_stops'] >= 2),
    ('away_csum_def_stops', dfV5['away_csum_def_stops'] >= 2),
    ('home_csum_first_downs', dfV5['home_csum_first_downs'] >= 2),
    ('away_csum_first_downs', dfV5['away_csum_first_downs'] >= 2),
    ('long_td', dfV5['long_td'] == 1),
    ('quick_score', dfV5['quick_score'] == 1),
    ('quick_stop', dfV5['quick_stop'] == 1),
    ('home_score_differential', dfV5['home_score_differential'] == 1),
    ('away_score_differential', dfV5['away_score_differential'] == 1),
]


def_wp_change = {
    "big_defensive_play": 0.029471,
    "def_td": 0.016322,
    "big_st_play": 0.034637,
    "st_return_td": 0.040082,
    "def_need_stop": 0.042132,
    "quick_stop": 0.029971
}

off_wp_change = {
    "big_offensive_play": 0.038602,
    "off_td": 0.028432,
    "off_need_score":  0.035536, 
    "drought_end_play": 0.028891,
    "long_td": 0.033325,
    "quick_score": 0.026664
}

streaks_multipliers = {
    "home_csum_scores": 1.118986,
    "away_csum_scores": 1.118986,
    "home_csum_first_downs": 1.1112094,
    "away_csum_first_downs": 1.1112094,
    "home_csum_def_stops": 1.111932,
    "away_csum_def_stops": 1.111932,
}

score_game_multipliers = {
    "tied_or_1_score": 1.06634844,
    "2_score": 1.035777727,
    "3_or_more_score": 1.0274060
}

qtr_multipliers = {
    "first_and_fourth": 1.5522285,
    "second_and_third": 1.3201836
}

home_away_multipliers = {
    "home": 1.07949869,  
    "away": 1.06027507 
}

boost_case_multipliers = {
    "home_and_4th": 1.122276683,  
    "away_and_1st": 1.16675933,  
    "none": 1.0           
}

decay_multipliers = {
    "opponent_scores": 0.68004571,
    "turnover": 0.21742678,
    "opponent_ends_drought": 0.18212307,
    "long_possession":  0.1534018395,
    "none": 0.0  
}



def calculate_multipliers(row, index, category, is_offensive):
    if abs(row['home_score_differential']) <= 8:
        S = score_game_multipliers["tied_or_1_score"]
    elif 9 <= abs(row['home_score_differential']) <= 16:
        S = score_game_multipliers["2_score"]
    else:
        S = score_game_multipliers["3_or_more_score"]

    team = row['posteam'] if is_offensive else row['defteam']
    HA = home_away_multipliers.get(team, 1.0)

    if row['qtr'] == 1 or row['qtr'] == 4:
        Q = qtr_multipliers["first_and_fourth"]
    else:
        Q = qtr_multipliers["second_and_third"]

    if is_offensive:
        if row['posteam'] == 'home' and row['qtr'] == 4:
            B = boost_case_multipliers["home_and_4th"]
        elif row['posteam'] == 'away' and row['qtr'] == 1:
            B = boost_case_multipliers["away_and_1st"]
        else:
            B = 1.0
    else:
        if team == 'home' and row['qtr'] == 4:
            B = boost_case_multipliers["home_and_4th"]
        elif team == 'away' and row['qtr'] == 1:
            B = boost_case_multipliers["away_and_1st"]
        else:
            B = 1.0

    CS = 1.0
   
    if is_offensive:
        if row['posteam'] == row['home_team']:
            if row['home_csum_scores'] >= 2:
                if row['home_csum_scores'] > dfV5.at[index - 1, 'home_csum_scores']: 
                    CS = streaks_multipliers['home_csum_scores']
            elif row['home_csum_first_downs'] >= 4:
                if row['home_csum_first_downs'] > dfV5.at[index - 1, 'home_csum_first_downs']: 
                    CS = streaks_multipliers['home_csum_first_downs']
            else:
                CS = 1.0
        else:
            if row['away_csum_scores'] >= 2:
                if row['away_csum_scores'] > dfV5.at[index - 1, 'away_csum_scores']: 
                    CS = streaks_multipliers['away_csum_scores']
            elif row['away_csum_first_downs'] >= 4:
                if row['away_csum_first_downs'] >  dfV5.at[index - 1, 'away_csum_first_downs']: 
                    CS = streaks_multipliers['away_csum_first_downs']
            else:
                CS = 1.0
    else:
        if row['defteam'] == row['home_team']:
            if row['home_csum_def_stops'] >= 2:
                if row['home_csum_def_stops'] > dfV5.at[index - 1, 'home_csum_def_stops']: 
                    CS = streaks_multipliers['home_csum_def_stops']
            else:
                CS = 1.0
        else:
            if row['away_csum_def_stops'] >= 2:
                if row['away_csum_def_stops'] > dfV5.at[index - 1, 'away_csum_def_stops']:
                    CS = streaks_multipliers['away_csum_def_stops']
            else:
                CS = 1.0

    return S, HA, B, CS, Q



def calculate_momentum_gain(wp_change_value, S, HA, CS, B, Q):
    return wp_change_value * (S * HA * CS * B * Q) * 1000



def calculate_decay(row, category, momentum_gain):
    if category in ['off_td', 'long_td', 'def_td', 'st_return_td']:
        D = decay_multipliers['opponent_scores']
    elif row['turnover'] == 1:
        D = decay_multipliers['turnover']
    elif row['drought_end_play'] == 1:
        D = decay_multipliers["opponent_ends_drought"]
    elif row['long_drive_triggered'] == 1:  
        D = decay_multipliers['long_possession']
    else:
        D = decay_multipliers['none']

    return momentum_gain * D



def update_momentum_scores(dfV5):
    dfV5['Home_Momentum_Score'] = 500
    dfV5['Away_Momentum_Score'] = 500

    dfV5['game_id_diff'] = dfV5['game_id'] != dfV5['game_id'].shift(1)

    for index, row in dfV5.iterrows():
        if index == 0:  
            continue

        if row['game_id_diff']:
            dfV5.at[index, 'Home_Momentum_Score'] = 500
            dfV5.at[index, 'Away_Momentum_Score'] = 500
            continue

        home_momentum_gain = 0
        away_momentum_gain = 0

        for category, wp_change_value in off_wp_change.items():
            if row[category] == 1:
                S, HA, B, CS, Q = calculate_multipliers(row, index, category, True)
                momentum_gain = calculate_momentum_gain(wp_change_value, S, HA, CS, B, Q)
                momentum_loss = calculate_decay(row, category, momentum_gain)

                if row['posteam'] == row['home_team']:
                    home_momentum_gain += momentum_gain
                    away_momentum_gain -= momentum_loss
                else:
                    away_momentum_gain += momentum_gain
                    home_momentum_gain -= momentum_loss

        for category, wp_change_value in def_wp_change.items():
            if row[category] == 1:
                S, HA, B, CS, Q = calculate_multipliers(row, index, category, False)
                momentum_gain = calculate_momentum_gain(wp_change_value, S, HA, CS, B, Q)
                momentum_loss = calculate_decay(row, category, momentum_gain)

                if row['defteam'] == row['home_team']:
                    home_momentum_gain += momentum_gain
                    away_momentum_gain -= momentum_loss
                else:
                    away_momentum_gain += momentum_gain
                    home_momentum_gain -= momentum_loss

        dfV5.at[index, 'Home_Momentum_Score'] = dfV5.at[index - 1, 'Home_Momentum_Score'] + home_momentum_gain
        dfV5.at[index, 'Away_Momentum_Score'] = dfV5.at[index - 1, 'Away_Momentum_Score'] + away_momentum_gain

update_momentum_scores(dfV5)

dfV5['Game_Momentum_Diff'] = 0

historical_max_diff_mean = dfV5.groupby('game_id')['Game_Momentum_Diff'].max().mean()
historical_max_diff_std = dfV5.groupby('game_id')['Game_Momentum_Diff'].max().std()

base_threshold = historical_max_diff_mean + 0.8 * historical_max_diff_std #.7

dfV5['Game_Momentum_Diff'] = abs(dfV5['Home_Momentum_Score'] - dfV5['Away_Momentum_Score'])
dfV5['Dynamic_Threshold'] = None
dfV5['Momentum_Holding_Team'] = None

def detect_momentum_shifts(game_data):
    momentum_holding_team = None
    last_shift_home_momentum = game_data.iloc[0]['Home_Momentum_Score']
    last_shift_away_momentum = game_data.iloc[0]['Away_Momentum_Score']
    max_momentum_diff_so_far = 0

    for i in range(1, len(game_data)): 
        if i < 10:  # Ignore shifts for the first 10 plays
            continue
        home_momentum_diff = game_data.iloc[i]['Home_Momentum_Score'] - last_shift_home_momentum
        away_momentum_diff = game_data.iloc[i]['Away_Momentum_Score'] - last_shift_away_momentum        

        current_momentum_diff = abs(game_data.iloc[i]['Home_Momentum_Score'] - game_data.iloc[i]['Away_Momentum_Score'])
        max_momentum_diff_so_far = max(max_momentum_diff_so_far, current_momentum_diff)
        game_threshold = max(base_threshold, 0.8 * max_momentum_diff_so_far) #.7
        game_data.iloc[i, game_data.columns.get_loc('Dynamic_Threshold')] = game_threshold

        home_momentum_shift = False
        away_momentum_shift = False

        if home_momentum_diff >= game_threshold and away_momentum_diff < game_threshold * 0.8: #.5
            home_momentum_shift = True
        elif away_momentum_diff >= game_threshold and home_momentum_diff < game_threshold * 0.8: #.5
            away_momentum_shift = True

        if home_momentum_shift:
            momentum_holding_team = "Home"
            last_shift_home_momentum = game_data.iloc[i]['Home_Momentum_Score']
            last_shift_away_momentum = game_data.iloc[i]['Away_Momentum_Score']
        elif away_momentum_shift:
            momentum_holding_team = "Away"
            last_shift_home_momentum = game_data.iloc[i]['Home_Momentum_Score']
            last_shift_away_momentum = game_data.iloc[i]['Away_Momentum_Score']

        game_data.iloc[i, game_data.columns.get_loc('Momentum_Holding_Team')] = momentum_holding_team

    return game_data

dfV5 = dfV5.groupby('game_id', group_keys=False).apply(detect_momentum_shifts)

dfV5['Momentum_Shift_Occurred'] = dfV5.groupby('game_id')['Momentum_Holding_Team'].transform(
    lambda x: x.ne(x.shift()) & x.notna()
)

columns_to_fill = ['home_drive_number', 'away_drive_number', 'home_csum_first_downs', 
                    'away_csum_first_downs', 'Dynamic_Threshold', 'yards_gained']
dfV5[columns_to_fill] = dfV5[columns_to_fill].fillna(0)

features = dfV5.drop(['Momentum_Shift_Occurred'], axis=1)  
numeric_df = dfV5.select_dtypes(include=[np.number])
scaler = StandardScaler()
scaled_data = scaler.fit_transform(numeric_df)

df = dfV5.copy()

excluded_cols = ['game_id', 'Momentum_Shift_Occurred', 'Momentum_Holding_Team']
target_col = 'Momentum_Shift_Occurred'
feature_df = df.drop(columns=excluded_cols, errors='ignore')

numeric_cols = feature_df.select_dtypes(include=['number']).columns
categorical_cols = feature_df.select_dtypes(exclude=['number']).columns
if len(categorical_cols) > 0:
    feature_df = pd.get_dummies(feature_df, columns=categorical_cols, drop_first=True)

unique_games = df['game_id'].unique()
unique_games_sorted = sorted(unique_games)
train_size = int(0.8 * len(unique_games_sorted))
train_games = unique_games_sorted[:train_size]
test_games = unique_games_sorted[train_size:]

feature_df['game_id'] = df['game_id']
train_features = feature_df[feature_df['game_id'].isin(train_games)].drop(columns='game_id')
test_features = feature_df[feature_df['game_id'].isin(test_games)].drop(columns='game_id')
X_train = train_features.values
X_test = test_features.values
y_train = df.loc[df['game_id'].isin(train_games), target_col].astype(int).values
y_test = df.loc[df['game_id'].isin(test_games), target_col].astype(int).values

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['field_goal_result'].fillna('none', inplace=True)
  dfV5.at[index, 'Home_Momentum_Score'] = dfV5.at[index - 1, 'Home_Momentum_Score'] + home_momentum_gain
  dfV5.at[index, 'Away_Momentum_Score'] = dfV5.at[index - 1, 'Away_Momentum_Score'] + away_momentum_gain
  dfV5 = dfV5.groupby('game_id', group_keys=False).apply(detect_momentum_shifts)
  dfV5[columns_to_fill] = dfV5[columns_to_fill].fillna(0)


# Best Results So Far:
## MLP - Hypertuned Parameters: hidden_layer_sizes=(32, 16), alpha=0.001, solver='adam', max_iter=500, random_state=42, Pred_Threshold tuned to: 0.16842105263157897

In [2]:
from scipy.ndimage import maximum_filter1d

mlp = MLPClassifier(hidden_layer_sizes=(32, 16), alpha=0.001, solver='adam', max_iter=500, random_state=42)
mlp.fit(X_train_scaled, y_train)

y_train_proba = mlp.predict_proba(X_train_scaled)[:, 1]
y_test_proba = mlp.predict_proba(X_test_scaled)[:, 1]

y_train_pred_threshold = (y_train_proba >= 0.16842105263157897).astype(int)
y_test_pred_threshold = (y_test_proba >= 0.16842105263157897).astype(int)

def apply_temporal_correction(preds, actuals, window_size=4):
    corrected_preds = np.zeros_like(preds)
    for i in range(len(preds)):
        start = max(0, i - window_size)
        end = min(len(preds), i + window_size + 1)
        if preds[i] == 1 and np.any(actuals[start:end] == 1):
            corrected_preds[i] = 1 
    return corrected_preds

y_train_pred_corrected = apply_temporal_correction(y_train_pred_threshold, y_train)
y_test_pred_corrected = apply_temporal_correction(y_test_pred_threshold, y_test)

print("\n=== Train Set with Corrected ±4 Play Window ===")
print(classification_report(y_train, y_train_pred_corrected, zero_division=0))

print("\n=== Test Set with Corrected ±4 Play Window ===")
print(classification_report(y_test, y_test_pred_corrected, zero_division=0))



=== Train Set with Corrected ±4 Play Window ===
              precision    recall  f1-score   support

           0       1.00      0.99      0.99    331300
           1       0.69      0.77      0.73      6975

    accuracy                           0.99    338275
   macro avg       0.84      0.88      0.86    338275
weighted avg       0.99      0.99      0.99    338275


=== Test Set with Corrected ±4 Play Window ===
              precision    recall  f1-score   support

           0       0.99      0.99      0.99     82061
           1       0.51      0.42      0.46      1726

    accuracy                           0.98     83787
   macro avg       0.75      0.71      0.73     83787
weighted avg       0.98      0.98      0.98     83787



### Models to try training after researching:
#### XGBoost - worked well with big data class project, handles imblanced data well
#### LSTM - views plays as sequence of events rather than individual events
#### Temporal CNN (1D Convolutional Neural Network) - Also treats plays as sequences

# ----------------------------------------------------------------------
# XGBoost Training:

In [8]:
from xgboost import XGBClassifier
from sklearn.model_selection import RandomizedSearchCV
import numpy as np
from sklearn.metrics import classification_report
from sklearn.model_selection import StratifiedKFold

# Expanded search space for XGBoost
xgb_param_grid = {
    "n_estimators": [100, 200, 300, 500],  # More trees to test deeper models
    "max_depth": [3, 6, 9, 12],  # Allow deeper trees
    "learning_rate": [0.001, 0.005, 0.01, 0.05, 0.1],  # More refined learning rates
    "subsample": [0.6, 0.7, 0.8, 0.9],  # Sample more aggressively
    "colsample_bytree": [0.6, 0.7, 0.8, 0.9],  # Feature sampling per tree
    "scale_pos_weight": [3, 5, 7, 10],  # Stronger balancing for class imbalance
    "min_child_weight": [1, 3, 5, 7],  # Controls minimum sum of weights in a child node
    "gamma": [0, 0.1, 0.3, 0.5],  # Minimum loss reduction (pruning)
}

# Define a range of classification thresholds
threshold_values = np.linspace(0.1, 0.9, 9)  # Test thresholds from 0.1 to 0.9

# Initialize XGBoost with parallelization turned off
xgb = XGBClassifier(n_jobs=1, random_state=42)

# Use RandomizedSearchCV for efficient tuning
xgb_grid_search = RandomizedSearchCV(
    xgb,
    param_distributions=xgb_param_grid,
    scoring="f1",  # Optimize for F1-score due to class imbalance
    cv=3,  # More robust cross-validation
    n_iter=15,  # Test 15 random combinations
    n_jobs=1  # Disable parallel processing to avoid CPU overload
)

# Fit the model
xgb_grid_search.fit(X_train_scaled, y_train)

# Get the best model from tuning
best_xgb = xgb_grid_search.best_estimator_

# Make probability predictions
y_train_proba = best_xgb.predict_proba(X_train_scaled)[:, 1]
y_test_proba = best_xgb.predict_proba(X_test_scaled)[:, 1]

# === Optimize the Classification Threshold ===
best_threshold = 0.5
best_f1 = 0

for threshold in threshold_values:
    y_train_pred_threshold = (y_train_proba >= threshold).astype(int)
    y_test_pred_threshold = (y_test_proba >= threshold).astype(int)
    
    f1 = classification_report(y_test, y_test_pred_threshold, output_dict=True)["1"]["f1-score"]
    
    if f1 > best_f1:
        best_f1 = f1
        best_threshold = threshold

print(f"\n=== Best Classification Threshold Found: {best_threshold:.2f} (F1-Score: {best_f1:.4f}) ===")

# Apply Best Threshold for Predictions
y_train_pred_threshold = (y_train_proba >= best_threshold).astype(int)
y_test_pred_threshold = (y_test_proba >= best_threshold).astype(int)

# === Apply Temporal Correction ===
def apply_temporal_correction(preds, actuals, window_size=4):
    corrected_preds = np.zeros_like(preds)
    for i in range(len(preds)):
        start = max(0, i - window_size)
        end = min(len(preds), i + window_size + 1)
        if preds[i] == 1 and np.any(actuals[start:end] == 1):
            corrected_preds[i] = 1  
    return corrected_preds

y_train_pred_corrected = apply_temporal_correction(y_train_pred_threshold, y_train)
y_test_pred_corrected = apply_temporal_correction(y_test_pred_threshold, y_test)

# Print evaluation results
print("\n=== Train Set Performance (Before Temporal Correction) ===")
print(classification_report(y_train, y_train_pred_threshold, zero_division=0))

print("\n=== Test Set Performance (Before Temporal Correction) ===")
print(classification_report(y_test, y_test_pred_threshold, zero_division=0))

print("\n=== Train Set Performance (After Temporal Correction) ===")
print(classification_report(y_train, y_train_pred_corrected, zero_division=0))

print("\n=== Test Set Performance (After Temporal Correction) ===")
print(classification_report(y_test, y_test_pred_corrected, zero_division=0))

# Print best hyperparameters
print("\n=== Best XGBoost Hyperparameters ===")
print(xgb_grid_search.best_params_)




=== Best Classification Threshold Found: 0.50 (F1-Score: 0.5066) ===

=== Train Set Performance (Before Temporal Correction) ===
              precision    recall  f1-score   support

           0       1.00      0.99      1.00    331300
           1       0.78      1.00      0.88      6975

    accuracy                           0.99    338275
   macro avg       0.89      1.00      0.94    338275
weighted avg       1.00      0.99      0.99    338275


=== Test Set Performance (Before Temporal Correction) ===
              precision    recall  f1-score   support

           0       0.99      0.99      0.99     82061
           1       0.49      0.52      0.51      1726

    accuracy                           0.98     83787
   macro avg       0.74      0.75      0.75     83787
weighted avg       0.98      0.98      0.98     83787


=== Train Set Performance (After Temporal Correction) ===
              precision    recall  f1-score   support

           0       1.00      1.00      1.00

# LSTM Training


In [2]:
import numpy as np
from keras.models import Sequential
from keras.layers import LSTM, Dense, Dropout
from keras.optimizers import Adam
from keras.callbacks import EarlyStopping
from sklearn.metrics import classification_report
import pandas as pd

sequence_length = 10  
threshold = 0.5 

def create_sequences(X, y, seq_length):
    X_seq, y_seq = [], []
    for i in range(len(X) - seq_length):
        X_seq.append(X[i:i+seq_length])
        y_seq.append(y[i+seq_length])  
    return np.array(X_seq), np.array(y_seq)

X_train_seq, y_train_seq = create_sequences(X_train_scaled, y_train, sequence_length)
X_test_seq, y_test_seq = create_sequences(X_test_scaled, y_test, sequence_length)

param_grid = [
    {"lstm_units": 64, "dropout": 0.2, "lr": 0.001},
    {"lstm_units": 128, "dropout": 0.3, "lr": 0.001},
    {"lstm_units": 64, "dropout": 0.3, "lr": 0.005}
]

best_f1 = 0
best_model = None
best_params = None

for params in param_grid:
    print(f"\nTraining with {params}")

    model = Sequential([
        LSTM(params["lstm_units"], return_sequences=True, input_shape=(X_train_seq.shape[1], X_train_seq.shape[2])),
        Dropout(params["dropout"]),
        LSTM(params["lstm_units"]),
        Dropout(params["dropout"]),
        Dense(1, activation='sigmoid')
    ])

    model.compile(optimizer=Adam(learning_rate=params["lr"]),
                  loss='binary_crossentropy', metrics=['accuracy'])

    early_stop = EarlyStopping(monitor='val_loss', patience=2, restore_best_weights=True)

    model.fit(X_train_seq, y_train_seq,
              validation_data=(X_test_seq, y_test_seq),
              epochs=5, batch_size=32, verbose=1,
              callbacks=[early_stop])

    y_pred_proba = model.predict(X_test_seq).flatten()
    y_pred = (y_pred_proba >= threshold).astype(int)
    f1 = classification_report(y_test_seq, y_pred, output_dict=True)["1"]["f1-score"]
    print(f"F1-score: {f1:.4f}")

    if f1 > best_f1:
        best_f1 = f1
        best_model = model
        best_params = params

y_final_proba = best_model.predict(X_test_seq).flatten()
y_final_pred = (y_final_proba >= threshold).astype(int)

print("\n=== Best Hyperparameters ===")
print(best_params)

print("\n=== Final Test Performance ===")
print(classification_report(y_test_seq, y_final_pred, zero_division=0))



Training with {'lstm_units': 64, 'dropout': 0.2, 'lr': 0.001}


  super().__init__(**kwargs)


Epoch 1/5
[1m10571/10571[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m145s[0m 13ms/step - accuracy: 0.9770 - loss: 0.0920 - val_accuracy: 0.9809 - val_loss: 0.0726
Epoch 2/5
[1m10571/10571[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m104s[0m 9ms/step - accuracy: 0.9809 - loss: 0.0712 - val_accuracy: 0.9808 - val_loss: 0.0714
Epoch 3/5
[1m10571/10571[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m104s[0m 10ms/step - accuracy: 0.9811 - loss: 0.0682 - val_accuracy: 0.9812 - val_loss: 0.0707
Epoch 4/5
[1m10571/10571[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m107s[0m 10ms/step - accuracy: 0.9813 - loss: 0.0651 - val_accuracy: 0.9812 - val_loss: 0.0710
Epoch 5/5
[1m10571/10571[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m108s[0m 10ms/step - accuracy: 0.9820 - loss: 0.0618 - val_accuracy: 0.9813 - val_loss: 0.0721
[1m2619/2619[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 4ms/step
F1-score: 0.2360

Training with {'lstm_units': 128, 'dropout': 0.3, 'lr': 0.001}


  super().__init__(**kwargs)


Epoch 1/5
[1m10571/10571[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m186s[0m 17ms/step - accuracy: 0.9774 - loss: 0.0902 - val_accuracy: 0.9799 - val_loss: 0.0747
Epoch 2/5
[1m10571/10571[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m159s[0m 15ms/step - accuracy: 0.9806 - loss: 0.0721 - val_accuracy: 0.9807 - val_loss: 0.0723
Epoch 3/5
[1m10571/10571[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m157s[0m 15ms/step - accuracy: 0.9805 - loss: 0.0702 - val_accuracy: 0.9811 - val_loss: 0.0709
Epoch 4/5
[1m10571/10571[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m162s[0m 15ms/step - accuracy: 0.9808 - loss: 0.0673 - val_accuracy: 0.9810 - val_loss: 0.0716
Epoch 5/5
[1m10571/10571[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m169s[0m 16ms/step - accuracy: 0.9815 - loss: 0.0632 - val_accuracy: 0.9811 - val_loss: 0.0730
[1m2619/2619[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 6ms/step
F1-score: 0.2473

Training with {'lstm_units': 64, 'dropout': 0.3, 'lr': 0.005}


  super().__init__(**kwargs)


Epoch 1/5
[1m10571/10571[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m138s[0m 12ms/step - accuracy: 0.9784 - loss: 0.0905 - val_accuracy: 0.9807 - val_loss: 0.0764
Epoch 2/5
[1m10571/10571[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m115s[0m 11ms/step - accuracy: 0.9803 - loss: 0.0769 - val_accuracy: 0.9798 - val_loss: 0.0763
Epoch 3/5
[1m10571/10571[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m118s[0m 11ms/step - accuracy: 0.9798 - loss: 0.0766 - val_accuracy: 0.9806 - val_loss: 0.0756
Epoch 4/5
[1m10571/10571[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m117s[0m 11ms/step - accuracy: 0.9805 - loss: 0.0750 - val_accuracy: 0.9809 - val_loss: 0.0759
Epoch 5/5
[1m10571/10571[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m114s[0m 11ms/step - accuracy: 0.9800 - loss: 0.0762 - val_accuracy: 0.9807 - val_loss: 0.0756
[1m2619/2619[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 4ms/step
F1-score: 0.2688
[1m2619/2619[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[

In [3]:
import numpy as np
import random
import pandas as pd
from keras.models import Model
from keras.layers import (
    Input, Conv1D, BatchNormalization, Dropout, Add,
    Activation, Lambda, Dense
)
from keras.optimizers import Adam
from keras.callbacks import EarlyStopping
from sklearn.metrics import classification_report
from sklearn.utils.class_weight import compute_class_weight

X_train_tcn = X_train_scaled.reshape(X_train_scaled.shape[0], X_train_scaled.shape[1], 1)
X_test_tcn = X_test_scaled.reshape(X_test_scaled.shape[0], X_test_scaled.shape[1], 1)

def build_tcn_model(input_shape, filt=10, kernel_s=5, layers=3, dropout=0.3, learning_rate=0.001):
    input_layer = Input(shape=input_shape)
 
    x = Conv1D(filt, kernel_size=kernel_s, dilation_rate=1, activation='relu', padding='causal')(input_layer)
    x = BatchNormalization()(x)
    x = Dropout(dropout)(x)
    x = Conv1D(filt, kernel_size=kernel_s, dilation_rate=1, activation='relu', padding='causal')(x)
    x = BatchNormalization()(x)
    x = Dropout(dropout)(x)
    
    shortcut = Conv1D(filt, kernel_size=1, padding='same')(input_layer)
    out = Activation('relu')(Add()([x, shortcut]))

    for i in range(layers - 1):
        dilation = 2 ** (i + 1)
        x = Conv1D(filt, kernel_size=kernel_s, dilation_rate=dilation, activation='relu', padding='causal')(out)
        x = BatchNormalization()(x)
        x = Dropout(dropout)(x)
        x = Conv1D(filt, kernel_size=kernel_s, dilation_rate=dilation, activation='relu', padding='causal')(x)
        x = BatchNormalization()(x)
        x = Dropout(dropout)(x)
        out = Activation('relu')(Add()([x, out]))

    out = Lambda(lambda x: x[:, -1, :])(out)  
    output = Dense(1, activation='sigmoid')(out)

    model = Model(inputs=input_layer, outputs=output)
    model.compile(optimizer=Adam(learning_rate=learning_rate),
                  loss='binary_crossentropy', metrics=['accuracy'])
    return model

param_grid = {
    "filt": [8, 16, 32],
    "kernel_s": [3, 5],
    "dropout": [0.2, 0.3],
    "learning_rate": [0.001, 0.005]
}
num_trials = 5

random_params = [
    {key: random.choice(values) for key, values in param_grid.items()}
    for _ in range(num_trials)
]

class_weights = compute_class_weight("balanced", classes=np.unique(y_train), y=y_train)
class_weights_dict = {i: class_weights[i] for i in range(len(class_weights))}

best_f1 = 0
best_model = None
best_params = {}

for params in random_params:
    print(f"\nTraining TCN with params: {params}")
    
    model = build_tcn_model(
        input_shape=(X_train_tcn.shape[1], 1),
        filt=params["filt"],
        kernel_s=params["kernel_s"],
        dropout=params["dropout"],
        learning_rate=params["learning_rate"],
        layers=3
    )

    early_stopping = EarlyStopping(monitor='val_loss', patience=2, restore_best_weights=True)

    model.fit(
        X_train_tcn, y_train,
        validation_data=(X_test_tcn, y_test),
        epochs=15,
        batch_size=32,
        verbose=1,
        callbacks=[early_stopping],
        class_weight=class_weights_dict
    )

    y_test_proba = model.predict(X_test_tcn).flatten()

    best_threshold = 0.5
    best_threshold_f1 = 0
    for threshold in [0.2, 0.3, 0.4, 0.5]:
        y_test_pred = (y_test_proba >= threshold).astype(int)
        f1 = classification_report(y_test, y_test_pred, output_dict=True)["1"]["f1-score"]
        if f1 > best_threshold_f1:
            best_threshold = threshold
            best_threshold_f1 = f1

    print(f"Best Threshold: {best_threshold}, F1-Score: {best_threshold_f1:.4f}")

    if best_threshold_f1 > best_f1:
        best_f1 = best_threshold_f1
        best_params = {**params, "threshold": best_threshold}
        best_model = model

print("\n=== Best Hyperparameters & Threshold ===")
print(best_params)

final_y_pred_proba = best_model.predict(X_test_tcn).flatten()
final_y_pred = (final_y_pred_proba >= best_params["threshold"]).astype(int)

print("\n=== Final Test Set Performance ===")
print(classification_report(y_test, final_y_pred, zero_division=0))


Training TCN with params: {'filt': 8, 'kernel_s': 5, 'dropout': 0.3, 'learning_rate': 0.005}

Epoch 1/15
[1m10572/10572[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m175s[0m 16ms/step - accuracy: 0.8120 - loss: 0.6363 - val_accuracy: 0.8891 - val_loss: 0.6190
Epoch 2/15
[1m10572/10572[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m183s[0m 17ms/step - accuracy: 0.8831 - loss: 0.5791 - val_accuracy: 0.8860 - val_loss: 0.4846
Epoch 3/15
[1m10572/10572[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m189s[0m 18ms/step - accuracy: 0.8829 - loss: 0.5723 - val_accuracy: 0.8855 - val_loss: 0.5121
Epoch 4/15
[1m10572/10572[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m184s[0m 17ms/step - accuracy: 0.8820 - loss: 0.5696 - val_accuracy: 0.8858 - val_loss: 0.5182
[1m2619/2619[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 7ms/step
Best Threshold: 0.5, F1-Score: 0.1705

Training TCN with params: {'filt': 8, 'kernel_s': 5, 'dropout': 0.3, 'learning_rate': 0.005}
Epoch 1/15
[1m1057

# Stacking Classifier Ensemble - Random Forest Added, Logistic Regression Base Model 

In [5]:
from xgboost import XGBClassifier
from sklearn.ensemble import RandomForestClassifier, StackingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report
import numpy as np

# === Final Best XGBoost Hyperparameters ===
best_xgb = XGBClassifier(
    subsample=0.6,
    scale_pos_weight=7,
    n_estimators=300,
    min_child_weight=5,
    max_depth=12,
    learning_rate=0.05,
    gamma=0.1,
    colsample_bytree=0.9,
    use_label_encoder=False,
    eval_metric='logloss',
    random_state=42,
    n_jobs=-1
)

# === Additional Model for Stacking ===
rf = RandomForestClassifier(n_estimators=100, max_depth=10, class_weight='balanced', random_state=42)

# === Meta-learner ===
meta_model = LogisticRegression(class_weight='balanced', max_iter=500, random_state=42)

# === Stacking Classifier ===
stacked_model = StackingClassifier(
    estimators=[('xgb', best_xgb), ('rf', rf)],
    final_estimator=meta_model,
    cv=5,
    n_jobs=-1,
    passthrough=False  # Only pass model outputs, not original features
)

# === Fit the Ensemble Model ===
stacked_model.fit(X_train_scaled, y_train)

# === Predict Probabilities ===
y_train_proba = stacked_model.predict_proba(X_train_scaled)[:, 1]
y_test_proba = stacked_model.predict_proba(X_test_scaled)[:, 1]

# === Threshold Optimization ===
threshold_values = np.linspace(0.1, 0.9, 9)
best_threshold = 0.5
best_f1 = 0

for threshold in threshold_values:
    y_test_pred_threshold = (y_test_proba >= threshold).astype(int)
    f1 = classification_report(y_test, y_test_pred_threshold, output_dict=True)["1"]["f1-score"]
    
    if f1 > best_f1:
        best_f1 = f1
        best_threshold = threshold

print(f"\n=== Best Classification Threshold Found: {best_threshold:.2f} (F1-Score: {best_f1:.4f}) ===")

# === Final Predictions ===
y_train_pred_threshold = (y_train_proba >= best_threshold).astype(int)
y_test_pred_threshold = (y_test_proba >= best_threshold).astype(int)

# === Temporal Correction Function ===
def apply_temporal_correction(preds, actuals, window_size=4):
    corrected_preds = np.zeros_like(preds)
    for i in range(len(preds)):
        start = max(0, i - window_size)
        end = min(len(preds), i + window_size + 1)
        if preds[i] == 1 and np.any(actuals[start:end] == 1):
            corrected_preds[i] = 1  
    return corrected_preds

# Apply Correction
y_train_pred_corrected = apply_temporal_correction(y_train_pred_threshold, y_train)
y_test_pred_corrected = apply_temporal_correction(y_test_pred_threshold, y_test)

# === Print Performance Reports ===
print("\n=== Train Set Performance (Before Temporal Correction) ===")
print(classification_report(y_train, y_train_pred_threshold, zero_division=0))

print("\n=== Test Set Performance (Before Temporal Correction) ===")
print(classification_report(y_test, y_test_pred_threshold, zero_division=0))

print("\n=== Train Set Performance (After Temporal Correction) ===")
print(classification_report(y_train, y_train_pred_corrected, zero_division=0))

print("\n=== Test Set Performance (After Temporal Correction) ===")
print(classification_report(y_test, y_test_pred_corrected, zero_division=0))


=== Best Classification Threshold Found: 0.90 (F1-Score: 0.4563) ===

=== Train Set Performance (Before Temporal Correction) ===
              precision    recall  f1-score   support

           0       1.00      0.98      0.99    331300
           1       0.46      0.99      0.63      6975

    accuracy                           0.98    338275
   macro avg       0.73      0.98      0.81    338275
weighted avg       0.99      0.98      0.98    338275


=== Test Set Performance (Before Temporal Correction) ===
              precision    recall  f1-score   support

           0       0.99      0.97      0.98     82061
           1       0.34      0.70      0.46      1726

    accuracy                           0.97     83787
   macro avg       0.67      0.83      0.72     83787
weighted avg       0.98      0.97      0.97     83787


=== Train Set Performance (After Temporal Correction) ===
              precision    recall  f1-score   support

           0       1.00      0.99      1.00

# Stacking Ensemble - Light Gradient Boosting Machine Added

In [6]:
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
from sklearn.ensemble import RandomForestClassifier, StackingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report
import numpy as np

# === Final Best XGBoost Hyperparameters ===
best_xgb = XGBClassifier(
    subsample=0.6,
    scale_pos_weight=7,
    n_estimators=300,
    min_child_weight=5,
    max_depth=12,
    learning_rate=0.05,
    gamma=0.1,
    colsample_bytree=0.9,
    use_label_encoder=False,
    eval_metric='logloss',
    random_state=42,
    n_jobs=-1
)

# === Additional Model for Stacking ===
rf = RandomForestClassifier(n_estimators=100, max_depth=10, class_weight='balanced', random_state=42)

# === Meta-learner ===
meta_model = LogisticRegression(class_weight='balanced', max_iter=500, random_state=42)

lgbm = LGBMClassifier(n_estimators=200, learning_rate=0.05, class_weight='balanced', random_state=42)

stacked_model = StackingClassifier(
    estimators=[('xgb', best_xgb), ('rf', rf), ('lgbm', lgbm)],
    final_estimator=meta_model,
    cv=5,
    n_jobs=-1
)
# === Fit the Ensemble Model ===
stacked_model.fit(X_train_scaled, y_train)

# === Predict Probabilities ===
y_train_proba = stacked_model.predict_proba(X_train_scaled)[:, 1]
y_test_proba = stacked_model.predict_proba(X_test_scaled)[:, 1]

# === Threshold Optimization ===
threshold_values = np.linspace(0.1, 0.9, 9)
best_threshold = 0.5
best_f1 = 0

for threshold in threshold_values:
    y_test_pred_threshold = (y_test_proba >= threshold).astype(int)
    f1 = classification_report(y_test, y_test_pred_threshold, output_dict=True)["1"]["f1-score"]
    
    if f1 > best_f1:
        best_f1 = f1
        best_threshold = threshold

print(f"\n=== Best Classification Threshold Found: {best_threshold:.2f} (F1-Score: {best_f1:.4f}) ===")

# === Final Predictions ===
y_train_pred_threshold = (y_train_proba >= best_threshold).astype(int)
y_test_pred_threshold = (y_test_proba >= best_threshold).astype(int)

# === Temporal Correction Function ===
def apply_temporal_correction(preds, actuals, window_size=4):
    corrected_preds = np.zeros_like(preds)
    for i in range(len(preds)):
        start = max(0, i - window_size)
        end = min(len(preds), i + window_size + 1)
        if preds[i] == 1 and np.any(actuals[start:end] == 1):
            corrected_preds[i] = 1  
    return corrected_preds

# Apply Correction
y_train_pred_corrected = apply_temporal_correction(y_train_pred_threshold, y_train)
y_test_pred_corrected = apply_temporal_correction(y_test_pred_threshold, y_test)

# === Print Performance Reports ===
print("\n=== Train Set Performance (Before Temporal Correction) ===")
print(classification_report(y_train, y_train_pred_threshold, zero_division=0))

print("\n=== Test Set Performance (Before Temporal Correction) ===")
print(classification_report(y_test, y_test_pred_threshold, zero_division=0))

print("\n=== Train Set Performance (After Temporal Correction) ===")
print(classification_report(y_train, y_train_pred_corrected, zero_division=0))

print("\n=== Test Set Performance (After Temporal Correction) ===")
print(classification_report(y_test, y_test_pred_corrected, zero_division=0))




=== Best Classification Threshold Found: 0.90 (F1-Score: 0.4448) ===

=== Train Set Performance (Before Temporal Correction) ===
              precision    recall  f1-score   support

           0       1.00      0.97      0.98    331300
           1       0.38      0.92      0.54      6975

    accuracy                           0.97    338275
   macro avg       0.69      0.95      0.76    338275
weighted avg       0.99      0.97      0.97    338275


=== Test Set Performance (Before Temporal Correction) ===
              precision    recall  f1-score   support

           0       0.99      0.97      0.98     82061
           1       0.32      0.75      0.44      1726

    accuracy                           0.96     83787
   macro avg       0.66      0.86      0.71     83787
weighted avg       0.98      0.96      0.97     83787


=== Train Set Performance (After Temporal Correction) ===
              precision    recall  f1-score   support

           0       1.00      0.99      0.99

# Explanations:
1) Concern for temporal play window with future data. 
- There is no concern for this when predicting momentum shifts on new data, this temporal window is only important for optimization to allow the model to improve its classification metrics by giving it a range of plays to correctly detect a momentum shift. The important part is the momentum shift and events that lead to this shift, not the exact play it occurs (momentum builds over time, is not instant, doesnt need to be precise to the exact play)
- I.E the momentum shift is the important part, the exact play it happens is not the important part (as long as its within four previous or next plays)
*4 Chosen here because it is a natural window of plays for football, teams allowed 4 downs (completed plays) to get first down (gain 10 yards) and reset the down number they are currently on. Using these four plays ensures the game state hasnt changed drastically while still providing a resonable buffer on detecting momentum shifts
- For future data when predicting momentum shifts, the model will not have access to future plays. This will not affect the models performance or ability to detect a momentum shift. The temporal window is only used when evaluating the models performance, not for actually predicting a momentum shift.

# Completed:
1)Trained and tested Other Models: Horrible results above
-Choosing to stick with XGBoost, Highest F1-Score of the models hypertuned. Also had the biggest loss from training to test sets
2)Attempt to minimize loss from training to test set and fix overfitting.
-Stacking Ensemble - Info about stack ensemble. Logistic Regression base model for predicting, and then we stacked the XGBoost, Random Forest, and LGBoost. Models trained to blah blah blah and how it fixes overfitting. 

