# Idea
## Grabbing both home and away mean changes in WP in 2nd quarter tied game for base values. These will be the base momentum score gained/lost for each dynamic category. Using tied game situations to emphasize the difference between the win probability model and the future research model, the score difference and time remaining isnt a main factor anymore but instead will play a small part
## Will have multipliers: Current Quarter, Score Difference, Home/Away

In [15]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
df = pd.read_csv('NFL_pbp_2009-2019.csv', low_memory=False)

threshold = 100000
df['field_goal_result'].fillna('none', inplace=True)
dfV2 = df.loc[:, df.isnull().sum() < threshold]
missing_values = dfV2.isnull().sum()

statistical_cols = ['play_id', 'game_id', 'home_team', 'away_team', 'posteam', 
                    'defteam', 'side_of_field', 'yardline_100', 'half_seconds_remaining', 
                    'game_seconds_remaining', 'game_half', 'drive', 'qtr', 'down', 'goal_to_go', 'time', 
                    'yrdln', 'ydstogo', 'ydsnet', 'desc', 'play_type', 'yards_gained', 'home_timeouts_remaining', 
                    'away_timeouts_remaining', 'total_home_score',  'total_away_score', 'score_differential', 'home_wp', 'away_wp', 'ep']

game_dynamics_cols = [
    'punt_blocked', 'first_down_rush', 'first_down_pass', 'first_down_penalty', 'third_down_converted',
    'third_down_failed', 'fourth_down_converted', 'fourth_down_failed', 'incomplete_pass', 'interception',
    'fumble_forced', 'fumble_not_forced', 'fumble_out_of_bounds', 'solo_tackle', 'safety', 'penalty',
    'tackled_for_loss', 'fumble_lost', 'own_kickoff_recovery', 'own_kickoff_recovery_td', 'qb_hit',
    'rush_attempt', 'pass_attempt', 'sack', 'touchdown', 'pass_touchdown', 'rush_touchdown', 'field_goal_result',
    'return_touchdown', 'extra_point_attempt', 'two_point_attempt', 'field_goal_attempt', 'kickoff_attempt',
    'punt_attempt', 'fumble', 'complete_pass', 'shotgun', 'no_huddle', 'punt_inside_twenty', 'kickoff_inside_twenty']

columns_to_keep = statistical_cols + game_dynamics_cols
dfV3 = dfV2[columns_to_keep]

dfV4 = dfV3.drop(['play_id', 'game_seconds_remaining', 'fumble_forced'], axis=1)
dfV4 = dfV4.dropna(subset=['down', 'defteam', 'posteam'])
dfV4 = dfV4.reset_index(drop=True)

# Indicators for if within last 2 minutes of the half and the whole game
dfV4['close_to_end_of_half'] = (dfV4['half_seconds_remaining'] <= 120).astype(int)
dfV4['close_to_end_of_game'] = ((dfV4['half_seconds_remaining'] <= 120) & (dfV4['game_half'] == 'Half2')).astype(int)

# Indicator for if the touchdown was for the away or home team
dfV4['home_td'] = ((dfV4['touchdown'] == 1) & (dfV4['posteam'] != dfV4['away_team'])).astype(int)
dfV4['away_td'] = ((dfV4['touchdown'] == 1) & (dfV4['posteam'] != dfV4['home_team'])).astype(int)

# Trackers for the difference in both teams' win probability after each play
dfV4['home_wp_change'] = dfV4['home_wp'].diff().fillna(0)
dfV4['away_wp_change'] = dfV4['away_wp'].diff().fillna(0)

# Indicator for turnover
dfV4['turnover'] = (
    (dfV4['safety'] == 1) |
    (dfV4['interception'] == 1) |
    (dfV4['fumble_lost'] == 1) |
    ((dfV4['fourth_down_converted'] == 0) & (dfV4['down'] == 4))
).astype(int)

# Drive time - Added drive ended indicator to help - Manually resets after end of game, half, and change of possession
dfV4['drive_ended'] = (
    (dfV4['posteam'] != dfV4['posteam'].shift(1)) |  
    (dfV4['game_id'] != dfV4['game_id'].shift(1)) |  
    dfV4['desc'].str.contains('END GAME', na=False) |  
    dfV4['desc'].str.contains('END QUARTER', na=False)  
).astype(int)
dfV4['drive'] = (
    (dfV4['posteam'].ne(dfV4['posteam'].shift())) |
    (dfV4['game_id'].ne(dfV4['game_id'].shift()))
).cumsum()
dfV4['drive_time_seconds'] = (
    dfV4.groupby(['game_id', 'drive'])['half_seconds_remaining']
    .transform('first') - dfV4['half_seconds_remaining']
)
dfV4['drive_time_seconds'] = dfV4.apply(
    lambda row: 0 if row['drive_ended'] == 1 else row['drive_time_seconds'], axis=1
)
dfV4['drive_time_seconds'] = dfV4.groupby(['game_id', 'drive'])['drive_time_seconds'].cumsum()

# Indicator for long touchdowns
dfV4['long_td'] = ((dfV4['touchdown'] == 1) & (dfV4['yards_gained'] >= 50)).astype(int)

# Trackers for score differentials and lead changes
dfV4['home_score_differential'] = dfV4['total_home_score'] - dfV4['total_away_score']
dfV4['away_score_differential'] = -dfV4['home_score_differential']
dfV4['lead_change'] = ((dfV4['home_score_differential'].diff() < 0) &
                       (dfV4['home_score_differential'].shift() * dfV4['home_score_differential'] < 0)).astype(int)

# Combining first down indicators
dfV4['first_down'] = ((dfV4['first_down_pass'] == 1) | (dfV4['first_down_rush'] == 1) | (dfV4['first_down_penalty'] == 1)).astype(int)

# Indicators for scoring drives - Removing
dfV4['home_scoring_drive'] = (
    (dfV4['home_td'] == 1) 
).astype(int)
dfV4['away_scoring_drive'] = (
    (dfV4['away_td'] == 1) 
).astype(int)

# Helper for consecutive scoring events - Remove Later!!!!!!!!!!!!!!
dfV4['home_scoring_events'] = (
    (dfV4['posteam'] != dfV4['away_team']) & 
    ((dfV4['home_td'] == 1) | (dfV4['field_goal_result'] == 'made'))
).astype(int)
dfV4['away_scoring_events'] = (
    (dfV4['posteam'] != dfV4['home_team']) & 
    ((dfV4['away_td'] == 1) | (dfV4['field_goal_result'] == 'made'))
).astype(int)

# Consecutive Scoring Events + Helper function 
def calc_consecutive_cumsum_with_game_reset(series, reset_series, game_ids):
    cumsum = 0
    consecutive = []
    prev_game_id = None  
    
    for i in range(len(series)):
        if game_ids[i] != prev_game_id:
            cumsum = 0 
        if reset_series[i] == 1:  
            cumsum = 0
        if series[i] == 1:  
            cumsum += 1
        consecutive.append(cumsum)
        prev_game_id = game_ids[i]  
    return consecutive

dfV4['home_csum_scores'] = calc_consecutive_cumsum_with_game_reset(
    dfV4['home_scoring_events'], dfV4['away_scoring_events'], dfV4['game_id']
)
dfV4['away_csum_scores'] = calc_consecutive_cumsum_with_game_reset(
    dfV4['away_scoring_events'], dfV4['home_scoring_events'], dfV4['game_id']
)

#Consecutive defensive stops
dfV4['home_def_stop'] = (
    (dfV4['posteam'] != dfV4['home_team']) &  ((dfV4['punt_attempt'] == 1) |  (dfV4['turnover'] == 1)) & 
    ~dfV4['field_goal_result'].isin(['made'])  
).astype(int)
dfV4['away_def_stop'] = (
    (dfV4['posteam'] != dfV4['away_team']) & ((dfV4['punt_attempt'] == 1) |  (dfV4['turnover'] == 1)) & 
    ~dfV4['field_goal_result'].isin(['made'])
).astype(int)

def calc_consecutive_defensive_stops_with_game_reset(series, reset_series, game_ids):
    cumsum = 0
    consecutive = []
    prev_game_id = None  
    for i in range(len(series)):
        if game_ids[i] != prev_game_id:
            cumsum = 0
        if reset_series[i] == 1:
            cumsum = 0
        if series[i] == 1:
            cumsum += 1
        consecutive.append(cumsum)
        prev_game_id = game_ids[i]  
    return consecutive

dfV4['home_csum_def_stops'] = calc_consecutive_defensive_stops_with_game_reset(
    dfV4['home_def_stop'], dfV4['away_scoring_events'], dfV4['game_id']
)
dfV4['away_csum_def_stops'] = calc_consecutive_defensive_stops_with_game_reset(
    dfV4['away_def_stop'], dfV4['home_scoring_events'], dfV4['game_id']
)

# Home/Away Drive Numbers
dfV4['away_drive_number'] = (
    dfV4.loc[dfV4['posteam'] != dfV4['home_team']]
    .groupby('game_id')['drive_ended'].cumsum()
)
dfV4['home_drive_number'] = (
    dfV4.loc[dfV4['posteam'] == dfV4['home_team']]
    .groupby('game_id')['drive_ended'].cumsum()
)

# High_leverage_play moment, game on the line in a 1 score game
dfV4['high_leverage_moment'] = (
    (dfV4['down'].isin([3, 4])) & 
    (abs(dfV4['score_differential']) <= 8) & 
    (dfV4['qtr'] >= 4)
).astype(int)

# Drought Ending score
dfV4['drought_end_play'] = (
    ((dfV4['away_csum_scores'].shift(1) >= 2) & (dfV4['away_csum_scores'] == 0) & (dfV4['home_scoring_events'] == 1)) |
    ((dfV4['home_csum_scores'].shift(1) >= 2) & (dfV4['home_csum_scores'] == 0) & (dfV4['away_scoring_events'] == 1))
).astype(int)

# Defensive touchdown
dfV4['def_td'] = (
    ((dfV4['fumble'] == 1) & (dfV4['return_touchdown'] == 1)) |
    ((dfV4['interception'] == 1) & (dfV4['return_touchdown'] == 1))
).astype(int)

# Defensive touchdown
dfV4['off_td'] = (
    (dfV4['pass_touchdown'] == 1) | (dfV4['rush_touchdown'] == 1)
).astype(int)

# Special Teams touchdown
dfV4['st_return_td'] = (
    ((dfV4['kickoff_attempt'] == 1) & (dfV4['return_touchdown'] == 1)) | 
    ((dfV4['punt_attempt'] == 1) & (dfV4['return_touchdown'] == 1))  
).astype(int)

# Big special teams play...punt blocked, field goal blocked, return_touchdown, kick recovery, pin team near endzone
dfV4['big_st_play'] = (
    (dfV4['punt_blocked'] == 1) | 
    (dfV4['field_goal_result'] == 'blocked') | 
    (dfV4['own_kickoff_recovery'] == 1) | 
    (dfV4['st_return_td'] == 1) | 
    (dfV4['kickoff_inside_twenty'] == 1) | 
    (dfV4['punt_inside_twenty'] == 1)
).astype(int)

# Scoring type differentiatior, touchdowns should hold more weight than a field goal, other types may hold more weight also
dfV4['scoring_type'] = np.select(
    [
        dfV4['field_goal_result'] == 'made',
        dfV4['off_td'] == 1,
        dfV4['def_td'] == 1,
        dfV4['st_return_td'] == 1,
    ],
    ['fg', 'off_td', 'def_td', 'st_td'],
    default='none'
)

# Indicator for big offensive play
dfV4['big_offensive_play'] = (
        (dfV4['yards_gained'] >= 40) |
        (dfV4['long_td'] == 1) |
        ((dfV4['high_leverage_moment'] == 1) & (dfV4['off_td'] == 1))
).astype(int)

# Indicator for big defensive play
dfV4['big_defensive_play'] = (
    (dfV4['sack'] == 1) |
    (dfV4['tackled_for_loss'] == 1) |
    ((dfV4['high_leverage_moment'] == 1) & (dfV4['def_td'] == 'def_td')) |
    (dfV4['scoring_type'] == 'def_td')
).astype(int)

#Quick Score and Quick Stop
dfV4['quick_score'] = ((dfV4['drive_time_seconds'] < 180) & 
                               ((dfV4['touchdown'] == 1) | (dfV4['field_goal_result'] == 'made'))).astype(int)

dfV4['total_drive_time'] = dfV4.groupby('drive')['drive_time_seconds'].transform('last') 
dfV4['quick_stop'] = ((dfV4['total_drive_time'] < 180) & (dfV4['scoring_type'] == 'none')).astype(int)


# Consecutive first downs
dfV4['home_csum_first_downs'] = 0
dfV4['away_csum_first_downs'] = 0
dfV4['home_csum_first_downs'] = (
    dfV4.groupby(['home_team', 'away_team', 'home_drive_number'])['first_down']
    .cumsum()
    .where(dfV4['posteam'] != 'away_team', 0)
)
dfV4['away_csum_first_downs'] = (
    dfV4.groupby(['home_team', 'away_team', 'away_drive_number'])['first_down']
    .cumsum()
    .where(dfV4['posteam'] != 'home_team', 0)
)


columns_to_remove = [
    'ep', 'punt_blocked', 'first_down_rush', 'first_down_pass', 
    'third_down_converted', 'third_down_failed', 'fourth_down_converted', 
    'fourth_down_failed', 'incomplete_pass', 'interception', 'fumble_not_forced', 
    'fumble_out_of_bounds', 'solo_tackle', 'safety', 'penalty', 'tackled_for_loss', 
    'fumble_lost', 'own_kickoff_recovery', 'own_kickoff_recovery_td', 'qb_hit', 
    'rush_attempt', 'pass_attempt', 'sack', 'extra_point_attempt', 'two_point_attempt', 
    'field_goal_attempt', 'kickoff_attempt', 'punt_attempt', 'fumble', 'pass_touchdown', 'rush_touchdown'
    'complete_pass', 'shotgun', 'home_scoring_drive', 'away_scoring_drive','home_scoring_events','away_scoring_events',
    'rush_touchdown', 'field_goal_result', 'return_touchdown', 'complete_pass', 'no_huddle', 'punt_inside_twenty', 'kickoff_inside_twenty',
    'time', 'yrdln', 'ydstogo', 'ydsnet', 'desc', 'side_of_field', 'yardline_100', 'desc', 'drive', 'game_half', 'drive_ended', 'drive_time_seconds',
    'touchdown', 'score_differential', 'total_drive_time', 'game_id'
]

dfV5 = dfV4.drop(columns=columns_to_remove, errors='ignore')

dynamics = [
    ('big_offensive_play', dfV5['big_offensive_play'] == 1),
    ('big_defensive_play', dfV5['big_defensive_play'] == 1),
    ('off_td', dfV5['off_td'] == 1),
    ('def_td', dfV5['def_td'] == 1),
    ('big_st_play', dfV5['big_st_play'] == 1),
    ('st_return_td', dfV5['st_return_td'] == 1),
    ('high_leverage_moment', dfV5['high_leverage_moment'] == 1),
    ('drought_end_play', dfV5['drought_end_play'] == 1),
    ('home_csum_scores', dfV5['home_csum_scores'] == 1),
    ('away_csum_scores', dfV5['away_csum_scores'] == 1),
    ('home_csum_def_stops', dfV5['home_csum_def_stops'] == 1),
    ('away_csum_def_stops', dfV5['away_csum_def_stops'] == 1),
    ('home_csum_first_downs', dfV5['home_csum_first_downs'] == 1),
    ('away_csum_first_downs', dfV5['away_csum_first_downs'] == 1),
    ('long_td', dfV5['long_td'] == 1),
    ('quick_score', dfV5['quick_score'] == 1),
    ('quick_stop', dfV5['quick_stop'] == 1),
    ('home_score_differential', dfV5['home_score_differential'] == 1),
    ('away_score_differential', dfV5['away_score_differential'] == 1),
]

wp_change_analysis = {}
for dynamic_name, mask in dynamics:
    wp_change_analysis[dynamic_name] = {
        'mean_home_wp_change': dfV5.loc[mask, 'home_wp_change'].mean(),
        'max_home_wp_change': dfV5.loc[mask, 'home_wp_change'].max(),
        'mean_away_wp_change': dfV5.loc[mask, 'away_wp_change'].mean(),
        'max_away_wp_change': dfV5.loc[mask, 'away_wp_change'].max(),
        'count': mask.sum(),
        'qtr': dfV5.loc[mask, 'qtr'].mode()[0] if not mask.sum() == 0 else None,  # Use qtr directly
    }

wp_change_analysis_df = pd.DataFrame(wp_change_analysis).T.reset_index()
wp_change_analysis_df.columns = [
    'game_dynamic',
    'mean_home_wp_change',
    'max_home_wp_change',
    'mean_away_wp_change',
    'max_away_wp_change',
    'count',
    'qtr',
]

# Base Values: mean difference in WP in 2nd quarter, tied game for any team

In [24]:
dynamics = [
    ('big_offensive_play', dfV5['big_offensive_play'] == 1),
    ('big_defensive_play', dfV5['big_defensive_play'] == 1),
    ('off_td', dfV5['off_td'] == 1),
    ('def_td', dfV5['def_td'] == 1),
    ('big_st_play', dfV5['big_st_play'] == 1),
    ('st_return_td', dfV5['st_return_td'] == 1),
    ('high_leverage_moment', dfV5['high_leverage_moment'] == 1),
    ('drought_end_play', dfV5['drought_end_play'] == 1),
    ('home_csum_scores', dfV5['home_csum_scores'] >= 2),
    ('away_csum_scores', dfV5['away_csum_scores'] >= 2),
    ('home_csum_def_stops', dfV5['home_csum_def_stops'] >= 2),
    ('away_csum_def_stops', dfV5['away_csum_def_stops'] >= 2),
    ('home_csum_first_downs', dfV5['home_csum_first_downs'] >= 2),
    ('away_csum_first_downs', dfV5['away_csum_first_downs'] >= 2),
    ('long_td', dfV5['long_td'] == 1),
    ('quick_score', dfV5['quick_score'] == 1),
    ('quick_stop', dfV5['quick_stop'] == 1)
]

mean_wp_changes = []

for dynamic_name, dynamic_condition in dynamics:
    if dynamic_name == 'def_td' or dynamic_name == 'st_return_td':
        dynamic_df = dfV5[
            dynamic_condition 
            & (dfV5['qtr'] == 2) 
            & (dfV5['home_score_differential'].shift(1) == 0)
        ]
    elif dynamic_name == 'high_leverage_moment':
        dynamic_df = dfV5[
            dynamic_condition 
            & (dfV5['home_score_differential'].abs() <= 8)
        ]
    else:
        dynamic_df = dfV5[
            dynamic_condition 
            & (dfV5['qtr'] == 2) 
            & (dfV5['home_score_differential'] == 0)
        ]

    wp_changes = pd.concat([dynamic_df['home_wp_change'], dynamic_df['away_wp_change']])
    wp_changes = wp_changes[wp_changes >= 0] 

    mean_wp_change = wp_changes.mean() if not wp_changes.empty else None
    mean_wp_changes.append((dynamic_name, 'All', mean_wp_change))

columns = ['Dynamic Category', 'Team', 'Mean WP Change']
mean_wp_df = pd.DataFrame(mean_wp_changes, columns=columns)

mean_wp_df

Unnamed: 0,Dynamic Category,Team,Mean WP Change
0,big_offensive_play,All,0.038602
1,big_defensive_play,All,0.029471
2,off_td,All,0.028432
3,def_td,All,0.016322
4,big_st_play,All,0.034637
5,st_return_td,All,0.040082
6,high_leverage_moment,All,0.035536
7,drought_end_play,All,0.028891
8,home_csum_scores,All,0.026948
9,away_csum_scores,All,0.030347


# Difference between quarters in tied game: Base value difference is quarter 2

In [30]:
mean_wp_changes_diff = []

for dynamic_name, dynamic_condition in dynamics:
    tied_game_df = dfV5[
        dynamic_condition 
        & (dfV5['home_score_differential'] == 0)
    ]
    
    wp_changes_by_quarter = {}
    
    for quarter in range(1, 5):
        dynamic_df_quarter = tied_game_df[tied_game_df['qtr'] == quarter]
        
        wp_changes = pd.concat([dynamic_df_quarter['home_wp_change'], dynamic_df_quarter['away_wp_change']])
        wp_changes = wp_changes[wp_changes >= 0] 
        
        wp_changes_by_quarter[quarter] = wp_changes.mean() if not wp_changes.empty else None
    
    base_wp_change = wp_changes_by_quarter.get(2, 0) or 0 
    
    for quarter in range(1, 5):
        if quarter != 2:  
            current_wp_change = wp_changes_by_quarter.get(quarter, 0) or 0  
            wp_change_diff = current_wp_change - base_wp_change
            mean_wp_changes_diff.append((dynamic_name, quarter, wp_change_diff))

columns = ['Dynamic Category', 'Quarter', 'WP Change Difference']
mean_wp_changes_diff_df = pd.DataFrame(mean_wp_changes_diff, columns=columns)

mean_wp_changes_diff_df

Unnamed: 0,Dynamic Category,Quarter,WP Change Difference
0,big_offensive_play,1,0.030241
1,big_offensive_play,3,0.003283
2,big_offensive_play,4,0.006589
3,big_defensive_play,1,0.022405
4,big_defensive_play,3,0.008265
5,big_defensive_play,4,0.02496
6,off_td,1,0.003655
7,off_td,3,0.00471
8,off_td,4,0.035857
9,def_td,1,0.020964


# Difference between score differentials in 2nd quarter: Base value for difference is tied game

In [33]:
mean_wp_changes_diff = []

scoring_ranges = {
    '1-score_game': (dfV5['home_score_differential'] > 0) & (dfV5['home_score_differential'] <= 8),
    '2-score_game': (dfV5['home_score_differential'] > 8) & (dfV5['home_score_differential'] <= 16),
    '3+_score_game': (dfV5['home_score_differential'] > 16),
}

for dynamic_name, dynamic_condition in dynamics:
    wp_changes_by_score_game = {}
    
    for range_label, condition in scoring_ranges.items():
        # Filter for the second quarter in addition to the score game condition
        dynamic_df_range = dfV5[dynamic_condition & condition & (dfV5['qtr'] == 2)]
        wp_changes = pd.concat([dynamic_df_range['home_wp_change'], dynamic_df_range['away_wp_change']])
        wp_changes = wp_changes[wp_changes >= 0] 
        
        wp_changes_by_score_game[range_label] = wp_changes.mean() if not wp_changes.empty else None

    base_wp_change = wp_changes_by_score_game.get('1-score_game', 0) or 0 
    for range_label in scoring_ranges.keys():
        if range_label != '1-score_game':  
            current_wp_change = wp_changes_by_score_game.get(range_label, 0) or 0  
            wp_change_diff = current_wp_change - base_wp_change
            mean_wp_changes_diff.append((dynamic_name, range_label, wp_change_diff))

columns = ['Dynamic Category', 'Score Game Type', 'WP Change Difference']
mean_wp_changes_diff_df = pd.DataFrame(mean_wp_changes_diff, columns=columns)

mean_wp_changes_diff_df

Unnamed: 0,Dynamic Category,Score Game Type,WP Change Difference
0,big_offensive_play,2-score_game,-0.005473
1,big_offensive_play,3+_score_game,-0.020124
2,big_defensive_play,2-score_game,-0.008878
3,big_defensive_play,3+_score_game,-0.019168
4,off_td,2-score_game,-0.005779
5,off_td,3+_score_game,-0.01912
6,def_td,2-score_game,0.004609
7,def_td,3+_score_game,-0.013528
8,big_st_play,2-score_game,-0.014903
9,big_st_play,3+_score_game,-0.027743


# Difference between Home and Away in tied game

In [None]:
mean_wp_changes_diff = []

tied_game_condition = dfV5['home_score_differential'] == 0

for dynamic_name, dynamic_condition in dynamics:
    dynamic_df_tied_game = dfV5[dynamic_condition & tied_game_condition]
    
    home_wp_changes = dynamic_df_tied_game['home_wp_change'][dynamic_df_tied_game['home_wp_change'] >= 0]
    away_wp_changes = dynamic_df_tied_game['away_wp_change'][dynamic_df_tied_game['away_wp_change'] >= 0]
    
    mean_home_wp_change = home_wp_changes.mean() if not home_wp_changes.empty else None
    mean_away_wp_change = away_wp_changes.mean() if not away_wp_changes.empty else None

    wp_change_diff = mean_home_wp_change - mean_away_wp_change if mean_home_wp_change is not None and mean_away_wp_change is not None else None
    
    mean_wp_changes_diff.append((dynamic_name, 'Home', mean_home_wp_change))
    mean_wp_changes_diff.append((dynamic_name, 'Away', mean_away_wp_change))
    mean_wp_changes_diff.append((dynamic_name, 'Difference', wp_change_diff))

columns = ['Dynamic Category', 'Team', 'Mean WP Change']
mean_wp_changes_diff_df = pd.DataFrame(mean_wp_changes_diff, columns=columns)

mean_wp_changes_diff_df

Unnamed: 0,Dynamic Category,Team,Mean WP Change
0,big_offensive_play,Home,0.058847
1,big_offensive_play,Away,0.061298
2,big_offensive_play,Difference,-0.002451
3,big_defensive_play,Home,0.043834
4,big_defensive_play,Away,0.051955
5,big_defensive_play,Difference,-0.00812
6,off_td,Home,0.043393
7,off_td,Away,0.042808
8,off_td,Difference,0.000585
9,def_td,Home,0.011292


# Multiplier Ranges

## 1,2,3 or more score games
### Tied/1 Score: 0.1...1.1
### 2 Score: 0.07.......1.07
### 3 or more Score 0.05........1.05

## Quarters 1,2,3,4... Quarters 1 and 4 have slight momentum gaining advantage over the middle of the game quarters
### 1,4: 0.2.....1.1
### 2,3: 0.1.....1.0

## Home/Away... Home team has a slight advantage over away team (Exception of away team in 1st quarter)
### Home: 0.5...1.5
### Away: 0.3...1.3

## Boost Cases: Combined Home/Away + Quarters? Learned from visualizing the data previously
### Home + 4th Quarter: 1.4
### Away + First Quarter: 1.4
### None: 1.0

## Decay Factors: Negatively effects the respectives teams momentum
### Opponent Scores                   |   0.5
### Opponent Consecutive Stops > 2    |   1.0
### Turnover                          |   0.8
### Opponent Ends a Scoring Drought	  |   1.0
### None                              |   0

## Base Values
### Dynamic Category	Team	    Mean    WP Change
### 0	big_offensive_play	    All	    0.038602
### 1	big_defensive_play	    All	    0.029471
### 2	off_td	                All	    0.028432
### 3	def_td	                All	    0.016322
### 4	big_st_play	            All	    0.034637
### 5	st_return_td	        All	    0.040082
### 6	high_leverage_moment	All	    0.035536
### 7	drought_end_play	    All	    0.028891
### 8	home_csum_scores	    All	    0.026948
### 9	away_csum_scores	    All	    0.030347
### 10	home_csum_def_stops	    All	    0.028362
### 11	away_csum_def_stops	    All	    0.028139
### 12	home_csum_first_downs	All	    0.027913
### 13	away_csum_first_downs	All	    0.027763
### 14	long_td	                All	    0.033325
### 15	quick_score	            All	    0.026664
### 16	quick_stop	            All	    0.029971

# Momentum Gain Equation (M_gain): C * (Q' * HA' * S' * B) * Scale
## C = Change in Win Probability: Difference between actual win probability model calculations after each play. Used the least impactful (tied, 2nd quarter), same variables on winning probability and grabbed the mean values to extract the plays and situations true win probability change.
## Multipliers: Multipliers used as game context weights. Used to influence momentum differently based on state of the game
### Q' = Quarter Multiplier, HA' = Home/Away Multiplier, S' = Score Multiplier, B = Boost Multiplier (1.0 if no boosted special case). These numbers were chosen using the max differences in win probability for the same plays when comparing the different situations for each multipliers (Different quarters, Home vs Away, Score Difference, Combined Special Situations i.e Home in 4th quarter vs away in 4th quarter much different changes in win probability)
## Scale: = 1,000. Scaling mechanism to turn Momentum Scores into larger, more interpretable number to work with.
## EX: 0.038602 * (1.1 * 1.1 * 1.5 * 1.2 * 1.05) * 1000 = 88.27

# Momentum Decay Equation (opposite_M_decay): (opposite_M_prev + M_gain) * d)
## d = decay factor, this only happens to the opposite teams momentum after the current team has momentum gain. Note, we have the loss coincide with the team that is losing momentum's momentum score.
## EX: 0.038602 * (1.1 * 1.1 * 1.5 * 1.2 * 1.05) * 1000 = 88.27.... 500 + 88.27 = 588.27
## 88.27 * 0.5 = 44.135..... 600 - 44.135 = Momemntum loss

# Full Conditional Equation: M_total = M_prev + M_gain
# if Decay Factor Event triggered: opposite_M_total = opposite_M_prev - opposite_M_decay
## Home/Away both start at 500. Momentum for both teams are independent of one another, so one team gaining momentum doesnt always mean the other team loses momentum. Certain key events do slow/stop momentum so this is where the momentum decay and decay factors will come in

# Model: going to use some type of neural network most likely: RNN or LSTM

# Momentum Shift: Perception/Theory so our definition is going to be a certain threshold range (Will need to test/optimize this range, planning on both starting at 1500, each defined category event gains from ~85 to ~350, while the team that is negatively affected can be from ~50 to ~200). Plan to validate future model by checking if the respective team triggering a momentum shift: 
## 1) goes on a scoring streak 
## 2) goes on a stopping streak + scores atleast once 
## 3) takes the lead