In [79]:
import pandas as pd
import numpy as np
from IPython.display import display

# LOAD DATA ------------------------------------------------------------------------------------------------
df = pd.read_csv('../data/complete_merge_preMetrics.csv')
#-----------------------------------------------------------------------------------------------------------



# possessionTeam-related VARIABLE MANIPULATION -------------------------------------------------------------
df['isHome'] = df['possessionTeam'] == df['homeTeamAbbr'] # if possessionTeam == homeTeamAbbr then isHome = true
df['possessionTeamWinProbability'] = df.apply(
    lambda row: row['preSnapHomeTeamWinProbability'] if row['possessionTeam'] == row['homeTeamAbbr']
    else row['preSnapVisitorTeamWinProbability'], axis=1
)

df['possessionTeamImpact'] = df.apply(
    lambda row: row['homeTeamWinProbabilityAdded'] if row['possessionTeam'] == row['homeTeamAbbr']
    else row['visitorTeamWinProbabilityAdded'], axis=1
)

df['opponentTeamImpact'] = df.apply(
    lambda row: row['visitorTeamWinProbabilityAdded'] if row['possessionTeam'] == row['homeTeamAbbr']
    else row['homeTeamWinProbabilityAdded'], axis=1
)
#-----------------------------------------------------------------------------------------------------------



# FIELD POSITION -------------------------------------------------------------------------------------------
# Updated FIELD POSITION METRICS with new subcategories for the opponent's side
# Conditions for field positions
conditions = [
    (df['yardlineSide'] != df['possessionTeam']) & (df['yardlineNumber'] == 1),   # Heaven (on opp 1 yardline, can smell 6)
    (df['yardlineSide'] != df['possessionTeam']) & (df['yardlineNumber'] <= 10),  # GoldZone (within opp 2nd-10 yardline, down & goal scenarios)
    (df['yardlineSide'] != df['possessionTeam']) & (df['yardlineNumber'] <= 20),  # RedZone (within 20 yards of opp endzone)
    (df['yardlineSide'] != df['possessionTeam']) & (df['yardlineNumber'] <= 45),  # FGR (opp 45 - 20 yardline)
    (df['yardlineSide'] != df['possessionTeam']),                                 # Decent (opp 45-50 yardlines, close to FG range, in enemy terrirtoy)
    (df['yardlineSide'] == df['possessionTeam']) & (df['yardlineNumber'] == 1),   # Hell (on your own 1 yardline, not ideal)
    (df['yardlineSide'] == df['possessionTeam']) & (df['yardlineNumber'] <= 10),  # Horrible (between ur own 2-10 yardline)
    (df['yardlineSide'] == df['possessionTeam']) & (df['yardlineNumber'] <= 25),  # Bad (between ur own 10-25 YL, worse than standard kickoff position)
    (df['yardlineSide'] == df['possessionTeam']) & (df['yardlineNumber'] <= 45),  # Neutral (between ur own 25-45 yardline, eh)
    (df['yardlineSide'] == df['possessionTeam']),                                 # Decent (ur own 45-50 yardlines, decent starting position for sure)
]

# Corresponding outputs
choices = [
    'Heaven', 'GoldZone', 'RedZone', 'FieldGoalRange', 'Decent', 'Hell', 'Horrible', 'Bad', 'Neutral', 'Decent'
]

# Assign field position
df['field_position'] = np.select(conditions, choices, default='Neutral')

# Updated Field position weight logic based on refined categories
def field_position_weight(position, expectedPoints):
    position_weights = {
        'Heaven': 2.5 + expectedPoints * 0.3,  # Highest weight for Heaven (1-yard line)
        'GoldZone': 2.2 + expectedPoints * 0.2,  # High weight for GoldZone (10 yards or less)
        'RedZone': 2.0 + expectedPoints * 0.2,   # High weight for RedZone
        'FieldGoalRange': 1.7 + expectedPoints * 0.1,  # Moderate weight for FG Range
        'Decent': 1.2,   # Slightly better than Neutral for good field position
        'Neutral': 1.0,   # Default weight for Neutral field position
        'Bad': 0.8,   # Weight for Bad field position (own 10–25 yard line)
        'Horrible': 0.5,   # Worst weight for Horrible field position (own 3–10 yard line)
        'Hell': 0.2   # Panic for very bad field position (own 0–3 yard line)
    }
    return position_weights.get(position, 1.0)   # Default to Neutral weight if not classified

# Apply the field_position_weight function to create a new weight column
df['field_position_weight'] = df.apply(lambda row: field_position_weight(row['field_position'], row['expectedPoints']), axis=1)
#-----------------------------------------------------------------------------------------------------------



# GAME SCORE SCENARIO --------------------------------------------------------------------------------------
# Calculate score differential from possession team's perspective
df['scoreDifferential'] = np.where(
    df['isHome'],
    df['preSnapHomeScore'] - df['preSnapVisitorScore'],  # Home team's perspective
    df['preSnapVisitorScore'] - df['preSnapHomeScore']   # Away team's perspective
)

# Convert gameClock to seconds. Assuming gameClock is in "MM:SS" format (e.g., '02:30' → 150 seconds)
df['gameClockSeconds'] = df['gameClock'].str.split(':').apply(lambda x: int(x[0]) * 60 + int(x[1]))

# Game Quarter Weight (reuse or expand the existing logic if already defined)
def game_quarter_weight(quarter, gameClock):
    # Base weights for most quarters (default logic)
    quarter_weights = {
        1: 1.0,       # Q1
        3: 1.5,       # Q3
        5: 2.5        # Overtime (Q5)
    }
    
    # Logic for special 2 minute warnining cases in Q2 and Q4
    if quarter == 2:
        return 1.4 if gameClock <= 120 else 1.2
    elif quarter == 4:
        return 2.4 if gameClock <= 120 else 2.0
    
    # Use dictionary lookup for standard cases (Q1, Q3, OT)
    return quarter_weights.get(quarter, 1.0)  # Default to 1.0 if quarter isn't mapped

# Apply the new game_quarter_weight function
df['gameQuarterWeight'] = df.apply(
    lambda row: game_quarter_weight(row['quarter'], row['gameClockSeconds']),
    axis=1
)

# Define Game Score Scenario Weight Logic
def calculate_game_score_scenario_weight(df):
    # Reduces play importance for larger score deficits and later game quarters
    penalty_factor = np.maximum(
        1.0 - (np.abs(df['scoreDifferential']) / 40) * df.apply(lambda row: game_quarter_weight(row['quarter'], row['gameClockSeconds']), axis=1),
        0.1
    ) # The penalty factor ensures that plays in extreme scenarios (huge leads or deficits) don't lose all their weight — it sets a minimum threshold of 0.1.
    
    # Base weights based on score differential
    conditions = [
        (df['scoreDifferential'] > 8),  # Leading by more than one touchdown
        (df['scoreDifferential'].between(7, 8)),  # Leading by exactly one touchdown
        (df['scoreDifferential'].between(1, 3)),  # Leading by 1-3 points
        (df['scoreDifferential'] == 0),  # Tied game
        (df['scoreDifferential'].between(-3, -1)),  # Trailing by 1-3 points
        (df['scoreDifferential'].between(-8, -7)),  # Trailing by exactly one touchdown
        (df['scoreDifferential'] < -8),  # Trailing by more than one touchdown
    ]
    
    base_weights = [
        2.0 + df['possessionTeamWinProbability'] * 0.5,
        1.8 + df['possessionTeamWinProbability'] * 0.4,
        1.5 + df['possessionTeamWinProbability'] * 0.3,
        1.2 + df['possessionTeamWinProbability'] * 0.2,
        1.0 + df['possessionTeamWinProbability'] * 0.1,
        0.8 + df['possessionTeamWinProbability'] * 0.075,
        0.6 + df['possessionTeamWinProbability'] * 0.05,
    ]
    
    # Assign weights using np.select
    df['baseWeight'] = np.select(conditions, base_weights, default=1.0)
    
    # Final Game Score Scenario Weight
    df['gameScoreScenarioWeight'] = df['baseWeight'] * penalty_factor
    
    return df

# Apply the calculation
df = calculate_game_score_scenario_weight(df)
#-----------------------------------------------------------------------------------------------------------



# PLAY SUCCESS WEIGHT --------------------------------------------------------------------------------------
# Define situational success based on down and yards gained
def situational_success(row):
    if row['down'] == 1:
        return row['yardsGained'] >= 4  # 1st down: gain at least 4 yards
    elif row['down'] == 2:
        return row['yardsGained'] >= max(row['yardsToGo'] / 2, 3)  # 2nd down: gain half distance or 3 yards
    elif row['down'] in [3, 4]:
        return row['yardsGained'] >= row['yardsToGo']  # 3rd/4th down: must convert
    return False

# Apply situational success
df['situationalSuccess'] = df.apply(situational_success, axis=1)

# Updated weighted play success metric (removing base_weight)
def weighted_play_success(row):
    # Situational success factor: boost if successful, reduced if failure
    situational_boost = 1.5 if row['situationalSuccess'] else 0.5  # Boost for success, lower for failure
    
    # Big play impact based on yardage
    if row['yardsGained'] >= 50:
        big_play_weight = 2.0  # Add more weight for 50+ yard gains
    elif row['yardsGained'] >= 25:
        big_play_weight = 1.5  # Add moderate weight for 25-49 yard gains
    elif row['yardsGained'] >= 10:
        big_play_weight = 1.2  # Add smaller weight for 10-24 yard gains
    else:
        big_play_weight = 1.0  # Neutral weight for smaller gains
    
    # Apply penalty for fumble lost
    fumble_penalty = 0.5 if row['fumbleLost'] == 1 else 1.0  # Deduct weight if a fumble was lost

    # Apply penalty for interception
    interception_penalty = 0.5 if row['passResult'] == 'IN' else 1.0  # Deduct weight if an interception occurred

    # Apply bonus for touchdown (checking play description or yards gained)
    touchdown_bonus = 3 if "TOUCHDOWN" in row['playDescription'] or (row['yardsGained'] == row['yardsToGo'] and row['scoreDifferential'] == 6) else 0
    
    # Calculate final play success weight (without base weight)
    final_success_weight = (row['expectedPointsAdded'] * 2.0) + situational_boost + big_play_weight * fumble_penalty * interception_penalty + touchdown_bonus
    
    return final_success_weight

# Apply the updated weighted play success function
df['playSuccessWeight'] = df.apply(weighted_play_success, axis=1)
#-----------------------------------------------------------------------------------------------------------



# FORMAL PLAY SCORE ----------------------------------------------------------------------------------------
df['FPS'] = (
    df['field_position_weight'] *
    df['gameScoreScenarioWeight'] *
    df['playSuccessWeight'] 
)
#-----------------------------------------------------------------------------------------------------------



# Selected columns for display
selected = [
    'gameId',
    'playId',
    'playDescription',
    'keyPlayer',
    'possessionTeam',
    'homeTeamAbbr',
    'visitorTeamAbbr',
    'preSnapHomeScore',                      
    'preSnapVisitorScore',                                  
    'motion_players',
    'shift_players',
    'field_position',                
    'field_position_weight', 
    'quarter',                        
    'gameClock',                      
    'gameClockSeconds',
    'gameQuarterWeight', 
    'down',
    'scoreDifferential',
    'gameScoreScenarioWeight',   
    'yardsToGo',
    'yardsGained',
    'situationalSuccess',
    'playSuccessWeight', 
    'FPS',               
]

display(df[selected].tail(5))
print(list(df.columns))  

Unnamed: 0,gameId,playId,playDescription,keyPlayer,possessionTeam,homeTeamAbbr,visitorTeamAbbr,preSnapHomeScore,preSnapVisitorScore,motion_players,...,gameClockSeconds,gameQuarterWeight,down,scoreDifferential,gameScoreScenarioWeight,yardsToGo,yardsGained,situationalSuccess,playSuccessWeight,FPS
16119,2022110700,3658,(3:14) (Shotgun) K.Drake left tackle to BLT 46...,Kenyan Drake,BAL,NO,BAL,13,27,,...,194,2.0,3,14,0.749289,1,2,True,2.965891,2.222309
16120,2022110700,3686,(2:31) K.Drake right guard to NO 49 for 5 yard...,Kenyan Drake,BAL,NO,BAL,13,27,,...,151,2.0,1,14,0.749523,10,5,True,2.187565,1.967557
16121,2022110700,3707,(2:00) (Shotgun) K.Drake right end to NO 45 fo...,Kenyan Drake,BAL,NO,BAL,13,27,,...,120,2.4,2,14,0.399783,5,4,True,2.892652,1.387719
16122,2022110700,3740,(1:15) (Shotgun) K.Drake left end to BLT 48 fo...,Kenyan Drake,BAL,NO,BAL,13,27,,...,75,2.4,3,14,0.399752,1,-7,False,-0.467378,-0.33821
16123,2022110700,3787,(:23) (Shotgun) D.Washington right guard to NO...,Dwayne Washington,NO,NO,BAL,13,27,,...,23,2.4,1,-14,0.096059,10,5,True,2.35554,0.181017


['gameId', 'playId', 'week', 'quarter', 'gameClock', 'down', 'yardsToGo', 'yardsGained', 'playDescription', 'possessionTeam', 'homeTeamAbbr', 'visitorTeamAbbr', 'preSnapHomeScore', 'preSnapVisitorScore', 'keyPlayer', 'motion_players', 'shift_players', 'inMotionAtBallSnap', 'motionSinceLineset', 'shiftSinceLineset', 'absoluteYardlineNumber', 'yardlineSide', 'passResult', 'isDropback', 'yardlineNumber', 'expectedPoints', 'expectedPointsAdded', 'playNullifiedByPenalty', 'preSnapHomeTeamWinProbability', 'preSnapVisitorTeamWinProbability', 'homeTeamWinProbabilityAdded', 'visitorTeamWinProbabilityAdded', 'pff_passCoverage', 'pff_manZone', 'offenseFormation', 'homeFinalScore', 'visitorFinalScore', 'fumbleLost', 'isHome', 'possessionTeamWinProbability', 'possessionTeamImpact', 'opponentTeamImpact', 'field_position', 'field_position_weight', 'scoreDifferential', 'gameClockSeconds', 'gameQuarterWeight', 'baseWeight', 'gameScoreScenarioWeight', 'situationalSuccess', 'playSuccessWeight', 'FPS']


In [80]:
# Sort the DataFrame by the 'formalPlayScore' in descending order (highest score first)
df_sorted = df.sort_values(by='FPS', ascending=False)

df_sorted.to_csv('FINAL_FPS.csv', index=False)

# Display the sorted DataFrame
display(df_sorted[selected].head())  # To check the top rows after sorting

Unnamed: 0,gameId,playId,playDescription,keyPlayer,possessionTeam,homeTeamAbbr,visitorTeamAbbr,preSnapHomeScore,preSnapVisitorScore,motion_players,...,gameClockSeconds,gameQuarterWeight,down,scoreDifferential,gameScoreScenarioWeight,yardsToGo,yardsGained,situationalSuccess,playSuccessWeight,FPS
1414,2022091110,1338,(7:37) (Shotgun) P.Mahomes pass short right to...,Clyde Edwards-Helaire,KC,ARI,KC,7,14,,...,457,1.2,4,7,1.69515,2,4,True,13.678092,64.509029
3858,2022092200,3242,(9:33) M.Dunn and H.Froholdt reported in as el...,Nick Chubb,CLE,CLE,PIT,16,14,,...,573,2.0,4,2,1.565084,1,1,True,10.853609,64.498199
4920,2022092508,1722,(3:00) (Shotgun) J.Hurts pass short right to A...,A.J. Brown,PHI,WAS,PHI,0,10,,...,180,1.2,3,10,1.719248,8,9,True,12.234783,61.558281
11524,2022102302,604,(4:55) H.Adeniji reported in as eligible. J.M...,Joe Mixon,CIN,CIN,ATL,7,0,,...,295,1.0,3,7,1.776132,1,1,True,7.838459,59.158383
11345,2022102300,2118,"(7:43) G.Edwards left tackle for 1 yard, TOUCH...",Gus Edwards,BAL,BAL,CLE,13,10,['Nick Boyle'],...,463,1.5,4,3,1.536788,1,1,True,9.666281,59.049585
