In [57]:
import pandas as pd
import numpy as np
import ast
from IPython.display import display

# LOAD DATA ------------------------------------------------------------------------------------------------
df = pd.read_csv('FINAL_FPS.csv')
#-----------------------------------------------------------------------------------------------------------

# MOTION TYPE SUMMARY

# Create categories for each play type
df['motionPlay'] = df['inMotionAtBallSnap'] == True
df['shiftPlay'] = ((df['motionSinceLineset'] == True) | (df['shiftSinceLineset'] == True)) & (df['inMotionAtBallSnap'] == False)
df['noMotionPlay'] = (df['inMotionAtBallSnap'] == False) & (df['motionSinceLineset'] == False) & (df['shiftSinceLineset'] == False)

# Define a function to calculate the summary metrics
def calculate_summary(df, group_name=None):
    if group_name:  # if a group_name is provided, we will group by that column
        summary = df.groupby(group_name).agg(
            avg_yards_gained=('yardsGained', 'mean'),
            success_rate=('situationalSuccess', lambda x: (x == True).mean()),  # success rate as a fraction of True values
            avg_fps=('FPS', 'mean'),
            total_plays=('FPS', 'count')  # Total plays for count
        ).reset_index()
    else:  # if no group_name, we calculate overall averages
        summary = pd.DataFrame({
            'avg_yards_gained': [df['yardsGained'].mean()],
            'success_rate': [df['situationalSuccess'].mean()],
            'avg_fps': [df['FPS'].mean()],
            'total_plays': [len(df)]  # Count total plays
        })
    return summary

# League averages (no filter, overall dataset)
league_avg_summary = calculate_summary(df)

# Motion Plays
motion_plays_summary = calculate_summary(df[df['motionPlay']], 'motionPlay')

# Shift Plays (must exclude motion at ballsnap plays)
shift_plays_summary = calculate_summary(df[df['shiftPlay'] & ~df['motionPlay']], 'shiftPlay')

# No Motion Plays
no_motion_plays_summary = calculate_summary(df[df['noMotionPlay']], 'noMotionPlay')

# Combine all summaries into a single DataFrame
final_summary_df = pd.concat([
    league_avg_summary.assign(play_type='League Average'),
    motion_plays_summary.assign(play_type='Motion Plays'),
    shift_plays_summary.assign(play_type='Shift Plays'),
    no_motion_plays_summary.assign(play_type='No Motion Plays')
], ignore_index=True)

# Reorganize columns for display
final_summary_df = final_summary_df[['play_type', 'avg_yards_gained', 'success_rate', 'avg_fps', 'total_plays']]

# Display the summary
display(final_summary_df)


Unnamed: 0,play_type,avg_yards_gained,success_rate,avg_fps,total_plays
0,League Average,5.460618,0.476433,3.10612,16124
1,Motion Plays,5.394866,0.500446,3.566405,4480
2,Shift Plays,5.536471,0.471782,3.004767,4483
3,No Motion Plays,5.454266,0.464321,2.881611,7161


In [58]:
#final_summary_df.to_csv('../data/Grouped Analysis/metric_summary.csv', index=False)

In [59]:
# PLAYER LEVEL ANALYSIS

players = pd.read_csv('../data/players.csv')


# KEY PLAYER Analysis -------------------------------------------------------------------------------------
key_player_avg_yards = df.groupby('keyPlayer')['yardsGained'].mean()
key_player_success_rate = df.groupby('keyPlayer')['playSuccessWeight'].apply(lambda x: (x > 0).mean())
key_player_avg_fps = df.groupby('keyPlayer')['FPS'].mean()

# Calculate Play Count for each Key Player
key_player_play_count = df.groupby('keyPlayer').size()

# Combine all metrics into one DataFrame
key_player_metrics = pd.DataFrame({
    'Play Count': key_player_play_count,
    'Avg Yards': key_player_avg_yards,
    'Success Rate': key_player_success_rate,
    'Avg FPS': key_player_avg_fps
})

# Reset the index to move the 'keyPlayer' to a column
key_player_metrics = key_player_metrics.reset_index()

# Filter to include only players with 5 or more play counts
#key_player_metrics = key_player_metrics[key_player_metrics['Play Count'] >= 15]
# Sort by 'Avg FPS' in descending order
key_player_metrics = key_player_metrics.sort_values(by='Avg FPS', ascending=False)

# Merge with players DataFrame to add player positions
key_player_metrics = key_player_metrics.rename(columns={'keyPlayer': 'displayName'})
key_player_metrics = key_player_metrics.merge(players[['displayName', 'position']], on='displayName', how='left')

# Add possessionTeam by extracting it from the main DataFrame
player_teams = df.groupby('keyPlayer')['possessionTeam'].first().reset_index()
player_teams = player_teams.rename(columns={'keyPlayer': 'displayName'})

# Merge possessionTeam into key_player_metrics
key_player_metrics = key_player_metrics.merge(player_teams, on='displayName', how='left')

key_player_metrics = key_player_metrics.rename(columns={'displayName': 'keyPlayer'})

# Display the metrics for Key Players
display(key_player_metrics.head(5))
#-----------------------------------------------------------------------------------------------------------


# MOTION PLAYER Analysis -----------------------------------------------------------------------------------
# Ensure 'motion_players' is a list
df['motion_players'] = df['motion_players'].apply(lambda x: ast.literal_eval(x) if isinstance(x, str) else x)
df['motion_players'] = df['motion_players'].apply(lambda x: x if isinstance(x, list) else [])

# explode the 'motion_players' column to handle the list of players individually
motion_players_exploded = df.explode('motion_players')

# Calculate Average Yards, Success Rate, and FPS for Motion Players
motion_player_avg_yards = motion_players_exploded.groupby('motion_players')['yardsGained'].mean()
motion_player_success_rate = motion_players_exploded.groupby('motion_players')['playSuccessWeight'].apply(lambda x: (x > 0).mean())
motion_player_avg_fps = motion_players_exploded.groupby('motion_players')['FPS'].mean()

# Calculate Play Count for each Motion Player
motion_player_play_count = motion_players_exploded.groupby('motion_players').size()

# Combine all metrics into one DataFrame
motion_player_metrics = pd.DataFrame({
    'Avg Yards': motion_player_avg_yards,
    'Success Rate': motion_player_success_rate,
    'Avg FPS': motion_player_avg_fps,
    'Play Count': motion_player_play_count
})

# Reset the index to move the 'keyPlayer' to a column
motion_player_metrics = motion_player_metrics.reset_index()

# Filter to include only players with 5 or more play counts
#motion_player_metrics = motion_player_metrics[motion_player_metrics['Play Count'] >= 5]
# Sort by 'Avg FPS' in descending order
motion_player_metrics = motion_player_metrics.sort_values(by='Avg FPS', ascending=False)

# Merge with players DataFrame to add player positions
motion_player_metrics = motion_player_metrics.rename(columns={'motion_players': 'displayName'})
motion_player_metrics = motion_player_metrics.merge(players[['displayName', 'position']], on='displayName', how='left')

# Add possessionTeam by extracting it from the exploded DataFrame
motion_player_teams = motion_players_exploded.groupby('motion_players')['possessionTeam'].first().reset_index()
motion_player_teams = motion_player_teams.rename(columns={'motion_players': 'displayName'})

# Merge possessionTeam into motion_player_metrics
motion_player_metrics = motion_player_metrics.merge(motion_player_teams, on='displayName', how='left')

motion_player_metrics = motion_player_metrics.rename(columns={'displayName': 'motionPlayer'})

# Display the metrics for Motion Players
display(motion_player_metrics.head(5))
#-----------------------------------------------------------------------------------------------------------


# SHIFT PLAYER Analysis ------------------------------------------------------------------------------------
# Ensure 'shift_players' is a list
df['shift_players'] = df['shift_players'].apply(lambda x: ast.literal_eval(x) if isinstance(x, str) else x)
df['shift_players'] = df['shift_players'].apply(lambda x: x if isinstance(x, list) else [])

shift_players_exploded = df.explode('shift_players')

# Calculate Average Yards, Success Rate, and FPS for Shift Players
shift_player_avg_yards = shift_players_exploded.groupby('shift_players')['yardsGained'].mean()
shift_player_success_rate = shift_players_exploded.groupby('shift_players')['playSuccessWeight'].apply(lambda x: (x > 0).mean())
shift_player_avg_fps = shift_players_exploded.groupby('shift_players')['FPS'].mean()

# Calculate Play Count for each Shift Player
shift_player_play_count = shift_players_exploded.groupby('shift_players').size()

# Combine all metrics into one DataFrame
shift_player_metrics = pd.DataFrame({
    'Avg Yards': shift_player_avg_yards,
    'Success Rate': shift_player_success_rate,
    'Avg FPS': shift_player_avg_fps,
    'Play Count': shift_player_play_count
})

# Reset the index to move 'shift_players' to a column
shift_player_metrics = shift_player_metrics.reset_index()

# Filter to include only players with 5 or more play counts
#shift_player_metrics = shift_player_metrics[shift_player_metrics['Play Count'] >= 5]
# Sort by 'Avg FPS' in descending order
shift_player_metrics = shift_player_metrics.sort_values(by='Avg FPS', ascending=False)

# Merge with players DataFrame to add player positions
shift_player_metrics = shift_player_metrics.rename(columns={'shift_players': 'displayName'})
shift_player_metrics = shift_player_metrics.merge(players[['displayName', 'position']], on='displayName', how='left')

# Add possessionTeam by extracting it from the exploded DataFrame
shift_player_teams = shift_players_exploded.groupby('shift_players')['possessionTeam'].first().reset_index()
shift_player_teams = shift_player_teams.rename(columns={'shift_players': 'displayName'})

# Merge possessionTeam into shift_player_metrics
shift_player_metrics = shift_player_metrics.merge(shift_player_teams, on='displayName', how='left')

# Rename 'shift_players' to 'shiftPlayer'
shift_player_metrics = shift_player_metrics.rename(columns={'displayName': 'shiftPlayer'})

# Display the metrics for Shift Players
display(shift_player_metrics.head(5))
#-----------------------------------------------------------------------------------------------------------

Unnamed: 0,keyPlayer,Play Count,Avg Yards,Success Rate,Avg FPS,position,possessionTeam
0,Shane Zylstra,1,1.0,1.0,36.610558,TE,DET
1,Marcedes Lewis,2,10.5,1.0,27.677767,TE,GB
2,Teagan Quitoriano,1,2.0,1.0,27.638452,TE,HOU
3,James Mitchell,2,4.0,1.0,26.198713,TE,DET
4,MyCole Pruitt,2,4.0,1.0,23.694397,TE,ATL


Unnamed: 0,motionPlayer,Avg Yards,Success Rate,Avg FPS,Play Count,position,possessionTeam
0,Jesper Horsted,1.0,1.0,43.499679,1,TE,LV
1,Kyle Rudolph,1.0,1.0,31.835236,1,TE,TB
2,Nick Boyle,2.5,1.0,30.293772,2,TE,BAL
3,Derrick Henry,17.0,1.0,21.46742,2,RB,TEN
4,Devin Asiasi,5.5,1.0,21.160007,2,TE,CIN


Unnamed: 0,shiftPlayer,Avg Yards,Success Rate,Avg FPS,Play Count,position,possessionTeam
0,Denzel Mims,4.0,0.5,22.493469,2,WR,NYJ
1,N'Keal Harry,25.0,1.0,21.710638,1,WR,CHI
2,Kenneth Gainwell,12.571429,0.857143,16.952712,7,RB,PHI
3,Tylan Wallace,18.0,1.0,15.843393,1,WR,BAL
4,Zack Moss,2.5,1.0,14.80725,4,RB,BUF


In [60]:
# key_player_metrics.to_csv('../data/Grouped Analysis/key_player_summary.csv', index=False)
# motion_player_metrics.to_csv('../data/Grouped Analysis/motion_player_summary.csv', index=False)
# shift_player_metrics.to_csv('../data/Grouped Analysis/shift_player_summary.csv', index=False)

In [78]:
# TEAM LEVEL ANALYSIS

# Function to calculate metrics for each team
def calculate_team_summary(df, team_column):
    # Overall team-level summary
    team_summary = df.groupby(team_column).agg(
        avg_yards_gained=('yardsGained', 'mean'),
        success_rate=('situationalSuccess', lambda x: (x == True).mean()),
        avg_fps=('FPS', 'mean'),
        total_plays=('FPS', 'count')
    ).reset_index()

    # Motion plays summary
    motion_summary = df[df['motionPlay']].groupby(team_column).agg(
        avg_yards_gained_motion=('yardsGained', 'mean'),
        success_rate_motion=('situationalSuccess', lambda x: (x == True).mean()),
        avg_fps_motion=('FPS', 'mean'),
        total_plays_motion=('FPS', 'count')
    ).reset_index()

    # Shift plays summary
    shift_summary = df[df['shiftPlay']].groupby(team_column).agg(
        avg_yards_gained_shift=('yardsGained', 'mean'),
        success_rate_shift=('situationalSuccess', lambda x: (x == True).mean()),
        avg_fps_shift=('FPS', 'mean'),
        total_plays_shift=('FPS', 'count')
    ).reset_index()

    # No motion plays summary
    no_motion_summary = df[df['noMotionPlay']].groupby(team_column).agg(
        avg_yards_gained_no_motion=('yardsGained', 'mean'),
        success_rate_no_motion=('situationalSuccess', lambda x: (x == True).mean()),
        avg_fps_no_motion=('FPS', 'mean'),
        total_plays_no_motion=('FPS', 'count')
    ).reset_index()

    # Merge all summaries on team
    team_df = team_summary.merge(motion_summary, on=team_column, how='left')\
        .merge(shift_summary, on=team_column, how='left')\
        .merge(no_motion_summary, on=team_column, how='left')

    # Optional: Fill missing values with 0 for teams with no plays in any category
    team_df = team_df.fillna(0)
    
    # Calculate combined motion + shift metrics
    team_df['combined_motion_total_plays'] = team_df['total_plays_motion'] + team_df['total_plays_shift']
    
    team_df['avg_yards_gained_combinedMotion'] = (
        (team_df['avg_yards_gained_motion'] * team_df['total_plays_motion'] +
         team_df['avg_yards_gained_shift'] * team_df['total_plays_shift']) /
        team_df['combined_motion_total_plays']
    ).fillna(0) 
    
    team_df['success_rate_combinedMotion'] = (
        (team_df['success_rate_motion'] * team_df['total_plays_motion'] +
         team_df['success_rate_shift'] * team_df['total_plays_shift']) /
        team_df['combined_motion_total_plays']
    ).fillna(0)

    team_df['avg_fps_combinedMotion'] = (
        (team_df['avg_fps_motion'] * team_df['total_plays_motion'] +
         team_df['avg_fps_shift'] * team_df['total_plays_shift']) /
        team_df['combined_motion_total_plays']
    ).fillna(0)

    return team_df

# Calculate team-level summary
team_summary_df = calculate_team_summary(df, 'possessionTeam')  # Change 'possessionTeam' to 'homeTeamAbbr' or 'visitorTeamAbbr' if needed

# Reorganize columns for clarity
team_summary_df = team_summary_df[['possessionTeam', 'avg_yards_gained', 'success_rate', 'avg_fps', 'total_plays',
                                   'avg_yards_gained_motion', 'success_rate_motion', 'avg_fps_motion', 'total_plays_motion',
                                   'avg_yards_gained_shift', 'success_rate_shift', 'avg_fps_shift', 'total_plays_shift',
                                   'avg_yards_gained_no_motion', 'success_rate_no_motion', 'avg_fps_no_motion', 'total_plays_no_motion',
                                   'combined_motion_total_plays', 'avg_yards_gained_combinedMotion', 'success_rate_combinedMotion', 'avg_fps_combinedMotion']]

# Display the summary DataFrame
display(team_summary_df.head(3))

Unnamed: 0,possessionTeam,avg_yards_gained,success_rate,avg_fps,total_plays,avg_yards_gained_motion,success_rate_motion,avg_fps_motion,total_plays_motion,avg_yards_gained_shift,...,avg_fps_shift,total_plays_shift,avg_yards_gained_no_motion,success_rate_no_motion,avg_fps_no_motion,total_plays_no_motion,combined_motion_total_plays,avg_yards_gained_combinedMotion,success_rate_combinedMotion,avg_fps_combinedMotion
0,ARI,4.94849,0.493783,1.936362,563,4.901786,0.473214,2.018091,112,4.0,...,1.369803,130,5.34891,0.523364,2.137293,321,242,4.417355,0.454545,1.669837
1,ATL,5.645545,0.516832,3.422615,505,4.99359,0.544872,3.786414,156,5.572115,...,3.265497,208,6.475177,0.48227,3.251891,141,364,5.324176,0.53022,3.488747
2,BAL,5.722986,0.518664,4.860597,509,6.815217,0.548913,5.276171,184,5.017647,...,4.895207,170,5.2,0.483871,4.329313,155,354,5.951977,0.533898,5.093222


In [79]:
#team_summary_df.to_csv('../data/Grouped Analysis/team_summary.csv', index=False)

In [63]:
# MAN VS ZONE PLAY SUMMARY ANALYSIS

# Function to calculate average yards, play count, success rate, and avg FPS by motion type for a given coverage
def calculate_metrics_by_coverage(df):
    # Initialize a list to store results
    results = []

    # Iterate over unique coverage types
    for coverage in df['pff_manZone'].unique():
        # Filter data for the current coverage type
        coverage_data = df[df['pff_manZone'] == coverage]

        # Calculate metrics for each motion type
        for motion_type, motion_filter in [
            ('NoMotion', coverage_data['noMotionPlay']),
            ('Motion', coverage_data['motionPlay']),
            ('Shift', coverage_data['shiftPlay'])
        ]:
            # Apply the filter for the motion type
            motion_data = coverage_data[motion_filter]

            # Calculate metrics
            avg_yards = motion_data['yardsGained'].mean()
            play_count = motion_data.shape[0]
            success_rate = motion_data['situationalSuccess'].mean()  # Fraction of plays marked as successful
            avg_fps = motion_data['FPS'].mean()

            # Append results to the list
            results.append({
                'Coverage Type': coverage,
                'Motion Type': motion_type,
                'Avg Yards': avg_yards,
                'Play Count': play_count,
                'Success Rate': success_rate,
                'Avg FPS': avg_fps
            })

    # Convert results to a DataFrame
    summary_df = pd.DataFrame(results)
    return summary_df

# Calculate summary
summary_df = calculate_metrics_by_coverage(df)

# Pivot the DataFrame for better readability
pivot_df = summary_df.pivot(index='Coverage Type', columns='Motion Type', values=['Avg Yards', 'Play Count', 'Success Rate', 'Avg FPS'])
pivot_df.columns = ['_'.join(col).strip() for col in pivot_df.columns.values]  # Flatten the multi-level columns
pivot_df = pivot_df.reset_index()  # Reset index for display purposes

pivot_df = pivot_df.dropna()
# Display the summary
display(pivot_df)


Unnamed: 0,Coverage Type,Avg Yards_Motion,Avg Yards_NoMotion,Avg Yards_Shift,Play Count_Motion,Play Count_NoMotion,Play Count_Shift,Success Rate_Motion,Success Rate_NoMotion,Success Rate_Shift,Avg FPS_Motion,Avg FPS_NoMotion,Avg FPS_Shift
1,Man,5.302778,5.471955,5.469231,1080.0,1765.0,1300.0,0.490741,0.445892,0.443846,4.560455,3.535099,3.450865
2,Other,2.81203,3.362667,2.067797,266.0,375.0,177.0,0.37218,0.36,0.316384,9.556496,6.415722,6.999222
3,Zone,5.653355,5.854012,5.774892,3130.0,4836.0,3003.0,0.515016,0.496691,0.493506,2.718397,2.4017,2.570675


In [64]:
#pivot_df.to_csv('../data/Grouped Analysis/coverageType_vs_motionType_summary.csv', index=False)

In [65]:
# MAN VS ZONE PLAYER ANALYSIS

key_player_summary = pd.read_csv('../data/Grouped Analysis/key_player_summary.csv')
motion_player_summary = pd.read_csv('../data/Grouped Analysis/motion_player_summary.csv')
shift_player_summary = pd.read_csv('../data/Grouped Analysis/shift_player_summary.csv')

# Key Players ---------------------------------------------------------------------------------------
# Filter valid coverage types to exclude NaN
df = df[df['pff_manZone'].notna()]

# Group data by keyPlayer and pff_manZone (Man, Zone, Other)
keyplayer_coverage_group = df.groupby(['keyPlayer', 'pff_manZone'])

# Calculate metrics for each keyPlayer vs coverage type
keyplayer_avg_yards = keyplayer_coverage_group['yardsGained'].mean()
keyplayer_success_rate = keyplayer_coverage_group['situationalSuccess'].mean()
keyplayer_avg_fps = keyplayer_coverage_group['FPS'].mean()

# Combine all metrics into one DataFrame
keyplayer_metrics = pd.DataFrame({
    'Avg Yards': keyplayer_avg_yards,
    'Success Rate': keyplayer_success_rate,
    'Avg FPS': keyplayer_avg_fps
}).reset_index()

# Pivot the results for better readability
keyplayer_metrics_pivoted = keyplayer_metrics.pivot(index='keyPlayer', columns='pff_manZone', values=['Avg Yards', 'Success Rate', 'Avg FPS'])

# Flatten the multi-index columns for cleaner display
keyplayer_metrics_pivoted.columns = ['_'.join(col).strip() for col in keyplayer_metrics_pivoted.columns]
keyplayer_metrics_pivoted = keyplayer_metrics_pivoted.reset_index()

# Rename columns for clarity
keyplayer_metrics_pivoted = keyplayer_metrics_pivoted.rename(columns={
    'keyPlayer': 'Key Player',
    'Avg Yards_Man': 'Man_Avg Yards',
    'Avg Yards_Zone': 'Zone_Avg Yards',
    'Avg Yards_Other': 'Other_Avg Yards',
    'Success Rate_Man': 'Man_Success Rate',
    'Success Rate_Zone': 'Zone_Success Rate',
    'Success Rate_Other': 'Other_Success Rate',
    'Avg FPS_Man': 'Man_Avg FPS',
    'Avg FPS_Zone': 'Zone_Avg FPS',
    'Avg FPS_Other': 'Other_Avg FPS'
})

# Add position, team, and play count information
# Ensure 'keyPlayer', 'position', 'team', and 'play count' are present in the original `df`
keyplayer_info = key_player_summary.groupby('keyPlayer').agg({
    'position': 'first',   # Assuming position doesn't change
    'possessionTeam': 'first',       # Assuming team doesn't change
    'Play Count': 'sum'     # Summing up the total play counts for each player
}).reset_index()

# Merge this additional info into the pivoted metrics DataFrame
keyplayer_metrics_pivoted = keyplayer_metrics_pivoted.merge(
    keyplayer_info.rename(columns={'keyPlayer': 'Key Player'}), 
    on='Key Player', 
    how='left'
)

# Rename 'possessionTeam' to 'Team' for consistency
keyplayer_metrics_pivoted = keyplayer_metrics_pivoted.rename(columns={'possessionTeam': 'Team'})

# Drop rows where 'Man_Avg Yards' or 'Zone_Avg Yards' columns have NaN values
keyplayer_metrics_pivoted = keyplayer_metrics_pivoted.dropna(subset=['Man_Avg Yards', 'Zone_Avg Yards'])

# Display the results
display(keyplayer_metrics_pivoted.head(10))  # Display top 10 for brevity



# Motion Players ---------------------------------------------------------------------------------------
# MOTION PLAYER Analysis -----------------------------------------------------------------------------------
# Ensure 'motion_players' is a list
df['motion_players'] = df['motion_players'].apply(lambda x: ast.literal_eval(x) if isinstance(x, str) else x)
df['motion_players'] = df['motion_players'].apply(lambda x: x if isinstance(x, list) else [])

# Explode the motion_players list for individual player analysis
motion_players_exploded = df.explode('motion_players')

# Group data by motion player and coverage type
motion_player_coverage_group = motion_players_exploded.groupby(['motion_players', 'pff_manZone'])

# Calculate metrics for each motion player vs coverage type
motion_player_avg_yards = motion_player_coverage_group['yardsGained'].mean()
motion_player_success_rate = motion_player_coverage_group['situationalSuccess'].mean()
motion_player_avg_fps = motion_player_coverage_group['FPS'].mean()

# Combine all metrics into one DataFrame
motion_player_metrics = pd.DataFrame({
    'Avg Yards': motion_player_avg_yards,
    'Success Rate': motion_player_success_rate,
    'Avg FPS': motion_player_avg_fps
}).reset_index()

# Pivot the results for better readability
motion_player_metrics_pivoted = motion_player_metrics.pivot(index='motion_players', columns='pff_manZone', values=['Avg Yards', 'Success Rate', 'Avg FPS'])

# Flatten the multi-index columns for cleaner display
motion_player_metrics_pivoted.columns = ['_'.join(col).strip() for col in motion_player_metrics_pivoted.columns]
motion_player_metrics_pivoted = motion_player_metrics_pivoted.reset_index()

# Rename columns for clarity
motion_player_metrics_pivoted = motion_player_metrics_pivoted.rename(columns={
    'motion_players': 'Motion Player',
    'Avg Yards_Man': 'Man_Avg Yards',
    'Avg Yards_Zone': 'Zone_Avg Yards',
    'Avg Yards_Other': 'Other_Avg Yards',
    'Success Rate_Man': 'Man_Success Rate',
    'Success Rate_Zone': 'Zone_Success Rate',
    'Success Rate_Other': 'Other_Success Rate',
    'Avg FPS_Man': 'Man_Avg FPS',
    'Avg FPS_Zone': 'Zone_Avg FPS',
    'Avg FPS_Other': 'Other_Avg FPS'
})

# Add position, team, and play count information
# Ensure 'keyPlayer', 'position', 'team', and 'play count' are present in the original `df`
motion_player_info = motion_player_summary.groupby('motionPlayer').agg({
    'position': 'first',   # Assuming position doesn't change
    'possessionTeam': 'first',       # Assuming team doesn't change
    'Play Count': 'sum'     # Summing up the total play counts for each player
}).reset_index()

# Merge this additional info into the pivoted metrics DataFrame
motion_player_metrics_pivoted = motion_player_metrics_pivoted.merge(
    motion_player_info.rename(columns={'motionPlayer': 'Motion Player'}), 
    on='Motion Player', 
    how='left'
)

# Rename 'possessionTeam' to 'Team' for consistency
motion_player_metrics_pivoted = motion_player_metrics_pivoted.rename(columns={'possessionTeam': 'Team'})

# Drop rows where 'Man_Avg Yards' or 'Zone_Avg Yards' columns have NaN values
motion_player_metrics_pivoted = motion_player_metrics_pivoted.dropna(subset=['Man_Avg Yards', 'Zone_Avg Yards'])


# Display the results for motion players
display(motion_player_metrics_pivoted.head(10))  # Display top 10 for brevity



# SHIFT PLAYER Analysis ------------------------------------------------------------------------------------
# Ensure 'shift_players' is a list
df['shift_players'] = df['shift_players'].apply(lambda x: ast.literal_eval(x) if isinstance(x, str) else x)
df['shift_players'] = df['shift_players'].apply(lambda x: x if isinstance(x, list) else [])

# Explode the shift_players list for individual player analysis
shift_players_exploded = df.explode('shift_players')

# Group data by shift player and coverage type
shift_player_coverage_group = shift_players_exploded.groupby(['shift_players', 'pff_manZone'])

# Calculate metrics for each shift player vs coverage type
shift_player_avg_yards = shift_player_coverage_group['yardsGained'].mean()
shift_player_success_rate = shift_player_coverage_group['situationalSuccess'].mean()
shift_player_avg_fps = shift_player_coverage_group['FPS'].mean()

# Combine all metrics into one DataFrame
shift_player_metrics = pd.DataFrame({
    'Avg Yards': shift_player_avg_yards,
    'Success Rate': shift_player_success_rate,
    'Avg FPS': shift_player_avg_fps
}).reset_index()

# Pivot the results for better readability
shift_player_metrics_pivoted = shift_player_metrics.pivot(index='shift_players', columns='pff_manZone', values=['Avg Yards', 'Success Rate', 'Avg FPS'])

# Flatten the multi-index columns for cleaner display
shift_player_metrics_pivoted.columns = ['_'.join(col).strip() for col in shift_player_metrics_pivoted.columns]
shift_player_metrics_pivoted = shift_player_metrics_pivoted.reset_index()

# Rename columns for clarity
shift_player_metrics_pivoted = shift_player_metrics_pivoted.rename(columns={
    'shift_players': 'Shift Player',
    'Avg Yards_Man': 'Man_Avg Yards',
    'Avg Yards_Zone': 'Zone_Avg Yards',
    'Avg Yards_Other': 'Other_Avg Yards',
    'Success Rate_Man': 'Man_Success Rate',
    'Success Rate_Zone': 'Zone_Success Rate',
    'Success Rate_Other': 'Other_Success Rate',
    'Avg FPS_Man': 'Man_Avg FPS',
    'Avg FPS_Zone': 'Zone_Avg FPS',
    'Avg FPS_Other': 'Other_Avg FPS'
})

# Add position, team, and play count information
# Ensure 'keyPlayer', 'position', 'team', and 'play count' are present in the original `df`
shift_player_info = shift_player_summary.groupby('shiftPlayer').agg({
    'position': 'first',   # Assuming position doesn't change
    'possessionTeam': 'first',       # Assuming team doesn't change
    'Play Count': 'sum'     # Summing up the total play counts for each player
}).reset_index()

# Merge this additional info into the pivoted metrics DataFrame
shift_player_metrics_pivoted = shift_player_metrics_pivoted.merge(
    shift_player_info.rename(columns={'shiftPlayer': 'Shift Player'}), 
    on='Shift Player', 
    how='left'
)

# Rename 'possessionTeam' to 'Team' for consistency
shift_player_metrics_pivoted = shift_player_metrics_pivoted.rename(columns={'possessionTeam': 'Team'})

# Drop rows where 'Man_Avg Yards' or 'Zone_Avg Yards' columns have NaN values
shift_player_metrics_pivoted = shift_player_metrics_pivoted.dropna(subset=['Man_Avg Yards', 'Zone_Avg Yards'])

# Display the results for shift players
display(shift_player_metrics_pivoted.head(10))  # Display top 10 for brevity


Unnamed: 0,Key Player,Man_Avg Yards,Other_Avg Yards,Zone_Avg Yards,Man_Success Rate,Other_Success Rate,Zone_Success Rate,Man_Avg FPS,Other_Avg FPS,Zone_Avg FPS,position,Team,Play Count
0,A.J. Brown,10.857143,3.0,10.09375,0.535714,0.2,0.5625,9.261472,14.090899,5.82823,WR,PHI,65
1,A.J. Dillon,3.725,1.8,4.2,0.375,0.0,0.56,1.364419,1.266298,2.518201,RB,GB,120
2,A.J. Green,0.75,0.0,4.090909,0.125,0.0,0.454545,-1.347537,-3.450712,0.492776,WR,ARI,42
3,Aaron Jones,5.644444,-2.0,5.88172,0.6,0.2,0.548387,3.687907,-0.449755,2.838098,RB,GB,143
4,Aaron Rodgers,-5.6,-12.0,-0.6875,0.2,0.0,0.3125,-4.847444,0.857199,-2.09231,QB,GB,31
6,Adam Thielen,3.736842,0.5,9.794118,0.315789,0.0,0.617647,3.185657,-0.708928,2.983865,WR,MIN,55
7,Adam Trautman,0.0,,11.75,0.0,,0.75,1.907902,,5.516086,TE,NO,9
9,Alec Ingold,0.0,1.0,3.909091,0.0,0.0,0.636364,1.42782,9.319384,2.476256,FB,MIA,13
10,Alec Pierce,12.166667,0.0,10.166667,0.666667,0.0,0.666667,4.087677,0.065202,3.249373,WR,IND,39
11,Alexander Mattison,2.0,2.8,4.4,0.666667,0.4,0.425,3.038143,11.12745,3.712804,RB,MIN,48


Unnamed: 0,Motion Player,Man_Avg Yards,Other_Avg Yards,Zone_Avg Yards,Man_Success Rate,Other_Success Rate,Zone_Success Rate,Man_Avg FPS,Other_Avg FPS,Zone_Avg FPS,position,Team,Play Count
0,A.J. Brown,9.857143,0.0,1.888889,0.714286,0.0,0.222222,8.312818,0.152195,0.599892,WR,PHI,17
1,A.J. Dillon,1.6,2.0,9.4,0.4,0.0,0.6,3.55819,6.061106,1.750623,RB,GB,16
2,A.J. Green,3.0,,3.0,1.0,,0.5,15.718433,,1.209642,WR,ARI,6
3,Aaron Jones,2.666667,,4.375,0.333333,,0.375,16.188286,,1.931626,RB,GB,27
5,Adam Thielen,1.571429,3.0,5.545455,0.428571,0.25,0.484848,6.214567,10.237105,3.77594,WR,MIN,44
6,Adam Trautman,1.0,2.0,4.5,0.0,0.0,0.5,0.453692,15.687824,0.889247,TE,NO,4
7,Albert Okwuegbunam,51.0,,6.0,1.0,,1.0,14.838525,,5.761281,TE,DEN,2
8,Alec Ingold,2.0,,5.933333,0.0,,0.555556,0.996295,,2.680166,FB,MIA,46
10,Allen Lazard,5.0,-15.0,5.285714,0.545455,0.0,0.428571,1.476636,-47.161554,4.580381,WR,GB,19
11,Allen Robinson,10.0,,12.5,1.0,,1.0,19.647127,,4.115819,WR,LA,3


Unnamed: 0,Shift Player,Man_Avg Yards,Other_Avg Yards,Zone_Avg Yards,Man_Success Rate,Other_Success Rate,Zone_Success Rate,Man_Avg FPS,Other_Avg FPS,Zone_Avg FPS,position,Team,Play Count
0,A.J. Brown,11.4,0.0,9.6,0.8,0.0,0.8,21.881314,2.434601,6.933426,WR,PHI,11
1,A.J. Dillon,3.25,4.0,6.0,0.5,1.0,0.666667,3.164799,43.959885,2.052279,RB,GB,8
3,Aaron Jones,5.666667,,5.705882,0.666667,,0.470588,7.955898,,1.937735,RB,GB,20
5,Adam Thielen,6.166667,,7.722222,0.333333,,0.5,2.783159,,5.58454,WR,MIN,24
6,Adam Trautman,2.0,,5.25,0.0,,0.5,0.958179,,2.751393,TE,NO,17
8,Alec Ingold,0.0,,5.416667,0.0,,0.5,1.42782,,2.945268,FB,MIA,13
9,Alec Pierce,1.0,,0.75,0.0,,0.0,0.600662,,-0.395709,WR,IND,5
10,Alexander Mattison,4.4,,3.0,0.8,,0.285714,24.714359,,-0.394384,RB,MIN,12
11,Allen Lazard,6.333333,2.0,8.0,0.555556,0.5,0.666667,7.752548,23.609165,5.13882,WR,GB,17
13,Alvin Kamara,3.0,0.0,3.666667,0.333333,0.0,0.333333,3.80699,-0.294148,1.499915,RB,NO,13


In [66]:
# keyplayer_metrics_pivoted.to_csv('../data/Grouped Analysis/coverageType_keyPlayer_summary.csv', index=False)
# motion_player_metrics_pivoted.to_csv('../data/Grouped Analysis/coverageType_motionPlayer_summary.csv', index=False)
# shift_player_metrics_pivoted.to_csv('../data/Grouped Analysis/coverageType_shiftPlayer_summary.csv', index=False)