# 49ers Play-Calling EDA (Shanahan Era)

This notebook explores the play-calling tendencies of Kyle Shanahan from 2017-2025, focusing specifically on **49ers offensive plays**.

In [10]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import sys
import os

# Add src to path
sys.path.append(os.path.abspath(os.path.join('..')))

from src.features import load_data, preprocess_features

%matplotlib inline
sns.set_style('whitegrid')

ModuleNotFoundError: No module named 'src'

In [None]:
# Load Data
raw_df = load_data('../data/processed/49ers_plays.csv')

# Filter for 49ers Offense ONLY
sfo_df = raw_df[raw_df['is_49ers'] == True].copy()

# Preprocess
df = preprocess_features(sfo_df)
print(f"Total 49ers Plays: {len(df)}")
df.head()

## 1. Overall Run/Pass Ratio

In [None]:
plt.figure(figsize=(8, 5))
ax = sns.countplot(x='target', data=df, palette='viridis')
plt.title('Overall Run (0) vs Pass (1) Distribution')
plt.xlabel('Play Type')
plt.xticks([0, 1], ['Run', 'Pass'])

# Add percentages
total = len(df)
for p in ax.patches:
    percentage = '{:.1f}%'.format(100 * p.get_height()/total)
    x = p.get_x() + p.get_width()/2
    y = p.get_height()
    ax.annotate(percentage, (x, y), ha='center', va='bottom')

plt.show()

## 2. Play Calling by Field Position
How does play calling change as the team approaches the endzone?

In [None]:
# Bin yards_to_endzone
df['field_pos_bin'] = pd.cut(df['yards_to_endzone'], bins=range(0, 101, 10), labels=range(5, 100, 10))

plt.figure(figsize=(12, 6))
sns.lineplot(x='field_pos_bin', y='target', data=df, marker='o')
plt.title('Pass Probability by Field Position (Yards to Endzone)')
plt.xlabel('Yards to Endzone (Bin Center)')
plt.ylabel('Pass Probability')
plt.ylim(0, 1)
plt.axhline(0.5, color='red', linestyle='--', alpha=0.5)
plt.show()

## 3. Play Calling by Down & Distance
Analyzing tendencies on different downs.

In [None]:
plt.figure(figsize=(10, 6))
sns.barplot(x='down', y='target', data=df, ci=None, palette='coolwarm')
plt.title('Pass Probability by Down')
plt.ylabel('Pass Probability')
plt.ylim(0, 1)
plt.show()

In [None]:
# 3rd Down Analysis
third_down = df[df['down'] == 3].copy()

plt.figure(figsize=(12, 6))
sns.histplot(data=third_down, x='ydstogo', hue='target', multiple='fill', binwidth=1, palette='coolwarm')
plt.title('3rd Down Run/Pass Ratio by Yards to Go')
plt.ylabel('Proportion')
plt.xlim(0, 15)
plt.show()

## 4. Play Calling by Score Differential
How does the score affect play calling?

In [None]:
# Bin score differential
# Negative = Trailing, Positive = Leading
df['score_diff_bin'] = pd.cut(df['score_differential'], bins=range(-21, 22, 7))

plt.figure(figsize=(12, 6))
sns.barplot(x='score_diff_bin', y='target', data=df, ci=None, palette='RdBu')
plt.title('Pass Probability by Score Differential')
plt.xlabel('Score Differential (Home - Away for Home Team)')
plt.ylabel('Pass Probability')
plt.xticks(rotation=45)
plt.show()