In [80]:
import pandas as pd

In [81]:
def load_data():
    data_all = pd.DataFrame()
    YEARS = [2020, 2022, 2023,2024]

    for i in YEARS:
        i_data = pd.read_csv('https://github.com/nflverse/nflverse-data/releases/download/pbp/' \
                             'play_by_play_' + str(i) + '.csv.gz',
                             compression='gzip', low_memory=False)

        data_all = pd.concat([data_all, i_data])

    return data_all


In [82]:
def preprocess_data(data):
    data = data.loc[(data.play_type.isin(['pass', 'run'])) & (data.epa.isna() == False)]
    data.loc[data['pass'] == 1, 'play_type'] = 'pass'
    data.loc[data.rush == 1, 'play_type'] = 'run'
    data.reset_index(drop=True, inplace=True)
    data = data.dropna(subset=['posteam'])
    data = data[data['season_type']=='REG']

    # Creating new columns
    new_columns_data = pd.DataFrame({
        'turnover': data['interception'] + data['fumble_lost'],
        '20+_play': (data['yards_gained'] > 19).astype(int),
        'short_pass': (data['air_yards'] < 10).astype(int),
        'medium_pass': ((data['air_yards'] > 9) & (data['air_yards'] < 20)).astype(int),
        'deep_pass': (data['air_yards'] > 19).astype(int),
        'fantasy_points': (
            data['complete_pass'] * 1 +  # 1 point per completion
            data['fumble_lost'] * -2 +
            data['touchdown'] * 6 +       # 6 points per touchdown
            data['yards_gained'] * 0.1     # 0.1 points per yard gained
        ),
        'end_zone_target': (data['yardline_100'] - data['air_yards']) <= 0,
        'distance_to_EZ_after_target': data['yardline_100'] - data['air_yards']
        #'goal_to_go': (data['yardline_100'] < 10).astype(int)
    })

    # Concatenate the new columns to the original DataFrame
    data = pd.concat([data, new_columns_data], axis=1)
    return data

In [83]:
df = preprocess_data(load_data())

df_all = df.copy()

df=df[df['two_point_attempt']==0]

two_point_df = df_all[df_all['two_point_attempt']==1]



In [84]:
pass_fd_rate = df[(df['down'] == 3) & (df['ydstogo'] == 2) & (df['play_type'] == 'pass')]['first_down'].mean()


In [85]:
run_fd_rate = df[(df['down'] == 3) & (df['ydstogo'] == 2) & (df['play_type'] == 'run')]['first_down'].mean()


In [86]:
pass_rate = df[(df['down'] == 3) & (df['ydstogo'] == 2)]['pass'].mean()

In [87]:
print('Third & 2\n')

print(f'Pass Rate: {round(pass_rate,3)}')
print(f'Pass 1D%: {round(pass_fd_rate,3)}')
print(f'Run 1D%: {round(run_fd_rate,3)}')

Third & 2

Pass Rate: 0.63
Pass 1D%: 0.56
Run 1D%: 0.609


In [88]:
two_point_pass_rate = two_point_df['pass'].mean()
two_point_pass_success_rate = two_point_df[two_point_df['pass'] == 1]['success'].mean()
two_point_run_success_rate = two_point_df[two_point_df['pass'] == 0]['success'].mean()

In [89]:
print('\nTwo-Point Conversions\n')
print(f'Pass Rate: {round(two_point_pass_rate,3)}')       
print(f'Pass Success Rate: {round(two_point_pass_success_rate,3)}')
print(f'Run Success Rate: {round(two_point_run_success_rate,3)}')


Two-Point Conversions

Pass Rate: 0.707
Pass Success Rate: 0.437
Run Success Rate: 0.551


In [90]:
pass_fd_rate_third_and_3 = df[(df['down'] == 3) & (df['ydstogo'] == 3) & (df['play_type'] == 'pass')]['first_down'].mean()
run_fd_rate_third_and_3 = df[(df['down'] == 3) & (df['ydstogo'] == 3) & (df['play_type'] == 'run')]['first_down'].mean()
pass_rate_third_and_3 = df[(df['down'] == 3) & (df['ydstogo'] == 3)]['pass'].mean()
print('\nThird & 3\n')
print(f'Pass Rate: {round(pass_rate_third_and_3,3)}')   
print(f'Pass 1D%: {round(pass_fd_rate_third_and_3,3)}')
print(f'Run 1D%: {round(run_fd_rate_third_and_3,3)}')



Third & 3

Pass Rate: 0.785
Pass 1D%: 0.504
Run 1D%: 0.514


In [91]:
pass_fd_rate_third_and_4 = df[(df['down'] == 3) & (df['ydstogo'] == 4) & (df['play_type'] == 'pass')]['first_down'].mean()
run_fd_rate_third_and_4 = df[(df['down'] == 3) & (df['ydstogo'] == 4) & (df['play_type'] == 'run')]['first_down'].mean()
pass_rate_third_and_4 = df[(df['down'] == 3) & (df['ydstogo'] == 4)]['pass'].mean()
print('\nThird & 4\n')
print(f'Pass Rate: {round(pass_rate_third_and_4,3)}')
print(f'Pass 1D%: {round(pass_fd_rate_third_and_4,3)}')
print(f'Run 1D%: {round(run_fd_rate_third_and_4,3)}')


Third & 4

Pass Rate: 0.888
Pass 1D%: 0.479
Run 1D%: 0.473


In [95]:
pass_fd_rate_third_and_5 = df[(df['down'] == 3) & (df['ydstogo'] == 5) & (df['play_type'] == 'pass')]['first_down'].mean()
run_fd_rate_third_and_5 = df[(df['down'] == 3) & (df['ydstogo'] == 5) & (df['play_type'] == 'run')]['first_down'].mean()
pass_rate_third_and_5 = df[(df['down'] == 3) & (df['ydstogo'] == 5)]['pass'].mean()
print('\nThird & 5\n')
print(f'Pass Rate: {round(pass_rate_third_and_5,3)}')
print(f'Pass 1D%: {round(pass_fd_rate_third_and_5,3)}')
print(f'Run 1D%: {round(run_fd_rate_third_and_5,3)}')


Third & 5

Pass Rate: 0.915
Pass 1D%: 0.437
Run 1D%: 0.37


In [93]:
pass_fd_rate_third_and_1 = df[(df['down'] == 3) & (df['ydstogo'] == 1) & (df['play_type'] == 'pass')]['first_down'].mean()
run_fd_rate_third_and_1 = df[(df['down'] == 3) & (df['ydstogo'] == 1) & (df['play_type'] == 'run')]['first_down'].mean()
pass_rate_third_and_1 = df[(df['down'] == 3) & (df['ydstogo'] == 1)]['pass'].mean()
print('\nThird & 1\n')
print(f'Pass Rate: {round(pass_rate_third_and_1,3)}')
print(f'Pass 1D%: {round(pass_fd_rate_third_and_1,3)}')
print(f'Run 1D%: {round(run_fd_rate_third_and_1,3)}')


Third & 1

Pass Rate: 0.256
Pass 1D%: 0.58
Run 1D%: 0.728
