In [538]:
# %%
import pandas as pd
import xgboost as xgb
from xgboost import XGBRegressor

from sklearn.metrics import mean_squared_error, r2_score
from sklearn.model_selection import train_test_split, GridSearchCV

import matplotlib.pyplot as plt
import pickle



# %%
YEARS = [2018, 2019, 2020, 2021, 2022, 2023,2024]

# %%
data_all = pd.DataFrame()

def calculate_seconds(row):
    if row['qtr'] != 5:
        return 3600 - row['game_seconds_remaining']
    else:
        return 600 - row['game_seconds_remaining'] + 3600


def get_quarter_value(dataf):
    if 'END QUARTER' in dataf['desc']:
        return dataf['level_0']
    else:
        return None

for i in YEARS:  
    i_data = pd.read_csv('https://github.com/nflverse/nflverse-data/releases/download/pbp/' \
                   'play_by_play_' + str(i) + '.csv.gz',
                   compression= 'gzip', low_memory= False)

    data_all = pd.concat([data_all,i_data])

ppr = 1

data = data_all.loc[data_all.season_type=='REG']
#data = data_all.loc[(data_all.play_type.isin(['no_play','pass','run'])) & (data_all.epa.isna()==False)]
#data.loc[data['pass']==1, 'play_type'] = 'pass'
#data.loc[data.rush==1, 'play_type'] = 'run'
data.reset_index(drop=True, inplace=True)
data['turnover'] = data['interception'] + data['fumble_lost']
data = data.dropna(subset=['posteam'])
data['inside_10'] = (data['yardline_100'] < 10).astype(int)
data['20+_play'] = (data['yards_gained'] > 19).astype(int)
data['short_pass'] = (data['air_yards'] < 10).astype(int)
data['medium_pass'] = ((data['air_yards'] > 9)&(data['air_yards']<20)).astype(int)
data['deep_pass'] = (data['air_yards'] > 19).astype(int)
data['end_zone_target'] = (data['yardline_100'] - data['air_yards']) <= 0
data['fantasy_points'] = (
    data['complete_pass'] * ppr +          # 1 point per completion
    data['touchdown'] * 6 +           # 6 points per touchdown
    data['yards_gained'] * 0.1        # 0.1 points per yard gained
)
data['distance_to_EZ_after_target'] = data['yardline_100'] - data['air_yards']


  data['turnover'] = data['interception'] + data['fumble_lost']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['turnover'] = data['interception'] + data['fumble_lost']
  data['inside_10'] = (data['yardline_100'] < 10).astype(int)
  data['20+_play'] = (data['yards_gained'] > 19).astype(int)
  data['short_pass'] = (data['air_yards'] < 10).astype(int)
  data['medium_pass'] = ((data['air_yards'] > 9)&(data['air_yards']<20)).astype(int)
  data['deep_pass'] = (data['air_yards'] > 19).astype(int)
  data['end_zone_target'] = (data['yardline_100'] - data['air_yards']) <= 0
  data['fantasy_points'] = (
  data['distance_to_EZ_after_target'] = data['yardline_100'] - data['air_yards']


In [539]:
def total_finder(home_or_away,home_total,away_total):
    if home_or_away == 'home':
        total = home_total
    else:
        total = away_total 
    return total

In [540]:
    data.reset_index(drop=True, inplace=True)

    data = data[data['two_point_attempt']==0]


    # derive implied team total from betting market data
    data['home_implied_total'] = abs(data['total_line'] / 2 + data['spread_line'] / 2)
    data['away_implied_total'] = abs(data['total_line'] / 2 - data['spread_line'] / 2)

    # Use list comprehension with zip for more efficient row-wise operations
    data['implied_posteam_total'] = [
    total_finder(has_ball, home_number, away_number)
        for has_ball, home_number, away_number in zip(data['posteam_type'], data['home_implied_total'], data['away_implied_total'])
    ]

  data['home_implied_total'] = abs(data['total_line'] / 2 + data['spread_line'] / 2)
  data['away_implied_total'] = abs(data['total_line'] / 2 - data['spread_line'] / 2)
  data['implied_posteam_total'] = [


In [541]:
    
    # we only want throws to a receiver, aka plays with air yardage (no running plays, sacks, throwaways etc.)
    throws = data[data['air_yards'].notna()]
    # only data before the current szn
    throws = throws[throws['pass_location'].notna()]

    
    df=throws[['passer_player_name','passer_player_id','posteam','pass','cp','game_id','complete_pass','inside_10','air_yards','yardline_100','ydstogo','implied_posteam_total','yards_gained','fantasy_points','pass_touchdown','down','pass_location','week','season','home_implied_total','away_implied_total','posteam_type','qb_hit','end_zone_target', 'distance_to_EZ_after_target','yards_after_catch','xyac_mean_yardage']]


# Season-over-season Comparison

In [542]:
def load_models():
    with open('Models/pass_yardage_model.pkl', 'rb') as file:
        yardage_model = pickle.load(file)
    
    with open('Models/pass_touchdown_model.pkl', 'rb') as file:
        touchdown_model = pickle.load(file)

    return yardage_model, touchdown_model

In [543]:
yardage_model, touchdown_model = load_models()

In [544]:
df['season'].value_counts()

season
2021    18641
2023    18247
2022    18006
2020    17926
2019    17779
2024    17740
2018    17593
Name: count, dtype: int64

In [545]:
new_predictors = [
    'air_yards', 'yardline_100', 'ydstogo',
    'down', 'pass_location', 'qb_hit', 'end_zone_target', 'distance_to_EZ_after_target'
]

test_df = df[new_predictors]

test_df.head()

Unnamed: 0,air_yards,yardline_100,ydstogo,down,pass_location,qb_hit,end_zone_target,distance_to_EZ_after_target
2,8.0,80.0,15,1.0,right,0.0,False,72.0
5,4.0,39.0,10,1.0,right,0.0,False,35.0
6,-3.0,39.0,10,2.0,left,0.0,False,42.0
7,24.0,39.0,10,3.0,left,0.0,False,15.0
10,1.0,1.0,1,3.0,right,0.0,True,0.0


In [546]:
test_df = pd.get_dummies(test_df, columns=['pass_location'], drop_first=True)


In [547]:


# Add predictions to the new dataset (optional)
df['xYards'] = yardage_model.predict(test_df)
df['xTDs'] = touchdown_model.predict_proba(test_df)[:, 1]
#df['xFPs'] = best_model.predict(test_df)
#df['compositeXFP'] = df['cp'] * ppr + df['xTDs'] * 6 + df['xYards'] * 0.1

df_copy = df.copy()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['xYards'] = yardage_model.predict(test_df)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['xTDs'] = touchdown_model.predict_proba(test_df)[:, 1]


In [548]:
df[['xYards','yards_gained']].corr()

Unnamed: 0,xYards,yards_gained
xYards,1.0,0.344472
yards_gained,0.344472,1.0


In [549]:
df[['xTDs','pass_touchdown']].corr()

Unnamed: 0,xTDs,pass_touchdown
xTDs,1.0,0.502229
pass_touchdown,0.502229,1.0


In [550]:
passers = df.groupby(['passer_player_id','posteam','season']).agg({'passer_player_name':'max','game_id':'nunique','air_yards':'sum','pass':'sum','xTDs':'sum','pass_touchdown':'sum','xYards':'sum','yards_gained':'sum','yards_after_catch':'sum','complete_pass':'sum','cp':'sum','xyac_mean_yardage':'sum'}).sort_values('yards_gained',ascending=False)

In [551]:
passers['TD diff'] = passers['pass_touchdown'] - passers['xTDs']

passers.sort_values('TD diff')[['passer_player_name','pass_touchdown','xTDs','TD diff']]

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,passer_player_name,pass_touchdown,xTDs,TD diff
passer_player_id,posteam,season,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
00-0036971,JAX,2021,T.Lawrence,12.0,24.069958,-12.069958
00-0038102,PIT,2022,K.Pickett,7.0,18.458149,-11.458149
00-0036355,LAC,2022,J.Herbert,25.0,35.416325,-10.416325
00-0027973,CIN,2019,A.Dalton,16.0,26.208084,-10.208084
00-0019596,TB,2022,T.Brady,25.0,34.637405,-9.637405
...,...,...,...,...,...,...
00-0034796,BAL,2019,L.Jackson,36.0,26.222387,9.777613
00-0037834,SF,2023,B.Purdy,31.0,19.087175,11.912825
00-0023459,GB,2020,A.Rodgers,48.0,34.401066,13.598934
00-0034796,BAL,2024,L.Jackson,41.0,26.171593,14.828407


In [552]:
passers['yardage diff'] = passers['yards_gained'] - passers['xYards']

passers['yac diff'] = passers['yards_after_catch'] - passers['xyac_mean_yardage']

passers['cpoe'] = passers['complete_pass'] - passers['cp']

passers.sort_values('cpoe')

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,passer_player_name,game_id,air_yards,pass,xTDs,pass_touchdown,xYards,yards_gained,yards_after_catch,complete_pass,cp,xyac_mean_yardage,TD diff,yardage diff,yac diff,cpoe
passer_player_id,posteam,season,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
00-0037013,NYJ,2021,Z.Wilson,13,2885.0,382,13.778047,9.0,2776.214111,2334.0,1077.0,213.0,248.280056,1920.765889,-4.778047,-442.214111,-843.765889,-35.280056
00-0034343,ARI,2018,J.Rosen,14,3262.0,392,15.317440,11.0,2903.294922,2278.0,974.0,217.0,247.589823,1959.181271,-4.317440,-625.294922,-985.181271,-30.589823
00-0036971,JAX,2021,T.Lawrence,17,4738.0,599,24.069958,12.0,4352.516602,3641.0,1634.0,359.0,386.888468,2913.473096,-12.069958,-711.516602,-1279.473096,-27.888468
00-0031407,JAX,2018,B.Bortles,13,2889.0,403,15.410169,13.0,2885.009521,2718.0,1472.0,243.0,270.727772,2119.078694,-2.410169,-167.009521,-647.078694,-27.727772
00-0039164,IND,2024,A.Richardson,11,3224.0,262,11.033096,8.0,2206.872559,1814.0,776.0,126.0,153.040383,1298.216121,-3.033096,-392.872559,-522.216121,-27.040383
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
00-0029263,SEA,2019,R.Wilson,16,4830.0,513,34.552357,31.0,3648.159424,4110.0,1723.0,341.0,306.610145,2362.177999,-3.552357,461.840576,-639.177999,34.389855
00-0023459,GB,2020,A.Rodgers,16,4135.0,523,34.401066,48.0,3665.318359,4299.0,2248.0,372.0,336.000369,2673.048576,13.598934,633.681641,-425.048576,35.999631
00-0034857,BUF,2020,J.Allen,16,4857.0,572,34.585648,37.0,4107.173828,4544.0,1871.0,396.0,359.870112,2600.406139,2.414352,436.826172,-729.406139,36.129888
00-0029263,SEA,2020,R.Wilson,16,4809.0,557,36.011909,40.0,3867.688477,4212.0,1827.0,384.0,347.579485,2510.490953,3.988091,344.311523,-683.490953,36.420515


In [553]:
passers[passers.columns[1:]].corr()[['yards_gained']].sort_values('yards_gained',ascending=False)

Unnamed: 0,yards_gained
yards_gained,1.0
complete_pass,0.994533
xYards,0.992685
pass,0.992283
cp,0.991277
yards_after_catch,0.990311
xyac_mean_yardage,0.988417
air_yards,0.98728
xTDs,0.981395
game_id,0.969672


In [554]:
passers_qual = passers[passers['pass']>149]

In [555]:
passers_qual['TD/game'] = passers_qual['pass_touchdown']/passers_qual['game_id']

passers_qual['xTD/game'] = passers_qual['xTDs']/passers_qual['game_id']

passers_qual['TD_delta/game'] = passers_qual['TD/game'] - passers_qual['xTD/game']

passers_qual['yards/game'] = passers_qual['yards_gained']/passers_qual['game_id']

passers_qual['xYards/game'] = passers_qual['xYards']/passers_qual['game_id']

passers_qual['yards_delta/game'] = passers_qual['yards/game'] - passers_qual['xYards/game']

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  passers_qual['TD/game'] = passers_qual['pass_touchdown']/passers_qual['game_id']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  passers_qual['xTD/game'] = passers_qual['xTDs']/passers_qual['game_id']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  passers_qual['TD_delta/game'] = passers_qual['TD/gam

In [556]:
passers_qual['yards_delta'] = passers_qual['yards_gained'] - passers_qual['xYards']

passers_qual['td_delta'] = passers_qual['pass_touchdown'] - passers_qual['xTDs']

passers_qual['cperc_oe'] = passers_qual['cpoe']/passers_qual['pass']

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  passers_qual['yards_delta'] = passers_qual['yards_gained'] - passers_qual['xYards']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  passers_qual['td_delta'] = passers_qual['pass_touchdown'] - passers_qual['xTDs']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  passers_qual['cperc_oe'] = passers_qual[

In [557]:
df = passers_qual.reset_index().sort_values(['passer_player_id', 'season'])

# Create lagged columns for the next season's stats
for col in ['xTDs', 'pass_touchdown', 'xYards', 'yards_gained', 'cp', 'complete_pass','cpoe','yards_delta','td_delta','cperc_oe','pass']:
    df[f'{col}_next'] = df.groupby('passer_player_id')[col].shift(-1)

# Drop rows where the next season's data is missing (last season for each player)
df_lagged = df.dropna(subset=[f'{col}_next' for col in ['xTDs', 'pass_touchdown', 'xYards', 'yards_gained', 'cp', 'complete_pass','cpoe','yards_delta','td_delta','cperc_oe','pass']])

In [558]:
correlations = {}

# Loop through each stat to calculate its correlation with the following season
for col in ['xTDs', 'pass_touchdown', 'xYards', 'yards_gained', 'cp', 'complete_pass','cpoe','yards_delta','td_delta','cperc_oe','pass']:
    correlation = df_lagged[col].corr(df_lagged[f'{col}_next'])
    #print(df_lagged)
    correlations[col] = correlation

# Display the results
correlations_df = pd.DataFrame.from_dict(correlations, orient='index', columns=['Correlation with Next Season'])
print(correlations_df.sort_values("Correlation with Next Season",ascending=False))

                Correlation with Next Season
pass_touchdown                      0.487336
complete_pass                       0.481746
yards_gained                        0.479293
xTDs                                0.478657
cp                                  0.462310
pass                                0.451993
xYards                              0.431411
yards_delta                         0.401556
cperc_oe                            0.389694
cpoe                                0.347077
td_delta                            0.170454


In [559]:
correlations = {}

# Loop through each stat to calculate its correlation with the following season
for metric in ['pass_touchdown', 'yards_gained', 'complete_pass']:
    if metric == 'pass_touchdown':
        predictor = 'xTDs'
    if metric == 'yards_gained':
        predictor = 'xYards'
    if metric == 'complete_pass':
        predictor = 'cp'
    #print(f"{predictor} to predict {metric}")
    correlation = df_lagged[predictor].corr(df_lagged[f'{metric}_next'])
    correlations[metric] = correlation

# Display the results
x_stat_correlations_df = pd.DataFrame.from_dict(correlations, orient='index', columns=['Expected Stat Correlation with Actual Next Season Target'])
print(x_stat_correlations_df)

                Expected Stat Correlation with Actual Next Season Target
pass_touchdown                                           0.429418       
yards_gained                                             0.427620       
complete_pass                                            0.465268       


In [560]:
correlations_df.merge(x_stat_correlations_df,right_index=True,left_index=True,how='outer').sort_values('Correlation with Next Season',ascending=False)

Unnamed: 0,Correlation with Next Season,Expected Stat Correlation with Actual Next Season Target
pass_touchdown,0.487336,0.429418
complete_pass,0.481746,0.465268
yards_gained,0.479293,0.42762
xTDs,0.478657,
cp,0.46231,
pass,0.451993,
xYards,0.431411,
yards_delta,0.401556,
cperc_oe,0.389694,
cpoe,0.347077,


In [561]:
df = passers_qual.reset_index().sort_values(['passer_player_id', 'season'])

# Create lagged columns for the next season's stats
for col in ['xTD/game', 'TD/game', 'xYards/game', 'yards/game','TD_delta/game','yards_delta/game']:
    df[f'{col}_next'] = df.groupby('passer_player_id')[col].shift(-1)

# Drop rows where the next season's data is missing (last season for each player)
df_lagged = df.dropna(subset=[f'{col}_next' for col in ['xTD/game', 'TD/game', 'xYards/game', 'yards/game','TD_delta/game','yards_delta/game']])

In [562]:
correlations = {}

# Loop through each stat to calculate its correlation with the following season
for col in ['xTD/game', 'TD/game', 'xYards/game', 'yards/game','TD_delta/game','yards_delta/game']:
    correlation = df_lagged[col].corr(df_lagged[f'{col}_next'])
    #print(df_lagged)
    correlations[col] = correlation

# Display the results
correlations_df = pd.DataFrame.from_dict(correlations, orient='index', columns=['Correlation with Next Season'])
print(correlations_df.sort_values("Correlation with Next Season",ascending=False))

                  Correlation with Next Season
yards/game                            0.548550
xYards/game                           0.496288
xTD/game                              0.456638
TD/game                               0.449468
yards_delta/game                      0.428772
TD_delta/game                         0.214504


In [563]:
correlations = {}

# Loop through each stat to calculate its correlation with the following season
for metric in ['yards/game', 'TD/game']:
    if metric == 'yards/game':
        predictor = 'xYards/game'
    if metric == 'TD/game':
        predictor = 'xTD/game'
    #print(f"{predictor} to predict {metric}")
    correlation = df_lagged[predictor].corr(df_lagged[f'{metric}_next'])
    correlations[metric] = correlation

# Display the results
x_stat_correlations_df = pd.DataFrame.from_dict(correlations, orient='index', columns=['Expected Stat Correlation with Actual Next Season Target'])
print(x_stat_correlations_df)

            Expected Stat Correlation with Actual Next Season Target
yards/game                                           0.407989       
TD/game                                              0.360000       


In [564]:
correlations_df.merge(x_stat_correlations_df,right_index=True,left_index=True,how='outer').sort_values('Correlation with Next Season',ascending=False)

Unnamed: 0,Correlation with Next Season,Expected Stat Correlation with Actual Next Season Target
yards/game,0.54855,0.407989
xYards/game,0.496288,
xTD/game,0.456638,
TD/game,0.449468,0.36
yards_delta/game,0.428772,
TD_delta/game,0.214504,


## Weekly

In [565]:
weekly_passers = df_copy.groupby(['passer_player_id','posteam','season','week']).agg({'passer_player_name':'max','air_yards':'sum','pass':'sum','xTDs':'sum','pass_touchdown':'sum','xYards':'sum','yards_gained':'sum','yards_after_catch':'sum','complete_pass':'sum','cp':'sum','xyac_mean_yardage':'sum'}).sort_values('yards_gained',ascending=False)

In [566]:
weekly_passers[['xYards','xTDs','pass_touchdown','pass','yards_gained']].corr()

Unnamed: 0,xYards,xTDs,pass_touchdown,pass,yards_gained
xYards,1.0,0.643298,0.4314,0.980984,0.872959
xTDs,0.643298,1.0,0.580024,0.692484,0.681205
pass_touchdown,0.4314,0.580024,1.0,0.445559,0.625478
pass,0.980984,0.692484,0.445559,1.0,0.871489
yards_gained,0.872959,0.681205,0.625478,0.871489,1.0


In [567]:
weekly_passers.shape

(4534, 11)

In [568]:
def determine_half(row):
    if row['season'] < 2021:
        return 'first'  if 1 <= row['week'] <= 8 else 'second'
    else:
        return 'first'  if 1 <= row['week'] <= 9 else 'second'

In [569]:
weekly_reset = weekly_passers.reset_index()

weekly_reset['half'] = weekly_reset.apply(determine_half, axis=1)

In [570]:
first_half = weekly_reset[weekly_reset['half']=='first']

second_half = weekly_reset[weekly_reset['half']=='second']


In [571]:
first_half = first_half.groupby(['passer_player_id','posteam','season','passer_player_name']).sum()

first_half.drop(columns=['week','half'],inplace=True)

In [572]:
second_half = second_half.groupby(['passer_player_id','posteam','season','passer_player_name']).sum()

second_half.drop(columns=['week','half'],inplace=True)

In [573]:
first_half = first_half[first_half['pass']>39]

second_half = second_half[second_half['pass']>39]

In [574]:
def name_change(columns,half):
    name_list = []
    if half == 'first':
        prefix = '1H'
    else:
        prefix = '2H'
    for col in columns:
        new_name = prefix + "_" + col
        name_list.append(new_name)
    return name_list


In [575]:
first_half.columns = name_change(first_half.columns,'first')

In [576]:
second_half.columns = name_change(second_half.columns,'second')

In [577]:
split_df = pd.merge(first_half,second_half,right_index=True,left_index=True)

split_df['Total TDs'] = split_df['1H_pass_touchdown'] + split_df['2H_pass_touchdown']

In [578]:
#td_correlation

split_df[['1H_pass_touchdown','2H_pass_touchdown','1H_xTDs','2H_xTDs','1H_pass','2H_pass']].corr()[['2H_pass_touchdown']].sort_values('2H_pass_touchdown',ascending=False)

Unnamed: 0,2H_pass_touchdown
2H_pass_touchdown,1.0
2H_xTDs,0.852465
2H_pass,0.788478
1H_pass_touchdown,0.502197
1H_xTDs,0.438761
1H_pass,0.308096


In [579]:
split_df[['1H_yards_gained','2H_yards_gained','1H_xYards','2H_xYards','1H_pass','2H_pass']].corr()[['2H_yards_gained']].sort_values('2H_yards_gained',ascending=False)

Unnamed: 0,2H_yards_gained
2H_yards_gained,1.0
2H_xYards,0.964055
2H_pass,0.96263
1H_yards_gained,0.468378
1H_pass,0.393814
1H_xYards,0.39167
