# Predicting the Top 10 RBs of 2025

In [1]:
import pandas as pd
import glob
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

In [2]:
# Loading in the 2022-2024 datasets
df_2022 = pd.read_csv("/content/Top 25 RBs 2022.csv")
df_2023 = pd.read_csv("/content/Top 25 RBs 2023.csv")
df_2024 = pd.read_csv("/content/Top 25 RB 2024.csv")

In [3]:
df_2022.head()

Unnamed: 0,Player,Rush Yds,Att,TD,20+,40+,Lng,Rush 1st,Rush 1st%,Rush FUM
0,Josh Jacobs,1653,340,12,7,2,86,93,27.4,2
1,Derrick Henry,1538,349,13,10,4,56,65,18.6,4
2,Nick Chubb,1525,302,12,13,1,41,69,22.8,1
3,Saquon Barkley,1312,295,10,9,2,68,62,21.0,1
4,Miles Sanders,1269,259,11,9,1,40,62,23.9,1


In [4]:
# Merging the data based on 'Player', 'Rushing yards' and 'Rushing TDs'
df_2022 = df_2022[['Player', 'Rush Yds', 'TD']].rename(columns={
    'Rush Yds': 'Rush Yds 2022', 'TD': 'Rush TDs 2022'}
)
df_2023 = df_2023[['Player', 'Rush Yds', 'TD']].rename(columns={
    'Rush Yds': 'Rush Yds 2023', 'TD': 'Rush TDs 2023'}
)
df_2024 = df_2024[['Player', 'Rush Yds', 'TD']].rename(columns={
    'Rush Yds': 'Rush Yds 2024', 'TD': 'Rush TDs 2024'}
)

In [9]:
# Viewing new 2022 dataset
df_2022.head()

Unnamed: 0,Player,Rush Yds 2022,Rush TDs 2022
0,Josh Jacobs,1653,12
1,Derrick Henry,1538,13
2,Nick Chubb,1525,12
3,Saquon Barkley,1312,10
4,Miles Sanders,1269,11


In [10]:
# Merging the dataframes on 'Player'
merged_df = df_2022.merge(df_2023, on='Player', how='inner') \
                  .merge(df_2024, on='Player', how='inner')

In [11]:
merged_df.head()

Unnamed: 0,Player,Rush Yds 2022,Rush TDs 2022,Rush Yds 2023,Rush TDs 2023,Rush Yds 2024,Rush TDs 2024
0,Derrick Henry,1538,13,1167,12,1921,16
1,Saquon Barkley,1312,10,962,6,2005,13
2,Najee Harris,1034,7,1035,8,1043,6
3,Tony Pollard,1007,9,1005,6,1079,5


In [12]:
# Computing average 'Rush Yds' and 'Rush TDs'
merged_df['Avg Rush Yds'] = merged_df[['Rush Yds 2022',
                                       'Rush Yds 2023',
                                       'Rush Yds 2024']].mean(axis=1)

merged_df['Avg Rush TDs'] = merged_df[['Rush TDs 2022',
                                       'Rush TDs 2023',
                                       'Rush TDs 2024']].mean(axis=1)

In [13]:
# Viewing columns for averaged stats
merged_df.head()

Unnamed: 0,Player,Rush Yds 2022,Rush TDs 2022,Rush Yds 2023,Rush TDs 2023,Rush Yds 2024,Rush TDs 2024,Avg Rush Yds,Avg Rush TDs
0,Derrick Henry,1538,13,1167,12,1921,16,1542.0,13.666667
1,Saquon Barkley,1312,10,962,6,2005,13,1426.333333,9.666667
2,Najee Harris,1034,7,1035,8,1043,6,1037.333333,7.0
3,Tony Pollard,1007,9,1005,6,1079,5,1030.333333,6.666667
