In [195]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from dotenv import load_dotenv
import os
import requests

In [196]:
# get api from https://api.sportsdata.io/v3/nfl/scores/json/PlayersByAvailable?key=
load_dotenv()
API_KEY = os.getenv("API_KEY")

season = "2024REG"
player_id = 18878
number_of_games = "all"

pd.options.display.max_columns = None
pd.options.display.max_rows = None 

r = requests.get(
    f"https://api.sportsdata.io/v3/nfl/stats/json/PlayerSeasonStats/{season}?key={API_KEY}"
)

# get json data
data = r.json()

# check if the request was successful
if r.status_code == 200:
    # create dataframe
    df = pd.DataFrame(data)
else:
    error_message = data.get("message")
    print(f"Error: {error_message}")
    df = pd.DataFrame()  # create an empty dataframe

print(df.head())

   PlayerID  SeasonType  Season Team  Number        Name Position  \
0       549           1    2024  ARI       5    M.Prater        K   
1       611           1    2024  IND      15    J.Flacco       QB   
2      2593           1    2024  NYJ       8   A.Rodgers       QB   
3      3061           1    2024  CHI      84     M.Lewis       TE   
4      3341           1    2024  MIA      93  C.Campbell       DT   

  PositionCategory  Activated  Played  Started  PassingAttempts  \
0               ST          4       4        0              0.0   
1              OFF          8       8        6            423.3   
2              OFF         17      17       17            996.9   
3              OFF         17      17        3              0.0   
4              DEF         17      17       17              0.0   

   PassingCompletions  PassingYards  PassingCompletionPercentage  \
0                 0.0           0.0                          0.0   
1               276.5        3006.0           

In [197]:
# get all RBs
rbs = df[df['Position'] == 'RB']
print(rbs[["Name", "Position", "Team"]])

                   Name Position Team
63          C.Patterson       RB  PIT
126          A.Abdullah       RB   LV
159           R.Mostert       RB  MIA
181           E.Elliott       RB  DAL
194             D.Henry       RB  BAL
273             J.Mixon       RB  HOU
274              D.Cook       RB  DAL
277         C.McCaffrey       RB   SF
278            A.Kamara       RB   NO
313              K.Hunt       RB   KC
321           D.Foreman       RB  CLE
335            J.Conner       RB  ARI
339            S.Perine       RB   KC
341          J.Williams       RB   NO
356             A.Jones       RB  MIN
362         J.McNichols       RB  WAS
404            A.Ekeler       RB  WAS
408        D.Ogunbowale       RB  HOU
413           S.Barkley       RB  PHI
416             N.Chubb       RB  CLE
510             J.Kelly       RB  CLE
554             M.Boone       RB  CAR
560           G.Edwards       RB  LAC
570        J.Wilson Jr.       RB  MIA
614            M.Gaskin       RB  MIN
623         

In [198]:
# get Derrick Henry
input_name = input("Enter player name: ")
search_name = df.loc[
    df["Name"] == input_name, ["Name", "Position", "Team", "RushingYards", "RushingTouchdowns"]
]
print(search_name)

        Name Position Team  RushingYards  RushingTouchdowns
194  D.Henry       RB  BAL        3279.1               27.3


In [199]:
# import beautifulsoup4
from bs4 import BeautifulSoup

In [200]:
# get the url

new_season = "2020"

url = f"https://www.nfl.com/stats/player-stats/category/rushing/{new_season}/REG/all/rushingyards/desc"

page = requests.get(url)

print(page.status_code)

# create a BeautifulSoup object
soup = BeautifulSoup(page.content, "html.parser")

# get the table
table = soup.find("table")

# get the table headers
headers = table.find_all("th")

# get the table rows
rows = table.find_all("tr")

# get the table data
data = table.find_all("td")

# get the table headers
header_list = []

for header in headers:
    header_list.append(header.text)
    # remove /n from the list
    header_list = [header.strip() for header in header_list]

# get the table data
data_list = []

for item in data:
    data_list.append(item.text)
    # remove /n from the list
    data_list = [data.strip() for data in data_list]

# group eveery 10 items in data_list
data_list = [data_list[i:i+10] for i in range(0, len(data_list), 10)]

# create a dataframe
df = pd.DataFrame(data_list, columns=header_list)

print(df)



200
                   Player Rush Yds  Att  TD 20+ 40+ Lng Rush 1st Rush 1st%  \
0           Derrick Henry     2027  378  17  16   4  94       98      25.9   
1             Dalvin Cook     1557  312  16   6   1  70       91      29.2   
2         Jonathan Taylor     1169  232  11   7   3  62       69      29.7   
3             Aaron Jones     1104  201   9   6   4  77       52      25.9   
4        David Montgomery     1070  247   8   5   2  80       59      23.9   
5          James Robinson     1070  240   7   5   1  47       54      22.5   
6              Nick Chubb     1067  190  12  12   3  59       55        29   
7             Josh Jacobs     1065  273  12   3   0  28       61      22.3   
8           Lamar Jackson     1005  159   7  10   2  50       56      35.2   
9           Melvin Gordon      986  215   9   5   2  65       46      21.4   
10        Ezekiel Elliott      979  244   6   3   0  31       62      25.4   
11           Ronald Jones      978  192   7   6   1  98     

In [201]:
g_list = []

In [207]:

fox_season = 2018
while fox_season < 2025:
    fox_season = str(fox_season)
    fox_url = f"https://www.foxsports.com/nfl/stats?category=rushing&sort=ru_yds&season={fox_season}&seasonType=reg&sortOrder=desc"

    page = requests.get(fox_url)

    print(page.status_code)

    # create a BeautifulSoup object
    soup = BeautifulSoup(page.content, "html.parser")

    # get the table
    table = soup.find("table")

    # get the table headers
    headers = table.find_all("th")

    table_data = soup.find_all("td")

    start_list = []

    for data in table_data:
        start_list.append(data.text)
        # remove /n from the list
        start_list = [g.strip() for g in start_list]
        start_list = [g.replace("\n", "") for g in start_list]


    start_list = [start_list[i : i + 14] for i in range(0, len(start_list), 14)]


    # replace empty space in every 2nd element in the list of lists with a -
    start_list = [[g.replace("      ", " - ") for g in start_list] for start_list in start_list]

    # drop the first element in the list of lists
    start_list = [start_list[1:] for start_list in start_list]

    # split the first element in the list of lists by -
    for g in start_list:
        temp_list = []
        g[0] = g[0].split(" - ")
        # place the first two element in the list of lists in a temporary list
        temp_list.append(g[0][0:])
        # remove the first two element in the list of lists
        g.pop(0)
        # place each element in the temporary list in the list of lists
        g.insert(0,fox_season)
        for i in temp_list:
            for j in i:
                g.insert(1, j)

    print(start_list)

    header_list2 = []

    for header in headers:
        header_list2.append(header.text)
        # remove /n from the list
        header_list2 = [header.strip() for header in header_list2]

    header_list2.insert(0, "TEAM")
    header_list2.insert(0, "YEAR")

    # add each element of start_list to g_list
    for g in start_list:
        g_list.insert(0,g)

    print(g_list)

    fox_season = int(fox_season)
    fox_season += 1

# create a dataframe
df = pd.DataFrame(g_list, columns=header_list2)

print(df)

200
[['2018', 'DAL', 'Ezekiel Elliott', '15', '15', '304', '20.3', '1,434', '4.7', '95.6', '6', '41', '8', '6', '1'], ['2018', 'NYG', 'Saquon Barkley', '16', '16', '261', '16.3', '1,307', '5.0', '81.7', '11', '78', '7', '0', '0'], ['2018', 'LAR', 'Todd Gurley II', '14', '14', '256', '18.3', '1,251', '4.9', '89.4', '17', '36', '7', '1', '1'], ['2018', 'CIN', 'Joe Mixon', '14', '13', '237', '16.9', '1,168', '4.9', '83.4', '8', '51', '4', '0', '0'], ['2018', 'SEA', 'Chris Carson', '14', '14', '247', '17.6', '1,151', '4.7', '82.2', '9', '61', '6', '3', '2'], ['2018', 'CAR', 'Christian McCaffrey', '16', '16', '219', '13.7', '1,098', '5.0', '68.6', '7', '59', '4', '4', '1'], ['2018', 'TEN', 'Derrick Henry', '16', '12', '215', '13.4', '1,059', '4.9', '66.2', '12', '99', '2', '1', '1'], ['2018', 'WAS', 'Adrian Peterson', '16', '16', '251', '15.7', '1,042', '4.2', '65.1', '7', '90', '3', '3', '2'], ['2018', 'DEN', 'Phillip Lindsay', '15', '8', '192', '12.8', '1,037', '5.4', '69.1', '9', '65', '

In [208]:
# convert the RYDS, GP, GS, RATT, ATT/G, RAVG, RYDS/G columns to appropriate types
df["RYDS"] = df["RYDS"].astype(str).str.replace(",", "").astype(int)
df["GP"] = df["GP"].astype(int)
df["GS"] = df["GS"].astype(int)
df["RATT"] = df["RATT"].astype(str).str.replace(",", "").astype(float)
df["ATT/G"] = df["ATT/G"].astype(str).str.replace(",", "").astype(float)
df["RAVG"] = df["RAVG"].astype(str).str.replace(",", "").astype(float)
df["RYDS/G"] = df["RYDS/G"].astype(str).str.replace(",", "").astype(float)

# sort the dataframe by RushingYards
df = df.sort_values(by="RYDS", ascending=False)

# remove duplicate rows where the player has the same Name keep the highest RYDS row
df = df.drop_duplicates(subset="PLAYERS", keep="first")

# reset the index
df = df.reset_index(drop=True)

print(df)


    YEAR TEAM                PLAYERS  GP  GS   RATT  ATT/G  RYDS  RAVG  \
0   2020  TEN          Derrick Henry  16  16  378.0   23.6  2027   5.4   
1   2024  PHI         Saquon Barkley  16  16  345.0   21.6  2005   5.8   
2   2021  IND        Jonathan Taylor  17  17  332.0   19.5  1811   5.5   
3   2022   LV            Josh Jacobs  17  17  340.0   20.0  1653   4.9   
4   2020  MIN            Dalvin Cook  14  14  312.0   22.3  1557   5.0   
5   2022  CLE             Nick Chubb  17  17  302.0   17.8  1525   5.0   
6   2023   SF    Christian McCaffrey  16  16  272.0   17.0  1459   5.4   
7   2024  ATL         Bijan Robinson  17  17  304.0   17.9  1456   4.8   
8   2018  DAL        Ezekiel Elliott  15  15  304.0   20.3  1434   4.7   
9   2024  DET           Jahmyr Gibbs  17   4  250.0   14.7  1412   5.6   
10  2024  LAR         Kyren Williams  16  16  316.0   19.8  1299   4.1   
11  2022  CAR          Miles Sanders  17  15  259.0   15.2  1269   4.9   
12  2018  LAR         Todd Gurley II  

In [209]:
nfc_teams = ["ATL", "CAR", "NO", "TB", "DAL", "NYG", "PHI", "WAS", "CHI", "DET", "GB", "MIN", "ARI", "LAR", "SF", "SEA"]
afc_teams = ["BAL", "CIN", "CLE", "PIT", "BUF", "MIA", "NE", "NYJ", "HOU", "IND", "JAX", "TEN", "DEN", "KC", "LAC", "LV"]

In [210]:
# AfC teams
afc_df = df[df["TEAM"].isin(afc_teams)]

# NFC teams
nfc_df = df[df["TEAM"].isin(nfc_teams)]

print(afc_df)
print(nfc_df)


    YEAR TEAM                PLAYERS  GP  GS   RATT  ATT/G  RYDS  RAVG  \
0   2020  TEN          Derrick Henry  16  16  378.0   23.6  2027   5.4   
2   2021  IND        Jonathan Taylor  17  17  332.0   19.5  1811   5.5   
3   2022   LV            Josh Jacobs  17  17  340.0   20.0  1653   4.9   
5   2022  CLE             Nick Chubb  17  17  302.0   17.8  1525   5.0   
14  2019  BAL          Lamar Jackson  15  15  176.0   11.7  1206   6.9   
15  2021  CIN              Joe Mixon  16  16  292.0   18.3  1205   4.1   
16  2021  PIT           Najee Harris  17  17  307.0   18.1  1200   3.9   
18  2019  JAX      Leonard Fournette  15  15  265.0   17.7  1152   4.3   
21  2022  JAX     Travis Etienne Jr.  17  12  220.0   12.9  1125   5.1   
22  2023  BUF             James Cook  17  13  237.0   13.9  1122   4.7   
26  2019  IND            Marlon Mack  14  12  247.0   17.6  1091   4.4   
27  2024  TEN           Tony Pollard  16  16  260.0   16.3  1079   4.2   
30  2019  HOU            Carlos Hyde  