Notebook for looking at 2023 Team rushing stats. Trying to find a way to point out who was statistically the best 

In [1]:
import pandas as pd
import numpy as np
from bs4 import BeautifulSoup
import requests
import matplotlib.pyplot as mpl
import re

In [2]:
YEAR = '2023'
#rushing receiving scoring downs tackles 
# fumbles interceptions field-goals sociring 
# kickoff kickoff-returns punting punt-returns
STAT_TYPE = 'rushing' 
OFFENSE_DEFENSE = 'offense' #defense

URL = f'https://www.nfl.com/stats/team-stats/{OFFENSE_DEFENSE}/{STAT_TYPE}/{YEAR}/reg/all'

page = requests.get(URL)
html = page.content
soup = BeautifulSoup(html, "html.parser")

In [3]:
table_name = "d3-o-table d3-o-table--detailed d3-o-team-stats--detailed d3-o-table--sortable {sortlist: [[0,0]], sortinitialorder: 'asc'}"

table = soup.find("table", class_=table_name)

print(table)

<table class="d3-o-table d3-o-table--detailed d3-o-team-stats--detailed d3-o-table--sortable {sortlist: [[0,0]], sortinitialorder: 'asc'}" data-require="modules/tableSortable">
<thead>
<tr>
<th>Team</th>
<th scope="col">Att</th>
<th scope="col">Rush Yds</th>
<th scope="col">YPC</th>
<th scope="col">TD</th>
<th scope="col">20+</th>
<th scope="col">40+</th>
<th scope="col">Lng</th>
<th scope="col">Rush 1st</th>
<th scope="col">Rush 1st%</th>
<th scope="col">Rush FUM</th>
</tr>
</thead>
<tbody>
<tr>
<td scope="row" tabindex="0">
<div class="d3-o-club-info">
<div class="d3-o-club-logo">
<picture><!--[if IE 9]><video style="display: none; "><![endif]--><source media="(min-width:1024px)" srcset="https://static.www.nfl.com/t_q-best/league/api/clubs/logos/BAL"/><source media="(min-width:768px)" srcset="https://static.www.nfl.com/t_q-best/league/api/clubs/logos/BAL"/><source srcset="https://static.www.nfl.com/t_q-best/league/api/clubs/logos/BAL"/><!--[if IE 9]></video><![endif]--><img alt="" cl

In [4]:
columns = soup.findAll('th')

print(columns)

[<th>Team</th>, <th scope="col">Att</th>, <th scope="col">Rush Yds</th>, <th scope="col">YPC</th>, <th scope="col">TD</th>, <th scope="col">20+</th>, <th scope="col">40+</th>, <th scope="col">Lng</th>, <th scope="col">Rush 1st</th>, <th scope="col">Rush 1st%</th>, <th scope="col">Rush FUM</th>]


In [11]:
df = pd.DataFrame(columns=['Team', 'Rush Att', 'Rush Yds', 'YPC',
                           'Rush TD', 'Rush 20+', 'Rush 40+', 'Rush Lng', 'Rush 1st',
                           'Rush 1st%', 'Rush Fum'])


In [8]:
def remove_html_tags(text):
    clean = re.compile('<.*?>')
    return re.sub(clean, '',text)

In [12]:
for row in table.find_all('tr'):
    c = row.find_all('td')
    
    if (c != []):
        name = remove_html_tags(str(c[0])).replace('\n','')
        att = remove_html_tags(str(c[1])).replace('\n','')
        rush_yards = remove_html_tags(str(c[2])).replace('\n','')
        ypc = remove_html_tags(str(c[3])).replace('\n','')
        td = remove_html_tags(str(c[4])).replace('\n','')
        twentyP = remove_html_tags(str(c[5])).replace('\n','')
        fourtyP = remove_html_tags(str(c[6])).replace('\n','')
        lng = remove_html_tags(str(c[7])).replace('\n','')
        rush_first_down = remove_html_tags(str(c[8])).replace('\n','')
        rush_first_down_perc = remove_html_tags(str(c[9])).replace('\n','')
        fumbles = remove_html_tags(str(c[10])).replace('\n','')

        
        df = pd.concat([df, pd.DataFrame.from_records([{'Team':name, 'Rush Att':att, 'Rush Yds':rush_yards, 'YPC':ypc,
                           'Rush TD':td, 'Rush 20+':twentyP, 'Rush 40+':fourtyP, 'Rush Lng':lng,
                           'Rush 1st':rush_first_down,'Rush 1st%':rush_first_down_perc,
                           'Rush Fum':fumbles}])], ignore_index=True)

df.head()

Unnamed: 0,Team,Rush Att,Rush Yds,YPC,Rush TD,Rush 20+,Rush 40+,Rush Lng,Rush 1st,Rush 1st%,Rush Fum
0,Ravens ...,541,2661,4.9,26,21,4,60,144,26.6,7
1,Bears ...,534,2399,4.5,16,13,0,39T,137,25.7,5
2,Falcons ...,522,2159,4.1,14,11,0,38,121,23.2,13
3,Browns ...,518,2017,3.9,15,9,3,69,121,23.4,9
4,Bills ...,512,2212,4.3,22,9,1,42,158,30.9,9


function that will be used to determine the efficiency of the teams rushing stats (its an arbitrary value)

NOTE: function variables and how it determines the rating does need fine tuning 

In [22]:
#def passer_rating(comp, attempts, tds, yards, inrcpts):
#    return round(((((comp / attempts) - 0.3) * 5) 
#                  + (((yards / attempts) - 3) * 0.25) + ((tds / attempts) * 20) 
#                  + (2.375 - ((inrcpts / attempts) * 25))) / (6/100), 2)


def rushing_rating(ypc, tds, fumbles, attempts, yards):
    return round(((((yards / attempts) - 3) * 0.25) 
                  + ((tds / attempts) * 20) 
                  + (6.375 - ((fumbles / attempts) * 25))) / (6/100), 2)

In [23]:

print(rushing_rating(4.9,26,7,541, 2661))

124.87
