## Scraping NFL table

In [1]:
import requests
import pandas as pd

from bs4 import BeautifulSoup

In [2]:
url = 'https://www.nfl.com/standings/league/2022/REG'

In [3]:
page = requests.get(url)
page

<Response [200]>

In [4]:
soup = BeautifulSoup(page.text, 'lxml')

In [5]:
#Subsets the HTML to only get the HTML of our table needed
table = soup.find('table', {'summary':'Standings - Detailed View'} )

In [6]:
table.find_all('th')

[<th aria-label="Division name" scope="col">NFL Team</th>,
 <th aria-label="WINS" scope="col">W</th>,
 <th aria-label="LOSSES" scope="col">L</th>,
 <th aria-label="TIES" scope="col">T</th>,
 <th aria-label="PCT" scope="col">PCT</th>,
 <th aria-label="PF" scope="col">PF</th>,
 <th aria-label="PA" scope="col">PA</th>,
 <th aria-label="NET PTS" scope="col">Net Pts</th>,
 <th aria-label="HOME" scope="col">Home</th>,
 <th aria-label="ROAD" scope="col">Road</th>,
 <th aria-label="DIV" scope="col">Div</th>,
 <th aria-label="PCT" scope="col">Pct</th>,
 <th aria-label="CONF" scope="col">Conf</th>,
 <th aria-label="PCT" scope="col">Pct</th>,
 <th aria-label="NON-CONF" scope="col">Non-Conf</th>,
 <th aria-label="STRK" scope="col">Strk</th>,
 <th aria-label="LAST 5" scope="col">Last 5</th>]

In [7]:
#Gets all the column headers of  table
headers = []
for i in table.find_all('th'):
    title = i.text.strip()
    headers.append(title)


In [8]:
# Check the headers agains table
headers

['NFL Team',
 'W',
 'L',
 'T',
 'PCT',
 'PF',
 'PA',
 'Net Pts',
 'Home',
 'Road',
 'Div',
 'Pct',
 'Conf',
 'Pct',
 'Non-Conf',
 'Strk',
 'Last 5']

In [9]:
#Creates a dataframe using the column headers from our table
df = pd.DataFrame(columns = headers)
df

Unnamed: 0,NFL Team,W,L,T,PCT,PF,PA,Net Pts,Home,Road,Div,Pct,Conf,Pct.1,Non-Conf,Strk,Last 5


In [10]:
# Create a loop to get data for each row and append each row 
for i in table.find_all('tr')[1:]:
    row_data = i.find_all('td')
    row = [tr.text for tr in row_data]
    length = len(df)
    df.loc[length] = row

In [11]:
# Let's take a look at the output
df

Unnamed: 0,NFL Team,W,L,T,PCT,PF,PA,Net Pts,Home,Road,Div,Pct,Conf,Pct.1,Non-Conf,Strk,Last 5
0,Arizona CardinalsCardinals,0,1,0,0.0,21,44,-23,0 - 1 - 0,0 - 0 - 0,0 - 0 - 0,0.0,0 - 0 - 0,0.0,0 - 1 - 0,1L,0 - 1 - 0
1,Atlanta FalconsFalcons,0,1,0,0.0,26,27,-1,0 - 1 - 0,0 - 0 - 0,0 - 1 - 0,0.0,0 - 1 - 0,0.0,0 - 0 - 0,1L,0 - 1 - 0
2,Carolina PanthersPanthers,0,1,0,0.0,24,26,-2,0 - 1 - 0,0 - 0 - 0,0 - 0 - 0,0.0,0 - 0 - 0,0.0,0 - 1 - 0,1L,0 - 1 - 0
3,Cincinnati BengalsBengals,0,1,0,0.0,20,23,-3,0 - 1 - 0,0 - 0 - 0,0 - 1 - 0,0.0,0 - 1 - 0,0.0,0 - 0 - 0,1L,0 - 1 - 0
4,Dallas CowboysCowboys,0,1,0,0.0,3,19,-16,0 - 1 - 0,0 - 0 - 0,0 - 0 - 0,0.0,0 - 1 - 0,0.0,0 - 0 - 0,1L,0 - 1 - 0
5,Denver BroncosBroncos,0,1,0,0.0,16,17,-1,0 - 0 - 0,0 - 1 - 0,0 - 0 - 0,0.0,0 - 0 - 0,0.0,0 - 1 - 0,1L,0 - 1 - 0
6,Detroit LionsLions,0,1,0,0.0,35,38,-3,0 - 1 - 0,0 - 0 - 0,0 - 0 - 0,0.0,0 - 1 - 0,0.0,0 - 0 - 0,1L,0 - 1 - 0
7,Green Bay PackersPackers,0,1,0,0.0,7,23,-16,0 - 0 - 0,0 - 1 - 0,0 - 1 - 0,0.0,0 - 1 - 0,0.0,0 - 0 - 0,1L,0 - 1 - 0
8,Houston TexansTexans,0,0,1,0.5,20,20,0,0 - 0 - 1,0 - 0 - 0,0 - 0 - 1,0.5,0 - 0 - 1,0.5,0 - 0 - 0,1T,0 - 0 - 1
9,Indianapolis ColtsColts,0,0,1,0.5,20,20,0,0 - 0 - 0,0 - 0 - 1,0 - 0 - 1,0.5,0 - 0 - 1,0.5,0 - 0 - 0,1T,0 - 0 - 1


## Putting it all together

In [27]:
#Creates a dataframe using the column headers from our table
df = pd.DataFrame(columns = headers)

for row in table.find_all('tr')[1:]:
    #Strip the club name in the column NFLTeam
    first_td = row.find_all('td')[0].find('div', class_ = 'd3-o-club-fullname').text.strip()
    
    # find all data for rest of rows
    data = row.find_all('td')[1:]
    row_data = [td.text.strip() for td in data]
    
    # insert the first row to each set of list
    row_data.insert(0,first_td)
    
    # find position of df it is currently at
    length = len(df)
    
    # Add the row at the position
    df.loc[length] = row_data


In [28]:
# Check the result
df.head()

Unnamed: 0,NFL Team,W,L,T,PCT,PF,PA,Net Pts,Home,Road,Div,Pct,Conf,Pct.1,Non-Conf,Strk,Last 5
0,Arizona Cardinals,0,1,0,0.0,21,44,-23,0 - 1 - 0,0 - 0 - 0,0 - 0 - 0,0.0,0 - 0 - 0,0.0,0 - 1 - 0,1L,0 - 1 - 0
1,Atlanta Falcons,0,1,0,0.0,26,27,-1,0 - 1 - 0,0 - 0 - 0,0 - 1 - 0,0.0,0 - 1 - 0,0.0,0 - 0 - 0,1L,0 - 1 - 0
2,Carolina Panthers,0,1,0,0.0,24,26,-2,0 - 1 - 0,0 - 0 - 0,0 - 0 - 0,0.0,0 - 0 - 0,0.0,0 - 1 - 0,1L,0 - 1 - 0
3,Cincinnati Bengals,0,1,0,0.0,20,23,-3,0 - 1 - 0,0 - 0 - 0,0 - 1 - 0,0.0,0 - 1 - 0,0.0,0 - 0 - 0,1L,0 - 1 - 0
4,Dallas Cowboys,0,1,0,0.0,3,19,-16,0 - 1 - 0,0 - 0 - 0,0 - 0 - 0,0.0,0 - 1 - 0,0.0,0 - 0 - 0,1L,0 - 1 - 0


In [29]:
#exports the data as a csv
df.to_csv('NFLdata_2022.csv', index=False)