In [1]:
# Import Dependencies
from splinter import Browser
from bs4 import BeautifulSoup
from pprint import pprint

import pandas as pd
import numpy as np

In [2]:
# Generate the url list for the ATS
ats_base_url = 'https://www.teamrankings.com/nfl/trends/ats_trends/?range=yearly_'
record_base_url = 'https://www.teamrankings.com/nfl/trends/win_trends/?range=yearly_'
ou_base_url = 'https://www.teamrankings.com/nfl/trends/ou_trends/?range=yearly_'

# Create the years integers
year_i = int(2003)
year_f = int(2023)
years = np.arange(year_i, 1 + year_f, 1)

# Create a list for the urls
ats_urls = []
record_urls = []
ou_urls = []

for year in years:
    ats_url = ats_base_url + str(year)
    ats_urls.append(ats_url)
    
    record_url = record_base_url + str(year)
    record_urls.append(record_url)
    
    ou_url = ou_base_url + str(year)
    ou_urls.append(ou_url)

In [6]:
# Create a Chrome browser instance
browser = Browser('chrome')

In [7]:
# Script to automate browsing for Win/Loss Record
combined_data = []

# Visit each year the url list
for url in record_urls:  
    
    # Visit the page and create the soup object
    browser.visit(url)
    html = browser.html
    soup = BeautifulSoup(html, 'html.parser')
    
    # Scrape the tables from the page
    tables = soup.find('tbody')
    
    # Extract data from the table
    # Create an empty list
    data_list = []
    
    # Iterate through the rows in the table
    for row in tables.find_all('tr'): 
        
        # Empty list for the row data
        row_data = []

        # Iterate through the <td> tags for each cell data point in the current row
        for cell in row.find_all('td'):
            
            row_data.append(cell.get_text())

        # Append the row_data list to the data_list
        data_list.append(row_data)
            
    combined_data.extend(data_list)
    
## Create a Pandas DataFrame by using the list of rows and a list of the column names
columns = ['Team', 'Win-Loss Record', 'Win %', 'MOV', 'ATS']

record_df = pd.DataFrame(combined_data, columns=columns)

# Fill in the year information
years = list(range(2003, 2024))
filled_years = [year for year in years for _ in range(32)]

record_df['Year'] = filled_years

In [4]:
# Script to automate browsing for Against the Spread (ATS)
combined_data = []

# Visit each year the url list
for url in ats_urls:  
    
    # Visit the page and create the soup object
    browser.visit(url)
    html = browser.html
    soup = BeautifulSoup(html, 'html.parser')
    
    # Scrape the table from the page
    tables = soup.find('tbody')
    
    # Extract data from the table
    # Create an empty list
    data_list = []
    
    # Iterate through the rows in the table
    for row in tables.find_all('tr'): 
        
        # Empty list for the row data
        row_data = []

        # Iterate through the <td> tags for each cell data point in the current row
        for cell in row.find_all('td'):
            
            row_data.append(cell.get_text())

        # Append the row_data list to the data_list
        data_list.append(row_data)
            
    combined_data.extend(data_list)
    
## Create a Pandas DataFrame by using the list of rows and a list of the column names
columns = ['Team', 'ATS Record', 'Cover %', 'MOV', 'ATS']

ats_df = pd.DataFrame(combined_data, columns=columns)

# Fill in the year information
years = list(range(2003, 2024))
filled_years = [year for year in years for _ in range(32)]

ats_df['Year'] = filled_years

In [8]:
# Script to automate browsing for Over/Under Record
combined_data = []

# Visit each year the url list
for url in ou_urls:  
    
    # Visit the page and create the soup object
    browser.visit(url)
    html = browser.html
    soup = BeautifulSoup(html, 'html.parser')
    
    # Scrape the tables from the page
    tables = soup.find('tbody')
    
    # Extract data from the table
    # Create an empty list
    data_list = []
    
    # Iterate through the rows in the table
    for row in tables.find_all('tr'): 
        
        # Empty list for the row data
        row_data = []

        # Iterate through the <td> tags for each cell data point in the current row
        for cell in row.find_all('td'):
            
            row_data.append(cell.get_text())

        # Append the row_data list to the data_list
        data_list.append(row_data)
            
    combined_data.extend(data_list)
    
## Create a Pandas DataFrame by using the list of rows and a list of the column names
columns = ['Team', 'Over Record', 'Over %', 'Under %', 'Total']

ou_df = pd.DataFrame(combined_data, columns=columns)

# Fill in the year information
years = list(range(2003, 2024))
filled_years = [year for year in years for _ in range(32)]

ou_df['Year'] = filled_years