In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import time

In [10]:
"""

The goal of this cell is to obtain the current contracts of 
Quarterbacks playing in the league.

"""
# @source: spotrac- very neat website to obtain player contracts
# from any sport

url = 'https://www.spotrac.com/nfl/contracts/quarterback/'

response = requests.get(url)
time.sleep(8)
soup = BeautifulSoup(response.text, 'html.parser')

table = soup.find('tbody')
players = []

# for each row in table, extract the player information.
for row in table.find_all('tr'):
    columns = row.find_all('td')

    # Extract player name from the first column (assuming it's plain text)
    player_name = columns[1].find('a', class_ = 'team-name').text.strip()

    # Extract the year info
    year_range = columns[1].find_all('div', class_ = 'rank-position')[1].text.strip()
    start_year = year_range.split('-')[0]
    end_year = year_range.split('-')[1].split("(")[0].strip()
    contract_length = int(end_year) - int(start_year)

    # Calculate the contract value
    contract_value = columns[4].find('span', class_ = 'cap').text.strip()
    guaranteed_money = columns[-1].text.strip()


    player_data = {
        'Player Name': player_name,
        'Start Year': start_year,
        'End Year': end_year,
        'Contract Length': contract_length,
        'Contract Value': contract_value,
        'Guaranteed Money': guaranteed_money
    }
    players.append(player_data)
df = pd.DataFrame(players)
df.to_csv('quarterback_contracts.csv', index=False)


In [29]:
"""

The goal of this cell is to obtain players off Pro Football
Reference and their statistics that we will use in our model.

"""
# @source: Pro Football Reference- awesome site that has every statistic
# of every football player imaginable in the NFL.

alphabet = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
alphabet = list(alphabet)

url_template = 'https://www.pro-football-reference.com/players/{}/'

df = pd.read_csv('quarterback_contracts.csv')

players_to_get = df['Player Name']

quarterback_links = []

for letter in alphabet:
    url = url_template.format(letter)
    response = requests.get(url)
    time.sleep(8)
    soup = BeautifulSoup(response.text, 'html.parser')

    for p in soup.find_all('p'):
        a_tag = p.find('a')
        if a_tag is not None:
            player_name = a_tag.text.strip()
            
            text = p.get_text()
            position = ''
            if ' (' in text and ')' in text:
                position = text.split(' (')[1].split(') ')[0]
        
            is_qb = 1 if 'QB' in position else 0

            if player_name in players_to_get.values and is_qb == 1:
                player_html = a_tag['href']
                full_html = f"https://www.pro-football-reference.com/{player_html}"
                quarterback_links.append(full_html)



['https://www.pro-football-reference.com//players/A/AlleBr00.htm', 'https://www.pro-football-reference.com//players/A/AlleJo02.htm', 'https://www.pro-football-reference.com//players/A/AlleKy00.htm', 'https://www.pro-football-reference.com//players/B/BeatC.00.htm', 'https://www.pro-football-reference.com//players/B/BoylTi00.htm', 'https://www.pro-football-reference.com//players/B/BridTe00.htm', 'https://www.pro-football-reference.com//players/B/BrisJa00.htm', 'https://www.pro-football-reference.com//players/B/BrowJa08.htm', 'https://www.pro-football-reference.com//players/B/BurrJo01.htm', 'https://www.pro-football-reference.com//players/C/CarrDe02.htm']
