<a href="https://colab.research.google.com/github/MattUran22/NFL_Stats/blob/main/NFL_QB_Stats.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [7]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

# Define the base URL for the initial page
base_url = "https://stathead.com/football/player-game-finder.cgi?request=1&draft_pick_type=overall&player_game_num_career_max=400&comp_type=reg&order_by=date&season_start=1&team_game_num_season_max=17&team_game_num_season_min=1&order_by_asc=1&weight_max=500&week_num_season_max=22&rookie=N&player_game_num_season_max=18&year_min=2011&qb_start_num_career_min=1&match=player_game&year_max=2023&player_game_num_season_min=1&season_end=-1&qb_start_num_career_max=400&week_num_season_min=1&player_game_num_career_min=1&cstat[1]=pass_att&ccomp[1]=gt&cval[1]=1&offset={}"

# Initialize lists to store data
all_dates = []
all_teamsW = []
all_teamsL = []
all_scoresW = []
all_scoresL = []
all_yardsW = []
all_yardsL = []
all_turnoversW = []
all_turnoversL = []

page = 0

while True:
    # Define the URL for the current page
    url = base_url.format(page)

    # Send an HTTP GET request to the URL
    response = requests.get(url)

    # Check if the request was successful
    if response.status_code == 200:
        # Parse the HTML content of the page
        soup = BeautifulSoup(response.content, 'html.parser')

        # Find the table with the specified id attribute
        game_table = soup.find('table', {'id': 'stats'})

        if game_table:
            # Initialize lists to store data for the current page
            dates = []
            teamsW = []
            teamsL = []
            scoresW = []
            scoresL = []
            yardsW = []
            yardsL = []
            turnoversW = []
            turnoversL = []

            # Loop through the rows of the table
            for row in game_table.find_all('tr')[1:]:  # Skip the header row
                columns = row.find_all('td')

                # Check if there are enough columns in the row
                if len(columns) >= 6:
                    date = columns[1].text.strip()
                    time = columns[2].text.strip()
                    team1 = columns[3].text.strip()
                    team2 = columns[5].text.strip()
                    score1 = columns[7].text.strip()
                    score2 = columns[8].text.strip()
                    yards1 = columns[9].text.strip()
                    yards2 = columns[11].text.strip()
                    turnovers1 = columns[10].text.strip()
                    turnovers2 = columns[12].text.strip()

                    # Combine the date and time into a single datetime string
                    datetime_str = f"{date} {time}"
                    dates.append(datetime_str)
                    teamsW.append(team1)
                    teamsL.append(team2)
                    scoresW.append(score1)
                    scoresL.append(score2)
                    yardsW.append(yards1)
                    yardsL.append(yards2)
                    turnoversW.append(turnovers1)
                    turnoversL.append(turnovers2)

            # Append data for the current page to the overall lists
            all_dates.extend(dates)
            all_teamsW.extend(teamsW)
            all_teamsL.extend(teamsL)
            all_scoresW.extend(scoresW)
            all_scoresL.extend(scoresL)
            all_yardsW.extend(yardsW)
            all_yardsL.extend(yardsL)
            all_turnoversW.extend(turnoversW)
            all_turnoversL.extend(turnoversL)

            # Check if there's a "Next" link to continue to the next page
            next_link = soup.find('a', {'class': 'button2 next'})
            if next_link:
                page += 200  # Adjust the offset for the next page
            else:
                break  # No more pages to scrape
        else:
            print("Couldn't find the table with id 'stats'.")
    else:
        print(f"Failed to retrieve the webpage. Status code: {response.status_code}")

# Create a DataFrame from the collected data
data = {
    'Date': all_dates, 'Winning Team': all_teamsW, 'Losing Team': all_teamsL,
    'WScore': all_scoresW, 'LScore': all_scoresL, 'WYards': all_yardsW,
    'LYards': all_yardsL, 'WTurnovers': all_turnoversW, 'LTurnovers': all_turnoversL
}
df = pd.DataFrame(data)

# Export the data to a CSV file
df.to_csv('nfl_player_game_stats.csv', index=False)

print("Data scraped and saved to 'nfl_player_game_stats.csv'.")

Data scraped and saved to 'nfl_player_game_stats.csv'.
