In [26]:
# imports
from selenium import webdriver
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
import time
from tabulate import tabulate


# Set up WebDriver with headless mode
options = webdriver.ChromeOptions()
service = Service(ChromeDriverManager().install())
driver = webdriver.Chrome(service=service, options=options)

In [27]:
# Set up WebDriver
service = Service(ChromeDriverManager().install())
driver = webdriver.Chrome(service=service)

# Navigate to the ESPN NFL scoreboard page
driver.get("https://www.espn.com/nfl/scoreboard")
time.sleep(5)  # Allow page to load

In [28]:

# Initialize WebDriverWait
wait = WebDriverWait(driver, 10)  # Define 'wait' here

In [None]:
# Initialize WebDriverWait
wait = WebDriverWait(driver, 15)  # Define 'wait' here

all_links = []

while True:
    # Use JavaScript to get all elements matching the selector
    elements = driver.execute_script(
        "return [...document.querySelectorAll('.Arrow.flex.justify-center.items-center.Arrow--left')]"
    )

    # Check if any elements were found
    if not elements:
        print("No elements found.")
        break

    # Extract href attributes using JavaScript
    week_js1 = driver.execute_script(
        "return [...document.querySelectorAll('div.Week.currentWeek div.Week__wrapper div.custom--week a')].map(e => e.href);"
    )


    # Filter for only links containing '/season/2'
    filtered_links = [link for link in week_js1 if "/seasontype/2" in link]
    
    # Add the extracted links to the list
    # Convert to set to automatically remove duplicates
    all_links = list(set(all_links + filtered_links))   

    # Use JavaScript to check if the button is disabled
    is_disabled = driver.execute_script(
        """
        return document.querySelector('.Arrow.flex.justify-center.items-center.Arrow--left').classList.contains('disabled');
        """
    )

    # If the button is disabled, stop the loop
    if is_disabled:
        print("Reached the beginning of the carousel. Stopping.")
        break

    # Click the first element in the list to go to the previous carousel page
    elements[0].click()

# Print all the collected links after the loop finishes
print("All links collected:")
for link in all_links:
    print(link)


In [None]:

# List to store boxscore links
boxscore_links = []

# Loop through each week link
for link in all_links:
    try:
        print(f"\nNavigating to: {link}")
        driver.get(link)  # Open the page in the browser

        # Wait for the page to fully load
        wait.until(EC.presence_of_element_located((By.ID, "fittPageContainer")))


        # Extract the week number from the URL
        week_number = link.split("/week/")[1].split("/")[0]  # Extracts the "18" from "/week/18/"

        # Extract boxscore links
        boxscores = driver.find_elements(By.XPATH, '//a[contains(@href, "/boxscore/")]')

        for item in boxscores:
            boxscore = item.get_attribute('href')
            if boxscore and '/boxscore/' in boxscore:
                boxscore_links.append(boxscore)  # Append only the link (no dictionary)
                print(f"Found boxscore link: {boxscore}")

    except Exception as e:
        print(f"Error processing {link}: {e}")

# Print collected boxscore links
print("\nAll boxscore links collected:")
for link in boxscore_links:
    game_ids = [link.split("gameId/")[-1] for link in boxscore_links]
    print(game_ids)

In [31]:
driver.quit()

In [None]:
import requests
import pandas as pd
import os
from sqlalchemy import create_engine

# MySQL Configuration
DB_USER = "root"
DB_PASSWORD = "_Joseph344"
DB_HOST = "localhost"
DB_NAME = "NFL_Project2"

engine = create_engine(f"mysql+pymysql://{DB_USER}:{DB_PASSWORD}@{DB_HOST}/{DB_NAME}")

csv_path = "/Users/mattatchison/Documents/SQL_Projects/raw_all_games.csv"

def get_game_stats(game_id):
    """Fetches comprehensive game stats using ESPN APIs"""
    summary_url = f"https://site.api.espn.com/apis/site/v2/sports/football/nfl/summary?event={game_id}"
    scoreboard_url = f"https://site.api.espn.com/apis/site/v2/sports/football/nfl/scoreboard/{game_id}"
    
    try:
        headers = {
            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
        }
        
        summary_response = requests.get(summary_url, headers=headers, timeout=10)
        scoreboard_response = requests.get(scoreboard_url, headers=headers, timeout=10)
        
        summary_response.raise_for_status()
        scoreboard_response.raise_for_status()
        
        summary_data = summary_response.json()
        scoreboard_data = scoreboard_response.json()
        
        # Extract metadata
        week = summary_data.get("header", {}).get("week", "na")
        season_type = scoreboard_data.get("season", {}).get("slug", "na")
        game_date = scoreboard_data.get("date", "na")
        
        # Get referees
        refs = ", ".join([official.get("fullName", "na") 
                         for official in summary_data.get("gameInfo", {}).get("officials", [])])
        
        scores = summary_data.get("header", {}).get("competitions", [{}])[0].get("competitors", [])
        for s in scores:
            score = s.get("team", "na")

        all_stats = []

        for team in summary_data.get("boxscore", {}).get("teams", []):
            team_info = team.get("team", {})
            
            team_data = {
                "game_id": game_id,
                "score": score, 
                "team_name": team_info.get("displayName", "Unknown"),
                "team_abbreviation": team_info.get("abbreviation", "na"),
                "team_id": team_info.get("id", "na"),
                "referees": refs,
                "home_away": team.get("homeAway", "unknown"),
                "game_date": game_date,
                "week": week,
                "season_type": season_type
                
            }
            
            for stat in team.get("statistics", []):
                label = stat.get("name", stat.get("label", "Unknown"))
                clean_label = label.lower().replace(" ", "_")
                team_data[clean_label] = stat.get("displayValue", "0")
            
            all_stats.append(team_data)
        
        df = pd.DataFrame(all_stats)

        if not df.empty:
            # Save to MySQL
            df.to_sql("team_stats", con=engine, if_exists="append", index=False)
            print(f"✅ Successfully saved game {game_id} (Week {week})")

            # Save to CSV (Append mode)
            df.to_csv(csv_path, mode='a', header=not os.path.exists(csv_path), index=False)
            print(f"✅ Game {game_id} saved to CSV.")

    except requests.exceptions.HTTPError as e:
        print(f"❌ HTTP Error for game {game_id}: {str(e)[:100]}...")
    except Exception as e:
        print(f"❌ Unexpected error for game {game_id}: {str(e)[:100]}...")

if __name__ == "__main__":
    # Extract game IDs from boxscore_links (make sure `boxscore_links` exists)
    game_ids = [link.split("gameId/")[-1] for link in boxscore_links]   
    
    for game_id in game_ids:
        print(f"\nProcessing Game ID: {game_id}")
        get_game_stats(game_id)
