In [1]:
import warnings
warnings.filterwarnings('ignore')

In [2]:
import pandas as pd
import numpy as np
import json
import os
import requests
from bs4 import BeautifulSoup

In [12]:
def get_scores(day):
    url = f"https://www.ncaa.com/scoreboard/soccer-men/d1/2024/{day}/all-conf"

    # Send a request to fetch the HTML content of the page
    response = requests.get(url)

    # Check if the request was successful
    if response.status_code != 200:
        print("Error fetching the page.")
        return []

    # Create a BeautifulSoup object
    soup = BeautifulSoup(response.text, 'html.parser')

    # Find all game pods with the final status
    games = soup.find_all('div', class_='gamePod gamePod-type-game status-final')

    scores = []  # List to hold score results

    # Iterate through each game and extract relevant information
    for game in games:
        teams = game.find_all('li')

        if len(teams) < 2:  # Ensure there are at least two teams
            continue

        # Away team information (always the first team listed)
        away_team_name = teams[0].find('span', class_='gamePod-game-team-name').text.strip()
        away_team_score = teams[0].find('span', class_='gamePod-game-team-score').text.strip()

        # Home team information (always the second team listed)
        home_team_name = teams[1].find('span', class_='gamePod-game-team-name').text.strip()
        home_team_score = teams[1].find('span', class_='gamePod-game-team-score').text.strip()
        
        # Append the result to the scores list
        scores.append({
            'home_team': home_team_name,
            'home_team_score': home_team_score,
            'away_team': away_team_name,
            'away_team_score': away_team_score,
        })

    return scores  # Return the scores list


In [20]:
start_date = '2024-08-22'
end_date = '2024-09-23'

# Generate a date range
date_range = pd.date_range(start=start_date, end=end_date)

# Extract month/day for each date
time_range = date_range.strftime('%m/%d').tolist()

In [21]:
dfs = []
for day in time_range:
    scores = get_scores(day)
    
    scores_df = pd.DataFrame(scores)
    
    dfs.append(scores_df)
    
dfs = pd.concat(dfs, ignore_index=True)
dfs.to_csv(rf'results/ncaa_scores_{end_date}.csv', index=False)