## Set up shell environment
Before we start, let's make sure our environment is set up to run all the code we will need.

In [1]:
# !conda init
# !conda create -n gymternet -- python 3.12
# !conda activate gymternet 
!conda install pip -y
!pip install -r ../requirements.txt

Channels:
 - defaults
 - conda-forge
Platform: osx-64
Collecting package metadata (repodata.json): done
Solving environment: done

# All requested packages already installed.



## Import libraries and programs

Now that we're operating in Python, install all the libraries etc called on in the code

In [None]:
!pip show numpy

In [2]:
import os
import json
import requests
import datetime

import numpy as np
import pandas as pd 

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from scrapy import Selector

from tqdm.notebook import tqdm
from pprint import pprint as print

In [3]:
# Setting program-level variables
driver = webdriver.Chrome()
year_url_root = "https://www.roadtonationals.com/api/women/finalresults/"
years = [2024, 2023, 2022, 2021, 2020, 2019, 2018, 2017, 2016, 2015] # These are the years that we are interested in evaluating

In [4]:
# Setting up a fetch page function with a retry function and error handling
# def fetch_page(url, retries=5):
#     for i in range(retries):
#         try:
#             driver.get(url)
#             return driver.page_source
#         except Exception as e:
#             # print(f"Error fetching page: {url}, retrying...")
#             # print(e)
#             pass
#     return None

error_logs = []

def fetch_page(url, retries=3, timeout=10):
    for i in range(retries):
        try:
            response = requests.get(url, timeout=timeout)
            if response.status_code == 200:
                return response.text
            else:
                error_logs.append({
                    'url': url,
                    'status_code': response.status_code,
                    'error': 'Non-200 status code',
                    'timestamp': datetime.now().isoformat()
                })
        except requests.exceptions.Timeout:
            error_logs.append({
                'url': url,
                'status_code': None,
                'error': 'Timeout',
                'timestamp': datetime.now().isoformat()
            })
        except requests.exceptions.RequestException as e:
            error_logs.append({
                'url': url,
                'status_code': None,
                'error': str(e),
                'timestamp': datetime.now().isoformat()
            })
    return None


# 1: The teams
The first step in the logic is to start to set up the data related to the teams. The teams are the 'base unit' of analysis for these data: all meets comprise teams, all gymnasts belong to teams, all scores either belong to gymnasts who belong to teams, or belong to teams directly.

On the landing page, I have scraped all the information for the past 10 years; teams are relatively static, but occassionally there will be a new team added to the roster, or a team dropped, so at this stage I'll just grab everything and drop duplicates later.

**NB: The raw data that is written from this is saved in the 'data/teams' folder**

In [None]:
# TODO - Perhaps wrap this into a method that takes a year?

# For every year, access the website and save the data to a json file
for year in years:
    year_url = year_url_root + str(year)

    payload = {}
    headers = {
        'Cookie': 'PHPSESSID=c48eb24102c0c45390a5d64809741f95'
    }

    response = requests.request("GET", year_url, headers=headers, data=payload)

    # Save the data to a json file
    with open(f'../Data/Raw/teams/{year}_teams.json', 'w') as f:
        # pure text
        f.write(response.text)

In [5]:
# Read the json files into a dataframe

# Create an empty dataframe
teams_data_df = pd.DataFrame()

# For every year, load the data from the json file and append to the dataframe
for year in years:
    filename = f'../Data/Raw/teams/{year}_teams.json'

    # Read the json file into a temporary df
    temp_df = pd.read_json(filename)
    temp_df['year'] = year

    # Append the temporary df to the main df
    teams_data_df = pd.concat([teams_data_df, temp_df])


teams_data_df = teams_data_df.reset_index(drop=True)
teams_df = pd.json_normalize(teams_data_df['data']).reset_index(drop=True)
teams_df['year'] = teams_data_df['year']

In [None]:
teams_df

In [6]:
# Drop the columns that we are not interested in
teams_df = teams_df.drop(columns=['rank', 'ncaa_final', 'nqs', 'regionals', 'rqs', 'division_id', 'average_score', 'high_score', 'ncaa'])

In [None]:
# Preview the df
teams_df.head()

In [7]:
# Remove duplicates - ie. if team_id & team_name are identical, retain years as a list

teams_df = teams_df.drop_duplicates(subset=['team_id', 'team_name']).reset_index(drop=True)

In [None]:
# Preview the df
teams_df.head()

In [8]:
# Determine the link to access the team's dashboard
base_team_url = 'https://www.roadtonationals.com/api/women/dashboard'

# Add the team links to the team_url column
teams_df['team_url'] = teams_df.apply(lambda x: f'{base_team_url}/{str(x["year"])}/{str(x["team_id"])}', axis=1)

In [None]:
# Preview the df - this looks good to work with now
teams_df.head()

Now we go to each of the links in the teams df and scrape the data for the meets

In [9]:
#Create a list of desired urls using two list comprehensions
base_team_url = 'https://www.roadtonationals.com/api/women/dashboard'

# Create a list of all team dashboards across all years and teams 
# NB: Some of these will be inactive, but we will filter these out later
meet_urls = [f'{base_team_url}/{str(year)}/{str(team_id)}' for year in years for team_id in teams_df['team_id']]

meet_urls


['https://www.roadtonationals.com/api/women/dashboard/2024/34',
 'https://www.roadtonationals.com/api/women/dashboard/2024/15',
 'https://www.roadtonationals.com/api/women/dashboard/2024/69',
 'https://www.roadtonationals.com/api/women/dashboard/2024/22',
 'https://www.roadtonationals.com/api/women/dashboard/2024/61',
 'https://www.roadtonationals.com/api/women/dashboard/2024/47',
 'https://www.roadtonationals.com/api/women/dashboard/2024/6',
 'https://www.roadtonationals.com/api/women/dashboard/2024/2',
 'https://www.roadtonationals.com/api/women/dashboard/2024/33',
 'https://www.roadtonationals.com/api/women/dashboard/2024/20',
 'https://www.roadtonationals.com/api/women/dashboard/2024/40',
 'https://www.roadtonationals.com/api/women/dashboard/2024/39',
 'https://www.roadtonationals.com/api/women/dashboard/2024/46',
 'https://www.roadtonationals.com/api/women/dashboard/2024/38',
 'https://www.roadtonationals.com/api/women/dashboard/2024/49',
 'https://www.roadtonationals.com/api/wome

In [None]:
# Get the meet info for every team in every year
def get_the_meet_info(url):
    year = url.split('/')[-2]
    team = url.split('/')[-1]
    # If we are able to fetch the page without timing out
    if fetch_page(url):   
        payload = {}
        headers = {
                'Cookie': 'PHPSESSID=c48eb24102c0c45390a5d64809741f95'
                }

        response = requests.request("GET", url, headers=headers, data=payload)

        # Save the data to a json file
        with open(f'../Data/Raw/meets/{year}_{team}_meets.json', 'w') as f:
            # pure text
            f.write(response.text)
    else:
        pass



In [10]:
# Batching up the meet_urls to avoid overloading the server
batch_size = 100
batches = [meet_urls[i:i + batch_size] for i in range(0, len(meet_urls), batch_size)]
len(batches)

9

In [None]:
# Call the method for every url in the list

# #Batch 1 #Completed successfully and commented out to avoid re-running
# for url in tqdm(batches[0]):

#     get_the_meet_info(url)

# #Batch 2 #Completed successfully and commented out to avoid re-running
# for url in tqdm(batches[1]): 

#     get_the_meet_info(url)

# #Batch 3  #Completed successfully and commented out to avoid re-running
# for url in tqdm(batches[2]):

#     get_the_meet_info(url)

# #Batch 4  #Completed successfully and commented out to avoid re-running
# for url in tqdm(batches[3]):

#     get_the_meet_info(url)

# #Batch 5 #Completed successfully and commented out to avoid re-running
# for url in tqdm(batches[4]):

#     get_the_meet_info(url)

# #Batch 6  #Completed successfully and commented out to avoid re-running
# for url in tqdm(batches[5]):

#     get_the_meet_info(url)

# #Batch 7   #Completed successfully and commented out to avoid re-running
# for url in tqdm(batches[6]):

#     get_the_meet_info(url)

# #Batch 8   #Completed successfully and commented out to avoid re-running
# for url in tqdm(batches[7]):

#     get_the_meet_info(url)

# #Batch 9  #Completed successfully and commented out to avoid re-running
# for url in tqdm(batches[8]):

#     get_the_meet_info(url)


In [11]:
# Read the json files into a dataframe

# Create an empty dataframe
team_ids = teams_df['team_id'].tolist()
meets_data_df = pd.DataFrame()

with open(filename) as data_file:    
    data = json.load(data_file)  


# For every year, load the data from the json file and append to the dataframe
for year in years:
    for team in team_ids:
        filename = f'../Data/Raw/meets/{year}_{team}_meets.json'

        with open(filename) as data_file:    
            data = json.load(data_file) 

            # Read the json file into a temporary df
            temp_df = pd.json_normalize(data, 'meets')
            temp_df['year'] = year
            temp_df['team_id'] = team

            # Append the temporary df to the main df
            meets_data_df = pd.concat([meets_data_df, temp_df])


meets_data_df = meets_data_df.reset_index(drop=True)

In [None]:
# Preview the df
meets_data_df.sort_values(by='meet_id', ascending=False).head()


In [12]:
# Add the meet url to the dataframe
results_url_root = "https://www.roadtonationals.com/api/women/meetresults/"
meets_data_df['meet_url'] = meets_data_df['meet_id'].apply(lambda x: f"{results_url_root}{str(x)}")
meets_data_df.set_index('meet_url').head()

Unnamed: 0_level_0,team_id,team_name,meet_id,meet_date,team_score,home,opponent,meet_desc,linked_id,jas,year
meet_url,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
https://www.roadtonationals.com/api/women/meetresults/28977,34,LSU,28977,"Fri, Jan-05-2024",196.975,H,Ohio State,,5986,,2024
https://www.roadtonationals.com/api/women/meetresults/29040,34,LSU,29040,"Sat, Jan-13-2024",197.15,A,"Oklahoma, UCLA, Utah",Sprouts Farmers Market Collegiate Quad,6011,,2024
https://www.roadtonationals.com/api/women/meetresults/29098,34,LSU,29098,"Fri, Jan-19-2024",198.125,H,Kentucky,,6030,,2024
https://www.roadtonationals.com/api/women/meetresults/29215,34,LSU,29215,"Fri, Jan-26-2024",197.225,A,Missouri,,6078,,2024
https://www.roadtonationals.com/api/women/meetresults/29303,34,LSU,29303,"Fri, Feb-02-2024",198.475,H,Arkansas,,6111,,2024


Unfortunately, the website I'm scraping from allocates a different meet_id for the same meet depending on which team is the originating source, so this df has a lot of duplicates that are difficult to spot. Luckily, there are only some 10,000 to sort through, so this should be no problem.

I'm in a bit of a rush, so I'm going to leave that problem on the table for now and progress to the next bit of scraping, as it will likely take some time.

In [16]:
# Create a new column that stores the team name and the opponent names as a sorted list
meets_data_df['all_teams'] = meets_data_df.apply(lambda x: [x['team_name']] + x['opponent'].split(', '), axis=1)

meets_data_df['all_teams'] = meets_data_df['all_teams'].apply(lambda x: sorted(x))

meets_data_df['all_teams'] = meets_data_df['all_teams'].apply(tuple)

meets_data_df

# Drop duplicates (when all_teams and meet_date column are identical, they are duplicates)
meets_df = meets_data_df.drop_duplicates(subset=['all_teams', 'meet_date']).reset_index(drop=True)

meets_df


Unnamed: 0,team_id,team_name,meet_id,meet_date,team_score,home,opponent,meet_desc,linked_id,jas,year,meet_url,all_teams
0,34,LSU,28977,"Fri, Jan-05-2024",196.9750,H,Ohio State,,5986,,2024,https://www.roadtonationals.com/api/women/meet...,"(LSU, Ohio State)"
1,34,LSU,29040,"Sat, Jan-13-2024",197.1500,A,"Oklahoma, UCLA, Utah",Sprouts Farmers Market Collegiate Quad,6011,,2024,https://www.roadtonationals.com/api/women/meet...,"(LSU, Oklahoma, UCLA, Utah)"
2,34,LSU,29098,"Fri, Jan-19-2024",198.1250,H,Kentucky,,6030,,2024,https://www.roadtonationals.com/api/women/meet...,"(Kentucky, LSU)"
3,34,LSU,29215,"Fri, Jan-26-2024",197.2250,A,Missouri,,6078,,2024,https://www.roadtonationals.com/api/women/meet...,"(LSU, Missouri)"
4,34,LSU,29303,"Fri, Feb-02-2024",198.4750,H,Arkansas,,6111,,2024,https://www.roadtonationals.com/api/women/meet...,"(Arkansas, LSU)"
...,...,...,...,...,...,...,...,...,...,...,...,...,...
3609,77,UW-Eau Claire,18268,"Sat, Feb-14-2015",181.4500,A,Hamline,,2466,,2015,https://www.roadtonationals.com/api/women/meet...,"(Hamline, UW-Eau Claire)"
3610,77,UW-Eau Claire,18269,"Fri, Feb-20-2015",185.9250,H,Gustavus Adolphus,,2481,,2015,https://www.roadtonationals.com/api/women/meet...,"(Gustavus Adolphus, UW-Eau Claire)"
3611,76,Winona State,18257,"Tue, Feb-10-2015",183.1000,H,Gustavus Adolphus,,2439,,2015,https://www.roadtonationals.com/api/women/meet...,"(Gustavus Adolphus, Winona State)"
3612,76,Winona State,18258,"Tue, Feb-17-2015",185.9500,A,Hamline,,2477,,2015,https://www.roadtonationals.com/api/women/meet...,"(Hamline, Winona State)"


In [17]:
results_url_root = "https://www.roadtonationals.com/api/women/meetresults/"
results_links = meets_df['meet_url'].tolist()

# Get the results info for every meet
def get_the_results_info(url):
    meet_id = url.split('/')[-1]
    # If we are able to fetch the page without timing out
    if fetch_page(url):   
        payload = {}
        headers = {
                'Cookie': 'PHPSESSID=c48eb24102c0c45390a5d64809741f95'
                }

        response = requests.request("GET", url, headers=headers, data=payload)

        # Save the data to a json file
        with open(f'../Data/Raw/results/{meet_id}_results.json', 'w') as f:
            # pure text
            f.write(response.text)
    else:
        pass

In [20]:
# Note for players at home - this will take a while to run (approx ~1 hr)
# Raw data as at 2024-05-25 14:30:00 UTC is available in the '../data/raw/results' directory

# Call the method for every url in the list

for url in results_links:

    get_the_results_info(url)

In [None]:
# PART 1: Go to the url, wait until everything on the page loads

def get_url_and_wait_for_elements_to_load(url, css_selector):
    try:
        driver.get(url)
        print("*****************************")
        element = WebDriverWait(driver, 10).until(
        EC.presence_of_element_located((By.CSS_SELECTOR, css_selector)) # (Source: https://selenium-python.readthedocs.io/waits.html )
        )
        print(element)
    except:
        print("oh no it didn't work")
        pass # ?? Trying to make it so that the program doesn't crash if the element isn't found
    

get_url_and_wait_for_elements_to_load(url, 'div.rt-table > div.rt-tbody')
response = Selector(text=driver.page_source)

print(response)

In [None]:
# PART 2: Set up the dataframes we exported in the last session as variables

teams_df = pd.read_csv('../Data/Raw/teams.csv')
meets_df = pd.read_csv('../Data/Raw/meets.csv')

#print(teams_df)
for meet in meets_df['link'][0:20]:
    print(meet)

In [None]:
css_selector = 'div.rt-tbody'
get_url_and_wait_for_elements_to_load(meet_link, css_selector)
response = Selector(text=driver.page_source)  
meet_results_table = response.css(css_selector)
meet_results_table_rows = meet_results_table.css('div.rt-tr-group')
teams_button_clicker = driver.find_element(By.CSS_SELECTOR, '#teambtn')
row_count = 0
meet_host = ''

if response.css('p:nth-child(4)').get():
    meet_host = response.css('p:nth-child(4)::text').get()
else:
    meet_host = 'NaN'

meet_hosts.append(meet_host)


In [None]:
get_url_and_wait_for_elements_to_load(meet_link, css_selector)
response = Selector(text=driver.page_source)

meet_results_table = response.css(css_selector)
meet_results_table.css('div.rt-tr-group::text').getall()

meet_results_table_rows.css('div:nth-child(4)::text').getall()
len(meet_results_table_rows)

In [None]:
def get_team_score_info(url):

    #Load the page and set the selectors
    get_url_and_wait_for_elements_to_load(url, css_selector)
    response = Selector(text=driver.page_source)  
    meet_results_table_rows = meet_results_table.css('div.rt-tr-group')

    # Determind the meet id
    meet_id = url.split('/')[-1]
    meet_ids.append(meet_id)
    
    # Find out if there is a host for the meet, and if so add to the meet_hosts list
    meet_host = ''
    
    if response.css('p:nth-child(4)').get():
        meet_host = response.css('p:nth-child(4)::text').get()
    else:
        meet_host = 'NaN'

    meet_hosts.append(meet_host)
    
    # Find out how many teams are competing in the meet
    team_count = len(meet_results_table_rows)

    # Add the meet id to the list of team_meet_ids list for each team
    for team_meet_id in range(0, team_count):
        team_meet_id = meet_id
        team_meet_ids.append(team_meet_id)
    
    # Get the hrefs for each team
    team_hrefs = meet_results_table_rows.css('div > div > a::attr(href)').getall()
    # Splitting out the team_id and adding them to the team_ids list
    for team_href in team_hrefs:
        team_id = team_href.split('/')[-1]
        team_ids.append(team_id)

    # Get the scores for each event and the total meet score (this generates a list of lists)
    current_meet_team_vt_scores = meet_results_table_rows.css('div:nth-child(4)::text').getall()
    current_meet_team_ub_scores = meet_results_table_rows.css('div:nth-child(5)::text').getall()
    current_meet_team_bb_scores = meet_results_table_rows.css('div:nth-child(6)::text').getall()
    current_meet_team_fx_scores = meet_results_table_rows.css('div:nth-child(7)::text').getall()
    current_meet_team_meet_scores = meet_results_table_rows.css('div:nth-child(8) > strong::text').getall()

    # Iterating over the lists generated above and adding them to the appropriate (variable) list
    for score in current_meet_team_vt_scores:
        team_vt_scores.append(score)
    
    for score in current_meet_team_ub_scores:
        team_ub_scores.append(score)
    
    for score in current_meet_team_bb_scores:
        team_bb_scores.append(score)
    
    for score in current_meet_team_fx_scores:
        team_fx_scores.append(score)
    
    for score in current_meet_team_meet_scores:
        team_meet_scores.append(score)
    
    
    return team_ids, team_meet_ids, team_vt_scores, team_ub_scores, team_bb_scores, team_fx_scores, team_meet_scores, meet_hosts
    


In [None]:
def get_gymnast_score_info(url):

    #Load the page and set the selectors
    get_url_and_wait_for_elements_to_load(url, css_selector)
    response = Selector(text=driver.page_source)  
    meet_results_table_rows = response.css('div.rt-tr-group')
    
    # Determind the meet id
    meet_id = url.split('/')[-1]
    meet_ids.append(meet_id)

    # Find out how many teams are competing in the meet
    team_count = len(meet_results_table_rows)

    #Click the "Teams" button
    driver.find_element(By.CSS_SELECTOR, '#teambtn').click()
    gymnast_results_table_rows = response.css('div.rt-tr-group')

    for i in range(0, team_count): # Looping through the teams
        # Click on the Team Name
        team_clicker_selector = "#team" + str(i)
        team_clicker = driver.find_element(By.CSS_SELECTOR, team_clicker_selector)
        team_clicker.click()

        # Get the gymnast metadata
        gymnast_hrefs = gymnast_results_table_rows.css('a::attr(href)').getall()
        gymnast_names = gymnast_results_table_rows.css('a::text').getall()
        
        for href in gymnast_hrefs:
            gymnast_id = href.split('/')[-1]
            gymnast_ids.append(gymnast_id)
            gymnast_team_id = href.split('/')[-2]
            gymnast_team_ids.append(gymnast_team_id)
        
        for name in gymnast_names:
            gymnast_names.append(name)

        # Get the gymnast scores
        gymnast_vt_scores = gymnast_results_table_rows.css('div:nth-child(3)::text').getall()
        gymnast_ub_scores = gymnast_results_table_rows.css('div:nth-child(4)::text').getall()
        gymnast_bb_scores = gymnast_results_table_rows.css('div:nth-child(5)::text').getall()
        gymnast_fx_scores = gymnast_results_table_rows.css('div:nth-child(6)::text').getall()
        gymnast_aa_scores = gymnast_results_table_rows.css('div:nth-child(7)::text').getall()



        

    meet_hosts.append(meet_host)
    
    # Find out how many teams are competing in the meet
    team_count = len(meet_results_table_rows)

    # Add the meet id to the list of team_meet_ids list for each team
    for team_meet_id in range(0, team_count):
        team_meet_id = meet_id
        team_meet_ids.append(team_meet_id)
    
    # Get the hrefs for each team
    team_hrefs = meet_results_table_rows.css('div > div > a::attr(href)').getall()
    # Splitting out the team_id and adding them to the team_ids list
    for team_href in team_hrefs:
        team_id = team_href.split('/')[-1]
        team_ids.append(team_id)

    # Get the scores for each event and the total meet score (this generates a list of lists)
    current_meet_team_vt_scores = meet_results_table_rows.css('div:nth-child(4)::text').getall()
    current_meet_team_ub_scores = meet_results_table_rows.css('div:nth-child(5)::text').getall()
    current_meet_team_bb_scores = meet_results_table_rows.css('div:nth-child(6)::text').getall()
    current_meet_team_fx_scores = meet_results_table_rows.css('div:nth-child(7)::text').getall()
    current_meet_team_meet_scores = meet_results_table_rows.css('div:nth-child(8) > strong::text').getall()

    # Iterating over the lists generated above and adding them to the appropriate (variable) list
    for score in current_meet_team_vt_scores:
        team_vt_scores.append(score)
    
    for score in current_meet_team_ub_scores:
        team_ub_scores.append(score)
    
    for score in current_meet_team_bb_scores:
        team_bb_scores.append(score)
    
    for score in current_meet_team_fx_scores:
        team_fx_scores.append(score)
    
    for score in current_meet_team_meet_scores:
        team_meet_scores.append(score)
    
    
    return team_ids, team_meet_ids, team_vt_scores, team_ub_scores, team_bb_scores, team_fx_scores, team_meet_scores, meet_hosts
    


In [None]:
# 5 Go to each of the meet's links and scrape the score information

# dfs we are adding to: meets_df, team_scores_df (meet_id, team_id), gymnast_scores_df (team_id, meet_id)

# Setting up the variables we will be using
meet_links = meets_df['link']
meet_ids = []
team_ids = []
team_meet_ids = []
team_vt_scores = []
team_ub_scores = []
team_bb_scores = []
team_fx_scores = []
team_meet_scores = []
gymnast_ids = []
gymnast_names = []
gymnast_team_ids = []
gymnast_meet_ids = []
gymnast_vt_scores = []
gymnast_ub_scores = []
gymnast_bb_scores = []
gymnast_fx_scores = []
gymnast_aa_scores = []
meet_hosts = []

meet_hosts

In [None]:
meet_links[0:20]

In [None]:
subset_meet_links = meet_links[0:100]

def get_all_the_team_results_from_all_the_meets(url):
    for meet_link in tqdm(meet_links):
        #print(meet_link)
        get_team_score_info(meet_link)
    return team_ids, team_meet_ids, team_vt_scores, team_ub_scores, team_bb_scores, team_fx_scores, team_meet_scores, meet_hosts

get_all_the_team_results_from_all_the_meets(url)

print(len(team_ids))
print(len(team_meet_ids))
print(len(team_vt_scores))
print(len(team_ub_scores))
print(len(team_bb_scores))
print(len(team_fx_scores))
print(len(team_meet_scores))


team_results_df = pd.DataFrame({'team_id': team_ids, 'meet_id': team_meet_ids, 'vt_score': team_vt_scores, 'ub_score': team_ub_scores, 'bb_score': team_bb_scores, 'fx_score': team_fx_scores, 'meet_score': team_meet_scores})

In [None]:
team_results_df

## Scraping using hidden APIS

In [None]:
# # Replace with the actual API endpoint you discovered
# years = [2024, 2023, 2022, 2021, 2020, 2019, 2018, 2017, 2016, 2015]

# api_url = 'https://www.roadtonationals.com/api/women/finalresults/2024'

# # Include necessary headers, cookies, or auth tokens
# headers = {}

# response = requests.get(api_url, headers=headers)

# if response.status_code == 200:
#     data = response.json()
#     print(data)
# else:
#     print(f"Failed to retrieve data: {response.status_code}")