## Libraries

In [1]:
import os
import time
import requests
import pandas as pd
import numpy as np
from collections import defaultdict
from pathlib import Path

### Structuring file Data

In [83]:
data = defaultdict(list)
current_track = None

with open("./output/raw_csv_links.csv", "r", encoding="utf-8") as f:
    for line in f:
        line = line.strip()
        if not line:
            continue

        if line.startswith("http"):
            data[current_track].append(line)
        else:
            current_track = line


### Converting to DF

In [84]:
df = pd.DataFrame(dict(
    [(k, pd.Series(v)) for k, v in data.items()]
))
df.head()

Unnamed: 0,Sonoma Raceway,Long Beach Street Circuit,Circuit of the Americas,Sebring International Raceway,VIRginia International Raceway,Road America,Barber Motorsports Park,Indianapolis Motor Speedway RC
0,http://usac.alkamelna.com/Results/06_SRO/25_20...,http://usac.alkamelna.com/Results/06_SRO/25_20...,http://usac.alkamelna.com/Results/06_SRO/25_20...,http://usac.alkamelna.com/Results/06_SRO/25_20...,http://usac.alkamelna.com/Results/06_SRO/25_20...,http://usac.alkamelna.com/Results/06_SRO/25_20...,http://usac.alkamelna.com/Results/06_SRO/25_20...,http://usac.alkamelna.com/Results/06_SRO/25_20...
1,http://usac.alkamelna.com/Results/06_SRO/25_20...,http://usac.alkamelna.com/Results/06_SRO/25_20...,http://usac.alkamelna.com/Results/06_SRO/25_20...,http://usac.alkamelna.com/Results/06_SRO/25_20...,http://usac.alkamelna.com/Results/06_SRO/25_20...,http://usac.alkamelna.com/Results/06_SRO/25_20...,http://usac.alkamelna.com/Results/06_SRO/25_20...,http://usac.alkamelna.com/Results/06_SRO/25_20...
2,http://usac.alkamelna.com/Results/06_SRO/25_20...,http://usac.alkamelna.com/Results/06_SRO/25_20...,http://usac.alkamelna.com/Results/06_SRO/25_20...,http://usac.alkamelna.com/Results/06_SRO/25_20...,http://usac.alkamelna.com/Results/06_SRO/25_20...,http://usac.alkamelna.com/Results/06_SRO/25_20...,http://usac.alkamelna.com/Results/06_SRO/25_20...,http://usac.alkamelna.com/Results/06_SRO/25_20...
3,http://usac.alkamelna.com/Results/06_SRO/25_20...,http://usac.alkamelna.com/Results/06_SRO/25_20...,http://usac.alkamelna.com/Results/06_SRO/25_20...,http://usac.alkamelna.com/Results/06_SRO/25_20...,http://usac.alkamelna.com/Results/06_SRO/25_20...,http://usac.alkamelna.com/Results/06_SRO/25_20...,http://usac.alkamelna.com/Results/06_SRO/25_20...,http://usac.alkamelna.com/Results/06_SRO/25_20...
4,http://usac.alkamelna.com/Results/06_SRO/25_20...,http://usac.alkamelna.com/Results/06_SRO/25_20...,http://usac.alkamelna.com/Results/06_SRO/25_20...,http://usac.alkamelna.com/Results/06_SRO/25_20...,http://usac.alkamelna.com/Results/06_SRO/25_20...,http://usac.alkamelna.com/Results/06_SRO/25_20...,http://usac.alkamelna.com/Results/06_SRO/25_20...,http://usac.alkamelna.com/Results/06_SRO/25_20...


### Filtering Data

In [85]:
Season = 'SRO'
Year = '2025'
Races = ('Race 1','Race 2')
RaceEvent = 'TGRNA GR CUP NORTH AMERICA'

In [86]:
def match_url(x):
    if not isinstance(x, str):
        return False

    x_list = x.replace('%20', ' ').split('/')

    if len(x_list) < 8:
        return False

    return (
        Season.lower() in x_list[4].lower()
        and Year in x_list[5]
        and RaceEvent.lower() in x_list[7].lower()
        and any(k.lower() in x_list[-2].lower() for k in Races)
    )

In [87]:
matched = dict()
for column in df.columns:
    current_column = df[column]
    mask = current_column.apply(match_url)
    matched[column] = current_column[mask]

filtered_df = pd.DataFrame(dict(
    [(k, pd.Series(v)) for k, v in matched.items()]
))

In [91]:
filtered_df.head()

Unnamed: 0,Sonoma Raceway,Long Beach Street Circuit,Circuit of the Americas,Sebring International Raceway,VIRginia International Raceway,Road America,Barber Motorsports Park,Indianapolis Motor Speedway RC
30,,,,,,,,http://usac.alkamelna.com/Results/06_SRO/25_20...
31,,,,,,,,http://usac.alkamelna.com/Results/06_SRO/25_20...
32,,,,,,,,http://usac.alkamelna.com/Results/06_SRO/25_20...
33,,,,,,,,http://usac.alkamelna.com/Results/06_SRO/25_20...
34,,,,,,,,http://usac.alkamelna.com/Results/06_SRO/25_20...


In [90]:
filtered_df['Sonoma Raceway'].notnull().sum()

np.int64(12)

### Saving Filtered Links

In [93]:
filtered_df.to_csv('./output/all_csv_links.csv',index = False)

### Downloading Data

In [2]:
df = pd.read_csv('./output/all_csv_links.csv')
df.head()

Unnamed: 0,Sonoma Raceway,Long Beach Street Circuit,Circuit of the Americas,Sebring International Raceway,VIRginia International Raceway,Road America,Barber Motorsports Park,Indianapolis Motor Speedway RC
0,,,,,,,,http://usac.alkamelna.com/Results/06_SRO/25_20...
1,,,,,,,,http://usac.alkamelna.com/Results/06_SRO/25_20...
2,,,,,,,,http://usac.alkamelna.com/Results/06_SRO/25_20...
3,,,,,,,,http://usac.alkamelna.com/Results/06_SRO/25_20...
4,,,,,,,,http://usac.alkamelna.com/Results/06_SRO/25_20...


In [5]:
BASE_PATH = Path('../data/')

In [None]:
def download_file(url, out_dir="new_event"):
    if not isinstance(url, str):
        return None

    url_lower = url.lower()

    # Determine the race from the URL
    if "race 1" in url_lower or "race%201" in url_lower:
        race_folder = "RACE 1"
    elif "race 2" in url_lower or "race%202" in url_lower:
        race_folder = "RACE 2"
    else:
        race_folder = "UNKNOWN_RACE"  # fallback (optional)

    # Create the full path: BASE_PATH / out_dir / race_folder
    target_dir = BASE_PATH / out_dir / race_folder
    target_dir.mkdir(parents=True, exist_ok=True)

    # Use the original filename
    filename = url.split("/")[-1]
    file_path = target_dir / filename

    try:
        r = requests.get(url, timeout=30)
        r.raise_for_status()

        # Write file to disk
        file_path.write_bytes(r.content)

        print("fileurl:", url)
        return str(file_path)

    except Exception as e:
        print(f"Failed: {url} â†’ {e}")
        return None


In [6]:
for column in df.columns:
    if column != 'Sonoma Raceway':
        df[column].apply(lambda url: download_file(url,out_dir = column))

fileurl:  http://usac.alkamelna.com/Results/06_SRO/25_2025/03_Circuit%20of%20the%20Americas/17_TGRNA%20GR%20Cup%20North%20America/202504261555_Race%201/00_Results%20GR%20Cup%20Race%201%20Official.CSV
fileurl:  http://usac.alkamelna.com/Results/06_SRO/25_2025/03_Circuit%20of%20the%20Americas/17_TGRNA%20GR%20Cup%20North%20America/202504261555_Race%201/03_Provisional%20Results_Race%201.CSV
fileurl:  http://usac.alkamelna.com/Results/06_SRO/25_2025/03_Circuit%20of%20the%20Americas/17_TGRNA%20GR%20Cup%20North%20America/202504261555_Race%201/05_Provisional%20Results%20by%20Class_Race%201.CSV
fileurl:  http://usac.alkamelna.com/Results/06_SRO/25_2025/03_Circuit%20of%20the%20Americas/17_TGRNA%20GR%20Cup%20North%20America/202504261555_Race%201/23_AnalysisEnduranceWithSections_Race%201.CSV
fileurl:  http://usac.alkamelna.com/Results/06_SRO/25_2025/03_Circuit%20of%20the%20Americas/17_TGRNA%20GR%20Cup%20North%20America/202504261555_Race%201/26_Weather_Race%201.CSV
fileurl:  http://usac.alkamelna.c

### Rough 

In [120]:
filtered_df.iloc[0,-1]

'http://usac.alkamelna.com/Results/06_SRO/25_2025/08_Indianapolis%20Motor%20Speedway%20RC/52_TGRNA%20GR%20Cup%20North%20America/202510180850_Race%201/03_Provisional%20Results_Race%201.CSV'

In [134]:
filtered_df['Sonoma Raceway'].apply(lambda url: download_file(url,out_dir = 'Sonoma Raceway'))

fileurl:  http://usac.alkamelna.com/Results/06_SRO/25_2025/01_Sonoma%20Raceway/06_TGRNA%20GR%20Cup%20North%20America/202503291405_Race%201/03_Results.CSV
fileurl:  http://usac.alkamelna.com/Results/06_SRO/25_2025/01_Sonoma%20Raceway/06_TGRNA%20GR%20Cup%20North%20America/202503291405_Race%201/03_Results_Race%201.CSV
fileurl:  http://usac.alkamelna.com/Results/06_SRO/25_2025/01_Sonoma%20Raceway/06_TGRNA%20GR%20Cup%20North%20America/202503291405_Race%201/05_Results%20by%20Class_Race%201.CSV
fileurl:  http://usac.alkamelna.com/Results/06_SRO/25_2025/01_Sonoma%20Raceway/06_TGRNA%20GR%20Cup%20North%20America/202503291405_Race%201/23_AnalysisEnduranceWithSections_Race%201.CSV
fileurl:  http://usac.alkamelna.com/Results/06_SRO/25_2025/01_Sonoma%20Raceway/06_TGRNA%20GR%20Cup%20North%20America/202503291405_Race%201/26_Weather_Race%201.CSV
fileurl:  http://usac.alkamelna.com/Results/06_SRO/25_2025/01_Sonoma%20Raceway/06_TGRNA%20GR%20Cup%20North%20America/202503291405_Race%201/99_Best%2010%20Laps%

30                                                   NaN
31                                                   NaN
32                                                   NaN
33                                                   NaN
34                                                   NaN
                             ...                        
208         ..\data\Sonoma Raceway\RACE 2\03_Results.CSV
209    ..\data\Sonoma Raceway\RACE 2\05_Provisional_R...
210    ..\data\Sonoma Raceway\RACE 2\23_AnalysisEndur...
211    ..\data\Sonoma Raceway\RACE 2\26_Weather_%20Ra...
212    ..\data\Sonoma Raceway\RACE 2\99_Best%2010%20L...
Name: Sonoma Raceway, Length: 83, dtype: str

In [14]:
def fixing_filename(base_folder):
    for root, dirs, files in os.walk(base_folder):
        for filename in files:
            if "%20" in filename:
                old_path = os.path.join(root, filename)
                new_filename = filename.replace("%20", "_")
                new_path = os.path.join(root, new_filename)

                os.rename(old_path, new_path)
    print('Fixing Complete')

In [16]:
fixing_filename('../data')

Fixing Complete
