In [None]:
import csv
import os
import string
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

def sanitize_filename(filename):
    valid_chars = f"-_.() {string.ascii_letters}{string.digits}"
    return ''.join(c for c in filename if c in valid_chars)

def export_table_to_csv(table, person_name, destination_folder, table_index, athlete_id_value, exercise_name):
    title_tag = table.find_previous('div', class_='ibox-title collapse-link')
    title = title_tag.h5.text.strip() if title_tag else f"Table_{table_index}"
    filename_safe_title = sanitize_filename(title)
    filename_safe_exercise = sanitize_filename(exercise_name)
    filename = f"{person_name}_{filename_safe_exercise}_{filename_safe_title}_{table_index}.csv"
    full_path = os.path.join(destination_folder, filename)

    rows = table.find_all('tr')
    table_data = [[col.text.strip() for col in row.find_all(['td', 'th'])] for row in rows]

    for row in table_data[1:]:
        row.append(athlete_id_value)

    table_data[0].append("Athlete_ID")

    try:
        with open(full_path, 'w', newline='') as csvfile:
            writer = csv.writer(csvfile)
            writer.writerows(table_data)
        print(f"Data exported to {full_path}")
    except Exception as e:
        print(f"Error saving data to {full_path}: {e}")

    return table_data

def get_athlete_name(soup):
    first_name_tag = soup.find('h2', class_='no-margins')
    last_name_tag = soup.find('h4')
    first_name = first_name_tag.text.strip() if first_name_tag else 'Unknown'
    last_name = last_name_tag.text.strip() if last_name_tag else 'Unknown'
    return f"{first_name}_{last_name}"

def process_athlete(driver, athlete_id, workout_keyword, aggregated_data, unique_rows):
    base_url = f"https://traq.drivelinebaseball.com/athletes/view/{athlete_id}"
    sub_tab_urls = [f"{base_url}#subTab-1", f"{base_url}#subTab-2", f"{base_url}#subTab-13"]
    
    sanitized_workout_keyword = sanitize_filename(workout_keyword)
    destination_folder = os.path.join("C:/Users/benoi/OneDrive/Desktop/bea/Data/CSV/traq/BEA_Texas", sanitized_workout_keyword)
    os.makedirs(destination_folder, exist_ok=True)

    workout_found = False

    for sub_tab_url in sub_tab_urls:
        print(f"Navigating to: {sub_tab_url}")
        driver.get(sub_tab_url)
        WebDriverWait(driver, 20).until(EC.presence_of_element_located((By.CLASS_NAME, 'ibox-title')))
        soup = BeautifulSoup(driver.page_source, 'html.parser')
        person_name = get_athlete_name(soup)

        tables = soup.find_all('table', class_='table table-hover issue-tracker tracking-sheet')
        print(f"Found {len(tables)} tables in {sub_tab_url}")

        for index, table in enumerate(tables, start=1):
            exercise_name_tag = table.find_previous('h5')
            exercise_name = exercise_name_tag.text.strip() if exercise_name_tag else f"Exercise_{index}"
            
            print(f"Checking table {index} with exercise name: {exercise_name}")

            if workout_keyword.lower() in exercise_name.lower():
                print(f"Workout '{workout_keyword}' found in {sub_tab_url}")
                workout_found = True
                table_data = export_table_to_csv(table, person_name, destination_folder, index, athlete_id, exercise_name)
                
                # Add rows to aggregated_data if they are unique
                for row in table_data:
                    row_tuple = tuple(row)  # Convert list to tuple for hashability
                    if row_tuple not in unique_rows:
                        unique_rows.add(row_tuple)
                        aggregated_data.append(row)

    if not workout_found:
        print(f"Workout '{workout_keyword}' not found for athlete ID {athlete_id}.")

    return workout_found

def main():
    url = "https://traq.drivelinebaseball.com/login"
    username = "benoitadrian@gmail.com"
    password = "Baseball"
    workout_keyword = "VBT Testing"
    athlete_ids = ["8909", "40152", "34702", "63126"]  # Add more athlete IDs as needed

    driver = webdriver.Chrome()
    driver.get(url)
    WebDriverWait(driver, 20).until(EC.visibility_of_element_located((By.CSS_SELECTOR, 'form[method="post"][role="form"]')))

    email_input = driver.find_element(By.CSS_SELECTOR, 'input[name="email"]')
    password_input = driver.find_element(By.CSS_SELECTOR, 'input[name="password"]')

    email_input.send_keys(username)
    password_input.send_keys(password)
    driver.find_element(By.CSS_SELECTOR, 'form[method="post"][role="form"]').submit()

    WebDriverWait(driver, 20).until(EC.url_changes(url))
    print(f"Current URL after login: {driver.current_url}")

    aggregated_data = []
    unique_rows = set()  # Set to track unique rows based on their content

    for athlete_id in athlete_ids:
        process_athlete(driver, athlete_id, workout_keyword, aggregated_data, unique_rows)

    if aggregated_data:
        sanitized_workout_keyword = sanitize_filename(workout_keyword)
        aggregated_filename = f"aggregated_{sanitized_workout_keyword}.csv"
        aggregated_filepath = os.path.join("C:/Users/benoi/OneDrive/Desktop/bea/Data/CSV/traq/BEA_Texas", sanitized_workout_keyword, aggregated_filename)
        
        try:
            with open(aggregated_filepath, 'w', newline='') as csvfile:
                writer = csv.writer(csvfile)
                writer.writerows(aggregated_data)
            print(f"Aggregated data saved to {aggregated_filepath}")
        except Exception as e:
            print(f"Error saving aggregated data to {aggregated_filepath}: {e}")

    driver.quit()

if __name__ == "__main__":
    main()


Current URL after login: https://traq.drivelinebaseball.com/athletes?view=table
Navigating to: https://traq.drivelinebaseball.com/athletes/view/112633#subTab-1
Found 66 tables in https://traq.drivelinebaseball.com/athletes/view/112633#subTab-1
Checking table 1 with exercise name: Blast Bat Speed Tracking Sheet
Checking table 2 with exercise name: Initial Front Toss EV Tracking Sheet
Checking table 3 with exercise name: WO5_Exercise1: 1 Arm Bottoms Up KB Carry
Checking table 4 with exercise name: WO5_Exercise2: 1 Arm Kettlebell Turkish Get Up
Checking table 5 with exercise name: WO5_Exercise3: 1 Arm Low Incline DB Press
Checking table 6 with exercise name: WO5_Exercise4: Dumbbell Pullover
Checking table 7 with exercise name: WO5_Exercise5: INPUT + SAVE YOUR DATA + RERACK YOUR WEIGHTS
Checking table 8 with exercise name: WO5_Exercise6: Prone Plate Switch
Checking table 9 with exercise name: WO5_Exercise7: Standing 1 Arm Cable Row
Checking table 10 with exercise name: WO5_Exercise8: Supin

"117411", "118112", "106356", "81698", "118166", "118006", "108440", "53288", "95103"
"112631", "97210", "108470", "117221", "117183", "109444", "114403", "117320", "98377"
"101027", "95948", "83540", "113029", "106086", "37264", "114601", "102411", "108677"
"112633", "94345", "110819", "110916", "101144", "116165", "96740", "112657", "29699"
"87093", "15950", "35081", "", "", "", "", "", "", 
"8909", "40152", "34702", "63126", "", "", "", "", "", 
"", "", "", "", "", "", "", "", "", 
"", "", "", "", "", "", "", "", "", 

In [27]:
import csv
import os
import string
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

def sanitize_filename(filename):
    valid_chars = f"-_.() {string.ascii_letters}{string.digits}"
    return ''.join(c for c in filename if c in valid_chars)

def export_table_to_csv(table, person_name, destination_folder, table_index, athlete_id_value, exercise_name):
    title_tag = table.find_previous('div', class_='ibox-title collapse-link')
    title = title_tag.h5.text.strip() if title_tag else f"Table_{table_index}"
    filename_safe_title = sanitize_filename(title)
    filename_safe_exercise = sanitize_filename(exercise_name)
    filename = f"{person_name}_{filename_safe_exercise}_{filename_safe_title}_{table_index}.csv"
    full_path = os.path.join(destination_folder, filename)

    rows = table.find_all('tr')
    table_data = [[col.text.strip() for col in row.find_all(['td', 'th'])] for row in rows]

    for row in table_data[1:]:
        row.append(athlete_id_value)

    table_data[0].append("Athlete_ID")

    try:
        with open(full_path, 'w', newline='') as csvfile:
            writer = csv.writer(csvfile)
            writer.writerows(table_data)
        print(f"Data exported to {full_path}")
    except Exception as e:
        print(f"Error saving data to {full_path}: {e}")

    return table_data

def get_athlete_name(soup):
    first_name_tag = soup.find('h2', class_='no-margins')
    last_name_tag = soup.find('h4')
    first_name = first_name_tag.text.strip() if first_name_tag else 'Unknown'
    last_name = last_name_tag.text.strip() if last_name_tag else 'Unknown'
    return f"{first_name}_{last_name}"

def process_athlete(driver, athlete_id, workout_keywords, aggregated_data_dict, unique_rows_dict):
    base_url = f"https://traq.drivelinebaseball.com/athletes/view/{athlete_id}"
    sub_tab_urls = [f"{base_url}#subTab-1", f"{base_url}#subTab-2", f"{base_url}#subTab-13"]

    for sub_tab_url in sub_tab_urls:
        print(f"Navigating to: {sub_tab_url}")
        driver.get(sub_tab_url)
        WebDriverWait(driver, 20).until(EC.presence_of_element_located((By.CLASS_NAME, 'ibox-title')))
        soup = BeautifulSoup(driver.page_source, 'html.parser')
        person_name = get_athlete_name(soup)

        tables = soup.find_all('table', class_='table table-hover issue-tracker tracking-sheet')
        print(f"Found {len(tables)} tables in {sub_tab_url}")

        for index, table in enumerate(tables, start=1):
            exercise_name_tag = table.find_previous('h5')
            exercise_name = exercise_name_tag.text.strip() if exercise_name_tag else f"Exercise_{index}"

            for keyword in workout_keywords:
                if keyword.lower() in exercise_name.lower():
                    sanitized_keyword = sanitize_filename(keyword)
                    destination_folder = os.path.join("C:/Users/benoi/OneDrive/Desktop/bea/Data/CSV/traq/", sanitized_keyword)
                    os.makedirs(destination_folder, exist_ok=True)

                    print(f"Workout '{keyword}' found in {sub_tab_url}")
                    table_data = export_table_to_csv(table, person_name, destination_folder, index, athlete_id, exercise_name)

                    # Add rows to specific workout's aggregated data
                    for row in table_data:
                        row_tuple = tuple(row)
                        if row_tuple not in unique_rows_dict[sanitized_keyword]:  
                            unique_rows_dict[sanitized_keyword].add(row_tuple)
                            aggregated_data_dict[sanitized_keyword].append(row)

def main():
    url = "https://traq.drivelinebaseball.com/login"
    username = "benoitadrian@gmail.com"
    password = "Baseball"
    workout_keywords = ["Beimel Roll Ins"
                        ]  # Multiple workout keywords
    athlete_ids = ["100192"]
    #"117411", "118112", "106356", "81698", "118166", "118006", "108440", "53288", "95103"
    #"112631", "97210", "108470", "117221", "117183", "109444", "114403", "117320", "98377"
    #"101027", "95948", "83540", "113029", "106086", "37264", "114601", "102411", "108677"
    #"112633", "94345", "110819", "110916", "101144", "116165", "96740", "112657", "29699"
    #"87093", "15950", "35081"
    #"8909", "40152", "34702", "63126"

    driver = webdriver.Chrome()
    driver.get(url)
    WebDriverWait(driver, 20).until(EC.visibility_of_element_located((By.CSS_SELECTOR, 'form[method="post"][role="form"]')))

    email_input = driver.find_element(By.CSS_SELECTOR, 'input[name="email"]')
    password_input = driver.find_element(By.CSS_SELECTOR, 'input[name="password"]')

    email_input.send_keys(username)
    password_input.send_keys(password)
    driver.find_element(By.CSS_SELECTOR, 'form[method="post"][role="form"]').submit()

    WebDriverWait(driver, 20).until(EC.url_changes(url))
    print(f"Current URL after login: {driver.current_url}")

    aggregated_data_dict = {sanitize_filename(keyword): [] for keyword in workout_keywords}
    unique_rows_dict = {sanitize_filename(keyword): set() for keyword in workout_keywords}

    for athlete_id in athlete_ids:
        process_athlete(driver, athlete_id, workout_keywords, aggregated_data_dict, unique_rows_dict)

    for keyword, data in aggregated_data_dict.items():
        if data:
            aggregated_filename = f"Howard_{keyword}.csv"
            aggregated_filepath = os.path.join("C:/Users/benoi/OneDrive/Desktop/bea/Data/CSV/traq/", keyword, aggregated_filename)
            
            try:
                with open(aggregated_filepath, 'w', newline='') as csvfile:
                    writer = csv.writer(csvfile)
                    writer.writerows(data)
                print(f"Aggregated data for '{keyword}' saved to {aggregated_filepath}")
            except Exception as e:
                print(f"Error saving aggregated data for '{keyword}' to {aggregated_filepath}: {e}")

    driver.quit()

if __name__ == "__main__":
    main()


Current URL after login: https://traq.drivelinebaseball.com/athletes?view=table
Navigating to: https://traq.drivelinebaseball.com/athletes/view/100192#subTab-1
Found 158 tables in https://traq.drivelinebaseball.com/athletes/view/100192#subTab-1
Workout 'Beimel Roll Ins' found in https://traq.drivelinebaseball.com/athletes/view/100192#subTab-1
Data exported to C:/Users/benoi/OneDrive/Desktop/bea/Data/CSV/traq/Beimel Roll Ins\Alex_Howard_Beimel Roll Ins Tracking Sheet_Beimel Roll Ins Tracking Sheet_3.csv
Navigating to: https://traq.drivelinebaseball.com/athletes/view/100192#subTab-2
Found 158 tables in https://traq.drivelinebaseball.com/athletes/view/100192#subTab-2
Workout 'Beimel Roll Ins' found in https://traq.drivelinebaseball.com/athletes/view/100192#subTab-2
Data exported to C:/Users/benoi/OneDrive/Desktop/bea/Data/CSV/traq/Beimel Roll Ins\Alex_Howard_Beimel Roll Ins Tracking Sheet_Beimel Roll Ins Tracking Sheet_3.csv
Navigating to: https://traq.drivelinebaseball.com/athletes/view/