# Data Collection

## 1. Import Libraries and Set Up WebDriver:

In [104]:
# Import necessary libraries
import csv
import time
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from webdriver_manager.chrome import ChromeDriverManager

# Set up WebDriver service
service = Service(executable_path=ChromeDriverManager().install())

# Create a Chrome WebDriver instance
driver = webdriver.Chrome(service=service)

# Get the version of the ChromeDriver
print("ChromeDriver version:", driver.capabilities['chrome']['chromedriverVersion'])

ChromeDriver version: 125.0.6422.76 (67dcf7562b8fb4ab0819135589e37a97bcc8942c-refs/branch-heads/6422@{#1086})


## 2. Define Muscle Groups and Initialize Data List:

In [105]:
# Define the list of muscle groups to process
muscle_groups = [
    "Biceps", "Long Head Bicep", "Short Head Bicep", "Traps (mid-back)", "Lower back",
    "Abdominals", "Lower Abdominals", "Upper Abdominals", "Calves", "Tibialis",
    "Soleus", "Gastrocnemius", "Forearms", "Wrist Extensors", "Wrist Flexors",
    "Glutes", "Gluteus Medius", "Gluteus Maximus", "Hamstrings",
    "Medial Hamstrings", "Lateral Hamstrings", "Lats", "Shoulders",
    "Lateral Deltoid", "Anterior Deltoid", "Posterior Deltoid", "Triceps",
    "Long Head Tricep", "Lateral Head Triceps", "Medial Head Triceps", "Traps",
    "Upper Traps", "Lower Traps", "Quads", "Inner Thigh", "Inner Quadriceps",
    "Outer Quadricep", "Rectus Femoris", "Chest", "Upper Pectoralis",
    "Mid and Lower Chest", "Obliques", "Hands", "Front Shoulders", "Rear Shoulders"
]

# Initialize a list to store data
data = []

## 3. Open Webpage and Wait for Header:

In [106]:
# Try to perform the scraping task
try:
    # Open the webpage
    driver.get("https://musclewiki.com/directory")
    
    # Wait until the page header "Directory" becomes visible
    WebDriverWait(driver, 10).until(EC.visibility_of_element_located((By.XPATH, "//h2[contains(text(), 'Directory')]")))
    print("Webpage loaded successfully")
except Exception as e:
    print(f"Error loading the webpage: {str(e)}")

Webpage loaded successfully


## 4. Loop Through Muscle Groups, Extract Data, and Write to CSV

In [107]:
# Try to perform data extraction and write to CSV
try:
    # Loop through each muscle group to process
    for muscle in muscle_groups:
        # Find and click on the legend to reveal the checkboxes if not already visible
        try:
            muscles_legend = WebDriverWait(driver, 3).until(EC.element_to_be_clickable((By.XPATH, "//legend[contains(text(), 'Muscles')]")))
            muscles_legend.click()
        except:
            print(f"Muscles legend not found for {muscle}")

        # Find and click on the specific muscle checkbox
        try:
            muscle_checkbox = WebDriverWait(driver, 3).until(EC.element_to_be_clickable((By.XPATH, f"//label[contains(text(), '{muscle}')]/preceding-sibling::input[@type='checkbox']")))
            muscle_checkbox.click()
            print(f"{muscle} checkbox clicked")

            # Collect data for the selected muscle group
            WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.XPATH, f"//th[contains(text(), '{muscle}')]")))
            time.sleep(2)  # Give time for the exercises to load
            
            # Find all exercise rows for the muscle group
            exercise_rows = driver.find_elements(By.XPATH, f"//th[contains(text(), '{muscle}')]/following::tr[1]/following-sibling::tr")
            print(f"Found {len(exercise_rows)} exercises for {muscle}")

            # Loop through each exercise row
            for row in exercise_rows:
                try:
                    # Extract exercise details
                    exercise_name = row.find_element(By.XPATH, ".//td[contains(@class, 'font-medium')]/a").text
                    video_link_male = row.find_element(By.XPATH, ".//a[contains(text(), 'Male')]").get_attribute('href')
                    video_link_female = row.find_element(By.XPATH, ".//a[contains(text(), 'Female')]").get_attribute('href')
                    equipment_html = row.find_element(By.XPATH, ".//td[contains(@class, 'px-3')]/div").get_attribute('innerHTML').strip()
                    equipment_name = row.find_element(By.XPATH, ".//td[contains(@class, 'px-3')]/span").text.strip()
                    
                    # Combine the SVG icon and equipment name
                    equipment_combined = equipment_html + " " + equipment_name
                    difficulty = row.find_element(By.XPATH, ".//td[contains(@class, 'whitespace-normal') and not(contains(@class, 'px-3'))]/span").text.strip()
                    
                    # Append the extracted data to the list
                    data.append([muscle, exercise_name, video_link_male, video_link_female, equipment_combined, difficulty])
                    print(f"Added exercise: {exercise_name} for muscle: {muscle}")
                except Exception as e:
                    print(f"Error processing exercise row: {str(e)}")

            # Uncheck the muscle checkbox to prepare for the next iteration
            muscle_checkbox.click()
        except Exception as e:
            print(f"Error processing {muscle}: {str(e)}")
finally:
    # Quit the WebDriver instance
    driver.quit()

    # Write the data to a CSV file
    csv_filename = 'MuscleWiki_data_collection.csv'
    with open(csv_filename, 'w', newline='', encoding='utf-8') as file:
        writer = csv.writer(file)
        
        # Write header row
        writer.writerow(['Muscle Group', 'Exercise', 'Video Link (Male)', 'Video Link (Female)', 'Equipment', 'Difficulty'])
        
        # Write data rows
        writer.writerows(data)
        print(f"Data written to {csv_filename}")

    # Display the first few lines of the CSV for verification
    df = pd.read_csv(csv_filename)
    print(df.head(10))

Biceps checkbox clicked
Found 132 exercises for Biceps
Added exercise: Dumbbell Curl for muscle: Biceps
Added exercise: Dumbbell Hammer Curl for muscle: Biceps
Added exercise: Kettlebell Concentration Curl for muscle: Biceps
Added exercise: Kettlebell Preacher Curl for muscle: Biceps
Added exercise: Kettlebell Single Arm Curl for muscle: Biceps
Added exercise: Biceps Stretch Variation Five for muscle: Biceps
Added exercise: Biceps Stretch Variation Four for muscle: Biceps
Added exercise: Biceps Stretch Variation Three for muscle: Biceps
Added exercise: Biceps Stretch Variation Two for muscle: Biceps
Added exercise: Biceps Stretch Variation One for muscle: Biceps
Added exercise: Cable Twisting Curl for muscle: Biceps
Added exercise: Cable Single Arm Bayesian Curl for muscle: Biceps
Added exercise: Cable Single Arm Reverse Curl for muscle: Biceps
Added exercise: Cable Single Arm Hammer Curl for muscle: Biceps
Added exercise: Band Bayesian Curl for muscle: Biceps
Added exercise: Band Baye