In [1]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import pandas as pd
import time

In [2]:
# Configure ChromeDriver
options = webdriver.ChromeOptions()
options.add_argument('--start-maximized')
chromedriver_path = r'chromedriver-win64\chromedriver.exe'
service = Service(chromedriver_path)
driver = webdriver.Chrome(service=service, options=options)

In [3]:
# Open the main gyms page
driver.get("https://www.puregym.com/gyms/")
wait = WebDriverWait(driver, 10)

# Wait for the gym list container to load
wait.until(EC.presence_of_element_located(
    (By.CSS_SELECTOR, "div.styles__ListWrapper-sc-1n23ymn-1.czPyUO")
))

# Get all gym links from the list using the specified CSS selectors.
gym_link_elements = driver.find_elements(
    By.CSS_SELECTOR,
    "ul.list__List-sc-ysyucw-0.erbNjs li.list__ListItem-sc-ysyucw-1.kIYjls a.sc-1tbcnxx-5.fXaSor"
)
gym_links = [elem.get_attribute("href") for elem in gym_link_elements]

data = []

# Iterate over each gym URL
for url in gym_links:
    driver.get(url)
    try:
        # Wait for the address element to be present on the gym page
        address_elem = wait.until(EC.presence_of_element_located(
            (By.CSS_SELECTOR, "address.sc-1mjfvd9-5.govTZt")
        ))
        address_text = address_elem.text
    except Exception as e:
        # In case the address element is not found, store a placeholder or error message
        address_text = "Address not found"
        print(f"Error on {url}: {e}")
        
    # Save the URL and extracted address in data list
    data.append({"url": url, "address": address_text})
    
    # Return to the main gyms page
    driver.get("https://www.puregym.com/gyms/")
    
    # Allow some time for the page to load
    time.sleep(2)

# Create a pandas DataFrame with the collected data
df = pd.DataFrame(data)
print(df.head())

df.to_csv("Results/puregym_addresses.csv", index=False)

                                                 url  \
0             https://www.puregym.com/gyms/aberdare/   
1  https://www.puregym.com/gyms/aberdeen-kittybre...   
2    https://www.puregym.com/gyms/aberdeen-rubislaw/   
3     https://www.puregym.com/gyms/aberdeen-shiprow/   
4  https://www.puregym.com/gyms/aberdeen-wellingt...   

                                             address  
0  Unit 1b, Riverside Retail Park, Aberdare, Sout...  
1  Kittybrewster Retail Park, Bedford Road, AB24 3LJ  
2  H1 Building, Hill of Rubislaw, Anderson Drive,...  
3                        Shiprow, Aberdeen, AB11 5BW  
4      Unit B, Wellington Circle, Aberdeen, AB12 3QW  


In [4]:
# Quit the driver after the work is complete
driver.quit()