In [121]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from setups import get_local_safe_setup
import re
import time
import pandas as pd

# Set up the Chrome driver
driver = get_local_safe_setup()

# Navigate to the webpage that contains the data you want to scrape
driver.get('https://www.equinox.com/clubs')

In [22]:

cities_links = []

# Wait until element with card list appears
WebDriverWait(driver, 10).until(
    EC.presence_of_element_located((By.CLASS_NAME, "card-list-view___LIB5Q"))
)

region_list = driver.find_elements(By.CLASS_NAME, "card-list-view___LIB5Q")

for region in region_list:
    cities_links.append(region.find_element(By.TAG_NAME, 'a').get_attribute('href'))

print(f"{len(cities_links)} Cities")

13 Cities


In [23]:
clubs_links = []
for link in cities_links:
    driver.get(link)
    
    # Wait until element with card list appears
    WebDriverWait(driver, 10).until(
        EC.presence_of_element_located((By.CLASS_NAME, "card-list-view___LIB5Q"))
    )
    
    clubs_list = driver.find_elements(By.CLASS_NAME, "card-list-view___LIB5Q")
    
    for club in clubs_list:
        clubs_links.append(club.find_element(By.TAG_NAME, 'a').get_attribute('href'))
    
    time.sleep(2)
        

print(f"{len(clubs_links)} Clubs")

111 Clubs


In [117]:
names = []
managers = []
addresses = []
phones = []
club_hours = []

for link in clubs_links:
    driver.get(link)
    time.sleep(2)
    
    try:

        # Wait until element with card list appears
        WebDriverWait(driver, 20).until(
            EC.presence_of_element_located((By.XPATH, '//*[@aria-label="Club Address"]'))
        )

        names.append(driver.find_element(By.CSS_SELECTOR, '.primary-headline___3xR8O').text)
        addresses.append(driver.find_element(By.XPATH, '//*[@aria-label="Club Address"]/a[1]').text.split('\n')[0])
        phones.append(driver.find_element(By.XPATH, '//*[@aria-label="Club Address"]/a[2]').text)

        try:        
            club_info = driver.find_element(By.CSS_SELECTOR, '.club-data-container___3-XIb')

            # scroll down for club info 
            driver.execute_script("arguments[0].scrollIntoView();", club_info)
            time.sleep(2)   

            managers.append(club_info.find_element(By.CLASS_NAME, "club-manager___VWYgB").text.split('\n')[1])

            hours_card = driver.find_element(By.CSS_SELECTOR, '.accordion-contents___KOJyz')
            hours_list = hours_card.find_elements(By.TAG_NAME, 'li')
            club_hours.append([i.text.replace("\n", " ").replace("—", "-") for i in hours_list])

        except:
            managers.append(None)
            club_hours.append(None)
    except:
        pass



url = "https://www.google.com/maps/search/Equinox"
locations = [url+" "+i+" "+j for i, j in zip(names, addresses)]

print(f"{len(locations)} Locations")

110 Locations


In [123]:
driver.get(locations[0])


In [166]:
cities =[]
states=[]
zip_codes=[]
countries=[]
latitude=[]
longitude=[]
geo_accuracy=[]


for location in locations:
    driver.get(location)
    
    try:
        WebDriverWait(driver, 5).until(
            EC.presence_of_element_located((By.CSS_SELECTOR, '.Io6YTe'))
            )
    except:
        best_match = driver.find_element(By.CLASS_NAME, 'hfpxzc').get_attribute('aria-label')
        driver.get("https://www.google.com/maps/search/"+best_match)
        WebDriverWait(driver, 5).until(
            EC.presence_of_element_located((By.CSS_SELECTOR, '.Io6YTe'))
            )
        
    finally:
        address = driver.find_element(By.CSS_SELECTOR, '.Io6YTe').text.split(",")
        if len(address) == 4:
            cities.append(address[1])
            states.append(address[2].split()[0])
            zip_codes.append(address[2].split()[1])
            countries.append(address[3])
        else:
            cities.append(address[1].split()[0])
            states.append(None)
            zip_codes.append("".join(address[1].split()[1:]))
            countries.append(address[2])   
        
        
        map_url = driver.find_element(By.CSS_SELECTOR ,'meta[itemprop=image]').get_attribute('content')

        # Extract the latitude and longitude coordinates from the URL using regex
        match = re.search(r'center=(-?\d+\.\d+)%2C(-?\d+\.\d+)', map_url)
        latitude.append(match.group(1))
        longitude.append(match.group(2))
       
    try:
        WebDriverWait(driver, 5).until(
                EC.presence_of_element_located((By.XPATH, '//*[@id="U5ELMd"]'))
            )
        footer = driver.find_element(By.CLASS_NAME, "scene-footer")
        # scroll down for footer
        driver.execute_script("arguments[0].scrollIntoView();", footer)
        time.sleep(2)

        geo_element = footer.find_element(By.XPATH, '//*[@id="U5ELMd"]')
        geo_accuracy.append(geo_element.text)
    except:
        geo_accuracy.append(None)






In [164]:
df = pd.DataFrame({'Club Name': names, "Phone Number": phones, "Manager": managers, "Address" : addresses, "City":cities, 
                   "State":states, "Zip Code":zip_codes, "Country":countries, "Latitude":latitude, "Longitude":longitude, 
                   "Geo Accuracy":geo_accuracy, "Location":locations, "Club Hours":club_hours}) 

df.to_csv("Equinox Clubs Data.csv", index=False)
df 

Unnamed: 0,Club Name,Phone Number,Manager,Address,City,State,Zip Code,Country,Latitude,Longitude,Geo Accuracy,Location,Club Hours
0,CHESTNUT HILL,(617) 531-7077,Ana Kostadinova,200 Boylston Street,Chestnut Hill,MA,02467,United States,42.318743,-71.1755946,100 m,https://www.google.com/maps/search/Equinox CHE...,"[Mon 5:00am - 10:00pm, Tue 5:00am - 10:00pm, W..."
1,DARTMOUTH STREET,(617) 578-8918,Lindsey Cardin,131 Dartmouth Street,Boston,MA,02116,United States,42.3470414,-71.0753582,100 m,https://www.google.com/maps/search/Equinox DAR...,"[Mon 5:30am - 10:00pm, Tue 5:30am - 10:00pm, W..."
2,FRANKLIN STREET,(617) 426-2140,Matt Strutt,225 Franklin Street,Boston,MA,02110,United States,42.3559701,-71.0539553,100 m,https://www.google.com/maps/search/Equinox FRA...,"[Mon 5:30am - 9:00pm, Tue 5:30am - 9:00pm, Wed..."
3,SEAPORT,(617) 206-2641,Curtis Duffy,27 Northern Avenue,Boston,MA,02210,United States,42.3533219,-71.0476806,100 m,https://www.google.com/maps/search/Equinox SEA...,"[Mon 5:30am - 10:00pm, Tue 5:30am - 10:00pm, W..."
4,SPORTS CLUB BOSTON,(617) 375-8200,Gianna Rebosio,4 Avery St,Boston,MA,02111,United States,42.353014,-71.063256,100 m,https://www.google.com/maps/search/Equinox SPO...,"[Mon 5:30am - 10:00pm, Tue 5:30am - 10:00pm, W..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...
105,ANTHEM ROW,(202) 741-0015,Megan Cooke,"800 K Street NW, Suite 90",Washington,DC,20001,United States,38.9017024,-77.0227305,100 m,https://www.google.com/maps/search/Equinox ANT...,"[Mon 5:30am - 10:00pm, Tue 5:30am - 10:00pm, W..."
106,BETHESDA,(301) 652-1078,Ross Poirier,4905 Elm Street,Bethesda,MD,20814,United States,38.9822266,-77.0964747,100 m,https://www.google.com/maps/search/Equinox BET...,"[Mon 5:00am - 10:00pm, Tue 5:00am - 10:00pm, W..."
107,SPORTS CLUB WASHINGTON D.C.,(202) 974-6600,Joe Vito,1170 22nd St NW,Washington,DC,20037,United States,38.9047761,-77.0490104,100 m,https://www.google.com/maps/search/Equinox SPO...,"[Mon 5:30am - 10:00pm, Tue 5:30am - 10:00pm, W..."
108,TYSONS CORNER,(703) 790-6193,Anthony Evans,8065 Leesburg Pike.,Vienna,VA,22182,United States,38.9127212,-77.2247514,100 m,https://www.google.com/maps/search/Equinox TYS...,"[Mon 5:30am - 9:00pm, Tue 5:30am - 9:00pm, Wed..."
