In [None]:
# Import the necessary libraries
import undetected_chromedriver as uc  # For using Chrome browser
from selenium.webdriver.common.by import By  # For locating elements
import time  # For adding delays
import pandas as pd  # For working with dataframes
from tqdm.notebook import tqdm  # For progress bar

# Initialize empty lists to store data
names, locations, websites = [], [], []

# Create a ChromeOptions object
options = uc.ChromeOptions()

# Add the "--disable-popup-blocking" argument to the ChromeOptions
options.add_argument("--disable-popup-blocking")

# Initialize a Chrome browser instance
driver = uc.Chrome(options=options)

# Maximize the browser window
driver.maximize_window()

# Open the target website
driver.get('https://clutch.co/agencies/digital-marketing?client_type=field_pp_cs_enterprise&client_type=field_pp_cs_midmarket')

# Wait for 1 second to allow the page to load
time.sleep(1)

# Close pop-up
driver.find_element(By.XPATH, '/html/body/div/div[1]/div[4]/a[2]').click()

# Initialize a variable to keep track of the page number
n = 0

# Initialize a variable to control the loop
next_button_enabled = True

# Main loop to scrape data from each page
while next_button_enabled:
    # Wait for 3 seconds before proceeding
    time.sleep(3)

    # Find the container element that holds the listings
    container1 = driver.find_element(By.XPATH, '//*[@id="providers"]/div[2]/ul')

    # Find all the listing elements within the container
    listings = container1.find_elements(By.XPATH, './li[@data-position]')

    # Loop through each listing
    for listing in tqdm(listings):

        # Scroll the page to bring the listing into view
        driver.execute_script("arguments[0].scrollIntoView();", listing)
        time.sleep(1)

        # Try to extract the name of the company from the listing
        try:
            name = listing.find_element(By.XPATH, './/h3[@class="company_info"]').text
            names.append(name)
        except:
            name = None
            names.append(name)

        # Try to extract the location of the company from the listing
        try:
            location = listing.find_element(By.XPATH, './/span[@class="locality"]').text
            locations.append(location)
        except:
            location = None
            locations.append(location)


        # Get the link for the web page and then openning it in a new tab
        web_page_link = listing.find_element(By.XPATH, './/h3[@class="company_info"]/a').get_attribute('href')
        web_page = driver.execute_script("window.open('{}', '_blank');".format(web_page_link))

        # Switch to the new tab
        driver.switch_to.window(driver.window_handles[1])

        # Add 1 sec delay
        time.sleep(1)

        # Try to extract the website link from the new tab
        try:
            website = driver.find_element(By.XPATH, '//a[@title="Visit website"]').get_attribute('href')
            websites.append(website)
        except:
            website = None
            websites.append(website)

        # Close the new tab
        driver.close()

        # Switch back to the original tab
        driver.switch_to.window(driver.window_handles[0])

        # Print the extracted data for the current listing
        print(name, ":", location, ":", website)

    # Check if there is a next page button and click it
    try:
        next_page = driver.find_element(By.XPATH, '//*[@id="providers"]/nav/ul/li[@class="page-item next"]/a').click()
    except:
        next_button_enabled = False

    # Increment the page number
    n += 1

    # Print the status of the current page
    print(f'page{n}: Done')

# Create a pandas DataFrame from the scraped data
df = pd.DataFrame(list(zip(names, locations, websites)), columns=["Name", "Location", "Website"])

# Save the DataFrame to a CSV file
df.to_csv("agencies.csv", index=False, encoding="utf-8-sig")

# Quit the browser
driver.quit()