In [69]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
import csv

# Initialize the WebDriver
driver = webdriver.Chrome()

# Open a CSV file to write data
with open('upwork_jobs.csv', mode='w', newline='', encoding='utf-8') as csv_file:
    csv_writer = csv.writer(csv_file)
    # Write the header row
    csv_writer.writerow(['Job Title', 'Hourly Rate', 'Experience Level', 'Estimated Time'])

    try:
        # Open the Upwork job search page
        driver.get("https://www.upwork.com/nx/search/jobs/?nbs=1&q=data%20analyst")

        # Initialize page counter
        current_page = 1
        max_pages = 10  # Set to 10 pages

        while current_page <= max_pages:
            print(f"Scraping page {current_page}...")

            # Wait for the job listings to load
            wait = WebDriverWait(driver, 20)
            jobs = wait.until(EC.presence_of_all_elements_located((By.XPATH, '//*[@id="main"]/div/div/div/div[2]/div[2]/section/article')))

            # Iterate over each job listing
            for job in jobs:
                try:
                    # Extract the job title
                    job_title = job.find_element(By.XPATH, './/h2[contains(@class, "h5 mb-0 mr-2 job-tile-title")]').text

                    # Extract the job info list
                    job_info_list = job.find_element(By.XPATH, './/ul[contains(@class, "job-tile-info-list text-base-sm mb-4")]')

                    # Extract the Hourly Rate
                    hourly_rate_element = job_info_list.find_element(By.XPATH, './/li[@data-test="job-type-label"]')
                    hourly_rate = hourly_rate_element.text if hourly_rate_element else 'Not Available'
                    
                    # Extract the Experience Level
                    experience_level_element = job_info_list.find_element(By.XPATH, './/li[@data-test="experience-level"]')
                    experience_level = experience_level_element.text if experience_level_element else 'Not Available'

                    # Extract the Estimated Time
                    estimated_time = 'Not Available'
                    try:
                        # Attempt to find all <li> elements
                        estimated_time_elements = job_info_list.find_elements(By.XPATH, './/li')
                        for li in estimated_time_elements:
                            text = li.text.strip()
                            if text.startswith('Est. time:'):
                                estimated_time = text.replace('Est. time:', '').strip()
                                break
                    except Exception as e:
                        print(f"Error extracting estimated time: {e}")

                    # Write the job data to CSV file
                    csv_writer.writerow([job_title, hourly_rate, experience_level, estimated_time])

                except Exception as e:
                    print(f"An error occurred while processing a job: {e}")

            # Move to the next page
            try:
                # Handle potential pop-ups or banners
                try:
                    cookie_banner = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, 'button.onetrust-close-btn-handler')))
                    cookie_banner.click()
                except:
                    pass  # No cookie banner or already closed

                # Ensure the 'Next' button is in view
                next_button = wait.until(EC.presence_of_element_located((By.XPATH, '//span[@aria-hidden="true" and contains(text(), "Next")]')))
                driver.execute_script("arguments[0].scrollIntoView(true);", next_button)  # Scroll into view
                
                # Wait for the element to be clickable and click it
                next_button = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, '//span[@aria-hidden="true" and contains(text(), "Next")]')))
                driver.execute_script("arguments[0].click();", next_button)
                
                time.sleep(2)  # Wait for the new page to load
                current_page += 1
            except Exception as e:
                print(f"Error navigating to the next page: {e}")
                break

    finally:
        # Close the WebDriver
        driver.quit()


Scraping page 1...
Scraping page 2...
Scraping page 3...
Scraping page 4...
Scraping page 5...
Scraping page 6...
Scraping page 7...
Scraping page 8...
Scraping page 9...
Scraping page 10...
