## 1. Importing Necessary Libraries

First, we need to import the necessary libraries that will be used in this notebook.

In [3]:
import csv
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.options import Options
import os
import time


ModuleNotFoundError: No module named 'selenium'

## 2. Writing Data to a CSV File

We need a function to write the collected job information into a CSV file. This function will take the data as input and append it to the specified CSV file.


In [None]:
def write_to_csv(data):
    # Specify the CSV file path
    csv_file_path = 'internships.csv'

    # Write the data to the CSV file
    with open(csv_file_path, 'a', newline='', encoding='utf-8') as csv_file:
        fieldnames = ['Job Title', 'Job URL', 'Company', 'Location', 'Job Description', 'Salary']
        writer = csv.DictWriter(csv_file, fieldnames=fieldnames)

        # Write the data rows
        writer.writerows(data)


## 3. Configuring and Initializing the Web Driver

Next, we will configure and initialize the Selenium web driver. We will use Chrome in incognito mode to scrape the job listings.


In [None]:
if __name__ == "__main__":
    options = Options()
    options.add_argument("--incognito") # Add any other options
    driver = webdriver.Chrome(options=options)


## 4. Scraping Job Listings from Indeed

We will define the job title and location we are interested in and construct the URL for the job search. Then, we will scrape the job listings from the search results.


In [None]:
    job_ = 'Data+Engineer'
    location = 'Boston'
    url = f"https://www.indeed.com/jobs?q={job_}&l={location}&vjk=d8e1abf4ae32e95e"
    numOfPages = 5
    driver.get(url)

    page_num = 1

    for i in range(numOfPages):
        job_counter = 0
        wait = WebDriverWait(driver, 10)
        p_element = wait.until(EC.presence_of_element_located((By.ID, 'jobsearch-JapanPage')))

        job_listings = p_element.find_elements(By.CSS_SELECTOR, 'li.css-5lfssm')
        for job_listing in job_listings:
            data = []
            driver.implicitly_wait(3)
            
            p_element = wait.until(EC.presence_of_element_located((By.ID, 'jobsearch-JapanPage')))
            job_listings = p_element.find_elements(By.CSS_SELECTOR, 'li.css-5lfssm')

            current_job = job_listings[job_counter]

            driver.implicitly_wait(3)
            try:
                title = current_job.find_element(By.CSS_SELECTOR, 'h2.jobTitle').text
            except:
                job_counter += 1
                continue

            job_link_element = current_job.find_element(By.XPATH, f'/html/body/main/div/div[2]/div/div[5]/div/div[1]/div[5]/div/ul/li[{job_counter + 1}]/div/div[1]/div/div/div/table[1]/tbody/tr/td/div[1]/h2/a')
            job_link = job_link_element.get_attribute('href')

            driver.get(job_link)
            
            time.sleep(3)
            main = driver.find_element(By.CSS_SELECTOR, 'div.jobsearch-JobComponent')
            try:
                salary = main.find_element(By.CSS_SELECTOR, 'span.css-19j1a75').text 
            except:
                salary = 'undefined'

            mini_main = main.find_element(By.CSS_SELECTOR, 'div.css-kyg8or')
            try: 
                location = mini_main.find_element(By.XPATH, '//*[@id="viewJobSSRRoot"]/div[2]/div[3]/div/div/div[1]/div[2]/div[1]/div[2]/div/div/div/div[2]').text
            except:
                job_counter += 1
                driver.get(url)
                continue

            company = main.find_element(By.CSS_SELECTOR, 'a.e19afand0').text

            summary = main.find_element(By.ID, 'jobDescriptionText').text
            summary = summary.replace("\n", " ")

            data.append({'Job Title': title, 'Job URL': job_link, 'Company': company, 'Location': location, 'Job Description': summary, 'Salary': salary})
            write_to_csv(data)
            
            job_counter += 1
            driver.get(url)


## 5. Navigating to the Next Page

We will also need to handle pagination to scrape multiple pages of job listings. The following code snippet will navigate to the next page of job listings.


In [None]:
        page_list = p_element.find_element(By.CSS_SELECTOR, 'ul.css-1g90gv6')

        pageNums = page_list.find_elements(By.CSS_SELECTOR,'li.css-227srf')

        for num in pageNums:
            try:
                num_in_list = int(num.text)
            except:
                continue

            if (page_num + 1)  == num_in_list:
                page_num += 1
                num.click()
                driver.implicitly_wait(3)
                url = driver.current_url
                break
            else: 
                continue

    driver.quit()
