In [None]:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options


from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC


import time
from selenium.webdriver.common.keys import Keys

# data manipulate library
import pandas as pd

import csv

# address
from geopy.geocoders import Nominatim

from win10toast import ToastNotifier

In [None]:
chrome_options = Options()
chrome_options.add_experimental_option("debuggerAddress","localhost:8000")
driver = webdriver.Chrome(executable_path="chromedriver.exe", chrome_options=chrome_options)

# cmd command:
# chrome.exe --remote-debugging-port=8000 --user-data-dir="D:\Chromedata"

In [None]:
# Xpath: Basic Job and Company Information
'''
Job Card = job-preview style__details___paBkq
Job_Title           = //*[@id="skip-to-content"]/div[3]/div/div[1]/div/form/div[2]/div/div/div[2]/div[1]/div[1]/h1
Job_Info            = //*[@id="skip-to-content"]/div[3]/div/div[1]/div/form/div[2]/div/div/div[2]/div[1]/div[1]/div[1]
Company_Name        = //*[@id="skip-to-content"]/div[3]/div/div[1]/div/form/div[2]/div/div/div[2]/div[1]/div[1]/div[2]/div/div[2]/a
Company_Location    = //*[@id="skip-to-content"]/div[3]/div/div[1]/div/form/div[2]/div/div/div[2]/div[1]/div[1]/div[2]/div/div[2]/div/div
Container           = //*[@id="skip-to-content"]/div[3]/div/div[1]/div/form/div[2]/div/div/div[2]/div[1]/div[1]
'''

# Class: Basic Job and Company Information
'''
Job_Title           = style__job-title___P7PJV
Job_Info            = style__job-type-info___MJ8oT
Company_Name        = style__employer-name___54lqg
Company_Location    = style__list-with-tooltip___Js1g4
'''

In [None]:
# General Information
general_information_keys = ['Job Title', 'Job Info', 'Company Name', 'Company Location']

def get_general_information():
    job_title = driver.find_element(By.CLASS_NAME, 'style__job-title___P7PJV').text   
    job_info = driver.find_element(By.CLASS_NAME, 'style__job-type-info___MJ8oT').text
    company_name = driver.find_element(By.CLASS_NAME, 'style__employer-name___54lqg').text
    
    try: 
        company_location = driver.find_element(By.CLASS_NAME, 'style__list-with-tooltip___Js1g4').text
        if 'more' in company_location:
            text = driver.find_element(By.CLASS_NAME, 'style__list-with-tooltip___Js1g4').get_attribute('title')
            company_location = ('\n').join(text.split(' - '))
    except:
        company_location = driver.find_element(By.CLASS_NAME, 'style__media-body___MV2ef').text.split('∙')[1]

    return pd.Series([job_title, job_info, company_name, company_location], index=general_information_keys).to_frame()

In [None]:
# Company Information
company_information_keys = ['Employees', 'Industry', 'Headquarters']

def get_company_information():
    info = driver.find_elements(By.CLASS_NAME, 'style__content___sE2EK')
    employees = info[0].text
    industry = info[1].text
    headquarters = info[2].text
    return pd.Series([employees, industry, headquarters], index=company_information_keys).to_frame()

In [None]:
# About the Role
'''
class = style__col___5FTI6
title = style__title___5XGGK
content = style__content___w3TUd
'''

In [None]:
# Card Title
def get_card_titles():
    card_titles = driver.find_elements(By.CLASS_NAME, "style__title___5XGGK")
    card_title = []
    for i in range(len(card_titles)):
        title = card_titles[i].text
        card_title.append(title)
    return card_title

In [None]:
# Card Content
def get_card_content():
    card_contents = driver.find_elements(By.CLASS_NAME, "style__content___w3TUd")
    card_content = []
    for i in range(len(card_contents)):
        content = card_contents[i].text
        card_content.append(content)
    return card_content

In [None]:
# Go to next page
def next_page():
    next_page_button = driver.find_element(By.XPATH, '//button[@aria-label="next page"]')
    next_page_button.click()
    time.sleep(20)

In [None]:
def print_case(page, case, df=None):
    width = 50
    print((f'Page {page}: Case {case}').center(width, '-'))
    print(df)
    print('-'*width)

In [None]:
def refresh():
    driver.refresh()
    time.sleep(10)

In [None]:
def main_program(trial=1):
    total = 0
    page = 1
    count = 0
    rest = 0
    data = pd.DataFrame()

    try:
        while True:
            if page > trial:
                data.to_csv('output/output.csv', mode='a', encoding='utf-8', index=False)
                print("Total successful cases: " + str(total))
                break
            
            if rest > 5:
                refresh()
                rest = 0

            job_cards = driver.find_elements(By.XPATH, '//div/a[@data-hook="jobs-card"]')
            
            for job_card in job_cards:
                job_card.click()
                time.sleep(2)

                # General Information
                try:
                    job_info = get_general_information()
                except:
                    print("Error: General Information")

                # Company Information
                try:
                    company_info = get_company_information()
                except:
                    print("Error: Company Information")

                # About the Role: Title
                try:
                    card_titles = get_card_titles()
                except:
                    print("Error: Card Titles")

                # About the Role: Content
                try:
                    card_contents = get_card_content()
                except:
                    print("Error: Card Contents")

                about_the_role = pd.Series(card_contents, index=card_titles).to_frame()
                df = pd.concat([job_info, company_info, about_the_role], axis=0).T
                data = pd.concat([data, df], axis=0)

                count += 1

                print_case(page, count, df.T)
            
            total += count
            next_page()
            rest += 1
            page += 1
            count = 0
            
    except Exception as e:
        print("Stop at page " + str(page) + " (" + str(count) + " cases)" + " due to " + str(e))
        data.to_csv('output/output.csv', mode='a', encoding='utf-8', index=False)
        print("Total successful cases: " + str(total))

In [None]:
main_program(50)