In [16]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.edge.service import Service
from selenium.webdriver.edge.options import Options
from selenium.common.exceptions import NoSuchElementException, ElementClickInterceptedException, ElementNotInteractableException
import pandas as pd
import time

def get_jobs(keyword, num_jobs, verbose):
    '''Gathers jobs as a dataframe, scraped from Glassdoor'''
    
    # Initializing the webdriver
    options = Options()
    
    # Uncomment the line below if you'd like to scrape without a new Chrome window every time.
    # options.add_argument('headless')
    
    # Change the path to where chromedriver is in your home folder.
    chrome_path = "msedgedriver.exe"
    service = Service(chrome_path)
    driver = webdriver.Edge(service=service, options=options)
    driver.set_window_size(1120, 1000)
    char = str(len(keyword))
    url = 'https://www.glassdoor.com/Job/' + keyword + '-jobs-SRCH_KO0,' + char + '.htm'
    driver.get(url)
    jobs = []
    time.sleep(5)
    processed = set()
    while len(jobs) < num_jobs:
        # Going through each job in this page
        try:
            job_cards = driver.find_elements(By.CLASS_NAME, 'JobCard_jobCardContainer___hKKI')
            print("Found job cards:", len(job_cards))
        except:
            continue
            pass
        
        print("Progress: {}".format("" + str(len(jobs)) + "/" + str(num_jobs)))
        if len(jobs) >= num_jobs:
            break
        
        for job_card in job_cards:
            if len(jobs)>=num_jobs:
                return pd.DataFrame(jobs)
            try:
                driver.find_element(By.XPATH, "/html/body/div[11]/div[2]/div[2]/div[1]/div[1]/button").click()  # clicking to the X.
                print("clicked the cross")
            except NoSuchElementException:
                pass
            job_url = job_card.find_element(By.CLASS_NAME, 'JobCard_jobTitle___7I6y').get_attribute('href')
            if job_url not in processed:
                try:
                    job_card.click()
                    time.sleep(2)
                    collected_successfully = False
                    while not collected_successfully:
                        try:
                            company_name = job_card.find_element(By.CLASS_NAME, 'EmployerProfile_compactEmployerName__LE242').text
                            location = job_card.find_element(By.CLASS_NAME, 'JobCard_location__rCz3x').text
                            job_title = job_card.find_element(By.CLASS_NAME, 'JobCard_jobTitle___7I6y').text
                            job_description = job_card.find_element(By.CLASS_NAME, 'JobCard_jobDescriptionSnippet__yWW8q').text
                            collected_successfully = True
                            processed.add(job_url)
                        except NoSuchElementException:
                            time.sleep(5)
                    try:
                        salary_estimate = job_card.find_element(By.CLASS_NAME, 'JobCard_salaryEstimate__arV5J').text
                    except NoSuchElementException:
                        salary_estimate = -1  # You need to set a "not found value. It's important."
                
                    try:
                        rating = job_card.find_element(By.CLASS_NAME, 'EmployerProfile_ratingContainer__ul0Ef').text
                    except NoSuchElementException:
                        rating = -1  # You need to set a "not found value. It's important."
                    
                    # Printing for debugging
                    if verbose:
                        print("Job Title: {}".format(job_title))
                        print("Salary Estimate: {}".format(salary_estimate))
                        print("Job Description: {}".format(job_description[:500]))
                        print("Rating: {}".format(rating))
                        print("Company Name: {}".format(company_name))
                        print("Location: {}".format(location))
                    
                    # Going to the Company tab...
                    time.sleep(3)
                    j=1
                    try:
                        size = driver.find_element(By.XPATH,'(//div[@class="JobDetails_overviewItemValue__xn8EF"])['+str(j)+']').text
                        j+=1
                    except NoSuchElementException:
                        size = -1
                    try:
                        founded = driver.find_element(By.XPATH,'(//div[@class="JobDetails_overviewItemValue__xn8EF"])['+str(j)+']').text
                        j+=1
                    except NoSuchElementException:
                        founded = -1
                    try:
                        type_of_ownership = driver.find_element(By.XPATH,'(//div[@class="JobDetails_overviewItemValue__xn8EF"])['+str(j)+']').text
                        j+=1
                    except NoSuchElementException:
                        type_of_ownership = -1
                    try:
                        industry = driver.find_element(By.XPATH,'(//div[@class="JobDetails_overviewItemValue__xn8EF"])['+str(j)+']').text
                        j+=1
                    except NoSuchElementException:
                        industry = -1
                    try:
                        sector = driver.find_element(By.XPATH,'(//div[@class="JobDetails_overviewItemValue__xn8EF"])['+str(j)+']').text
                        j+=1
                    except NoSuchElementException:
                        sector = -1
                    try:
                        revenue = driver.find_element(By.XPATH,'(//div[@class="JobDetails_overviewItemValue__xn8EF"])['+str(j)+']').text
                        j+=1
                    except NoSuchElementException:
                        revenue = -1
                    
                    if verbose:
                        print("Size: {}".format(size))
                        print("Founded: {}".format(founded))
                        print("Type of Ownership: {}".format(type_of_ownership))
                        print("Industry: {}".format(industry))
                        print("Sector: {}".format(sector))
                        print("Revenue: {}".format(revenue))
                        print("@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@")
                    
                    jobs.append({
                        "Job Title": job_title,
                        "Salary Estimate": salary_estimate,
                        "Job Description": job_description,
                        "Rating": rating,
                        "Company Name": company_name,
                        "Location": location,
                        "Size": size,
                        "Founded": founded,
                        "Type of ownership": type_of_ownership,
                        "Industry": industry,
                        "Sector": sector,
                        "Revenue": revenue,
                        "URL": job_url,
                    })
                except ElementClickInterceptedException:
                    print("Error clicking on job card")
                    continue
                except ElementNotInteractableException:
                    print("Job card not interactable")
                    continue
        try:
            driver.find_element(By.XPATH, '(//button[@class="button_Button__MlD2g button-base_Button__knLaX"])[2]').click()
            time.sleep(3)
        except:
            print("Scraping terminated before reaching target number of jobs. Needed {}, got {}.".format(num_jobs, len(jobs)))
            break
    
    driver.quit()
    return pd.DataFrame(jobs)  # This line converts the dictionary object into a pandas DataFrame.

# Example usage:
df = get_jobs(input("Enter the keyword you want to search"), int(input("Enter the number of jobs you want")), bool(input("Do you want the output to be displayed simultaneously")))


Found job cards: 30
Progress: 0/35
Job Title: Data Analyst
Salary Estimate: $75K - $198K (Employer est.)
Job Description: If you have a foundation in online platform trust and safety issues, skills in data analysis and and an empathetic approach to mentoring and collaborating with……
Skills: Data analysis skills, SQL, Machine learning, GitHub, Data science
Rating: 3.8
Company Name: GitHub, Inc.
Location: Remote
Size: 1001 to 5000 Employees
Founded: 2008
Type of Ownership: Subsidiary or Business Segment
Industry: Internet & Web Services
Sector: Information Technology
Revenue: Unknown / Non-Applicable
@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
Job Title: Data Analytics
Salary Estimate: $38.00 - $45.00 Per Hour (Employer est.)
Job Description: Minimum three years experience in data science applications. Very strong preference for SAS programming experience.…
Rating: 3.9
Company Name: J & S Consulting
Location: Tucker, GA
Size: 51 to 200 Employees
Founded: --
Type of Ownership: Co

In [17]:
df

Unnamed: 0,Job Title,Salary Estimate,Job Description,Rating,Company Name,Location,Size,Founded,Type of ownership,Industry,Sector,Revenue,URL
0,Data Analyst,$75K - $198K (Employer est.),If you have a foundation in online platform tr...,3.8,"GitHub, Inc.",Remote,1001 to 5000 Employees,2008,Subsidiary or Business Segment,Internet & Web Services,Information Technology,Unknown / Non-Applicable,https://www.glassdoor.com/job-listing/data-ana...
1,Data Analytics,$38.00 - $45.00 Per Hour (Employer est.),Minimum three years experience in data science...,3.9,J & S Consulting,"Tucker, GA",51 to 200 Employees,--,Company - Private,Information Technology Support Services,Information Technology,$5 to $25 million (USD),https://www.glassdoor.com/job-listing/data-ana...
2,Data Science Analyst,$55K - $60K (Employer est.),"Bachelor's degree in Computer Science, Statist...",4.0,TRESUME,"Washington, DC",1 to 50 Employees,--,Company - Public,--,--,Unknown / Non-Applicable,https://www.glassdoor.com/job-listing/data-sci...
3,Research Data Analyst,$98K - $108K (Employer est.),"Select, adapt, and apply appropriate quantitat...",3.8,Calbright College,California,51 to 200 Employees,2019,Company - Public,Colleges & Universities,Education,Unknown / Non-Applicable,https://www.glassdoor.com/job-listing/research...
4,Data Scientist,$66.82 - $80.54 Per Hour (Employer est.),End-to-end data and model pipeline deployment ...,4.4,Boston Technology,"Malvern, PA",51 to 200 Employees,2004,Company - Private,Information Technology Support Services,Information Technology,$5 to $25 million (USD),https://www.glassdoor.com/job-listing/data-sci...
5,Data Scientist,$120K - $130K (Employer est.),Data science: 1 year (Preferred). Expertise in...,4.0,element technologies,Remote,51 to 200 Employees,2000,Company - Private,Information Technology Support Services,Information Technology,Unknown / Non-Applicable,https://www.glassdoor.com/job-listing/data-sci...
6,Data Engineer,$112K - $140K (Employer est.),"A minimum of a BS degree in computer science, ...",2.3,Verana Health,Remote,51 to 200 Employees,2018,Company - Private,Enterprise Software & Network Solutions,Information Technology,Unknown / Non-Applicable,https://www.glassdoor.com/job-listing/data-eng...
7,Data Engineer,$105K - $160K (Employer est.),Communicates technical concepts to non-technic...,3.9,Costco Wholesale,"Dallas, TX",10000+ Employees,1976,Company - Public,General Merchandise & Superstores,Retail & Wholesale,Unknown / Non-Applicable,https://www.glassdoor.com/job-listing/data-eng...
8,Data Science Analyst,$55K - $65K (Employer est.),"Bachelor's degree in Computer Science, Statist...",4.0,TRESUME,"Ashburn, VA",1 to 50 Employees,--,Company - Public,--,--,Unknown / Non-Applicable,https://www.glassdoor.com/job-listing/data-sci...
9,Data Engineer,$63K - $105K (Glassdoor est.),Experience with a wide range of data warehousi...,4.4,DotCMS,"Miami, FL",1 to 50 Employees,--,Company - Private,Research & Development,Management & Consulting,Unknown / Non-Applicable,https://www.glassdoor.com/job-listing/data-eng...


In [None]:
df.to_csv('Data Science Jobs updated.csv')

Unnamed: 0,Job Title,Salary Estimate,Job Description,Rating,Company Name,Location,Size,Founded,Type of ownership,Industry,Sector,Revenue
0,Data Analytics,$38.00 - $45.00 Per Hour (Employer est.),Minimum three years experience in data science...,3.9,J & S Consulting,"Tucker, GA",51 to 200 Employees,--,Company - Private,Information Technology Support Services,Information Technology,$5 to $25 million (USD)
1,DATA SCIENCE: SAP Next Talent - Rotational Pro...,,,4.2,SAP,,10000+ Employees,1972,Company - Public,Enterprise Software & Network Solutions,Information Technology,$10+ billion (USD)
2,Data Engineer,$105K - $160K (Employer est.),Communicates technical concepts to non-technic...,3.9,Costco Wholesale,"Dallas, TX",10000+ Employees,1976,Company - Public,General Merchandise & Superstores,Retail & Wholesale,Unknown / Non-Applicable
3,Data Scientist (Hourly Remote),$45.00 - $80.00 Per Hour (Employer est.),They will be encouraged to propose innovative ...,4.1,ARES Corporation,United States,501 to 1000 Employees,1992,Company - Private,Aerospace & Defense,Aerospace & Defense,$100 to $500 million (USD)
4,Data Scientist,$120K - $130K (Employer est.),Data science: 1 year (Preferred). Expertise in...,4.0,element technologies,Remote,51 to 200 Employees,2000,Company - Private,Information Technology Support Services,Information Technology,Unknown / Non-Applicable
5,"Manager, Data Science",$154K - $220K (Employer est.),The engineering organization at ServiceTitan i...,3.4,ServiceTitan,Remote,1001 to 5000 Employees,2013,Company - Private,Software Development,Information Technology,Unknown / Non-Applicable
6,Data Science Manager,$96K - $156K (Glassdoor est.),Encourages innovation within the team to explo...,4.0,Boston Children's Hospital,"Boston, MA",5001 to 10000 Employees,1869,Nonprofit Organization,Health Care Services & Hospitals,Healthcare,Unknown / Non-Applicable
7,Data Science,$117K - $229K (Employer est.),"Doctorate in Data Science, Mathematics, Statis...",4.2,Microsoft,"Redmond, WA",10000+ Employees,1975,Company - Public,Computer Hardware Development,Information Technology,$10+ billion (USD)
8,Data Scientist,$92K - $151K (Glassdoor est.),Experience with data engineering and data scie...,4.0,RADAR,"San Jose, CA",501 to 1000 Employees,--,Self-employed,Civic & Social Services,Nonprofit & NGO,$25 to $100 million (USD)
