# Glassdoor U.S. Company Job Listing Scrapper

### Introduction


### Dataset


### Import Libraries

In [5]:
import sys
import os
import re
import time
import glob
import requests
import warnings
import numpy as np
import pandas as pd

sys.path.insert(0, "../utils")

from driver_setup import driver_setup
from save_file import save_file
from tqdm.auto import tqdm # works for both terminal and notebook
from IPython.display import display

from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.common.exceptions import NoSuchElementException, WebDriverException, StaleElementReferenceException 
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support.ui import Select

warnings.filterwarnings('ignore')

### Scrape Jobs

In [7]:
def get_jobs(function_id, num_jobs=30, verbose=False, slp_time=5):

    url='https://www.glassdoor.com/Job/remote-jobs-SRCH_KO0,6.htm?jobType=fulltime&sgocId={}&employerSizes=4&remoteWorkType=1'.format(function_id)
    
    driver = driver_setup(url)
    
    jobs = []
    jobs_count = len(jobs)
    
    filename ='{}_listings'.format(function_id)
    
    
    time.sleep(slp_time)
    
    
    try:
        total_listings = driver.find_element(By.XPATH, "//p[@data-test='jobsCount']").text.split()
            
        if len(total_listings) == 0:
            total_listings = int(driver.find_element(By.XPATH, "//h1[@data-test='jobCount-H1title']").text.split()[0])
        else:
            total_listings = int(total_listings[0])
            
    except NoSuchElementException:
            total_listings = np.nan 
            
    
    if num_jobs > total_listings:
        
        print("The number of jobs to be scrapped: {} exceeds the number of listings: {}".format(num_jobs, total_listings))
        
        num_jobs = total_listings
        
        print("The number of jobs has been updated to reflect the number of listings")
        print("")
    
    
    print("Total number of job listings: {}, number of jobs to be scraped: {}".format(total_listings, num_jobs))
    print("")
    
    pbar = tqdm(total=num_jobs) # Init progress bar

    while jobs_count < num_jobs:
        
        time.sleep(slp_time)
        time.sleep(.1)
        
        job_listings = driver.find_elements(By.CLASS_NAME, "react-job-listing")
        
        for listing in job_listings:
            
            pbar.update(1)
            
            if jobs_count >= num_jobs:
                print("Scraping completed, scraped {} of {} jobs".format(jobs_count, num_jobs))
                break
            
            try:
                driver.find_element(By.XPATH, "//div[@class='qual_x_close']").click()  #In case survey pops up. 
            except NoSuchElementException:
                pass
            
            listing.click()
            time.sleep(2)
            
            try:
                driver.find_element(By.XPATH, '//button[text()="Retry your search"]').click()
                time.sleep(2)
                
                try:
                    driver.find_element(By.XPATH, "//div[@class='qual_x_close']").click()  #In case survey pops up. 
                except NoSuchElementException:
                    pass
                               
            except NoSuchElementException:
                pass 
            
            try:
                driver.find_element(By.XPATH, "//div[@class='qual_x_close']").click()  #In case survey pops up. 
            except NoSuchElementException:
                pass

            try:
                driver.find_element(By.XPATH, "//div[@class='qual_x_close']").click()  #In case survey pops up. 
            except NoSuchElementException:
                pass

            try: 
                driver.find_element(By.XPATH, "//span[@alt='Close']").click()  #clicking to the X.   
            except NoSuchElementException:
                pass

            
            collected_successfully = False
            
            while not collected_successfully: 
                try: 
                    job_title = driver.find_element(By.XPATH,'//div[@class="css-1vg6q84 e1tk4kwz4"]').text.strip()
                    location = driver.find_element(By.XPATH,'//div[@class="css-56kyx5 e1tk4kwz5"]').text.strip()
                    job_description = driver.find_element(By.XPATH,'//div[@class="jobDescriptionContent desc"]').text
                    collected_successfully = True        
                except NoSuchElementException:
                    time.sleep(slp_time)
                        
          
            try: # sometimes there are listings that are posted without a company name
                company_name = driver.find_element(By.XPATH,'(//div[@class="css-87uc0g e1tk4kwz1"])').text.strip() #returns any element which is direct parent.
            except:
                company_name = np.nan 
            
            try:
                salary_range = driver.find_element(By.XPATH, '//div[@class="css-w04er4 e1tk4kwz6"]/div[4]/span').text.strip()
            except NoSuchElementException:  
                salary_range = np.nan

            try:
                salary_avg = driver.find_element(By.XPATH, '//div[@class="css-1bluz6i e2u4hf13"]').text.strip()
                salary_avg = salary_avg.split()[0]
            except NoSuchElementException:
                salary_avg = np.nan

            
            # Search for Company Container

            try:
                driver.find_element(By.ID, 'CompanyContainer')
                
                try: 
                    size = driver.find_element(By.XPATH, 
                                               '(//div[@class="d-flex justify-content-start css-rmzuhb e1pvx6aw0"])[1]//span[2]').text.strip()
    
                except NoSuchElementException:
                    size = np.nan

                try:
                    industry = driver.find_element(By.XPATH, 
                                               '(//div[@class="d-flex justify-content-start css-rmzuhb e1pvx6aw0"])[4]//span[2]').text.strip()
                except NoSuchElementException:
                    industry = np.nan

                try:
                    sector = driver.find_element(By.XPATH, 
                                               '(//div[@class="d-flex justify-content-start css-rmzuhb e1pvx6aw0"])[5]//span[2]').text.strip()
                except NoSuchElementException:
                    sector = np.nan

                try:
                    revenue = driver.find_element(By.XPATH, 
                                               '(//div[@class="d-flex justify-content-start css-rmzuhb e1pvx6aw0"])[6]//span[2]').text.strip()
                except NoSuchElementException:
                    revenue = np.nan


            except NoSuchElementException:
                size = np.nan
                industry = np.nan
                sector = np.nan
                revenue = np.nan

            
            # Search for Reviews Container
            try:
                driver.find_element(By.XPATH, '//div[@data-test="company-ratings"]')

                try: 
                    rating = float(driver.find_element(By.XPATH, '//div[@class="mr-sm css-ey2fjr e1pr2f4f2"]').text.strip())
                except NoSuchElementException:
                    rating = np.nan

                try:
                    recommend = driver.find_element(By.XPATH, '(//div[@class="d-flex top css-1efnr4n e1o78bat2"])[1]//div[1]').text.strip()
                except NoSuchElementException: # 
                    recommend = np.nan

                try:
                    ceo = driver.find_element(By.XPATH, '//div[@class="css-ztsow4 ceoApprove"]').text.strip()
                except NoSuchElementException:
                    ceo = np.nan

                try:
                    opportunities = float(driver.find_element(By.XPATH, '//ul[@class="css-38kpu8 erz4gkm0"]/span[3]').text.strip())        
                except NoSuchElementException:
                    opportunities = np.nan
                try:
                    comp_benefits = float(driver.find_element(By.XPATH, '//ul[@class="css-38kpu8 erz4gkm0"]/span[6]').text.strip())        
                except NoSuchElementException:
                    comp_benefits = np.nan

                try:
                    culture = float(driver.find_element(By.XPATH, '//ul[@class="css-38kpu8 erz4gkm0"]/span[9]').text.strip())        
                except NoSuchElementException:
                    culture = np.nan

                try:
                    management = float(driver.find_element(By.XPATH, '//ul[@class="css-38kpu8 erz4gkm0"]/span[12]').text.strip())        
                except NoSuchElementException:
                    management = np.nan

                try:
                    worklife = float(driver.find_element(By.XPATH, '//ul[@class="css-38kpu8 erz4gkm0"]/span[15]').text.strip())        
                except NoSuchElementException:
                    worklife = np.nan


            except NoSuchElementException:
                rating = np.nan
                recommend = np.nan
                ceo = np.nan
                opportunities = np.nan
                comp_benefits = np.nan
                culture = np.nan
                management = np.nan
                worklife = np.nan


            # Get Employee Reviews
            try: 
                driver.find_element(By.ID, 'ReviewsContainer')

                try:
                    
                    pro_reviews = driver.find_element(By.XPATH, 
                                                      '(//div[@class="css-r14ud0 e1vn3ovn4"])[1]//div') # check for pros

                    pro_reviews = pro_reviews.find_elements(By.XPATH, "following-sibling::p")
                    pros = [review.text for review in pro_reviews]  

                except NoSuchElementException: 
                    pros = np.nan

                try:
                    con_reviews = driver.find_element(By.XPATH, 
                                                      '(//div[@class="css-r14ud0 e1vn3ovn4"])[2]//div')

                    con_reviews = con_reviews.find_elements(By.XPATH, "following-sibling::p")
                    cons = [review.text for review in con_reviews]    

                except NoSuchElementException:
                    cons = np.nan
                
                try: 
                    reviews_url = driver.find_element(By.XPATH, '//a[@class="seeAll pb-0 pt-std css-922fyb euq8tqg0"]').get_attribute('href')
                except NoSuchElementException:
                    reviews_url = np.nan

            except NoSuchElementException: 
                pros = np.nan
                cons = np.nan
                reviews_url = np.nan

            # Get Benefits Rating and Reviews
            try: 
                driver.find_element(By.CLASS_NAME, 'p-std')

                try: 
                    benefits_rating = float(driver.find_element(By.XPATH, '//div[@class="ratingNum mr-sm"]').text.strip())

                except NoSuchElementException: 
                    benefits_rating = np.nan
                
                try: 
                    benefits_url = driver.find_element(By.XPATH, '//a[@class="css-zuof7g mt-0 p-std d-flex justify-content-center"]').get_attribute('href')
                except NoSuchElementException:
                    benefits_url = np.nan

            except NoSuchElementException: 
                benefits_rating = np.nan
                

            jobs.append({"company_name": company_name,
                        "job_title": job_title, 
                        "location": location,
                        "job_desc": job_description, 
                        "salary_est": salary_range,
                        "avg_salary": salary_avg,
                        "size": size,
                        "industry": industry,
                        "sector": sector,
                        "function_id": function_id,
                        "revenue": revenue,
                        "rating": rating,
                        "recommend": recommend,
                        "ceo": ceo,
                        "benefits": benefits_rating,
                        "opportunities": opportunities,
                        "company_benefits": comp_benefits,
                        "culture": culture,
                        "management": management,
                        "worklife": worklife,
                        "pros": pros,
                        "cons": cons,
                        "num_listings": total_listings,
                        "reviews_url": reviews_url,
                        "benefits_url": benefits_url})
            
        
            jobs_count = len(jobs)
            
            if not verbose:
                print("Scraped {} out of {} job listings".format(jobs_count, num_jobs), end='\r')
            
            # print for debugging purposes
            if verbose:
                print("Company Name: {}".format(company_name))
                print("Job Title: {}".format(job_title))
                print("Location: {}".format(location))
                print("Job Description: {}".format(job_description[:500]))
                print("Salary Estimate: {}".format(salary_range))
                print("Avg Salary: {}".format(salary_avg))
                print("Size: {}".format(size))
                print("Industry: {}".format(industry))
                print("Sector: {}".format(sector))
                print("Job Function ID: {}".format(function_id))
                print("Revenue: {}".format(revenue))
                print("Rating: {}".format(rating))
                print("Recommend To Friend: {}".format(recommend))
                print("Approve of CEO: {}".format(ceo))
                print("Benefits Rating: {}".format(benefits_rating))
                print("Career Opportunities: {}".format(opportunities))
                print("Comp & Benefits: {}".format(comp_benefits))
                print("Culture & Values: {}".format(culture))
                print("Senior Managment: {}".format(management))
                print("Work Life Balance: {}".format(worklife))
                print("Pros: ", pros)
                print("Cons: ", cons)
                print("Number of Listings: {}".format(total_listings))
                print("Link to Company Reviews: {}".format(reviews_url))
                print("Link to Company Benefits: {}".format(benefits_url))
                print("")
                print("@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@")
                print("")

        # clicking on the "next page button"

        try:
            driver.find_element(By.XPATH, '//button[@data-test="pagination-next"]').click()

        except NoSuchElementException:
            print("Scraping completed, scraped {}, out of {} job listings.".format(jobs_count, num_jobs))
            break
 
    
    jobs = pd.DataFrame(jobs)
    
    if verbose:
        display(jobs.head())
    
    save_file(jobs, filename=filename, directory_path='../data/listings')
    
    print("")
    
    pbar.close()
    driver.close()
    return jobs

In [8]:
df = get_jobs(function_id='1003', num_jobs=400, verbose=False)

Total number of job listings: 1045, number of jobs to be scraped: 400



  0%|          | 0/400 [00:00<?, ?it/s]

Scraping completed, scraped 400 of 400 jobs
Saving File...
File Saved: 1003_listings_2022-11-30.csv
Saved In: ../data/1003_listings_2022-11-30.csv



In [22]:
df.head()

Unnamed: 0,company_name,job_title,location,job_desc,salary_est,avg_salary,size,industry,sector,function_id,...,opportunities,company_benefits,culture,management,worklife,pros,cons,num_listings,reviews_url,benefits_url
0,XCMG North America Corporation\n4.1,Field Service Representative,Remote,Job Title: Field Service Representatives\nThe ...,Employer Provided Salary:$31.00 Per Hour,$31.00,1001 to 5000 Employees,Manufacturing,Unknown / Non-Applicable,1001,...,3.7,3.4,3.0,3.2,3.1,"[""Good admissions amd good people"" (in 5 revie...","[""Good Morning Smarty the link below"" (in 3 re...",313,https://www.glassdoor.com/Reviews/XCMG-Reviews...,https://www.glassdoor.com/Benefits/XCMG-Benefi...
1,ApolloMD\n3.5,Practice Coordinator- Remote,Remote,Job Description: Works collaboratively with pr...,Employer Provided Salary:$18.00 - $20.00 Per Hour,$19.00,1001 to 5000 Employees,Health Care Services & Hospitals,Healthcare,1001,...,3.1,2.8,3.3,3.3,3.4,[No Pros have been reported by the Glassdoor c...,"[""No benefits for this position."" (in 1 reviews)]",313,https://www.glassdoor.com/Reviews/ApolloMD-Rev...,https://www.glassdoor.com/Benefits/ApolloMD-Be...
2,Ascensus\n3.9,Account Services Representative (West Coast) -...,California,"Newport, an Ascensus company, helps employers ...",,,1001 to 5000 Employees,Financial Transaction Processing,Financial Services,1001,...,3.8,3.7,4.2,4.0,4.2,"[""Good training and decent benefits."" (in 3 re...","[""Management on some teams is horrible"" (in 1 ...",313,https://www.glassdoor.com/Reviews/Ascensus-Rev...,https://www.glassdoor.com/Benefits/Ascensus-Be...
3,GovCIO\n3.1,VistA Business Coordinator (Remote),"Fairfax, VA",Company Overview\n\nGovCIO is a team of transf...,$52K - $75K (Glassdoor est.),"$62,563",1001 to 5000 Employees,Information Technology,Unknown / Non-Applicable,1001,...,2.9,2.9,2.9,2.6,3.4,"[""• Salary and Benefits are competitive"" (in 1...","[""Benefits, some of the executives are not com...",313,https://www.glassdoor.com/Reviews/GovCIO-Revie...,https://www.glassdoor.com/Benefits/GovCIO-Bene...
4,Symetra\n4.0,Enterprise Data Steward - Remote,"Bellevue, WA",Symetra has an exciting opportunity to join ou...,Employer Provided Salary:$87K - $145K,"$115,750",1001 to 5000 Employees,Insurance Carriers,Insurance,1001,...,3.8,3.9,4.2,3.7,4.3,"[""Good work life balance"" (in 3 reviews), ""Gre...","[""Company culture may not apply to all manager...",313,https://www.glassdoor.com/Reviews/Symetra-Revi...,https://www.glassdoor.com/Benefits/Symetra-Ben...


### Scrape Job Listings

In [3]:
ids = ['1001','1002','1003','1004','1005','1006',
       '1007', '1008', '1009', '1010', '1011', '1012', 
       '1013', '1014', '1015', '1016', '1017', '1018', 
       '1019','1020','1021','1022']

In [7]:
for id in ids:
    get_jobs(function_id=id, num_jobs=500, verbose=False)



[WDM] - Downloading:   0%|                          | 0.00/8.61M [00:00<?, ?B/s][A[A

[WDM] - Downloading:   4%|▋                 | 344k/8.61M [00:00<00:02, 3.51MB/s][A[A

[WDM] - Downloading:  29%|████▉            | 2.53M/8.61M [00:00<00:00, 15.0MB/s][A[A

[WDM] - Downloading: 100%|█████████████████| 8.61M/8.61M [00:00<00:00, 29.6MB/s][A[A


Total number of job listings: 791, number of jobs to be scraped: 500



  0%|          | 0/500 [00:00<?, ?it/s]

Scraping completed, scraped 500 of 500 jobs
Saving File...
File Saved: 1019_listings_2022-11-30.csv
Saved In: ../data/1019_listings_2022-11-30.csv

The number of jobs to be scrapped: 500 exceeds the number of listings: 50
The number of jobs has been updated to reflect the number of listings

Total number of job listings: 50, number of jobs to be scraped: 50



  0%|          | 0/50 [00:00<?, ?it/s]

Scraping completed, scraped 50 of 50 jobs
Saving File...
File Saved: 1020_listings_2022-11-30.csv
Saved In: ../data/1020_listings_2022-11-30.csv

Total number of job listings: 1192, number of jobs to be scraped: 500



  0%|          | 0/500 [00:00<?, ?it/s]

Scraping completed, scraped 500 of 500 jobs
Saving File...
File Saved: 1021_listings_2022-11-30.csv
Saved In: ../data/1021_listings_2022-11-30.csv

The number of jobs to be scrapped: 500 exceeds the number of listings: 196
The number of jobs has been updated to reflect the number of listings

Total number of job listings: 196, number of jobs to be scraped: 196



  0%|          | 0/196 [00:00<?, ?it/s]

Scraping completed, scraped 196 of 196 jobs
Saving File...
File Saved: 1022_listings_2022-11-30.csv
Saved In: ../data/1022_listings_2022-11-30.csv



In [9]:
df.head()

Unnamed: 0,company_name,job_title,location,job_desc,salary_est,avg_salary,size,industry,sector,function_id,...,opportunities,company_benefits,culture,management,worklife,pros,cons,num_listings,reviews_url,benefits_url
0,WCG\n3.4,Director Human Capital Management (Remote),Township of Hamilton,Description and Requirements\nJOB SUMMARY: As ...,$124K - $200K (Glassdoor est.),"$157,452",1001 to 5000 Employees,Pharmaceutical & Biotechnology,$100 to $500 million (USD),1003,...,3.0,3.6,3.2,2.9,2.9,"[""Decent pay."" (in 3 reviews), ""Good amount of...","[""No work life balance whatsoever."" (in 5 revi...",1045,https://www.glassdoor.com/Reviews/WCG-WIRB-Cop...,https://www.glassdoor.com/Benefits/WCG-WIRB-Co...
1,Ingevity\n4.2,Business Systems Analyst Supply Chain Logistic...,"North Charleston, SC",Job Family Group:\nInformation Technology\nAre...,$67K - $100K (Glassdoor est.),"$81,968",1001 to 5000 Employees,Chemical Manufacturing,Manufacturing,1003,...,3.7,4.1,4.0,4.1,4.2,"[""Money is kind of good"" (in 1 reviews)]",[No Cons have been reported by the Glassdoor c...,1045,https://www.glassdoor.com/Reviews/Ingevity-Rev...,https://www.glassdoor.com/Benefits/Ingevity-Be...
2,"National Debt Relief, LLC.\n4.2",Debt Negotiator,"New York, NY",About National Debt Relief:\nNational Debt Rel...,Employer Provided Salary:$19.81 Per Hour,$19.81,1001 to 5000 Employees,Banking & Lending,Financial Services,1003,...,3.8,4.0,3.8,3.7,3.8,"[""good pay."" (in 18 reviews), ""Management is f...","[""Management constantly plays the blame game f...",1045,https://www.glassdoor.com/Reviews/National-Deb...,https://www.glassdoor.com/Benefits/National-De...
3,GovCIO\n3.0,Subcontracts Administrator II (Remote),"Fairfax, VA",GovCIO is a team of transformers-people who ar...,$46K - $65K (Glassdoor est.),"$54,354",1001 to 5000 Employees,Information Technology,Unknown / Non-Applicable,1003,...,2.9,2.8,2.8,2.7,3.2,"[""Strong leadership, decent benefits, growing ...","[""The senior management team, unprofessional b...",1045,https://www.glassdoor.com/Reviews/GovCIO-Revie...,https://www.glassdoor.com/Benefits/GovCIO-Bene...
4,KAR Global\n3.5,"Account Coordinator, Allied Team (Remote | M-F...",Remote,"Account Coordinator, Allied Team\nRemote | WFH...",Employer Provided Salary:$17.00 Per Hour,$17.00,1001 to 5000 Employees,Enterprise Software & Network Solutions,Information Technology,1003,...,3.3,3.5,3.4,3.4,3.8,"[""Decent Pay &amp; benefits, etc"" (in 4 review...",[No Cons have been reported by the Glassdoor c...,1045,https://www.glassdoor.com/Reviews/KAR-Global-R...,https://www.glassdoor.com/Benefits/KAR-Global-...


### Create and Save Final Dataset

#### Combine all CSV files in "data" folder

In [6]:
all_cvs = glob.glob(os.path.join('../data/listings', "*.csv"))

df = pd.concat((pd.read_csv(f) for f in all_cvs), ignore_index=True)
df

Unnamed: 0,company_name,job_title,location,job_desc,salary_est,avg_salary,size,industry,sector,function_id,...,opportunities,company_benefits,culture,management,worklife,pros,cons,num_listings,reviews_url,benefits_url
0,WCG\n3.4,Director Human Capital Management (Remote),Township of Hamilton,Description and Requirements\nJOB SUMMARY: As ...,$124K - $200K (Glassdoor est.),"$157,452",1001 to 5000 Employees,Pharmaceutical & Biotechnology,$100 to $500 million (USD),1003,...,3.0,3.6,3.2,2.9,2.9,"['""Decent pay."" (in 3 reviews)', '""Good amount...","['""No work life balance whatsoever."" (in 5 rev...",1045,https://www.glassdoor.com/Reviews/WCG-WIRB-Cop...,https://www.glassdoor.com/Benefits/WCG-WIRB-Co...
1,Ingevity\n4.2,Business Systems Analyst Supply Chain Logistic...,"North Charleston, SC",Job Family Group:\nInformation Technology\nAre...,$67K - $100K (Glassdoor est.),"$81,968",1001 to 5000 Employees,Chemical Manufacturing,Manufacturing,1003,...,3.7,4.1,4.0,4.1,4.2,"['""Money is kind of good"" (in 1 reviews)']",['No Cons have been reported by the Glassdoor ...,1045,https://www.glassdoor.com/Reviews/Ingevity-Rev...,https://www.glassdoor.com/Benefits/Ingevity-Be...
2,"National Debt Relief, LLC.\n4.2",Debt Negotiator,"New York, NY",About National Debt Relief:\nNational Debt Rel...,Employer Provided Salary:$19.81 Per Hour,$19.81,1001 to 5000 Employees,Banking & Lending,Financial Services,1003,...,3.8,4.0,3.8,3.7,3.8,"['""good pay."" (in 18 reviews)', '""Management i...","['""Management constantly plays the blame game ...",1045,https://www.glassdoor.com/Reviews/National-Deb...,https://www.glassdoor.com/Benefits/National-De...
3,GovCIO\n3.0,Subcontracts Administrator II (Remote),"Fairfax, VA",GovCIO is a team of transformers-people who ar...,$46K - $65K (Glassdoor est.),"$54,354",1001 to 5000 Employees,Information Technology,Unknown / Non-Applicable,1003,...,2.9,2.8,2.8,2.7,3.2,"['""Strong leadership, decent benefits, growing...","['""The senior management team, unprofessional ...",1045,https://www.glassdoor.com/Reviews/GovCIO-Revie...,https://www.glassdoor.com/Benefits/GovCIO-Bene...
4,KAR Global\n3.5,"Account Coordinator, Allied Team (Remote | M-F...",Remote,"Account Coordinator, Allied Team\nRemote | WFH...",Employer Provided Salary:$17.00 Per Hour,$17.00,1001 to 5000 Employees,Enterprise Software & Network Solutions,Information Technology,1003,...,3.3,3.5,3.4,3.4,3.8,"['""Decent Pay &amp; benefits, etc"" (in 4 revie...",['No Cons have been reported by the Glassdoor ...,1045,https://www.glassdoor.com/Reviews/KAR-Global-R...,https://www.glassdoor.com/Benefits/KAR-Global-...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7842,Holland America Line\n3.8,"Manager, Supply Chain Center of Excellence - R...","Santa Clarita, CA","Job Description\n\nPrincess Cruises, Holland A...",$71K - $119K (Glassdoor est.),"$92,074",1001 to 5000 Employees,Travel Agencies,Hotels & Travel Accommodation,1016,...,3.5,3.4,3.5,3.1,3.3,"['""Great travel benefits with a lot of room fo...","['""the food in crewmess is horrible"" (in 4 rev...",610,https://www.glassdoor.com/Reviews/Holland-Amer...,https://www.glassdoor.com/Benefits/Holland-Ame...
7843,Holland America Line Inc\n3.8,Inventory Management Specialist (Remote),"Seattle, WA","Princess Cruises, Holland America Line, Seabou...",$42K - $62K (Glassdoor est.),"$51,337",1001 to 5000 Employees,Travel Agencies,Hotels & Travel Accommodation,1016,...,3.5,3.4,3.5,3.1,3.3,"['""Great travel benefits with a lot of room fo...","['""the food in crewmess is horrible"" (in 4 rev...",610,https://www.glassdoor.com/Reviews/Holland-Amer...,https://www.glassdoor.com/Benefits/Holland-Ame...
7844,Hinge Health\n3.5,"VP, Strategic Partnerships (Remote)",United States,One in two people experience debilitating back...,Employer Provided Salary:$192K - $348K,"$269,650",1001 to 5000 Employees,Software Development,Information Technology,1016,...,3.5,3.6,3.7,3.5,3.8,"['""Decent pay and benefits."" (in 3 reviews)', ...",['No Cons have been reported by the Glassdoor ...,610,https://www.glassdoor.com/Reviews/Hinge-Health...,https://www.glassdoor.com/Benefits/Hinge-Healt...
7845,American Specialty Health Incorporated\n3.6,State Compliance Analyst I (Remote),Remote,American Specialty Health is seeking a State C...,,,1001 to 5000 Employees,Health Care Services & Hospitals,Healthcare,1016,...,3.5,2.9,4.0,3.6,3.8,"['""It was a great opportunity to learn."" (in 2...","['""Management was just okay no real issues."" (...",610,https://www.glassdoor.com/Reviews/American-Spe...,https://www.glassdoor.com/Benefits/American-Sp...


In [8]:
df.to_csv('../data/final.csv', index=False)