In [1]:
# Dependencies
from bs4 import BeautifulSoup
import requests
import pandas as pd
import time
import pathlib

In [2]:
# DEFAULT_CITIES = {'New York', 'Chicago'}

DEFAULT_CITIES = {'New York', 'Chicago', 'San Francisco', 'Austin', 'Seattle',
                  'Los Angeles', 'Philadelphia', 'Atlanta', 'Dallas',
                  'Pittsburgh', 'Portland', 'Phoenix', 'Denver', 'Houston',
                  'Miami'}

YOUR_CITIES = {'Boston', 'Washington DC', 'St Louis', 'San Diego',
               'San Antonio', 'Columbus', 'Sacramento', 'Charlotte', 'Memphis',
               'Detroit', 'Nashville', 'Jacksonville', 'Indianapolis',
               'Fort Worth', 'Charlotte', 'El Paso', 'Oklahoma City',
               'Las Vegas', 'Louisville', 'Milwaukee', 'Albuquerque', 'Tucson',
               'Kansas City', 'Mesa', 'Colorado Springs', 'Raleigh', 'Omaha',
               'Virginia Beach', 'Minneapolis', 'New Orleans', 'Tampa',
               'San Jose', 'Baltimore', 'Fresno', 'Oakland', 'Tulsa', 'Madison',
               'Arlington', 'Wichita', 'Cleveland', 'Aurora', 'Honolulu',
               'Orlando', 'Anchorage', 'Des Moines', 'Salt Lake City',
               'Lexington', 'Cincinnati', 'Newark', 'Durham', 'Buffalo',
               'Baton Rouge', 'Richmond', 'Boise', 'Birmingham', 'Little Rock',
               'Grand Rapids', 'Worcester', 'Providence', 'Sioux Falls',
               'Jackson', 'Hartford', 'Bridgeport', 'Jersey City', 'Charleston',
               'Billings', 'Fargo', 'Augusta'}

In [3]:
def extract_location(result):
    """extract job location"""
    try:
        location = result.find('span', class_='location').get_text().strip()
        return location
    except:
        return None

In [4]:
def extract_company(result):
    """extract the name of the company"""
    try:
        company = result.find('span', class_='company').get_text().strip()
        return company
    except:
        return None

In [5]:
def extract_title(result):
    """extract the job title"""
    try:
        title = result.find('a', attrs={'data-tn-element': "jobTitle"}).get('title')
        return title
    except:
        return None

In [6]:
def extract_star(result):
    """extract a number (width) that is proportional to the number of stars
    shown for the company"""
    try:
        # the 'style' attribute dictates how many stars are filled with color
        star = result.find('span', class_='ratingsContent').get_text()
        # extract only the number
        star = star.replace('""', '').replace('\n', '')
        return star
    except:
        return None

In [7]:
def extract_salary(result):
    """extract the salary"""
    try:
        salary = result.find('span', class_='salaryText').get_text().strip()
        return salary
    except:
        return None

In [8]:
url = "http://www.indeed.com/jobs"
# params = {'q': 'data scientist', 'radius': '100'}
params = {'radius': '100'}
max_results = 100

In [9]:

company_info_df = pd.DataFrame()
company_result_df = pd.DataFrame()

for city in DEFAULT_CITIES | YOUR_CITIES:
# for city in DEFAULT_CITIES:
    for start in range(0, max_results, 10):
        url_params = params.copy()
        url_params.update({'l': city, 'start': start})
        scraped_data = {
                    'location': [],
                    'company': [],
                    'title': [],
                    'salary' :[],
                    'star': []}
        response = requests.get(url, params=url_params)
        soup = BeautifulSoup(response.text, 'lxml')
        results = soup.find_all('div', class_='result')
        
        time.sleep(0.05)
        print(response.url)
        print('------------------------------------------------------------------------')
        
        for result in results:
            scraped_data['location'].append(extract_location(result))
            scraped_data['company'].append(extract_company(result))
            scraped_data['title'].append(extract_title(result))
            scraped_data['salary'].append(extract_salary(result))
            scraped_data['star'].append(extract_star(result))
        
        result_df = pd.DataFrame(scraped_data)
        
        company_result_df = company_result_df.append(result_df, ignore_index=True)
        
        company_info_df = pd.DataFrame(scraped_data)
        
        company_info_df.to_csv('../csv/company_info.csv', mode='a', header=False, index=False)

https://www.indeed.com/jobs?radius=100&l=Honolulu&start=0
------------------------------------------------------------------------
https://www.indeed.com/jobs?radius=100&l=Honolulu&start=10
------------------------------------------------------------------------
https://www.indeed.com/jobs?radius=100&l=Honolulu&start=20
------------------------------------------------------------------------
https://www.indeed.com/jobs?radius=100&l=Honolulu&start=30
------------------------------------------------------------------------
https://www.indeed.com/jobs?radius=100&l=Honolulu&start=40
------------------------------------------------------------------------
https://www.indeed.com/jobs?radius=100&l=Honolulu&start=50
------------------------------------------------------------------------
https://www.indeed.com/jobs?radius=100&l=Honolulu&start=60
------------------------------------------------------------------------
https://www.indeed.com/jobs?radius=100&l=Honolulu&start=70
------------------

https://www.indeed.com/jobs?radius=100&l=Durham&start=20
------------------------------------------------------------------------
https://www.indeed.com/jobs?radius=100&l=Durham&start=30
------------------------------------------------------------------------
https://www.indeed.com/jobs?radius=100&l=Durham&start=40
------------------------------------------------------------------------
https://www.indeed.com/jobs?radius=100&l=Durham&start=50
------------------------------------------------------------------------
https://www.indeed.com/jobs?radius=100&l=Durham&start=60
------------------------------------------------------------------------
https://www.indeed.com/jobs?radius=100&l=Durham&start=70
------------------------------------------------------------------------
https://www.indeed.com/jobs?radius=100&l=Durham&start=80
------------------------------------------------------------------------
https://www.indeed.com/jobs?radius=100&l=Durham&start=90
---------------------------------

https://www.indeed.com/jobs?radius=100&l=Tampa&start=40
------------------------------------------------------------------------
https://www.indeed.com/jobs?radius=100&l=Tampa&start=50
------------------------------------------------------------------------
https://www.indeed.com/jobs?radius=100&l=Tampa&start=60
------------------------------------------------------------------------
https://www.indeed.com/jobs?radius=100&l=Tampa&start=70
------------------------------------------------------------------------
https://www.indeed.com/jobs?radius=100&l=Tampa&start=80
------------------------------------------------------------------------
https://www.indeed.com/jobs?radius=100&l=Tampa&start=90
------------------------------------------------------------------------
https://www.indeed.com/jobs?radius=100&l=Sioux+Falls&start=0
------------------------------------------------------------------------
https://www.indeed.com/jobs?radius=100&l=Sioux+Falls&start=10
------------------------------

https://www.indeed.com/jobs?radius=100&l=Miami&start=70
------------------------------------------------------------------------
https://www.indeed.com/jobs?radius=100&l=Miami&start=80
------------------------------------------------------------------------
https://www.indeed.com/jobs?radius=100&l=Miami&start=90
------------------------------------------------------------------------
https://www.indeed.com/jobs?radius=100&l=Washington+DC&start=0
------------------------------------------------------------------------
https://www.indeed.com/jobs?radius=100&l=Washington+DC&start=10
------------------------------------------------------------------------
https://www.indeed.com/jobs?radius=100&l=Washington+DC&start=20
------------------------------------------------------------------------
https://www.indeed.com/jobs?radius=100&l=Washington+DC&start=30
------------------------------------------------------------------------
https://www.indeed.com/jobs?radius=100&l=Washington+DC&start=40
--

https://www.indeed.com/jobs?radius=100&l=Grand+Rapids&start=90
------------------------------------------------------------------------
https://www.indeed.com/jobs?radius=100&l=Charleston&start=0
------------------------------------------------------------------------
https://www.indeed.com/jobs?radius=100&l=Charleston&start=10
------------------------------------------------------------------------
https://www.indeed.com/jobs?radius=100&l=Charleston&start=20
------------------------------------------------------------------------
https://www.indeed.com/jobs?radius=100&l=Charleston&start=30
------------------------------------------------------------------------
https://www.indeed.com/jobs?radius=100&l=Charleston&start=40
------------------------------------------------------------------------
https://www.indeed.com/jobs?radius=100&l=Charleston&start=50
------------------------------------------------------------------------
https://www.indeed.com/jobs?radius=100&l=Charleston&start=60


https://www.indeed.com/jobs?radius=100&l=Oakland&start=10
------------------------------------------------------------------------
https://www.indeed.com/jobs?radius=100&l=Oakland&start=20
------------------------------------------------------------------------
https://www.indeed.com/jobs?radius=100&l=Oakland&start=30
------------------------------------------------------------------------
https://www.indeed.com/jobs?radius=100&l=Oakland&start=40
------------------------------------------------------------------------
https://www.indeed.com/jobs?radius=100&l=Oakland&start=50
------------------------------------------------------------------------
https://www.indeed.com/jobs?radius=100&l=Oakland&start=60
------------------------------------------------------------------------
https://www.indeed.com/jobs?radius=100&l=Oakland&start=70
------------------------------------------------------------------------
https://www.indeed.com/jobs?radius=100&l=Oakland&start=80
-------------------------

https://www.indeed.com/jobs?radius=100&l=Austin&start=40
------------------------------------------------------------------------
https://www.indeed.com/jobs?radius=100&l=Austin&start=50
------------------------------------------------------------------------
https://www.indeed.com/jobs?radius=100&l=Austin&start=60
------------------------------------------------------------------------
https://www.indeed.com/jobs?radius=100&l=Austin&start=70
------------------------------------------------------------------------
https://www.indeed.com/jobs?radius=100&l=Austin&start=80
------------------------------------------------------------------------
https://www.indeed.com/jobs?radius=100&l=Austin&start=90
------------------------------------------------------------------------
https://www.indeed.com/jobs?radius=100&l=Jacksonville&start=0
------------------------------------------------------------------------
https://www.indeed.com/jobs?radius=100&l=Jacksonville&start=10
----------------------

https://www.indeed.com/jobs?radius=100&l=Boise&start=60
------------------------------------------------------------------------
https://www.indeed.com/jobs?radius=100&l=Boise&start=70
------------------------------------------------------------------------
https://www.indeed.com/jobs?radius=100&l=Boise&start=80
------------------------------------------------------------------------
https://www.indeed.com/jobs?radius=100&l=Boise&start=90
------------------------------------------------------------------------
https://www.indeed.com/jobs?radius=100&l=St+Louis&start=0
------------------------------------------------------------------------
https://www.indeed.com/jobs?radius=100&l=St+Louis&start=10
------------------------------------------------------------------------
https://www.indeed.com/jobs?radius=100&l=St+Louis&start=20
------------------------------------------------------------------------
https://www.indeed.com/jobs?radius=100&l=St+Louis&start=30
------------------------------

https://www.indeed.com/jobs?radius=100&l=Worcester&start=90
------------------------------------------------------------------------
https://www.indeed.com/jobs?radius=100&l=Nashville&start=0
------------------------------------------------------------------------
https://www.indeed.com/jobs?radius=100&l=Nashville&start=10
------------------------------------------------------------------------
https://www.indeed.com/jobs?radius=100&l=Nashville&start=20
------------------------------------------------------------------------
https://www.indeed.com/jobs?radius=100&l=Nashville&start=30
------------------------------------------------------------------------
https://www.indeed.com/jobs?radius=100&l=Nashville&start=40
------------------------------------------------------------------------
https://www.indeed.com/jobs?radius=100&l=Nashville&start=50
------------------------------------------------------------------------
https://www.indeed.com/jobs?radius=100&l=Nashville&start=60
----------

https://www.indeed.com/jobs?radius=100&l=Milwaukee&start=10
------------------------------------------------------------------------
https://www.indeed.com/jobs?radius=100&l=Milwaukee&start=20
------------------------------------------------------------------------
https://www.indeed.com/jobs?radius=100&l=Milwaukee&start=30
------------------------------------------------------------------------
https://www.indeed.com/jobs?radius=100&l=Milwaukee&start=40
------------------------------------------------------------------------
https://www.indeed.com/jobs?radius=100&l=Milwaukee&start=50
------------------------------------------------------------------------
https://www.indeed.com/jobs?radius=100&l=Milwaukee&start=60
------------------------------------------------------------------------
https://www.indeed.com/jobs?radius=100&l=Milwaukee&start=70
------------------------------------------------------------------------
https://www.indeed.com/jobs?radius=100&l=Milwaukee&start=80
---------

https://www.indeed.com/jobs?radius=100&l=Fort+Worth&start=30
------------------------------------------------------------------------
https://www.indeed.com/jobs?radius=100&l=Fort+Worth&start=40
------------------------------------------------------------------------
https://www.indeed.com/jobs?radius=100&l=Fort+Worth&start=50
------------------------------------------------------------------------
https://www.indeed.com/jobs?radius=100&l=Fort+Worth&start=60
------------------------------------------------------------------------
https://www.indeed.com/jobs?radius=100&l=Fort+Worth&start=70
------------------------------------------------------------------------
https://www.indeed.com/jobs?radius=100&l=Fort+Worth&start=80
------------------------------------------------------------------------
https://www.indeed.com/jobs?radius=100&l=Fort+Worth&start=90
------------------------------------------------------------------------
https://www.indeed.com/jobs?radius=100&l=Cleveland&start=0
---

https://www.indeed.com/jobs?radius=100&l=Cincinnati&start=60
------------------------------------------------------------------------
https://www.indeed.com/jobs?radius=100&l=Cincinnati&start=70
------------------------------------------------------------------------
https://www.indeed.com/jobs?radius=100&l=Cincinnati&start=80
------------------------------------------------------------------------
https://www.indeed.com/jobs?radius=100&l=Cincinnati&start=90
------------------------------------------------------------------------
https://www.indeed.com/jobs?radius=100&l=Portland&start=0
------------------------------------------------------------------------
https://www.indeed.com/jobs?radius=100&l=Portland&start=10
------------------------------------------------------------------------
https://www.indeed.com/jobs?radius=100&l=Portland&start=20
------------------------------------------------------------------------
https://www.indeed.com/jobs?radius=100&l=Portland&start=30
----------

https://www.indeed.com/jobs?radius=100&l=Columbus&start=80
------------------------------------------------------------------------
https://www.indeed.com/jobs?radius=100&l=Columbus&start=90
------------------------------------------------------------------------
https://www.indeed.com/jobs?radius=100&l=Baltimore&start=0
------------------------------------------------------------------------
https://www.indeed.com/jobs?radius=100&l=Baltimore&start=10
------------------------------------------------------------------------
https://www.indeed.com/jobs?radius=100&l=Baltimore&start=20
------------------------------------------------------------------------
https://www.indeed.com/jobs?radius=100&l=Baltimore&start=30
------------------------------------------------------------------------
https://www.indeed.com/jobs?radius=100&l=Baltimore&start=40
------------------------------------------------------------------------
https://www.indeed.com/jobs?radius=100&l=Baltimore&start=50
------------

https://www.indeed.com/jobs?radius=100&l=El+Paso&start=0
------------------------------------------------------------------------
https://www.indeed.com/jobs?radius=100&l=El+Paso&start=10
------------------------------------------------------------------------
https://www.indeed.com/jobs?radius=100&l=El+Paso&start=20
------------------------------------------------------------------------
https://www.indeed.com/jobs?radius=100&l=El+Paso&start=30
------------------------------------------------------------------------
https://www.indeed.com/jobs?radius=100&l=El+Paso&start=40
------------------------------------------------------------------------
https://www.indeed.com/jobs?radius=100&l=El+Paso&start=50
------------------------------------------------------------------------
https://www.indeed.com/jobs?radius=100&l=El+Paso&start=60
------------------------------------------------------------------------
https://www.indeed.com/jobs?radius=100&l=El+Paso&start=70
--------------------------

In [10]:
company_result_df.head()


Unnamed: 0,location,company,title,salary,star
0,,U.S. Customs and Border Protection,Border Patrol Agent,,4.1
1,,Ala Moana Dental Care,3 DAYS ON/4 DAYS OFF FT Dental Treatment Coord...,$13 - $18 an hour,
2,,Tardus Wealth Strategies,Personal Finance Coach,$25 - $125 an hour,
3,,Wilson Care Group,Finance Specialist,,
4,"Waimānalo, HI 96795",Palace Entertainment,Sea Life Park - Wildlife Keeper II (HI060),,3.5


In [11]:
company_result_df['star'] = company_result_df['star'].astype(float)
company_rating_df = company_result_df.groupby('company').mean()
company_rating_df = company_rating_df.reset_index()
company_rating_df

Unnamed: 0,company,star
0,#A DELIVERY COMPANY,
1,1-800-GOT-JUNK? Baltimore,
2,1-888-OHIOCOMP,
3,10 Fitness,4.4
4,17th Street Automotive/Exxon Fuels,
...,...,...
5197,sharkies snow,
5198,signature consultants,3.9
5199,south windsor veterinary clinic,
5200,stayAPT Suites,


In [12]:
company_rating_df.to_csv('../csv/company_rating.csv', mode='w', header=True, index=False)