In [1]:
from bs4 import BeautifulSoup  # For HTML parsing
import datetime
from tinydb import TinyDB, Query
import urllib3   # Website connections
import xlsxwriter
import requests
import pandas as pd  # For converting results to a dataframe and bar chart plots
import time
import lxml
import re

In [2]:
url = 'https://au.indeed.com/jobs?q=data+scientist&l=Sydney+NSW'
job = requests.get(url)

soup = BeautifulSoup(job.content)
print(soup.prettify())

<!DOCTYPE html>
<html dir="ltr" lang="en">
 <head>
  <meta content="text/html;charset=utf-8" http-equiv="content-type"/>
  <script src="/s/ab2a39a/en_AU.js" type="text/javascript">
  </script>
  <link href="/s/970d98c/jobsearch_all.css" rel="stylesheet" type="text/css"/>
  <link href="http://au.indeed.com/rss?q=data+scientist&amp;l=Sydney+NSW" rel="alternate" title="Data Scientist Jobs in Sydney NSW" type="application/rss+xml"/>
  <link href="/m/jobs?q=data+scientist&amp;l=Sydney+NSW" media="only screen and (max-width: 640px)" rel="alternate"/>
  <link href="/m/jobs?q=data+scientist&amp;l=Sydney+NSW" media="handheld" rel="alternate"/>
  <script type="text/javascript">
   if (typeof window['closureReadyCallbacks'] == 'undefined') {
        window['closureReadyCallbacks'] = [];
    }

    function call_when_jsall_loaded(cb) {
        if (window['closureReady']) {
            cb();
        } else {
            window['closureReadyCallbacks'].push(cb);
        }
    }
  </script>
  <meta c



 BeautifulSoup(YOUR_MARKUP})

to this:

 BeautifulSoup(YOUR_MARKUP, "lxml")

  markup_type=markup_type))


 ### Function to get job title information 
 - pulling out all <div> tags with class including 'row'
 - identifying <a> tags with attribute 'data-tn-element':'jobTitle'
 - $for each <a> tags, find the value of attribute 'title'

In [3]:
def extract_job_title_from_result(soup):
    jobs = []
    for div in soup.findAll(name='div', attrs={'class':'row'}):
            for a in div.findAll(name='a', attrs={'data-tn-element':'jobTitle'}):
                jobs.append(a['title'])
    return(jobs)

extract_job_title_from_result(soup)

['Data Scientist - Analytics',
 'Data Scientist',
 'Lead Data Scientist',
 'Junior Data Scientist/ Machine Learning Engineer',
 'Junior Data Scientist',
 'Data Scientist - Complaint Services, Professional Standards Command, Redfern - NSW Police Force',
 'Junior Data Scientist',
 'Customer Facing Data Scientist',
 'Data Scientist - Award-winning FinTech',
 'Data Scientist',
 'Data Scientist',
 'Data Scientist',
 'Ikon | Data Scientist',
 'Data Scientist',
 'Team Assistant with AI company']

### Function to get Company Name information
- pulling all <span> tags with 'class:'company' and 'class':'result-link-source'

In [4]:
def extract_company_from_result(soup):
    companies = []
    for div in soup.findAll(name='div', attrs={'class':'row'}):
        company = div.findAll(name='span', attrs={'class':'company'})
        if len(company) > 0:
            for b in company:
                companies.append(b.text.strip())
        else:
            sec_try = div.findAll(name='span', attrs={'class':'result-link-source'})
            for span in sec_try:
                companies.append(span.text.strip())
    return(companies)
extract_company_from_result(soup)

['Domain Group',
 'ANZ Banking Group',
 'Domain Group',
 'Intellify',
 'The Eclair Group',
 'Investigations & Counter Terrorism',
 'Sirius People',
 'DataRobot',
 'Datasii',
 'Freelancer.com',
 'Freshwater Group',
 'CPT Global',
 'Ikon',
 'News Corp Australia',
 'HIVERY']

### Function to get Location information

- pulling all <span> tags with 'class:'location' use forloop on all span tags for text

In [5]:
def extract_location_from_result(soup):
    locations = []
    spans = soup.findAll('span', attrs={'class':'location'})
    for span in spans:
        locations.append(span.text)
    return(locations)

extract_location_from_result(soup)

['Sydney NSW',
 'Sydney NSW',
 'Sydney NSW',
 'Sydney Central Business District NSW',
 'Sydney NSW',
 'Sydney NSW',
 'Sydney NSW',
 'Sydney NSW',
 'Sydney NSW',
 'Sydney NSW']

### Function to get Salary information

- pulling span tags with class : no-wrap to get the salary

In [6]:
def extract_salary_from_result(soup):
    salary = ['none']
    spans = soup.findAll('span', attrs={'class':'no-wrap'})
    for span in spans:
        salary.append(span.text.strip())
    return salary

extract_salary_from_result(soup)

['none',
 'relevance -\n            date',
 '$80,000 - $100,000 a year',
 '$70,000 - $90,000 a year',
 '$100,000 - $115,000 a year',
 '$65,000 - $110,000 a year',
 '$60,000 - $85,000 a year']

### Function to get Job Summary Description

In [7]:
def extract_summary_from_result(soup):
    summaries = []
    spans = soup.findAll('span', attrs={'class':'summary'})
    for span in spans:
        summaries.append(span.text.strip())
    return(summaries)

extract_summary_from_result(soup)

['As a Data Scientist in our team, you will leverage your deep experience in statistics, machine learning and data analysis to drive significant improvements to...',
 'As the Data Scientist, you are to source data from multiple data stores, and build advanced data models to solve complex problems and generate sophisticated...',
 'We are looking for a lead data scientist to help sales, product and business leadership make decisions with data, give consumers certainty and control when...',
 'We also believe great data science comes from great teams, so invest continuously in upskilling and cross-training our data scientists, with lunch & learn...',
 'Industry experience as a Data Analyst or Junior Data Scientist with a track record of manipulating, processing, and extracting value from large datasets....',
 'Data Scientist - Complaint Services, Professional Standards Command, Redfern - NSW Police Force. Experience with common data science toolkits, particularly R....',
 'Opportunity to s

### Putting it together to Scrape Data Science Job listing

 #### Create Empty Dataframe

In [8]:
max_results_per_city = 150
city_set = ["sydney+NSW","Ultimo+NSW","sydney+Western+Suburbs","Darlinghurst+NSW","Barangaroo+NSW",
            "Mascot+NSW","Parramatta+NSW","Surry+Hills+NSW","Macquarie+Park+NSW",
            "sydney+Inner+Suburbs+NSW","Hills+District+NSW","sydney+Central+Business+District+NSW",
            'New+South+Wales', 'Melbourne+VIC']
columns = ['city', 'job_title', 'company_name', 'location', 'summary', 'salary']
dsjob_df = pd.DataFrame(columns = columns)

In [9]:
a=[1,2,3,4,5,6]
a[0]=5
job_coll={}

In [10]:

for city in city_set:
    for start in range(0, max_results_per_city,1000):
        page = requests.get('https://au.indeed.com/jobs?q=data+scientist&l=' + '&start=' + str(start))
        time.sleep(1)
        for div in soup.findAll(name='div', attrs={'class':'row'}):
            # specify row num for index of job posting in dataframe
            num = (len(job_coll) + 1)
            
            # Create an empty list to hold data for each posting
            #job_post = ['None','None','None','None','None','None']
            job_post = ['city', 'job_title', 'company_name', 'location', 'summary', 'salary']
            # append city name
            job_post[0]=city
           
            # grab job title
            for a in div.findAll(name='a', attrs={'data-tn-element':'jobTitle'}):
                job_post[1]=a['title']
            
            # grap company name
            company = div.findAll(name='span', attrs={'class':'company'})
            if len(company) > 0:
                for b in company:
                    job_post[2]=(b.text.strip())
            else:
                sec_try = div.findAll(name='span', attrs= {'class': 'result-link-source'})
                for span in sec_try:
                    job_post[2]=(span.text)
                
            # grab location name
            c = div.findAll('span', attrs={'class':'location'})
            for span in c:
                job_post[3]=(span.text)
                    
            # grab summary text
            d = div.findAll('span', attrs={'class':'summary'})
            for span in d:
                job_post[4]=(span.text.strip())
                
            # grab salary
            e = div.findAll('span', attrs={'class':'no-wrap'})
            for span in e:
                job_post[5]=(span.text.strip())
                
            
            print ((job_post))
            
            job_coll[num]=job_post
            
           

['sydney+NSW', 'Data Scientist - Analytics', 'Domain Group', 'location', 'As a Data Scientist in our team, you will leverage your deep experience in statistics, machine learning and data analysis to drive significant improvements to...', 'salary']
['sydney+NSW', 'Data Scientist', 'ANZ Banking Group', 'location', 'As the Data Scientist, you are to source data from multiple data stores, and build advanced data models to solve complex problems and generate sophisticated...', 'salary']
['sydney+NSW', 'Lead Data Scientist', 'Domain Group', 'location', 'We are looking for a lead data scientist to help sales, product and business leadership make decisions with data, give consumers certainty and control when...', 'salary']
['sydney+NSW', 'Junior Data Scientist/ Machine Learning Engineer', 'Intellify', 'Sydney NSW', 'We also believe great data science comes from great teams, so invest continuously in upskilling and cross-training our data scientists, with lunch & learn...', '$80,000 - $100,000 

In [11]:
len(job_coll)

210

In [12]:
job_coll

{1: ['sydney+NSW',
  'Data Scientist - Analytics',
  'Domain Group',
  'location',
  'As a Data Scientist in our team, you will leverage your deep experience in statistics, machine learning and data analysis to drive significant improvements to...',
  'salary'],
 2: ['sydney+NSW',
  'Data Scientist',
  'ANZ Banking Group',
  'location',
  'As the Data Scientist, you are to source data from multiple data stores, and build advanced data models to solve complex problems and generate sophisticated...',
  'salary'],
 3: ['sydney+NSW',
  'Lead Data Scientist',
  'Domain Group',
  'location',
  'We are looking for a lead data scientist to help sales, product and business leadership make decisions with data, give consumers certainty and control when...',
  'salary'],
 4: ['sydney+NSW',
  'Junior Data Scientist/ Machine Learning Engineer',
  'Intellify',
  'Sydney NSW',
  'We also believe great data science comes from great teams, so invest continuously in upskilling and cross-training our data

### Convert to Dataframe

In [13]:
ds_df = pd.DataFrame.from_dict(job_coll)


In [14]:
dsjob_df = ds_df.T

In [15]:
dsjob_df.head(3)

Unnamed: 0,0,1,2,3,4,5
1,sydney+NSW,Data Scientist - Analytics,Domain Group,location,"As a Data Scientist in our team, you will leve...",salary
2,sydney+NSW,Data Scientist,ANZ Banking Group,location,"As the Data Scientist, you are to source data ...",salary
3,sydney+NSW,Lead Data Scientist,Domain Group,location,We are looking for a lead data scientist to he...,salary


In [16]:
dsjob_df.columns = ['city','job_title','company_name','location','summary','salary']


In [17]:
dsjob_df

Unnamed: 0,city,job_title,company_name,location,summary,salary
1,sydney+NSW,Data Scientist - Analytics,Domain Group,location,"As a Data Scientist in our team, you will leve...",salary
2,sydney+NSW,Data Scientist,ANZ Banking Group,location,"As the Data Scientist, you are to source data ...",salary
3,sydney+NSW,Lead Data Scientist,Domain Group,location,We are looking for a lead data scientist to he...,salary
4,sydney+NSW,Junior Data Scientist/ Machine Learning Engineer,Intellify,Sydney NSW,We also believe great data science comes from ...,"$80,000 - $100,000 a year"
5,sydney+NSW,Junior Data Scientist,The Eclair Group,Sydney NSW,Industry experience as a Data Analyst or Junio...,"$70,000 - $90,000 a year"
6,sydney+NSW,"Data Scientist - Complaint Services, Professio...",Investigations & Counter Terrorism,Sydney NSW,"Data Scientist - Complaint Services, Professio...",salary
7,sydney+NSW,Junior Data Scientist,Sirius People,Sydney Central Business District NSW,Opportunity to start your career in Data Scien...,salary
8,sydney+NSW,Customer Facing Data Scientist,DataRobot,Sydney NSW,Customer Facing Data Scientists work with the ...,salary
9,sydney+NSW,Data Scientist - Award-winning FinTech,Datasii,Sydney NSW,Experience as a Data Scientist (or a Data Anal...,"$100,000 - $115,000 a year"
10,sydney+NSW,Data Scientist,Freelancer.com,Sydney NSW,Proven experience as a data scientist or analy...,"$65,000 - $110,000 a year"


### City and Location Column basically the same, Drop Location Column as City are more complete

In [18]:
dsjob_df = dsjob_df.drop(columns={'location'})

In [75]:
dsjob_df.head()

Unnamed: 0,city,job_title,company_name,summary,salary
1,sydney+NSW,Data Scientist - Analytics,Domain Group,"As a Data Scientist in our team, you will leve...",salary
2,sydney+NSW,Data Scientist,ANZ Banking Group,"As the Data Scientist, you are to source data ...",salary
3,sydney+NSW,Lead Data Scientist,Domain Group,We are looking for a lead data scientist to he...,salary
4,sydney+NSW,Junior Data Scientist/ Machine Learning Engineer,Intellify,We also believe great data science comes from ...,"$80,000 - $100,000 a year"
5,sydney+NSW,Junior Data Scientist,The Eclair Group,Industry experience as a Data Analyst or Junio...,"$70,000 - $90,000 a year"


### Clean City loacation

In [20]:
dsjob_df.city.unique()

array(['sydney+NSW', 'Ultimo+NSW', 'sydney+Western+Suburbs',
       'Darlinghurst+NSW', 'Barangaroo+NSW', 'Mascot+NSW',
       'Parramatta+NSW', 'Surry+Hills+NSW', 'Macquarie+Park+NSW',
       'sydney+Inner+Suburbs+NSW', 'Hills+District+NSW',
       'sydney+Central+Business+District+NSW', 'New+South+Wales',
       'Melbourne+VIC'], dtype=object)

In [24]:
d = dsjob_df.replace('Sydney+W.+','Sydney', regex=True)

In [25]:
e = d.replace('Ulti.+','Ultimo', regex=True)

In [26]:
e = e.replace('syd.+','sydney', regex=True)

In [27]:
e = e.replace('Baran.+','Barangaroo', regex=True)

In [28]:
e = e.replace('Masc.+','Mascot', regex=True)

In [29]:
e = e.replace('Parr.+','Parramatta', regex=True)

In [30]:
e = e.replace('Surry .+','Surry Hills', regex=True)

In [31]:
e = e.replace('Macq.+','Macquarie Park', regex=True)

In [32]:
e = e.replace('Hill.+','Hills District', regex=True)

In [33]:
e = e.replace('New.+','Sydney', regex=True)

In [36]:
e = e.replace('Melb.+','Melbourne', regex=True)

In [39]:
e = e.replace('Darl.+','Darlinghurst', regex=True)

In [43]:
e = e.replace('Surry H.+','Surry Hills', regex=True)

In [44]:
e = e.replace('syd.+','Sydney', regex=True)

In [74]:
e.city.unique()

array(['Sydney', 'Ultimo', 'Darlinghurst', 'Barangaroo', 'Mascot',
       'Parramatta', 'Surry Hills', 'Macquarie Park', 'Hills District',
       'Melbourne'], dtype=object)

### Clean Job Title

In [73]:
e.job_title.unique()

array(['Data Scientist - Analytics', 'Data Scientist',
       'Lead Data Scientist',
       'Junior Data Scientist/ Machine Learning Engineer',
       'Junior Data Scientist',
       'Data Scientist - Complaint Services, Professional Standards Command, Redfern - NSW Police Force',
       'Customer Facing Data Scientist',
       'Data Scientist - Award-winning FinTech', 'Ikon | Data Scientist',
       'Team Assistant with AI company'], dtype=object)

In [77]:
e = e.replace('data.+','Data Scientist', regex=True)

In [78]:
e = e.replace('Lead.+', 'Senior Data Scientist', regex=True)

In [79]:
e = e.replace('Junior da.+', 'Junior Data Scientist', regex=True) 

In [80]:
e = e.replace('Junior Data Analyst/Sci.+', 'Junior Data Scientist', regex=True)

In [81]:
e.job_title.unique()

array(['Data Scientist - Analytics', 'Data Scientist',
       'Senior Data Scientist',
       'Junior Data Scientist/ Machine Learning Engineer',
       'Junior Data Scientist',
       'Data Scientist - Complaint Services, Professional Standards Command, Redfern - NSW Police Force',
       'Customer Facing Data Scientist',
       'Data Scientist - Award-winning FinTech', 'Ikon | Data Scientist',
       'Team Assistant with AI company'], dtype=object)

In [83]:
e = e.replace('Data Sc.+', 'Data Scientist', regex=True)

In [84]:
e = e.replace('Junior.+', 'Junior Data Scientist', regex=True)

In [85]:
e = e.replace('Custom.+', 'Data Scientist', regex=True)

In [89]:
e = e.replace('Team.+', 'Junior Data Scientist', regex=True)

In [90]:
e = e.replace('Ikon.+', 'Data Scientist', regex=True)

In [91]:
e.job_title.unique()

array(['Data Scientist', 'Senior Data Scientist', 'Junior Data Scientist'],
      dtype=object)

In [92]:
e.head()

Unnamed: 0,city,job_title,company_name,summary,salary
1,Sydney,Data Scientist,Domain Group,As a Data Scientist,salary
2,Sydney,Data Scientist,ANZ Banking Group,As the Data Scientist,salary
3,Sydney,Senior Data Scientist,Domain Group,We are looking for a lead Data Scientist,salary
4,Sydney,Junior Data Scientist,Intellify,We also believe great Data Scientist,"$80,000 - $100,000 a year"
5,Sydney,Junior Data Scientist,The Eclair Group,Industry experience as a Data Analyst or Junio...,"$70,000 - $90,000 a year"


In [94]:
e = e.fillna(0)

In [95]:
e.head()

Unnamed: 0,city,job_title,company_name,summary,salary
1,Sydney,Data Scientist,Domain Group,As a Data Scientist,salary
2,Sydney,Data Scientist,ANZ Banking Group,As the Data Scientist,salary
3,Sydney,Senior Data Scientist,Domain Group,We are looking for a lead Data Scientist,salary
4,Sydney,Junior Data Scientist,Intellify,We also believe great Data Scientist,"$80,000 - $100,000 a year"
5,Sydney,Junior Data Scientist,The Eclair Group,Industry experience as a Data Analyst or Junio...,"$70,000 - $90,000 a year"


In [97]:
e.columns

Index(['city', 'job_title', 'company_name', 'summary', 'salary'], dtype='object')

In [98]:
e.to_csv('./data/dsjobx1.csv')

In [100]:
h_df = pd.read_csv('./data/dsjobx1.1.csv')

In [101]:
h_df.head()

Unnamed: 0,city,job_title,company_name,summary,salary_lower,salary_upper
0,Sydney,Data Scientist,Domain Group,As a Data Scientist,salary,
1,Sydney,Data Scientist,ANZ Banking Group,As the Data Scientist,salary,
2,Sydney,Senior Data Scientist,Domain Group,We are looking for a lead Data Scientist,salary,
3,Sydney,Junior Data Scientist,Intellify,We also believe great Data Scientist,"$80,000","$100,000"
4,Sydney,Junior Data Scientist,The Eclair Group,Industry experience as a Data Analyst or Junio...,"$70,000","$90,000"


In [102]:
h_df.salary_lower.replace('salary', 0, inplace=True)

In [103]:
h_df.salary_lower.replace('[$,]', '', regex=True, inplace=True)

In [104]:
h_df.salary_upper.replace('[A-z$,]', '', regex=True, inplace=True)

In [105]:
h_df.head()

Unnamed: 0,city,job_title,company_name,summary,salary_lower,salary_upper
0,Sydney,Data Scientist,Domain Group,As a Data Scientist,0,
1,Sydney,Data Scientist,ANZ Banking Group,As the Data Scientist,0,
2,Sydney,Senior Data Scientist,Domain Group,We are looking for a lead Data Scientist,0,
3,Sydney,Junior Data Scientist,Intellify,We also believe great Data Scientist,80000,100000.0
4,Sydney,Junior Data Scientist,The Eclair Group,Industry experience as a Data Analyst or Junio...,70000,90000.0


In [106]:
h_df[['salary_lower', 'salary_upper']] = h_df[['salary_lower', 'salary_upper']].astype('float64')

In [107]:
h_df.dtypes

city             object
job_title        object
company_name     object
summary          object
salary_lower    float64
salary_upper    float64
dtype: object

Replace 0 back to NAN as it cannot calculate the Median.

In [108]:
j_df = h_df.replace(0, np.nan)

In [109]:
j_df.head()

Unnamed: 0,city,job_title,company_name,summary,salary_lower,salary_upper
0,Sydney,Data Scientist,Domain Group,As a Data Scientist,,
1,Sydney,Data Scientist,ANZ Banking Group,As the Data Scientist,,
2,Sydney,Senior Data Scientist,Domain Group,We are looking for a lead Data Scientist,,
3,Sydney,Junior Data Scientist,Intellify,We also believe great Data Scientist,80000.0,100000.0
4,Sydney,Junior Data Scientist,The Eclair Group,Industry experience as a Data Analyst or Junio...,70000.0,90000.0


In [110]:
j_df['median_salary_lower'] = j_df.groupby('job_title')['salary_lower'].transform('median')

In [111]:
j_df['median_salary_upper'] = j_df.groupby('job_title')['salary_upper'].transform('median')

In [112]:
j_df.head()

Unnamed: 0,city,job_title,company_name,summary,salary_lower,salary_upper,median_salary_lower,median_salary_upper
0,Sydney,Data Scientist,Domain Group,As a Data Scientist,,,82500.0,112500.0
1,Sydney,Data Scientist,ANZ Banking Group,As the Data Scientist,,,82500.0,112500.0
2,Sydney,Senior Data Scientist,Domain Group,We are looking for a lead Data Scientist,,,,
3,Sydney,Junior Data Scientist,Intellify,We also believe great Data Scientist,80000.0,100000.0,70000.0,90000.0
4,Sydney,Junior Data Scientist,The Eclair Group,Industry experience as a Data Analyst or Junio...,70000.0,90000.0,70000.0,90000.0


In [113]:
j_df.salary_lower.fillna(j_df.median_salary_lower, inplace=True)

In [114]:
j_df.salary_upper.fillna(j_df.median_salary_upper, inplace=True)

In [116]:
j_df.dropna()

Unnamed: 0,city,job_title,company_name,summary,salary_lower,salary_upper,median_salary_lower,median_salary_upper
0,Sydney,Data Scientist,Domain Group,As a Data Scientist,82500.0,112500.0,82500.0,112500.0
1,Sydney,Data Scientist,ANZ Banking Group,As the Data Scientist,82500.0,112500.0,82500.0,112500.0
3,Sydney,Junior Data Scientist,Intellify,We also believe great Data Scientist,80000.0,100000.0,70000.0,90000.0
4,Sydney,Junior Data Scientist,The Eclair Group,Industry experience as a Data Analyst or Junio...,70000.0,90000.0,70000.0,90000.0
5,Sydney,Data Scientist,Investigations & Counter Terrorism,Data Scientist,82500.0,112500.0,82500.0,112500.0
6,Sydney,Junior Data Scientist,Sirius People,Opportunity to start your career in Data Scien...,70000.0,90000.0,70000.0,90000.0
7,Sydney,Data Scientist,DataRobot,Data Scientist,82500.0,112500.0,82500.0,112500.0
8,Sydney,Data Scientist,Datasii,Experience as a Data Scientist,100000.0,115000.0,82500.0,112500.0
9,Sydney,Data Scientist,Freelancer.com,Proven experience as a Data Scientist,65000.0,110000.0,82500.0,112500.0
10,Sydney,Data Scientist,Freshwater Group,The Data Scientist,82500.0,112500.0,82500.0,112500.0


Drop the last 2 Column

In [119]:
j_df.to_csv('./data/dsfinal.csv')

In [121]:
h_df = pd.read_csv('./data/dsfinal.csv')

In [122]:
h_df['mean_salary'] = (j_df.salary_lower + j_df.salary_upper) / 2

In [124]:
h_df.dropna()

Unnamed: 0.1,Unnamed: 0,city,job_title,company_name,summary,salary_lower,salary_upper,mean_salary
0,0,Sydney,Data Scientist,Domain Group,As a Data Scientist,82500.0,112500.0,97500.0
1,1,Sydney,Data Scientist,ANZ Banking Group,As the Data Scientist,82500.0,112500.0,97500.0
3,3,Sydney,Junior Data Scientist,Intellify,We also believe great Data Scientist,80000.0,100000.0,90000.0
4,4,Sydney,Junior Data Scientist,The Eclair Group,Industry experience as a Data Analyst or Junio...,70000.0,90000.0,80000.0
5,5,Sydney,Data Scientist,Investigations & Counter Terrorism,Data Scientist,82500.0,112500.0,97500.0
6,6,Sydney,Junior Data Scientist,Sirius People,Opportunity to start your career in Data Scien...,70000.0,90000.0,80000.0
7,7,Sydney,Data Scientist,DataRobot,Data Scientist,82500.0,112500.0,97500.0
8,8,Sydney,Data Scientist,Datasii,Experience as a Data Scientist,100000.0,115000.0,107500.0
9,9,Sydney,Data Scientist,Freelancer.com,Proven experience as a Data Scientist,65000.0,110000.0,87500.0
10,10,Sydney,Data Scientist,Freshwater Group,The Data Scientist,82500.0,112500.0,97500.0


In [125]:
h_df.to_csv('./data/dsfinal2.csv')