# Data Analysis & Data Scientist Canadian Salaries

In [53]:
import pandas as pd, re 
pd.set_option('display.max_rows',900)

## Collecting Data Sources & formatting them


### Starting with Statcan 2021 census information
https://www.jobbank.gc.ca/marketreport/wages-occupation/17882/ca

In [54]:
file = r'Data\StatCan_salary.csv'
dfWagesOG = pd.read_csv(file,low_memory=False)


In [55]:
dfWagesOG.columns

Index(['NOC_CNP', 'NOC_TITLE_ENG', 'NOC_TITLE_FRA', 'prov', 'ER_Code_Code_RE',
       'ER_Name', 'Nom_RE', 'Low_Wage_Salaire_Minium',
       'Median_Wage_Salaire_Median', 'High_Wage_Salaire_Maximal',
       'Average_Wage_Salaire_Moyen', 'Data_Source_E', 'Data_Source_F',
       'Reference_Period', 'Revision_Date_Date_revision',
       'Annual_Wage_Flag_Salaire_annuel', 'Wage_Comment_E', 'Wage_Comment_F'],
      dtype='object')

In [56]:
dfDatastr = dfWagesOG.copy()
dfDatastr = dfDatastr[dfDatastr['NOC_TITLE_ENG'].str.contains('Data',na=False)].copy()
dfDatastr['NOC_TITLE_ENG'].value_counts()

NOC_TITLE_ENG
Data entry clerks                            86
Data scientists                              86
Database analysts and data administrators    86
Name: count, dtype: int64

In [57]:
dfDS = dfDatastr[~dfDatastr['NOC_TITLE_ENG'].str.contains('clerks',na=False)].copy()

In [58]:
dfDS.describe()

Unnamed: 0,Low_Wage_Salaire_Minium,Median_Wage_Salaire_Median,High_Wage_Salaire_Maximal,Average_Wage_Salaire_Moyen,Annual_Wage_Flag_Salaire_annuel
count,52.0,64.0,52.0,62.0,172.0
mean,25.324808,41.265313,61.095385,43.900161,0.0
std,3.5886,4.199198,11.729541,6.778051,0.0
min,15.6,32.55,42.0,33.54,0.0
25%,23.08,38.0,53.2175,39.0875,0.0
50%,25.0,41.34,57.845,42.015,0.0
75%,27.345,44.8325,67.5225,47.39,0.0
max,36.85,50.95,86.89,69.93,0.0


In [59]:
#removing irrelevant columns
dfDS = dfDS.drop(columns=['NOC_CNP','Wage_Comment_F', 'Data_Source_F', 'NOC_TITLE_FRA', 'Nom_RE'])

In [60]:
#remove any row that has no data in any of the salary columns
dfDS.dropna(how='all',inplace=True,subset=['Low_Wage_Salaire_Minium', 'Median_Wage_Salaire_Median','High_Wage_Salaire_Maximal', 'Average_Wage_Salaire_Moyen'])

In [61]:
dfDS.reset_index(inplace=True,drop=True)
dfDS[3:36:8] #display a small selection of the table

Unnamed: 0,NOC_TITLE_ENG,prov,ER_Code_Code_RE,ER_Name,Low_Wage_Salaire_Minium,Median_Wage_Salaire_Median,High_Wage_Salaire_Maximal,Average_Wage_Salaire_Moyen,Data_Source_E,Reference_Period,Revision_Date_Date_revision,Annual_Wage_Flag_Salaire_annuel,Wage_Comment_E
3,Data scientists,NB,ER13,New Brunswick,,37.29,,46.55,2021 Census,2021,2023-11-29,0,
11,Data scientists,ON,ER3510,Ottawa,27.3,45.99,78.43,56.16,2021 Census,2021,2023-11-29,0,
19,Data scientists,SK,ER47,Saskatchewan,,46.65,,46.65,2021 Census,2021,2023-11-29,0,
27,Database analysts and data administrators,,ER00,Canada,24.0,40.87,58.0,40.88,Labour Force Survey,2021-2022,2023-11-29,0,
35,Database analysts and data administrators,NB,ER1320,Moncton--Richibucto,21.63,34.62,45.49,35.32,Labour Force Survey,2021-2022,2023-11-29,0,


In [62]:
dfDS.groupby('prov')['Average_Wage_Salaire_Moyen'].mean().sort_values(ascending=False)

prov
BC    46.435714
AB    46.205000
ON    45.754000
QC    45.186667
SK    43.754000
NL    43.110000
MB    39.015000
NB    38.022000
NS    37.716667
PE    33.540000
Name: Average_Wage_Salaire_Moyen, dtype: float64

### Followed by a curated list from https://ai-jobs.net
This data source is survey based however it seems to be the main third party source for data related jobs

In [63]:
dfOG2 = pd.read_csv(r'Data\salaries.csv')

dfOG2.columns

Index(['work_year', 'experience_level', 'employment_type', 'job_title',
       'salary', 'salary_currency', 'salary_in_usd', 'employee_residence',
       'remote_ratio', 'company_location', 'company_size'],
      dtype='object')

In [64]:
dfOG2['employment_type'].value_counts()

employment_type
FT    13680
CT       26
PT       22
FL       12
Name: count, dtype: int64

In [65]:
#filtering for year of 2023 and fulltime employment
df2 = dfOG2[(dfOG2['work_year']>=2021) & (dfOG2['employment_type']=='FT')].copy()
df2.describe()

Unnamed: 0,work_year,salary,salary_in_usd,remote_ratio
count,13611.0,13611.0,13611.0,13611.0
mean,2023.088825,165120.5,150324.481596,33.234149
std,0.642918,355622.9,68121.666495,46.697734
min,2021.0,15000.0,15000.0,0.0
25%,2023.0,104300.0,104000.0,0.0
50%,2023.0,142400.0,142200.0,0.0
75%,2023.0,188000.0,186000.0,100.0
max,2024.0,30400000.0,800000.0,100.0


In [66]:
#convert prices to CAD for 2023
try:
    with open(r'Data\usdcad.txt','r') as usdcadfile:
        usdcad = float(usdcadfile.read())
except:
    import MetaTrader5 as mt5
    mt5.initialize()
    mt5.symbol_select('USDCAD.p')
    prices = pd.DataFrame(mt5.copy_rates_range('USDCAD.p',mt5.TIMEFRAME_D1,pd.to_datetime('2023-01-01'),pd.to_datetime('2023-12-31')))
    usdcad =prices['close'].mean()
    with open(r'Data\usdcad.txt','w') as usdcadfile:
        usdcadfile.write(str(usdcad))
df2['salary_in_cad'] = df2['salary_in_usd']*usdcad

In [67]:
#filtering for Canadian employees
df2cad = df2[df2['employee_residence'] == 'CA'].copy()

In [68]:
df2cad.drop(columns=['salary','salary_in_usd','salary_currency'],inplace=True)

In [69]:
df2cad['job_title'].value_counts()

job_title
Data Scientist                        89
Data Engineer                         56
Machine Learning Engineer             42
Data Analyst                          41
Analytics Engineer                    18
Business Intelligence Analyst         12
Machine Learning Scientist            11
Data Architect                         8
Research Scientist                     5
Data Integration Specialist            4
Machine Learning Software Engineer     4
Data Modeler                           4
ML Engineer                            4
Data Science                           4
Business Intelligence Developer        4
Data Strategist                        2
AI Programmer                          2
Machine Learning Developer             2
Director of Data Science               2
Data Developer                         2
Business Intelligence Engineer         2
Deep Learning Engineer                 2
Data Lead                              2
Research Analyst                       2
AI Eng

In [70]:
df2cad[df2cad['job_title']=='Data Analyst'].describe()

Unnamed: 0,work_year,remote_ratio,salary_in_cad
count,41.0,41.0,41.0
mean,2023.0,41.463415,144419.160009
std,0.806226,49.8779,55748.474795
min,2021.0,0.0,54991.27805
25%,2022.0,0.0,101218.392857
50%,2023.0,0.0,141705.75
75%,2024.0,100.0,175445.214286
max,2024.0,100.0,279767.637857


### Another dataset from Statscan, but more broad
https://www150.statcan.gc.ca/t1/tbl1/en/tv.action?pid=9810041201

In [71]:
dfinc = pd.read_csv(r'Data\income_field_of_study.csv',index_col=None)
dfinc1 = dfinc[dfinc.iloc[:,0].str.contains('Data')].copy()
dfinc1

Unnamed: 0,Occupation,Total Employment ppls,With Income,Med Income,Mean Income,Med Inc + coms,Mean Inc + coms,Average wages salaries and commissions
93,14111 Data entry clerks,46895,43300,31200,33080,41595,32400,33760
141,21211 Data scientists,15420,14450,83000,89500,14080,83000,89600
146,21223 Database analysts and data administrators,26225,25025,72000,74400,24305,72500,75200


### Here we are defining functions for web scraping, and accessing the Canadian JobBank, as well as Indeed

In [110]:
def convCol(x:str):
    """
    For salaries with a range, take the min of the range, by finding 'to' or ' - ' and remove the tail of the str,
    convert salaries to hourly with 40 hours a week.

    x(str) : the series of a pd.Dataframe with salaries
    """
    newx=x
    x=x.lower()
    if ('daily' in x or 'day' in x) and ('to' in x or '–' in x or '-' in x):
        if 'to' in x:
            x=x[:x.index('t')]
        elif '–' in x:
            x=x[:x.index('–')]
        else:
            x=x[:x.index('-')]
        newx = float(re.sub('[^0-9.]','',x))
        newx= newx/8
    elif 'daily' in x or 'day' in x:
        newx = float(re.sub('[^0-9.]','',x))
        newx= newx/8
    elif 'hour' in x and ('to' in x or '–' in x or '-' in x):
        if 'to' in x:
            x=x[:x.index('t')]
        elif '–' in x:
            x=x[:x.index('–')]
        else:
            x=x[:x.index('-')]
        newx = float(re.sub('[^0-9.]','',x))
    elif 'hour' in x:
        newx = float(re.sub('[^0-9.]','',x))
    elif 'month' in x and ('to' in x or '–' in x or '-'in x):
        if '-' in x:
            x=x[:x.index('-')]
        elif 'to' in x:
            x=x[:x.index('to')]
        else:
            x=x[:x.index('–')]

        newx = float(re.sub('[^0-9.]','',x))
        newx=newx/((52/12)*40)
    elif 'month' in x:
        newx = float(re.sub('[^0-9.]','',x))
        newx=newx/((52/12)*40)
    elif 'week' in x and ('to' in x or '–' in x or '-'in x ):
        if '-' in x:
            x=x[:x.index('-')]
        elif 't' in x:
            x=x[:x.index('t')]
        else:
            x=x[:x.index('–')]
        newx = float(re.sub('[^0-9.]','',x))
        newx=newx/40
    elif 'week' in x:
        newx = float(re.sub('[^0-9.]','',x))
        newx=newx/40
    elif ('annual' in x or 'year'in x)  and ('to' in x or '–' in x or '-' in x ):
        if 'to' in x :
            x=x[:x.index('t')]
        elif '-' in x:
            x=x[:x.index('-')]
        else:
            x=x[:x.index('–')]
        newx = float(re.sub('[^0-9.]','',x))
        newx=newx/(40*52)    
    elif 'annual' in x or 'year' in x:
        newx = float(re.sub('[^0-9.]','',x))
        newx=newx/(40*52)
    else:
        newx = float(re.sub('[^0-9.]','',x))
    return newx

In [105]:
#cleaning functions
def jobbank_cleaning(data:str,save:bool=False):
    """
    Clean the results of the html source aquired from the Jobbank website

    data(str): Either html results, or file_path from jobbank_salaries()
    save(bool): whether html file is saved or not
    
    Returns Formatted dataframe, and unformatted dataframe
    """
    if save:
        with open(data, 'r',encoding='utf-8') as data:
            rawhtml = data.read()
        articles = re.findall('<article.*?</article>',rawhtml,re.DOTALL)
    else: 
        articles = re.findall('<article.*?</article>',data,re.DOTALL)
    print(f"Amount of items found: {len(articles)}")
    jobs = {'Title':[],'Salary':[],'Location':[],'Company':[]}
    for i in articles:
        try:
            salary = (re.search('salary.+?Salary:\n(.+?)<',i,re.DOTALL)).group(1).lstrip().rstrip()
        except Exception as e:
            continue
        title = (re.search('noctitle">(.+?)\n',i)).group(1).lstrip().rstrip()
        location=(re.search('Location</span>(.+?)<',i,re.DOTALL)).group(1).lstrip().rstrip()
        company = (re.search('business">(.+?)<',i)).group(1).lstrip().rstrip()
        jobs['Title'].append(title)
        jobs['Company'].append(company)
        jobs['Location'].append(location)
        jobs['Salary'].append(salary)
    dfjobbankOG = pd.DataFrame(jobs)
    dfjobbank =dfjobbankOG.copy()
    dfjobbank['Salary']=dfjobbank['Salary'].apply(convCol)
    dfjobbank['City'] = dfjobbank['Location'].str[:-4]
    dfjobbank['Province'] = dfjobbank['Location'].str[-4:]
    dfjobbank['Province'] = dfjobbank['Province'].apply(lambda x: re.sub('[()\s]','',x))
    dfjobbank.drop(columns='Location',inplace=True)
    dfjobbank = dfjobbank[['Title','Salary','City','Province','Company']] 
    print(f"Amount with Salaries: {len(dfjobbank)}")
    return (dfjobbank,dfjobbankOG)

def indeed_cleaning(data:str, save:bool=False):
    """
    Clean the results of the html source aquired from Indeed website

    data(str): Either html results, or file_path from indeed_salaries()
    save(bool): whether html file is saved or not
    
    Returns: Formatted dataframe, and unformatted dataframe
    """
    if save:
        with open(data, 'r',encoding='utf-8') as file:
            rawhtml = file.read()
        tbodies = re.findall('<tbody.*?</tbody>',rawhtml,re.DOTALL)
    else:
        tbodies = re.findall('<tbody.*?</tbody>',data,re.DOTALL)
    print(f"Amount of items found: {len(tbodies)}")
    indeedjobs={'Title':[],'Salary':[],'City':[],'Province':[],'Company':[]}
    for i in tbodies:
        try:
            salary = (re.search('(\$.+?)\n',i,re.DOTALL)).group(1)
            indeedjobs['Salary'].append(salary)
        except Exception as e:
            indeedjobs['Salary'].append(float('NaN'))
        try:
            name = (re.search('title="(.+?)"',i)).group(1)
            indeedjobs['Title'].append(name)
        except:
            indeedjobs['Title'].append(None)
        try:
            company = (re.search('company-name.+?\n(.+?)\n',i,re.DOTALL)).group(1)
            indeedjobs['Company'].append(company)
        except:
            indeedjobs['Company'].append(None)
        try:
            location = (re.search('text-location.*?\n(.*?)\n',i,re.DOTALL)).group(1)
            city,province = location.split(',')
            indeedjobs['City'].append(city)
            indeedjobs['Province'].append(province.lstrip())
        except:
            indeedjobs['City'].append(None)
            indeedjobs['Province'].append(None)
        

    dfindeedOG = pd.DataFrame(indeedjobs)
    dfindeedOG.dropna(subset='Salary',inplace=True)
    dfindeedOG.reset_index(drop=True,inplace=True)
    dfindeed = dfindeedOG.copy()
    dfindeed.dropna(how='any',inplace=True)
    dfindeed.reset_index(inplace=True,drop=True)
    dfindeed['Salary'] = dfindeed['Salary'].apply(convCol)
    print(f"Amount with Salaries {len(dfindeed)}")
    return (dfindeed,dfindeedOG)

In [74]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.common.exceptions import NoSuchElementException, ElementNotVisibleException, TimeoutException
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
import bs4
def jobbank_salaries(searchterms:str,save:bool=False,file_path:str=None,add_type:str='w'):
    """
    Accesses the jobbank website, and returns 2 Df's, cleaned for use, and the original contents from html parse with regex

    Params:
    searchterms (str) : The job search you want to make Ex. Data Analyst
    save (bool) : If you want to save the html file
    file_path (str) : where you want to save the html file, and access it afterwards if save=True
    add_type (str)[a|w] : 'a' you want to append to the existing html file, for example, searching 'Data Analyst' and want to also add the search for 'Data Scientist' 
        or 'w' if you just want to write a new one
    """
    driver = webdriver.Chrome()
    start_ ='https://www.jobbank.gc.ca/jobsearch/jobsearch?searchstring='
    end_ = '&locationstring=Canada'
    searchterms = searchterms.replace(' ','+')
    searchterms_ = searchterms.rstrip()
    searchterms_ = start_+searchterms+end_
    driver.get(searchterms_)
    time.sleep(5) 
    try:
        while True:
            see_more_button = WebDriverWait(driver, 10).until(
                EC.element_to_be_clickable((By.XPATH, '//button[@type="button"][@id="moreresultbutton"]')))
            if see_more_button:
                see_more_button.click()
                time.sleep(3)
            else: break
    except (NoSuchElementException, ElementNotVisibleException, TimeoutException) as e:
        print("The 'See More' button is no longer available.")
    html_source = driver.page_source
    if save:
        with open(file_path, add_type, encoding='utf-8') as file:
            file.write(html_source)
        driver.quit()
        return 'Html created'
    else:
        a,b = jobbank_cleaning(html_source)
        driver.quit()
        return a,b

def indeed_salaries(searchterms:str,pages:int,file_path:str=None,save:bool =False):
    """
    Accesses the Indeed website, and returns 2 Df's, cleaned for use, and the original contents from html parse with regex

    Params:
    searchterms (str) : The job search you want to make Ex. Data Analyst
    pages (str) : Number of pages you want to access from Indeed
    save (bool) : If you want to save the html file
    file_path (str) : where you want to save the html file, and access it afterwards if save=True
    
    """
    driver = webdriver.Chrome()
    start_ ='https://ca.indeed.com/jobs?q='
    end_ = '&l=Canada&vjk=d604f2f27a7640cf'
    searchterms = searchterms.replace(' ','+')
    searchterms_ = searchterms.rstrip()
    searchterms_ = start_+searchterms+end_
    driver.get(searchterms_)
    time.sleep(5) 
    html_source=''
    c=0
    try:
        while c<pages:
            html_source=html_source+driver.page_source
            if c==0: #sometimes the first page asks to accept cookies,check and click yes if so
                try: 
                    cookie = driver.find_element(By.XPATH,'//button[@class="gnav-CookiePrivacyNoticeButton"]')
                    cookie.click()
                except: 
                    pass
            if c==1: #there is a popup asking to sign up for emails on the second page, click 'X' to clear popup
                time.sleep(3)
                x_button = driver.find_element(By.XPATH,'//h3[@class="DesktopJobAlertPopup-heading"]//following::button[1]')
                x_button.click()
            next_button = WebDriverWait(driver, 10).until(
                EC.element_to_be_clickable((By.XPATH, '//a[@aria-label="Next Page"]')))
            if next_button: #next page button
                next_button.click()
                time.sleep(2)
            else: break
            c+=1
    except (NoSuchElementException, ElementNotVisibleException, TimeoutException) as e:
        print(f"Reached the {pages} page.",e)  
    #format html with bs4 to make easier to parse with regex
    soup =bs4.BeautifulSoup(html_source,'html.parser')
    if save:
        with open(file_path, 'w', encoding='utf-8') as file:
            file.write(soup.prettify()) 
        driver.quit()
        return 'Html created'
    else:
        a,b = indeed_cleaning(soup.prettify())
        driver.quit()
        return a,b

### Now we are getting the Job Bank Information from https://www.jobbank.gc.ca
Using Selenium, we will access websites like the Canadian Job Bank, or Indeed, and scrape their postings to get salary data around the country

In [75]:
filepathJobBank = r'Data\cadjobbank_14-3-24.html'
dfjobbank,dfjobbankOG=jobbank_cleaning(filepathJobBank,True)

Amount of items found: 309
Amount with Salaries: 190


In [76]:
dfjobbank['Province'].value_counts()

Province
ON    84
AB    46
BC    41
QC    11
SK     3
MB     2
NS     2
NB     1
Name: count, dtype: int64

In [77]:
dfjobbank.groupby('Province')['Salary'].mean().sort_values(ascending=False)

Province
QC    4456.208514
AB      43.262609
ON      42.215082
SK      40.146667
BC      39.788743
NS      36.826923
NB      35.500000
MB      30.665000
Name: Salary, dtype: float64

Obviously here there seems to be an issue with salaries in QC, so let's investigate what might be the problem with 

`dfjobbank['Salaries]>150`


In [78]:
dfjobbank[dfjobbank['Salary']>150].index

Index([7], dtype='int64')

In [79]:
dfjobbankOG.loc[dfjobbank[dfjobbank['Salary']>150].index]

Unnamed: 0,Title,Salary,Location,Company
7,DBA (database analyst),"$48,689.00 to $83,462.00 hourly",Gatineau (QC),Cégep Héritage College


So here we can see that: 
> Salary      $48,689.00 to $83,462.00 hourly

We can assume this data was incorrectly labeled as the hourly wages in the source data, and therefor it would need to correctly be labeled as anually

In [80]:
dfjobbank.at[7,'Salary'] = 48689/(52*40)
dfjobbank.groupby('Province')['Salary'].mean().sort_values(ascending=False)

Province
AB    43.262609
ON    42.215082
SK    40.146667
BC    39.788743
NS    36.826923
NB    35.500000
QC    32.063802
MB    30.665000
Name: Salary, dtype: float64

In [81]:
dfjobbank.groupby('Province')['Salary'].describe()

Unnamed: 0_level_0,count,mean,std,min,25%,50%,75%,max
Province,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
AB,46.0,43.262609,5.867471,31.25,42.0,42.125,43.3675,80.0
BC,41.0,39.788743,3.609453,22.0,38.0,40.2,41.0,47.0
MB,2.0,30.665,15.082588,20.0,25.3325,30.665,35.9975,41.33
NB,1.0,35.5,,35.5,35.5,35.5,35.5,35.5
NS,2.0,36.826923,4.487408,33.653846,35.240385,36.826923,38.413462,40.0
ON,84.0,42.215082,8.289294,17.94,41.9375,43.0,43.455,91.346154
QC,11.0,32.063802,12.089692,19.5,23.551442,31.25,35.548077,57.692308
SK,3.0,40.146667,0.790274,39.44,39.72,40.0,40.5,41.0


In [82]:
dfjobbank['Title'].value_counts()

Title
database analyst                            63
data administrator                          18
business data analyst                       17
database administrator (DBA)                16
data analyst - informatics and systems      16
analyst, database                           10
big data analyst                             8
administrator, database                      8
administrator, data                          6
database analyst (DBA)                       4
DBA (database analyst)                       4
information technology (IT) data analyst     3
data warehouse analyst                       3
database manager                             3
DBA (database administrator)                 3
database architect                           2
database designer                            1
database architect (DBA)                     1
data processing specialist                   1
data management specialist                   1
data quality analyst                         1
archite

### Indeed job search 

In [84]:
filepathIndeed = r'Data\indeed_14-3-2024.html'
dfindeed,dfindeedOG = indeed_cleaning(filepathIndeed,True)
dfindeed['Province'].value_counts()

Amount of items found: 1050
Amount with Salaries 166


Province
ON    94
BC    35
QC    16
AB    14
NS     2
MB     2
SK     2
NL     1
Name: count, dtype: int64

In [85]:
dfindeed.groupby('Province')['Salary'].mean().sort_values(ascending=False)

Province
BC    44.108997
AB    42.464574
ON    39.663298
NS    38.978173
QC    38.020031
SK    36.800000
MB    24.855769
NL    24.038462
Name: Salary, dtype: float64

### Combining jobbank and indeed 

In [86]:
dfjobbank.columns

Index(['Title', 'Salary', 'City', 'Province', 'Company'], dtype='object')

In [87]:
dfindeed.columns

Index(['Title', 'Salary', 'City', 'Province', 'Company'], dtype='object')

In [88]:
dfallJobs = pd.concat([dfindeed,dfjobbank])

In [89]:
dfallJobs.groupby('Province')['Salary'].describe()

Unnamed: 0_level_0,count,mean,std,min,25%,50%,75%,max
Province,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
AB,60.0,43.076401,9.33403,26.442308,41.435,42.05,43.27,80.0
BC,76.0,41.778334,10.49863,22.0,37.0,40.1,42.317308,76.923077
MB,4.0,27.760385,9.387165,20.0,22.704327,24.855769,29.911827,41.33
NB,1.0,35.5,,35.5,35.5,35.5,35.5,35.5
NL,1.0,24.038462,,24.038462,24.038462,24.038462,24.038462,24.038462
NS,4.0,37.902548,5.000051,33.653846,33.888221,36.983173,40.9975,43.99
ON,178.0,40.867511,11.564055,17.0,35.0,42.25,45.0,115.384615
QC,27.0,35.593419,16.058077,7.211538,23.551442,34.273558,47.52224,86.538462
SK,5.0,38.808,3.339215,32.932692,39.44,40.0,40.667308,41.0


In [111]:
a,b = indeed_salaries('Tree Planting',4)

Amount of items found: 120
Amount with Salaries 28


In [112]:
a

Unnamed: 0,Title,Salary,City,Province,Company
0,Climbing Arborist,25.0,Plympton-Wyoming,ON,Wyoming Tree Service
1,Tree Planter,22.5,Bear Island,NB,East Winds Silviculture
2,Landscaper hardscapes,25.0,Abbotsford,BC,Heidelberg Landscaping
3,Landscape Foreman/lead hand,35.0,Greater Toronto Area,ON,Sublime Landscapes Li...
4,Landscape Technician,24.0,Caledon East,ON,Glen Echo Nurseries
5,"Hardscaper/Landscaper-Full Time , Year Round p...",26.0,Saskatoon,SK,City Gardens Landscap...
6,seasonal tree planter,16.0,Lac-au-saumon,QC,Société d'exploitatio...
7,Tree Planting Head Cook,59.375,Quesnel,BC,Blue Collar Silvicult...
8,Tree Planter,18.0,Exeter,ON,Ausable Bayfield Cons...
9,Tree Nursery Supervisor,25.0,Medicine Hat,AB,Any Tree Ranch
