In [1]:
import re
from nltk.corpus import stopwords
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.common.exceptions import TimeoutException
import time
import requests
import random
import pandas as pd
import matplotlib.pyplot as plt
from goose3 import Goose

In [2]:
program_languages=['bash','r','python','java','c++','c#', 'f#', 'ruby','perl','matlab','javascript','scala','php']
analysis_software=['alteryx', 'excel','tableau', 'power bi', 'powerbi', 'qlikview', 'qlik','d3.js','sas','sap','spss','d3','saas','pandas','numpy','scipy','sps','spotfire','scikits.learn','splunk','powerpoint','h2o']
bigdata_tool=['hadoop','mapreduce','spark','pig','hive','shark','oozie','zookeeper','flume','mahout']
databases=['sql','nosql','hbase','cassandra','mongodb','mysql','mssql','postgresql','oracle db','rdbms']
machine_learning = ['keras', 'scikit-learn', 'tensorflow', 'caffe', 'datarobot', 'theano', 'torch']
overall_dict = program_languages + analysis_software + bigdata_tool + databases + machine_learning

In [3]:
def keywords_extract(url):
    g = Goose()
    article = g.extract(url=url)
    text = article.cleaned_text
    text = re.sub("[^a-zA-Z+3]"," ", text) #get rid of things that aren't words; 3 for d3 and + for c++
    text = text.lower().split()
    stops = set(stopwords.words("english")) #filter out stop words in english language
    text = [w for w in text if not w in stops]
    text = list(set(text))
    keywords = [str(word) for word in text if word in overall_dict]
    return keywords

In [4]:
def keywords_f(soup_obj):
    for script in soup_obj(["script", "style"]):
        script.extract() # Remove these two elements from the BS4 object
    text = soup_obj.get_text() 
    lines = (line.strip() for line in text.splitlines()) # break into line
    chunks = (phrase.strip() for line in lines for phrase in line.split("  ")) # break multi-headlines into a line each
    text = ''.join(chunk for chunk in chunks if chunk).encode('utf-8') # Get rid of all blank lines and ends of line
    try:
        text = text.decode('utf-8') # Need this as some websites aren't formatted
    except:                                                          
        return                                                       
    text = re.sub("[^a-zA-Z+3]"," ", text)  
    text = re.sub(r"([a-z])([A-Z])", r"\1 \2", text) # Fix spacing issue from merged words
    text = text.lower()
    text = re.sub(('power bi'), 'powerbi', text) # catch power bi before tokenization
    text = text.split()  
    stop_words = set(stopwords.words("english")) # Filter out any stop words
    text = [w for w in text if not w in stop_words]
    text = list(set(text)) #only care about if a word appears, don't care about the frequency
    keywords = [str(word) for word in text if word in overall_dict] #if a skill keyword is found, return it.
    return keywords

In [5]:
base_url = "http://www.indeed.nl"    
#change the start_url can scrape different cities.
start_url = "http://www.indeed.nl/jobs?q=BI+consultant&l="
resp = requests.get(start_url)
start_soup = BeautifulSoup(resp.content, 'lxml', from_encoding='utf-8')
urls=start_soup.find_all('a',{'rel':'nofollow','target':'_blank'})
urls = [link['href'] for link in urls] 
num_found = start_soup.find(id = 'searchCount').string.encode('utf-8').split() #this returns the total number of results
num_jobs = num_found[-1].decode('utf-8').split(',')
num_jobs = [x.replace('.', '') for x in num_jobs]

if len(num_jobs)>=2:
    num_jobs = int(num_jobs[0]) * 1000 + int(num_jobs[1])
else:
    num_jobs = int(num_jobs[0])
num_pages = int(num_jobs/10) #calculates how many pages needed to do the scraping

job_keywords=[]
print('There are %d jobs found and we need to extract %d pages.'%(num_jobs,num_pages))
print('extracting first page of job searching results')

There are 796 jobs found and we need to extract 79 pages.
extracting first page of job searching results


In [6]:
# prevent the driver stopping due to the unexpectedAlertBehaviour.
webdriver.DesiredCapabilities.FIREFOX["unexpectedAlertBehaviour"] = "accept"
get_info = True
driver = webdriver.Firefox(executable_path=r'C:\Users\emily\Documents\ML course\geckodriver.exe')
# set a page load time limit so that don't have to wait forever if the links are broken.
driver.set_page_load_timeout(15)
for i in range(len(urls)):
    get_info = True
    try:
        driver.get(base_url+urls[i])
    except TimeoutException:
        get_info = False
        continue
    j = random.randint(1000,2200)/1000.0
    time.sleep(j) #waits for a random time so that the website don't consider you as a bot
    if get_info:
        soup=BeautifulSoup(driver.page_source, 'lxml')
        print('extracting %d job keywords...' % i)
        soup_string = str(soup)
        single_job = keywords_f(soup)
        print(single_job,len(soup))
        print(driver.current_url)
        job_keywords.append([driver.current_url,single_job])

extracting 0 job keywords...
['powerbi', 'sql'] 1
https://www.indeed.nl/vacature-bekijken?jk=0a8fa72b3f5e0492&tk=1ckp7ld6314f20m5&from=serp&alid=3&advn=2180790811884654
extracting 1 job keywords...
[] 1
https://www.indeed.nl/vacature-bekijken?jk=9482038fe93b422b&q=BI+consultant&from=web&advn=6797582934259388&sjdu=jxdfeuh3wd52_PYAqFwGBk4EOSonXrGJz8uaxN9dKP9WYR_7-G4n0TR2YRcFFhQyZDWJrhZwqUk2lnwWOErFXSFN46R8cXKbc_0LrZM1ZWQ&acatk=1ckp7lgbn14f201h&pub=4a1b367933fd867b19b072952f68dceb&vjs=3
extracting 2 job keywords...
[] 1
https://www.indeed.nl/vacature-bekijken?jk=7593cc50f485ec4f&tk=1ckp7lim814f22eg&from=serp&alid=3&advn=2421485629829485
extracting 3 job keywords...
['tableau', 'alteryx', 'sap', 'qlik'] 1
https://www.indeed.nl/vacature-bekijken?jk=b0205cf54b42b821&tk=1ckp7lke514f27q3&from=serp&alid=3&advn=4016656594538944
extracting 4 job keywords...
['sap', 'qlik'] 1
https://www.indeed.nl/vacature-bekijken?jk=80161431fdfda2e7&from=serp&vjs=3
extracting 5 job keywords...
['tableau', 'power

In [7]:
for k in range(1,num_pages + 1):
#this 5 pages reopen the browser is to prevent connection refused error.
    if k%5==0.0:
        driver.quit()
        driver=webdriver.Firefox(executable_path=r'C:\Users\emily\Documents\ML course\geckodriver.exe')
        driver.set_page_load_timeout(15)

    current_url = start_url + "&start=" + str(k*10)
    print('extracting %d page of job searching results...' % k)
    resp = requests.get(current_url)
    current_soup = BeautifulSoup(resp.content, 'lxml')
    current_urls = current_soup.findAll('a',{'rel':'nofollow','target':'_blank'})
    current_urls = [link['href'] for link in current_urls]
    
    for i in range(len(current_urls)):
        get_info = True
        try:
            driver.get(base_url + current_urls[i])
        except TimeoutException:
            get_info = False
            continue 
        j = random.randint(1500,3200)/1000.0
        time.sleep(j) #waits for a random time
        if get_info:
            soup=BeautifulSoup(driver.page_source, 'lxml')
            print('extracting %d job keywords...' % i)
            single_job = keywords_f(soup)
            print(single_job,len(soup))
            print(driver.current_url)
            job_keywords.append([driver.current_url,single_job])
            
# use driver.quit() not driver.close() can get rid of the opening too many files error.
driver.quit()
skills_dict = [w[1] for w in job_keywords]
dict={}
for words in skills_dict:
    for word in words:
        if not word in dict:
            dict[word]=1
        else:
            dict[word]+=1
Result = pd.DataFrame()
Result['Skill'] = dict.keys()
Result['Count'] = dict.values()
Result['Ranking'] = Result['Count']/float(len(job_keywords))

extracting 1 page of job searching results...
extracting 0 job keywords...
[] 1
https://www.indeed.nl/vacature-bekijken?jk=209af06ed43c644b&tk=1ckp7mjef14f250i&from=serp&alid=3&advn=2421485629829485
extracting 1 job keywords...
[] 1
https://www.indeed.nl/vacature-bekijken?jk=9482038fe93b422b&q=BI+consultant&from=web&advn=6797582934259388&sjdu=jxdfeuh3wd52_PYAqFwGBk4EOSonXrGJz8uaxN9dKP9WYR_7-G4n0TR2YRcFFhQyZDWJrhZwqUk2lnwWOErFXSFN46R8cXKbc_0LrZM1ZWQ&acatk=1ckp7mn7714f27q7&pub=4a1b367933fd867b19b072952f68dceb&vjs=3
extracting 2 job keywords...
[] 1
https://www.indeed.nl/vacature-bekijken?jk=7593cc50f485ec4f&tk=1ckp7mq6i14f26rp&from=serp&alid=3&advn=2421485629829485
extracting 3 job keywords...
['sql'] 1
https://www.indeed.nl/vacature-bekijken?jk=39a4a955833d121d&from=serp&vjs=3
extracting 4 job keywords...
['powerbi', 'sql'] 1
https://www.indeed.nl/vacature-bekijken?jk=0e5d26889264c639&from=serp&vjs=3
extracting 5 job keywords...
['sql'] 1
https://www.indeed.nl/vacature-bekijken?jk=0759d

extracting 1 job keywords...
[] 1
https://www.indeed.nl/vacature-bekijken?jk=7593cc50f485ec4f&tk=1ckp7rj1614f212k&from=serp&alid=3&advn=2421485629829485
extracting 2 job keywords...
['tableau', 'alteryx', 'sap', 'qlik'] 1
https://www.indeed.nl/vacature-bekijken?jk=b0205cf54b42b821&tk=1ckp7rm2814f20jg&from=serp&alid=3&advn=4016656594538944
extracting 3 job keywords...
['hadoop'] 1
https://www.indeed.nl/vacature-bekijken?jk=f4d29a2daf1cc2f2&from=serp&vjs=3
extracting 4 job keywords...
['sap'] 1
https://www.indeed.nl/vacature-bekijken?jk=0d0c2dfa922daf3a&from=serp&vjs=3
extracting 5 job keywords...
['tableau', 'powerbi', 'alteryx', 'qlik'] 1
https://www.indeed.nl/vacature-bekijken?jk=d0d256a314eb346d&from=serp&vjs=3
extracting 6 job keywords...
['sql', 'java'] 1
https://www.indeed.nl/vacature-bekijken?jk=89e52e24e1dd5c3c&from=serp&vjs=3
extracting 7 job keywords...
['tableau', 'sql', 'java', 'powerbi', 'spss'] 1
https://www.indeed.nl/vacature-bekijken?jk=9c33ec392d1c87b5&from=serp&vjs=3
e

extracting 0 job keywords...
[] 1
https://www.indeed.nl/vacature-bekijken?jk=209af06ed43c644b&tk=1ckp80ssi14ko3gl&from=serp&alid=3&advn=2421485629829485
extracting 1 job keywords...
['powerbi', 'sql', 'r'] 1
https://www.indeed.nl/vacature-bekijken?jk=6d028a6a0c87e41c&q=BI+consultant&from=web&advn=2180790811884654&sjdu=XGcMNTSedHgn69ge8Rv0-QczTG3nlC0EU2f2klO71b63VlKMcsXkzpKWOnuUTRA28Y-JNPk6g2f94QTYfUqzrRFHKK5gkv3k4Jv2v0R8T9NZ3fASCkljWXRWvynqtS9y2vQaw46jDDkaMfs5GwNu36PqQA1LV3jhFCCDzCMd938&acatk=1ckp8109714ko1u9&pub=4a1b367933fd867b19b072952f68dceb&vjs=3
extracting 2 job keywords...
['tableau', 'alteryx', 'sap', 'qlik'] 1
https://www.indeed.nl/vacature-bekijken?jk=b0205cf54b42b821&tk=1ckp814f914ko68h&from=serp&alid=3&advn=4016656594538944
extracting 3 job keywords...
['excel'] 1
https://www.indeed.nl/vacature-bekijken?jk=48f90d2de7b1c71d&from=serp&vjs=3
extracting 4 job keywords...
['sap'] 1
https://www.indeed.nl/vacature-bekijken?jk=3ca743ea67c14ff8&from=serp&vjs=3
extracting 5 job keywo

extracting 10 page of job searching results...
extracting 0 job keywords...
[] 1
https://www.indeed.nl/vacature-bekijken?jk=9482038fe93b422b&q=BI+consultant&from=web&advn=6797582934259388&sjdu=jxdfeuh3wd52_PYAqFwGBk4EOSonXrGJz8uaxN9dKP9WYR_7-G4n0TR2YRcFFhQyZDWJrhZwqUk2lnwWOErFXSFN46R8cXKbc_0LrZM1ZWQ&acatk=1ckp86as514a80o7&pub=4a1b367933fd867b19b072952f68dceb&vjs=3
extracting 1 job keywords...
['powerbi', 'sql', 'r'] 1
https://www.indeed.nl/vacature-bekijken?jk=6d028a6a0c87e41c&q=BI+consultant&from=web&advn=2180790811884654&sjdu=XGcMNTSedHgn69ge8Rv0-QczTG3nlC0EU2f2klO71b63VlKMcsXkzpKWOnuUTRA28Y-JNPk6g2f94QTYfUqzrRFHKK5gkv3k4Jv2v0R8T9NZ3fASCkljWXRWvynqtS9y2vQaw46jDDkaMfs5GwNu36PqQA1LV3jhFCCDzCMd938&acatk=1ckp86ehm14a84k0&pub=4a1b367933fd867b19b072952f68dceb&vjs=3
extracting 2 job keywords...
[] 1
https://www.indeed.nl/vacature-bekijken?jk=7593cc50f485ec4f&tk=1ckp86h9r14a852q&from=serp&alid=3&advn=2421485629829485
extracting 3 job keywords...
['tableau', 'alteryx', 'sap', 'qlik'] 1
https:

extracting 1 job keywords...
[] 1
https://www.indeed.nl/vacature-bekijken?jk=209af06ed43c644b&tk=1ckp8bdss14a8652&from=serp&alid=3&advn=2421485629829485
extracting 2 job keywords...
[] 1
https://www.indeed.nl/vacature-bekijken?jk=9482038fe93b422b&q=BI+consultant&from=web&advn=6797582934259388&sjdu=jxdfeuh3wd52_PYAqFwGBk4EOSonXrGJz8uaxN9dKP9WYR_7-G4n0TR2YRcFFhQyZDWJrhZwqUk2lnwWOErFXSFN46R8cXKbc_0LrZM1ZWQ&acatk=1ckp8bhee14a85va&pub=4a1b367933fd867b19b072952f68dceb&vjs=3
extracting 3 job keywords...
['powerbi', 'sql', 'r'] 1
https://www.indeed.nl/vacature-bekijken?jk=6d028a6a0c87e41c&q=BI+consultant&from=web&advn=2180790811884654&sjdu=XGcMNTSedHgn69ge8Rv0-QczTG3nlC0EU2f2klO71b63VlKMcsXkzpKWOnuUTRA28Y-JNPk6g2f94QTYfUqzrRFHKK5gkv3k4Jv2v0R8T9NZ3fASCkljWXRWvynqtS9y2vQaw46jDDkaMfs5GwNu36PqQA1LV3jhFCCDzCMd938&acatk=1ckp8blbh14a84cd&pub=4a1b367933fd867b19b072952f68dceb&vjs=3
extracting 4 job keywords...
['tableau', 'powerbi', 'qlik', 'hadoop'] 1
https://www.indeed.nl/vacature-bekijken?jk=fda7b89

extracting 2 job keywords...
['tableau', 'alteryx', 'sap', 'qlik'] 1
https://www.indeed.nl/vacature-bekijken?jk=b0205cf54b42b821&tk=1ckp8gpl714l13kl&from=serp&alid=3&advn=4016656594538944
extracting 3 job keywords...
['sap', 'sql'] 1
https://www.indeed.nl/vacature-bekijken?jk=4621b506cb9d9f35&from=serp&vjs=3
extracting 4 job keywords...
['tableau', 'sql', 'r', 'powerbi', 'spss', 'python'] 1
https://www.indeed.nl/vacature-bekijken?jk=6034b3a0399934c6&from=serp&vjs=3
extracting 5 job keywords...
['sap'] 1
https://www.indeed.nl/vacature-bekijken?jk=484f1137409b00b1&from=serp&vjs=3
extracting 6 job keywords...
[] 1
https://www.indeed.nl/vacature-bekijken?jk=ca111457457de5ff&from=serp&vjs=3
extracting 7 job keywords...
['powerbi', 'sql', 'java'] 1
https://www.indeed.nl/vacature-bekijken?jk=979b5b6b7fc5fb86&from=serp&vjs=3
extracting 8 job keywords...
[] 1
https://www.indeed.nl/vacature-bekijken?jk=b29d8e45b3a00ac7&from=serp&vjs=3
extracting 9 job keywords...
['tableau', 'sas'] 1
https://www

extracting 2 job keywords...
['tableau', 'alteryx', 'sap', 'qlik'] 1
https://www.indeed.nl/vacature-bekijken?jk=b0205cf54b42b821&tk=1ckp8lm1614l12ue&from=serp&alid=3&advn=4016656594538944
extracting 3 job keywords...
['sql', 'sas', 'qlikview', 'hadoop', 'spark', 'python'] 1
https://www.indeed.nl/vacature-bekijken?jk=1e79d8d6c5142e51&from=serp&vjs=3
extracting 4 job keywords...
['qlik'] 1
https://www.indeed.nl/vacature-bekijken?jk=62927bfdc0ce985a&from=serp&vjs=3
extracting 5 job keywords...
['sql', 'hadoop', 'r'] 1
https://www.indeed.nl/vacature-bekijken?jk=e1b22a4874ae8f6f&from=serp&vjs=3
extracting 6 job keywords...
[] 1
https://www.indeed.nl/vacature-bekijken?jk=bf8d8715adb6c35c&from=serp&vjs=3
extracting 7 job keywords...
['sap', 'sql'] 1
https://www.indeed.nl/vacature-bekijken?jk=c5ac405c7c28f013&from=serp&vjs=3
extracting 8 job keywords...
['sap'] 1
https://www.indeed.nl/vacature-bekijken?jk=88fc72c96ed32d53&from=serp&vjs=3
extracting 9 job keywords...
['sap'] 1
https://www.indee

extracting 3 job keywords...
['sap'] 1
https://www.indeed.nl/vacature-bekijken?jk=8a3f088e50ca681f&from=serp&vjs=3
extracting 4 job keywords...
['sap'] 1
https://www.indeed.nl/vacature-bekijken?jk=ab8877ee1c813311&from=serp&vjs=3
extracting 5 job keywords...
['sap', 'scala'] 1
https://www.indeed.nl/vacature-bekijken?jk=82de68a845eb6ffe&from=serp&vjs=3
extracting 6 job keywords...
['tableau', 'powerbi', 'python'] 1
https://www.indeed.nl/vacature-bekijken?jk=ffe19a8801b562b5&from=serp&vjs=3
extracting 7 job keywords...
[] 1
https://www.indeed.nl/vacature-bekijken?jk=b12fe97db4783756&from=serp&vjs=3
extracting 8 job keywords...
['sql', 'r'] 1
https://www.indeed.nl/vacature-bekijken?jk=37558756b4f03a83&from=serp&vjs=3
extracting 9 job keywords...
[] 1
https://www.indeed.nl/vacature-bekijken?jk=ca111457457de5ff&from=serp&vjs=3
extracting 10 job keywords...
['sap', 'powerbi'] 1
https://www.indeed.nl/vacature-bekijken?jk=6fb97f075920ed3f&from=serp&vjs=3
extracting 11 job keywords...
['tableau

extracting 6 job keywords...
['sql', 'sas'] 1
https://www.indeed.nl/vacature-bekijken?jk=1788fca96fb00887&from=serp&vjs=3
extracting 7 job keywords...
['sap'] 1
https://www.indeed.nl/vacature-bekijken?jk=12726558bc737da2&from=serp&vjs=3
extracting 8 job keywords...
['powerbi', 'sap', 'excel', 'saas'] 1
https://www.indeed.nl/vacature-bekijken?jk=e0b357d0d9918505&from=serp&vjs=3
extracting 9 job keywords...
['powerbi', 'sql', 'r'] 1
https://www.indeed.nl/vacature-bekijken?jk=6a1c52cb3f9feada&from=serp&vjs=3
extracting 10 job keywords...
['sql', 'sap'] 1
https://www.indeed.nl/vacature-bekijken?jk=5ab590fbe641022d&from=serp&vjs=3
extracting 11 job keywords...
['sql', 'powerbi'] 1
https://www.indeed.nl/vacature-bekijken?jk=b1e17fe790d580b9&from=serp&vjs=3
extracting 12 job keywords...
['sap'] 1
https://www.indeed.nl/cmp/Matchfirst/jobs/SAP-Hana-Consultant-Cloud-99426e363a39fe37?vjs=3
extracting 13 job keywords...
[] 1
https://www.indeed.nl/vacature-bekijken?jk=7593cc50f485ec4f&tk=1ckp91eh21

extracting 6 job keywords...
['powerbi'] 1
https://www.indeed.nl/vacature-bekijken?jk=33dded5562e4a5dc&from=serp&vjs=3
extracting 7 job keywords...
['tableau', 'powerbi', 'sql', 'qlik'] 1
https://www.indeed.nl/vacature-bekijken?jk=089e0bb7821b04fb&from=serp&vjs=3
extracting 8 job keywords...
[] 1
https://www.indeed.nl/vacature-bekijken?jk=ca111457457de5ff&from=serp&vjs=3
extracting 9 job keywords...
['sql', 'sas'] 1
https://www.indeed.nl/vacature-bekijken?jk=4fe7f175a0375e61&from=serp&vjs=3
extracting 10 job keywords...
['sap', 'qlikview'] 1
https://www.indeed.nl/vacature-bekijken?jk=1ee2f181353ce1da&from=serp&vjs=3
extracting 11 job keywords...
[] 1
https://www.indeed.nl/vacature-bekijken?jk=4ed1a58e3479e12a&from=serp&vjs=3
extracting 12 job keywords...
[] 1
https://www.indeed.nl/cmp/Zendkracht---Recruiter/jobs/Business-Consultant-acbe493d367e6826?vjs=3
extracting 13 job keywords...
[] 1
https://www.indeed.nl/vacature-bekijken?jk=7593cc50f485ec4f&tk=1ckp96ms816vp0cl&from=serp&alid=3&a

extracting 7 job keywords...
['sql', 'cassandra', 'sap', 'r', 'hadoop', 'spark', 'qlik'] 1
https://www.indeed.nl/vacature-bekijken?jk=485f94b05636fe33&from=serp&vjs=3
extracting 8 job keywords...
['sap', 'excel'] 1
https://www.indeed.nl/vacature-bekijken?jk=5eb9823d0d1f60a4&from=serp&vjs=3
extracting 9 job keywords...
['excel'] 1
https://www.indeed.nl/vacature-bekijken?jk=b7d2daae9456f9c0&from=serp&vjs=3
extracting 10 job keywords...
['sap'] 1
https://www.indeed.nl/vacature-bekijken?jk=b59f6efba3822f7b&from=serp&vjs=3
extracting 11 job keywords...
['sap', 'qlikview'] 1
https://www.indeed.nl/vacature-bekijken?jk=5224b28454e3a08c&from=serp&vjs=3
extracting 12 job keywords...
['sql', 'scala', 'java'] 1
https://www.indeed.nl/vacature-bekijken?jk=b4781be97bc6b8e4&from=serp&vjs=3
extracting 13 job keywords...
[] 1
https://www.indeed.nl/vacature-bekijken?jk=209af06ed43c644b&tk=1ckp9bsue9tomchc&from=serp&alid=3&advn=2421485629829485
extracting 14 job keywords...
['powerbi', 'sql'] 1
https://ww

extracting 13 job keywords...
[] 1
https://www.indeed.nl/vacature-bekijken?jk=209af06ed43c644b&tk=1ckp9gl8n9tom8r7&from=serp&alid=3&advn=2421485629829485
extracting 14 job keywords...
['powerbi', 'sql'] 1
https://www.indeed.nl/vacature-bekijken?jk=0a8fa72b3f5e0492&tk=1ckp9gpao9tome2l&from=serp&alid=3&advn=2180790811884654
extracting 35 page of job searching results...
extracting 0 job keywords...
[] 1
https://www.indeed.nl/vacature-bekijken?jk=209af06ed43c644b&tk=1ckp9h6j1948gbet&from=serp&alid=3&advn=2421485629829485
extracting 1 job keywords...
[] 1
https://www.indeed.nl/vacature-bekijken?jk=9482038fe93b422b&q=BI+consultant&from=web&advn=6797582934259388&sjdu=jxdfeuh3wd52_PYAqFwGBk4EOSonXrGJz8uaxN9dKP9WYR_7-G4n0TR2YRcFFhQyZDWJrhZwqUk2lnwWOErFXSFN46R8cXKbc_0LrZM1ZWQ&acatk=1ckp9hagf948ge6c&pub=4a1b367933fd867b19b072952f68dceb&vjs=3
extracting 2 job keywords...
[] 1
https://www.indeed.nl/vacature-bekijken?jk=7593cc50f485ec4f&tk=1ckp9hdnc948gee6&from=serp&alid=3&advn=2421485629829485
ext

extracting 1 job keywords...
['tableau', 'alteryx', 'sap', 'qlik'] 1
https://www.indeed.nl/vacature-bekijken?jk=b0205cf54b42b821&tk=1ckp9lvpf948g8kn&from=serp&alid=3&advn=4016656594538944
extracting 2 job keywords...
[] 1
https://www.indeed.nl/vacature-bekijken?jk=7593cc50f485ec4f&tk=1ckp9m2pa948g98f&from=serp&alid=3&advn=2421485629829485
extracting 3 job keywords...
[] 1
https://www.indeed.nl/vacature-bekijken?jk=c4bd112c4981ab85&from=serp&vjs=3
extracting 4 job keywords...
['sap', 'qlik'] 1
https://www.indeed.nl/vacature-bekijken?jk=ec166d85b63074ce&from=serp&vjs=3
extracting 5 job keywords...
['sql', 'sas'] 1
https://www.indeed.nl/vacature-bekijken?jk=1d920086dd4fbbbc&from=serp&vjs=3
extracting 6 job keywords...
['sap', 'qlikview'] 1
https://www.indeed.nl/vacature-bekijken?jk=1ee2f181353ce1da&from=serp&vjs=3
extracting 7 job keywords...
['sql', 'r', 'hadoop', 'spark', 'python', 'nosql'] 1
https://www.indeed.nl/vacature-bekijken?jk=f4c5d24a7eeab127&from=serp&vjs=3
extracting 8 job ke

extracting 2 job keywords...
[] 1
https://www.indeed.nl/vacature-bekijken?jk=7593cc50f485ec4f&tk=1ckp9rb269noi8fk&from=serp&alid=3&advn=2421485629829485
extracting 3 job keywords...
['sql'] 1
https://www.indeed.nl/vacature-bekijken?jk=6d70a7073b045c04&from=serp&vjs=3
extracting 4 job keywords...
['sap'] 1
https://www.indeed.nl/vacature-bekijken?jk=cefa9bf9dcb2eb56&from=serp&vjs=3
extracting 5 job keywords...
[] 1
https://www.indeed.nl/vacature-bekijken?jk=e81d97d8eafeb119&from=serp&vjs=3
extracting 6 job keywords...
['powerbi', 'saas'] 1
https://www.indeed.nl/vacature-bekijken?jk=44fd93a18d76bfd0&from=serp&vjs=3
extracting 7 job keywords...
['powerbi'] 1
https://www.indeed.nl/vacature-bekijken?jk=f2f8954f3db51d4a&from=serp&vjs=3
extracting 8 job keywords...
['sap', 'qlikview'] 1
https://www.indeed.nl/vacature-bekijken?jk=649dc8de233c9299&from=serp&vjs=3
extracting 9 job keywords...
['sap'] 1
https://www.indeed.nl/vacature-bekijken?jk=2efb7fa0d769fba0&from=serp&vjs=3
extracting 10 job k

extracting 5 job keywords...
['sap'] 1
https://www.indeed.nl/vacature-bekijken?jk=7e3eead62d12fabb&from=serp&vjs=3
extracting 6 job keywords...
[] 1
https://www.indeed.nl/vacature-bekijken?jk=1368ba8cb3453f93&from=serp&vjs=3
extracting 7 job keywords...
['sap'] 1
https://www.indeed.nl/vacature-bekijken?jk=26d5ace02e1aba98&from=serp&vjs=3
extracting 8 job keywords...
['sql', 'sas'] 1
https://www.indeed.nl/vacature-bekijken?jk=a4accfa44f3629d5&from=serp&vjs=3
extracting 9 job keywords...
['sap'] 1
https://www.indeed.nl/vacature-bekijken?jk=1f2c8335af76e950&from=serp&vjs=3
extracting 10 job keywords...
['powerbi', 'sap', 'sas', 'qlikview'] 1
https://www.indeed.nl/vacature-bekijken?jk=b281302340ad3e23&from=serp&vjs=3
extracting 11 job keywords...
['sql'] 1
https://www.indeed.nl/vacature-bekijken?jk=54c3d10dbce15dcf&from=serp&vjs=3
extracting 12 job keywords...
['powerbi', 'sql', 'python'] 1
https://www.indeed.nl/vacature-bekijken?jk=b563b6db590cdfaa&from=serp&vjs=3
extracting 13 job keywor

extracting 6 job keywords...
['tableau', 'sql', 'powerbi'] 1
https://www.indeed.nl/cmp/ItaQ/jobs/Dwh-Developer-6e563232a2051426?vjs=3
extracting 7 job keywords...
[] 1
https://www.indeed.nl/vacature-bekijken?jk=f05c77a72ec2db76&from=serp&vjs=3
extracting 8 job keywords...
[] 1
https://www.indeed.nl/vacature-bekijken?jk=127839d03fcb3041&from=serp&vjs=3
extracting 9 job keywords...
['sql'] 1
https://www.indeed.nl/vacature-bekijken?jk=5c09f60518ae7f9a&from=serp&vjs=3
extracting 10 job keywords...
['powerbi'] 1
https://www.indeed.nl/vacature-bekijken?jk=197cbe52676ac53f&from=serp&vjs=3
extracting 11 job keywords...
['tableau', 'powerbi', 'sql', 'spark', 'python', 'qlik', 'hadoop'] 1
https://www.indeed.nl/vacature-bekijken?jk=3b8f0928331376d2&from=serp&vjs=3
extracting 12 job keywords...
['sas'] 1
https://www.indeed.nl/vacature-bekijken?jk=90d32e377fa0154d&from=serp&vjs=3
extracting 13 job keywords...
[] 1
https://www.indeed.nl/vacature-bekijken?jk=d1de895432f7739e&from=serp&vjs=3
extractin

extracting 6 job keywords...
['spss', 'excel', 'powerpoint', 'r'] 1
https://www.indeed.nl/vacature-bekijken?t=hbo+stage+marketing+intelligence&jk=eef4dac2c86e216e&vjs=3
extracting 7 job keywords...
['sql', 'sap'] 1
https://www.indeed.nl/vacature-bekijken?jk=31798d97168694e3&from=serp&vjs=3
extracting 8 job keywords...
['scala', 'c++', 'java', 'pig', 'spark', 'python', 'hadoop'] 1
https://www.indeed.nl/vacature-bekijken?jk=735e4e8fb261b3c8&from=serp&vjs=3
extracting 9 job keywords...
[] 1
https://www.indeed.nl/vacature-bekijken?jk=fdb30f611867116b&from=serp&vjs=3
extracting 10 job keywords...
['powerbi', 'sql'] 1
https://www.indeed.nl/vacature-bekijken?jk=7b0224477517ca6c&from=serp&vjs=3
extracting 11 job keywords...
['sql', 'java'] 1
https://www.indeed.nl/vacature-bekijken?jk=589c52679946e70b&from=serp&vjs=3
extracting 12 job keywords...
['sap'] 1
https://www.indeed.nl/vacature-bekijken?jk=6371d78b1ab29691&from=serp&vjs=3
extracting 13 job keywords...
['sql'] 1
https://www.indeed.nl/va

extracting 6 job keywords...
['sql', 'java', 'spark', 'python', 'hadoop', 'qlikview', 'r'] 1
https://www.indeed.nl/vacature-bekijken?jk=daa9c3231cb52f94&from=serp&vjs=3
extracting 7 job keywords...
['tableau', 'spotfire', 'sap', 'sql', 'java', 'qlikview'] 1
https://www.indeed.nl/vacature-bekijken?jk=cf72fea2f439baaf&from=serp&vjs=3
extracting 8 job keywords...
['powerbi', 'sql', 'python'] 1
https://www.indeed.nl/vacature-bekijken?jk=1b31b6d8ca9f1d7a&from=serp&vjs=3
extracting 9 job keywords...
['sap'] 1
https://www.indeed.nl/cmp/Pearson-Carter/jobs/Senior-Dynamics-Nav-Systems-Analyst-c9034d75677e6b54?vjs=3
extracting 10 job keywords...
['sql'] 1
https://www.indeed.nl/vacature-bekijken?jk=5340251d01aee06a&from=serp&vjs=3
extracting 11 job keywords...
['powerbi', 'java'] 1
https://www.indeed.nl/vacature-bekijken?jk=9a80bcded3c3d22e&from=serp&vjs=3
extracting 12 job keywords...
['java', 'excel'] 1
https://www.indeed.nl/vacature-bekijken?jk=f8c6d7d1f50f6c45&from=serp&vjs=3
extracting 13 jo

extracting 8 job keywords...
[] 1
https://www.indeed.nl/vacature-bekijken?jk=514a0db0a218f42a&from=serp&vjs=3
extracting 9 job keywords...
['sap'] 1
https://www.indeed.nl/vacature-bekijken?jk=67f3b5b1a971d3b0&from=serp&vjs=3
extracting 10 job keywords...
['sql', 'java', 'r'] 1
https://www.indeed.nl/vacature-bekijken?jk=8c93816931bcf441&from=serp&vjs=3
extracting 11 job keywords...
['tableau', 'sap', 'sql', 'sas', 'qlikview'] 1
https://www.indeed.nl/vacature-bekijken?jk=92f70084ac01f730&from=serp&vjs=3
extracting 12 job keywords...
['sql'] 1
https://www.indeed.nl/vacature-bekijken?jk=94243a1ee9e8d4cc&from=serp&vjs=3
extracting 13 job keywords...
['powerbi'] 1
https://www.indeed.nl/vacature-bekijken?jk=afe584bb8e66c8ee&from=serp&vjs=3
extracting 14 job keywords...
['tableau', 'alteryx', 'sap', 'qlik'] 1
https://www.indeed.nl/vacature-bekijken?jk=b0205cf54b42b821&tk=1ckpamr0v14g20v5&from=serp&alid=3&advn=4016656594538944
extracting 15 job keywords...
['powerbi', 'sql'] 1
https://www.indee

extracting 10 job keywords...
['qlikview', 'r'] 1
https://www.indeed.nl/vacature-bekijken?jk=ce3affee6d600da2&from=serp&vjs=3
extracting 11 job keywords...
['sas', 'qlikview'] 1
https://www.indeed.nl/vacature-bekijken?jk=aa6ef9f84b0cb7c1&from=serp&vjs=3
extracting 12 job keywords...
['sql'] 1
https://www.indeed.nl/vacature-bekijken?jk=6e15072d6fc875c1&from=serp&vjs=3
extracting 13 job keywords...
['r'] 1
https://www.indeed.nl/vacature-bekijken?jk=e320cc4f82fae4a6&from=serp&vjs=3
extracting 14 job keywords...
['powerbi', 'sql'] 1
https://www.indeed.nl/vacature-bekijken?jk=0a8fa72b3f5e0492&tk=1ckparr9u14g263l&from=serp&alid=3&advn=2180790811884654
extracting 15 job keywords...
[] 1
https://www.indeed.nl/vacature-bekijken?jk=209af06ed43c644b&tk=1ckparu5c14g27li&from=serp&alid=3&advn=2421485629829485
extracting 60 page of job searching results...
extracting 0 job keywords...
[] 1
https://www.indeed.nl/vacature-bekijken?jk=9482038fe93b422b&q=BI+consultant&from=web&advn=6797582934259388&sjdu

extracting 14 job keywords...
['tableau', 'alteryx', 'sap', 'qlik'] 1
https://www.indeed.nl/vacature-bekijken?jk=b0205cf54b42b821&tk=1ckpb18bp14is6nv&from=serp&alid=3&advn=4016656594538944
extracting 15 job keywords...
['powerbi', 'sql', 'r'] 1
https://www.indeed.nl/vacature-bekijken?jk=6d028a6a0c87e41c&q=BI+consultant&from=web&advn=2180790811884654&sjdu=XGcMNTSedHgn69ge8Rv0-QczTG3nlC0EU2f2klO71b63VlKMcsXkzpKWOnuUTRA28Y-JNPk6g2f94QTYfUqzrRFHKK5gkv3k4Jv2v0R8T9NZ3fASCkljWXRWvynqtS9y2vQaw46jDDkaMfs5GwNu36PqQA1LV3jhFCCDzCMd938&acatk=1ckpb1bo314is5u9&pub=4a1b367933fd867b19b072952f68dceb&vjs=3
extracting 63 page of job searching results...
extracting 0 job keywords...
[] 1
https://www.indeed.nl/vacature-bekijken?jk=9482038fe93b422b&q=BI+consultant&from=web&advn=6797582934259388&sjdu=jxdfeuh3wd52_PYAqFwGBk4EOSonXrGJz8uaxN9dKP9WYR_7-G4n0TR2YRcFFhQyZDWJrhZwqUk2lnwWOErFXSFN46R8cXKbc_0LrZM1ZWQ&acatk=1ckpb1fj214is7ni&pub=4a1b367933fd867b19b072952f68dceb&vjs=3
extracting 1 job keywords...
[] 1
http

extracting 15 job keywords...
['tableau', 'alteryx', 'sap', 'qlik'] 1
https://www.indeed.nl/vacature-bekijken?jk=b0205cf54b42b821&tk=1ckpb6i6m9mkvcil&from=serp&alid=3&advn=4016656594538944
extracting 66 page of job searching results...
extracting 0 job keywords...
[] 1
https://www.indeed.nl/vacature-bekijken?jk=209af06ed43c644b&tk=1ckpb6mco9mkvb2i&from=serp&alid=3&advn=2421485629829485
extracting 1 job keywords...
['powerbi', 'sql'] 1
https://www.indeed.nl/vacature-bekijken?jk=0a8fa72b3f5e0492&tk=1ckpb6q9s9mkv92u&from=serp&alid=3&advn=2180790811884654
extracting 2 job keywords...
[] 1
https://www.indeed.nl/vacature-bekijken?jk=9482038fe93b422b&q=BI+consultant&from=web&advn=6797582934259388&sjdu=jxdfeuh3wd52_PYAqFwGBk4EOSonXrGJz8uaxN9dKP9WYR_7-G4n0TR2YRcFFhQyZDWJrhZwqUk2lnwWOErFXSFN46R8cXKbc_0LrZM1ZWQ&acatk=1ckpb6t1f9mkvfkf&pub=4a1b367933fd867b19b072952f68dceb&vjs=3
extracting 3 job keywords...
[] 1
https://www.indeed.nl/vacature-bekijken?jk=7593cc50f485ec4f&tk=1ckpb70id9mkv9ij&from=ser

extracting 2 job keywords...
[] 1
https://www.indeed.nl/vacature-bekijken?jk=209af06ed43c644b&tk=1ckpbbt5s9mkvetq&from=serp&alid=3&advn=2421485629829485
extracting 3 job keywords...
[] 1
https://www.indeed.nl/vacature-bekijken?jk=9482038fe93b422b&q=BI+consultant&from=web&advn=6797582934259388&sjdu=jxdfeuh3wd52_PYAqFwGBk4EOSonXrGJz8uaxN9dKP9WYR_7-G4n0TR2YRcFFhQyZDWJrhZwqUk2lnwWOErFXSFN46R8cXKbc_0LrZM1ZWQ&acatk=1ckpbc0589mkvemr&pub=4a1b367933fd867b19b072952f68dceb&vjs=3
extracting 4 job keywords...
['sql'] 1
https://www.indeed.nl/vacature-bekijken?jk=1e384df09e56da7d&from=serp&vjs=3
extracting 5 job keywords...
['qlik'] 1
https://www.indeed.nl/vacature-bekijken?jk=64b8ba1a38ca2f11&from=serp&vjs=3
extracting 6 job keywords...
[] 1
https://www.indeed.nl/vacature-bekijken?jk=8bbfa99b5ee7ed5f&from=serp&vjs=3
extracting 7 job keywords...
[] 1
https://www.indeed.nl/vacature-bekijken?jk=bd109fa36586d460&from=serp&vjs=3
extracting 8 job keywords...
['d3'] 1
https://www.indeed.nl/vacature-bekijke

extracting 4 job keywords...
['sql'] 1
https://www.indeed.nl/vacature-bekijken?jk=1e384df09e56da7d&from=serp&vjs=3
extracting 5 job keywords...
['qlik'] 1
https://www.indeed.nl/vacature-bekijken?jk=64b8ba1a38ca2f11&from=serp&vjs=3
extracting 6 job keywords...
[] 1
https://www.indeed.nl/vacature-bekijken?jk=8bbfa99b5ee7ed5f&from=serp&vjs=3
extracting 7 job keywords...
[] 1
https://www.indeed.nl/vacature-bekijken?jk=bd109fa36586d460&from=serp&vjs=3
extracting 8 job keywords...
['d3'] 1
https://www.indeed.nl/vacature-bekijken?jk=2d975a53d2a1d1c7&from=serp&vjs=3
extracting 9 job keywords...
['sap'] 1
https://www.indeed.nl/vacature-bekijken?jk=95522cc7a7f7da01&from=serp&vjs=3
extracting 10 job keywords...
[] 1
https://www.indeed.nl/vacature-bekijken?jk=1684513730eda297&from=serp&vjs=3
extracting 11 job keywords...
['sql'] 1
https://www.indeed.nl/vacature-bekijken?jk=a702aa1e060fe28a&from=serp&vjs=3
extracting 12 job keywords...
[] 1
https://www.indeed.nl/vacature-bekijken?jk=d1f05d42d965b68

extracting 10 job keywords...
[] 1
https://www.indeed.nl/vacature-bekijken?jk=1684513730eda297&from=serp&vjs=3
extracting 11 job keywords...
[] 1
https://www.indeed.nl/vacature-bekijken?jk=6eabe4165a87eae8&from=serp&vjs=3
extracting 12 job keywords...
[] 1
https://www.indeed.nl/vacature-bekijken?jk=8070b0b598c14014&from=serp&vjs=3
extracting 13 job keywords...
[] 1
https://www.indeed.nl/vacature-bekijken?jk=d1f05d42d965b684&from=serp&vjs=3
extracting 14 job keywords...
['tableau', 'alteryx', 'sap', 'qlik'] 1
https://www.indeed.nl/vacature-bekijken?jk=b0205cf54b42b821&tk=1ckpbnlj99p12d12&from=serp&alid=3&advn=4016656594538944
extracting 15 job keywords...
[] 1
https://www.indeed.nl/vacature-bekijken?jk=9482038fe93b422b&q=BI+consultant&from=web&advn=6797582934259388&sjdu=jxdfeuh3wd52_PYAqFwGBk4EOSonXrGJz8uaxN9dKP9WYR_7-G4n0TR2YRcFFhQyZDWJrhZwqUk2lnwWOErFXSFN46R8cXKbc_0LrZM1ZWQ&acatk=1ckpbnp7s9p12c6n&pub=4a1b367933fd867b19b072952f68dceb&vjs=3
extracting 76 page of job searching results...

extracting 15 job keywords...
['powerbi', 'sql'] 1
https://www.indeed.nl/vacature-bekijken?jk=0a8fa72b3f5e0492&tk=1ckpbsrdm9p12bq0&from=serp&alid=3&advn=2180790811884654
extracting 79 page of job searching results...
extracting 0 job keywords...
[] 1
https://www.indeed.nl/vacature-bekijken?jk=7593cc50f485ec4f&tk=1ckpbsv4b9p12fc0&from=serp&alid=3&advn=2421485629829485
extracting 1 job keywords...
['tableau', 'alteryx', 'sap', 'qlik'] 1
https://www.indeed.nl/vacature-bekijken?jk=b0205cf54b42b821&tk=1ckpbt1ra9p12c4j&from=serp&alid=3&advn=4016656594538944
extracting 2 job keywords...
[] 1
https://www.indeed.nl/vacature-bekijken?jk=209af06ed43c644b&tk=1ckpbt51f9p12bab&from=serp&alid=3&advn=2421485629829485
extracting 3 job keywords...
[] 1
https://www.indeed.nl/vacature-bekijken?jk=9482038fe93b422b&q=BI+consultant&from=web&advn=6797582934259388&sjdu=jxdfeuh3wd52_PYAqFwGBk4EOSonXrGJz8uaxN9dKP9WYR_7-G4n0TR2YRcFFhQyZDWJrhZwqUk2lnwWOErFXSFN46R8cXKbc_0LrZM1ZWQ&acatk=1ckpbt8fd9p12fas&pub=4a1b3679

In [8]:
Result.to_csv("skills_for_BIconsultant.csv")