# LinkedIn Webscraper

## Part I
Getting Data from LinkedIn

In [1]:
from bs4 import BeautifulSoup
from datetime import datetime 
import pandas
import requests
from sqlalchemy import create_engine

In [2]:
positions = [
    'SAP-Entwickler/SAP-Berater',
    'IT-Projektmanager',
    'Softwareentwickler',
    'Business Intelligence Analyst',
    'IT-Controller',
    'IT-Berater',
    'Produktmanager',
    'App-Entwickler',
    'Anwendungsentwickler',
    'Datenbankspezialist',
    'ERP Manager'
]

base_url = 'https://de.linkedin.com/jobs/'
extended_url = '-stellen?position=1&pageNum=0'

In [3]:
joblistings = []

for position in positions:
    url = f'{base_url}{position}{extended_url}'
    page = requests.get(url)
    soup = BeautifulSoup(page.content, 'html.parser')
    posts = soup.find_all('div', class_ = 'base-card')
    for post in posts:
        title = post.find('h3', class_ = 'base-search-card__title').text.strip()
        company = post.find('h4', class_ = 'base-search-card__subtitle').text.strip()
        try:
            location = post.find('span', class_ = 'job-search-card__location').text.strip()
        except:
            location = 'unknown'
        link = post.find('a', class_ = 'base-card__full-link').get('href')
        job = {
            'title': title,
            'company': company,
            'location': location,
            'link': link,
            'Date': datetime.now().date(),
            'Timestamp': datetime.now().time()
        }
        joblistings.append(job)

df_joblistings = pandas.DataFrame(joblistings)
df_joblistings

Unnamed: 0,title,company,location,link,Date,Timestamp
0,Digitalisierungsmanager SAP (m/w/d),hanfried Personaldienstleistungen GmbH,Hamburg,https://de.linkedin.com/jobs/view/digitalisier...,2023-02-22,10:08:18.130752
1,SAP Consultant/Developer (m/w/d) PLM und Workflow,Kärcher,Winnenden,https://de.linkedin.com/jobs/view/sap-consulta...,2023-02-22,10:08:18.130752
2,IT Solution Architect SAP CO (w/m/d),STIHL,Fellbach,https://de.linkedin.com/jobs/view/it-solution-...,2023-02-22,10:08:18.130752
3,Junior SAP Consultant ABAP - IT Beratung / Sof...,amiconsult GmbH,Karlsruhe,https://de.linkedin.com/jobs/view/junior-sap-c...,2023-02-22,10:08:18.131753
4,SAP UI5/Fiori Entwickler/Berater (m/w/d) - Ham...,Washington Frank International,Oststeinbek,https://de.linkedin.com/jobs/view/sap-ui5-fior...,2023-02-22,10:08:18.131856
...,...,...,...,...,...,...
256,Manager* Demands IT Business Applications,BioNTech SE,Mainz,https://de.linkedin.com/jobs/view/manager-dema...,2023-02-22,10:08:31.109738
257,D365 ERP Specialist (f/m/d),EIGENSONNE,Berlin,https://de.linkedin.com/jobs/view/d365-erp-spe...,2023-02-22,10:08:31.109738
258,ERP-Projektmanager,Brunel,Bremerhaven,https://de.linkedin.com/jobs/view/erp-projektm...,2023-02-22,10:08:31.109738
259,"Senior Manager, Business Applications, Technology",South Pole,München,https://de.linkedin.com/jobs/view/senior-manag...,2023-02-22,10:08:31.109738


## Part II

Saving Data to a Database

In [4]:
# connection = create_engine('mysql+pymysql://root:@localhost/businessintelligence')
# df_job.to_sql('job', con=connection, index=False, if_exists='append')