In [1]:
from bs4 import BeautifulSoup
from datetime import datetime
import pandas as pd
import requests, re

In [2]:
jobs = []
for pag in range(1, 43):
    res = requests.get('https://stackoverflow.com/jobs?med=site-ui&ref=jobs-tab&sort=i&pg=%d'%pag)
    soup = BeautifulSoup(res.text, 'lxml')
    
    # Jobs list html
    jobs_html = soup.find('div', class_='listResults').findAll('div', class_='-job')
    for job_html in jobs_html:
        # job title
        title_html = job_html.find('h2').find('a', class_='s-link')
        job = {'title': title_html.text if title_html else None}
        
        # job company and location
        company, location = tuple(job_html.find('div', class_='-company').select('span'))
        job['company'], job['location'] = company.text, location.text
        
        # job posted
        job['posted'] = job_html.find('div', class_='-title').find('span', class_='pt2').text
        
        # request date
        job['request_date'] = datetime.now()
        
        # job salary and perks
        perks_html = job_html.find('div', class_='-perks')
        if perks_html:
            perks = []
            for p in perks_html.select('span'):

                if '-salary' in str(p):
                    job['salary'] = p.text.strip()
                else:
                    perks.append(p.text.strip())

            job['perks'] = ', '.join(perks)
            
        
        # job tags
        tags_html = job_html.find('div', class_='-tags')
        if tags_html:
            tags = [a.text for a in tags_html.select('a')]
            job['tags'] = ', '.join(tags)  
        
        jobs.append(job)
    

In [3]:
df = pd.DataFrame(jobs)

In [4]:
df.shape

(1050, 8)

In [5]:
df.head(10)

Unnamed: 0,company,location,perks,posted,request_date,salary,tags,title
0,Kindred AI,"\r\n - \r\nToronto, ON, Canada ...",,< 1h ago,2018-11-15 13:42:44.056522,C$90k - 110k,"scrum, agile",Technical Program Manager
1,Articulate Inc.,\r\n - \r\nNo office location,Remote,< 1h ago,2018-11-15 13:42:44.058503,,"node.js, amazon-web-services, terraform, docke...",Platform Engineer
2,Challengermode,"\r\n - \r\nStockholm, Sweden",Visa sponsor,< 1h ago,2018-11-15 13:42:44.059503,SEK 420k - 624k\r\n\r\n ...,"c#, asp.net, angularjs, javascript, .net",Fullstack Web Developer at Esports Tech Startup
3,Compliance Solutions Strategies,"\r\n - \r\nNew York, NY",,< 1h ago,2018-11-15 13:42:44.061502,,"c#, .net, javascript, node.js",Front to back Developer
4,Celonis SE,"\r\n - \r\nMünchen, Germany",,< 1h ago,2018-11-15 13:42:44.063500,,"java, spring, java-ee, rest, springboot",Senior Java Developer
5,Boyle Software,"\r\n - \r\nNew York, NY",,< 1h ago,2018-11-15 13:42:44.064499,$90k - 120k,"javascript, reactjs, angular, node.js, vue.js",Front End Developer
6,Velocity Resource Group,"\r\n - \r\nFranklin Lakes, NJ",Paid relocation,< 1h ago,2018-11-15 13:42:44.066498,,"java, spring, user-interface, jira, jenkins",Sr. Full Stack Software Development Engineer
7,Curotec,\r\n - \r\nNo office location,Remote,< 1h ago,2018-11-15 13:42:44.067498,,"laravel, php, javascript, html5, vue.js",Laravel and VueJS development team leader
8,Grid Dynamics,"\r\n - \r\nAtlanta, GA",Paid relocation,< 1h ago,2018-11-15 13:42:44.069496,,"java, cassandra",Sr. Java Engineer
9,Pulsify Inc,"\r\n - \r\nBoston, MA",,< 1h ago,2018-11-15 13:42:44.070496,$90k - 125k\r\n\r\n | ...,"reactjs, javascript, node.js, graphql, angular",Front-end/Full Stack Javascript/React Developer


In [6]:
df.to_csv('Stack_Overflow_Jobs.csv', index=False)