# Imports

In [48]:
import pandas as pd
import requests
from bs4 import BeautifulSoup as BS

# 1) Start by performing a GET request on the url above and convert the response into a BeautifulSoup object.  

In [4]:
URL = 'https://realpython.github.io/fake-jobs/'

In [5]:
response = requests.get(URL)

In [80]:
response.status_code

200

In [8]:
soup = BS(response.text)

## 1) a. Use the .find method to find the tag containing the first job title ("Senior Python Developer"). Hint: can you find a tag type and/or a class that could be helpful for extracting this information? Extract the text from this title. 

In [13]:
soup.find('h2', class_='title').text

'Senior Python Developer'

## 1) b. Now, use what you did for the first title, but extract the job title for all jobs on this page. Store the results in a list.  

In [27]:
job_title_tags = soup.findAll('h2', class_='title')

In [28]:
job_titles = [job.text for job in job_title_tags]

## 1) c. Finally, extract the companies, locations, and posting dates for each job. For example, the first job has a company of "Payne, Roberts and Davis", a location of "Stewartbury, AA", and a posting date of "2021-04-08". Ensure that the text that you extract is clean, meaning no extra spaces or other characters at the beginning or end.

In [35]:
soup.find('div', class_='card-content').find('h2', class_='title')

<h2 class="title is-5">Senior Python Developer</h2>

In [37]:
jobs_elements = soup.findAll('div', class_='card-content')

In [83]:
def get_job_description(url):
    response = requests.get(url)
    if response.status_code == 200:
        soup = BS(response.text)
        return soup.find('div', class_='content').find('p').text
    return

In [84]:
jobs = []
for job in jobs_elements:
    job_dict = {
        'title': job.find('h2', class_='title').text
        ,'company': job.find(class_='company').text
        ,'location': job.find(class_='location').text.replace('\n', '').strip()
        ,'posting_date': job.find('time').text
        ,'apply_from_dict': job.findAll('a', class_='card-footer-item', href=True)[1]['href']
        ,'description': get_job_description(job.findAll('a', class_='card-footer-item', href=True)[1]['href'])
    }
    jobs.append(job_dict)

In [85]:
jobs

[{'title': 'Senior Python Developer',
  'company': 'Payne, Roberts and Davis',
  'location': 'Stewartbury, AA',
  'posting_date': '2021-04-08',
  'apply_from_dict': 'https://realpython.github.io/fake-jobs/jobs/senior-python-developer-0.html',
  'description': 'Professional asset web application environmentally friendly detail-oriented asset. Coordinate educational dashboard agile employ growth opportunity. Company programs CSS explore role. Html educational grit web application. Oversea SCRUM talented support. Web Application fast-growing communities inclusive programs job CSS. Css discussions growth opportunity explore open-minded oversee. Css Python environmentally friendly collaborate inclusive role. Django no experience oversee dashboard environmentally friendly willing to learn programs. Programs open-minded programs asset.'},
 {'title': 'Energy engineer',
  'company': 'Vasquez-Davidson',
  'location': 'Christopherville, AA',
  'posting_date': '2021-04-08',
  'apply_from_dict': 'h

## 1) d. Take the lists that you have created and combine them into a pandas DataFrame. 

In [86]:
jobs_df = pd.DataFrame(jobs)

In [87]:
jobs_df.head()

Unnamed: 0,title,company,location,posting_date,apply_from_dict,description
0,Senior Python Developer,"Payne, Roberts and Davis","Stewartbury, AA",2021-04-08,https://realpython.github.io/fake-jobs/jobs/se...,Professional asset web application environment...
1,Energy engineer,Vasquez-Davidson,"Christopherville, AA",2021-04-08,https://realpython.github.io/fake-jobs/jobs/en...,Party prevent live. Quickly candidate change a...
2,Legal executive,"Jackson, Chambers and Levy","Port Ericaburgh, AA",2021-04-08,https://realpython.github.io/fake-jobs/jobs/le...,Administration even relate head color. Staff b...
3,Fitness centre manager,Savage-Bradley,"East Seanview, AP",2021-04-08,https://realpython.github.io/fake-jobs/jobs/fi...,Tv program actually race tonight themselves tr...
4,Product manager,Ramirez Inc,"North Jamieview, AP",2021-04-08,https://realpython.github.io/fake-jobs/jobs/pr...,Traditional page a although for study anyone. ...


# 2) Next, add a column that contains the url for the "Apply" button. Try this in two ways

#### Done above in 1.c

## 2) a. First, use the BeautifulSoup find_all method to extract the urls. 

In [59]:
all_card_footers = soup.findAll(class_='card-footer')

In [61]:
apply_urls = [footer.findAll('a')[1]['href'] for footer in all_card_footers]

In [63]:
jobs_df['apply_bs_findall'] = apply_urls

## 2) b. Next, get those same urls in a different way. Examine the urls and see if you can spot the pattern of how they are constructed. Then, build the url using the elements you have already extracted. Ensure that the urls that you created match those that you extracted using BeautifulSoup. Warning: You will need to do some string cleaning and prep in constructing the urls this way. For example, look carefully at the urls for the "Software Engineer (Python)" job and the "Scientist, research (maths)" job.

In [91]:
url_base = 'https://realpython.github.io/fake-jobs/jobs/'
jobs_df['apply_build'] = [url_base + title.replace(' ', '-')
                          .replace('(', '')
                          .replace(')', '')
                          .lower() + '-' + str(index) + '.html' for index, title in enumerate(job_titles)]

# 3) Finally, we want to get the job description text for each job.

## 3) a. Start by looking at the page for the first job, https://realpython.github.io/fake-jobs/jobs/senior-python-developer-0.html. Using BeautifulSoup, extract the job description paragraph. 

#### done above in the dictionary in 1.c

In [89]:
jobs_df['description_from_column'] = jobs_df['apply_from_dict'].apply(get_job_description)

In [92]:
jobs_df.head()

Unnamed: 0,title,company,location,posting_date,apply_from_dict,description,description_from_column,apply_build
0,Senior Python Developer,"Payne, Roberts and Davis","Stewartbury, AA",2021-04-08,https://realpython.github.io/fake-jobs/jobs/se...,Professional asset web application environment...,Professional asset web application environment...,https://realpython.github.io/fake-jobs/jobs/se...
1,Energy engineer,Vasquez-Davidson,"Christopherville, AA",2021-04-08,https://realpython.github.io/fake-jobs/jobs/en...,Party prevent live. Quickly candidate change a...,Party prevent live. Quickly candidate change a...,https://realpython.github.io/fake-jobs/jobs/en...
2,Legal executive,"Jackson, Chambers and Levy","Port Ericaburgh, AA",2021-04-08,https://realpython.github.io/fake-jobs/jobs/le...,Administration even relate head color. Staff b...,Administration even relate head color. Staff b...,https://realpython.github.io/fake-jobs/jobs/le...
3,Fitness centre manager,Savage-Bradley,"East Seanview, AP",2021-04-08,https://realpython.github.io/fake-jobs/jobs/fi...,Tv program actually race tonight themselves tr...,Tv program actually race tonight themselves tr...,https://realpython.github.io/fake-jobs/jobs/fi...
4,Product manager,Ramirez Inc,"North Jamieview, AP",2021-04-08,https://realpython.github.io/fake-jobs/jobs/pr...,Traditional page a although for study anyone. ...,Traditional page a although for study anyone. ...,https://realpython.github.io/fake-jobs/jobs/pr...
