## **Web Scrapping using Function**

In [None]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

In [None]:
# Function to download HTML content
def download_html(url):
    response = requests.get(url,)
    response.raise_for_status()
    return response.text

# Function to parse HTML content and extract data
def parse_html(html_content):
    soup = BeautifulSoup(html_content, 'html.parser')
    results_container = soup.find(id='ResultsContainer')
    jobs = []

    for job_element in results_container.find_all("div", class_="card-content"):
        title_element = job_element.find("h2", class_="title is-5").text.strip()
        company_element = job_element.find("h3", class_="subtitle is-6 company").text.strip()
        location_element = job_element.find("p", class_="location").text.strip()
        date = job_element.find("p", class_="is-small has-text-grey").text.strip()
        jobs.append({
            'Title': title_element,
            'Company': company_element,
            'Location': location_element,
            'Date': date
        })
    
    return jobs

# Function to save data to a CSV file
def save_to_csv(data, filename):
    df = pd.DataFrame(data)
    df.to_csv(filename, index=False)

# Main function
def main():
    url = 'https://realpython.github.io/fake-jobs/'  # URL of the jobs page

    try:
        html_content = download_html(url)
    except requests.exceptions.RequestException as e:
        print(f'Error downloading HTML content: {e}')
        return

    jobs = parse_html(html_content)

    if jobs:
        save_to_csv(jobs, 'Products.csv')
        print(f'Successfully saved {len(jobs)} jobs to Products.csv')
    else:
        print('No jobs found.')

if __name__ == '__main__':
    main()

Successfully saved 100 jobs to Products.csv


## **Web Scrapping using Class**

In [None]:
class JobScraper:
    def __init__(self, url):
        self.url = url
        self.jobs = []

    def download_html(self):
        headers = {'User-Agent': 'Mozilla/5.0'}
        response = requests.get(self.url, headers=headers)
        response.raise_for_status()
        return response.text

    def parse_html(self, html_content):
        soup = BeautifulSoup(html_content, 'html.parser')
        results_container = soup.find(id='ResultsContainer')

        for job_element in results_container.find_all("div", class_="card-content"):
            title_element = job_element.find("h2", class_="title is-5").text.strip()
            company_element = job_element.find("h3", class_="subtitle is-6 company").text.strip()
            location_element = job_element.find("p", class_="location").text.strip()
            date = job_element.find("p", class_="is-small has-text-grey").text.strip()
            self.jobs.append({
                'Title': title_element,
                'Company': company_element,
                'Location': location_element,
                'Date': date
            })

    def save_to_json(self, filename):
        df = pd.DataFrame(self.jobs)
        df.to_json(filename, index=False)

    def run(self, output_filename):
        try:
            html_content = self.download_html()
        except requests.exceptions.RequestException as e:
            print(f'Error downloading HTML content: {e}')
            return

        self.parse_html(html_content)
        
        if self.jobs:
            self.save_to_json(output_filename)
            print(f'Successfully saved {len(self.jobs)} jobs to {output_filename}')
        else:
            print('No jobs found.')

if __name__ == '__main__':
    url = 'https://realpython.github.io/fake-jobs/'  # URL of the jobs page
    output_filename = 'Products.json'
    
    scraper = JobScraper(url)
    scraper.run(output_filename)

Successfully saved 100 jobs to Products.json
