In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

In [14]:
url = "https://remoteok.com"
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
}


response = requests.get(url, headers=headers)

if response.status_code == 200:
    print("Page fetched successfully!")
else:
    print("An error occurred while fetching the page")

Page fetched successfully!


In [15]:
# Parse the HTML content
soup = BeautifulSoup(response.text, "html.parser")

# Print the page title to confirm it worked
print(soup.title.text)
# print(soup.prettify())

Remote Jobs in Programming, Design, Sales and more #OpenSalaries


In [29]:
searchOptions = soup.find_all("div", class_="suggested-filter")
options = []
cleanedOptions = []

for option in searchOptions:
    original_text = option.text.strip()
    options.append(original_text)
    cleanedOptions.append(original_text[2:].strip().lower())

print("Original Options:", options)
print("Cleaned Options:", cleanedOptions)

Original Options: ['🤓 Engineer', '💼 Executive', '👵 Senior', '🤓 Developer', '💰 Finance', '♾️ Sys Admin']
Cleaned Options: ['engineer', 'executive', 'senior', 'developer', 'finance', 'sys admin']


In [31]:
for option in cleanedOptions:
    print(f"Searching for {option} jobs...")

    try:
        response = requests.get(f"{url}/remote-{option}-jobs", headers=headers)
        response.raise_for_status()  # Raises HTTPError for bad responses (4xx and 5xx)
        soup = BeautifulSoup(response.text, "html.parser")
    except requests.exceptions.RequestException as e:
        print(f"Error fetching jobs for {option}: {e}")
        continue  # Skip this option and move to the next one

    try:
        jobCards = soup.find_all("tr", class_="job")
        if not jobCards:
            print(f"No job cards found for {option}. Skipping...")
            continue

        csv_data = []

        # Loop through each job element
        for job in jobCards:
            try:
                # Extract job title
                title = (
                    job.find("td", class_="company_and_position")
                    .find("h2")
                    .text.strip()
                )

                # Extract company name
                company = (
                    job.find("td", class_="company_and_position")
                    .find("h3")
                    .text.strip()
                )

                # Extract location
                location = (
                    job.find("td", class_="company_and_position")
                    .find("div", class_="location")
                    .text.strip()
                )

                # Extract salary
                salary = (
                    job.find("td", class_="company_and_position")
                    .find("div", class_="location")
                    .find_next_sibling("div", class_="location tooltip")
                    .text.strip()
                )

                # Extract tags
                tags = job.find("td", class_="tags").find_all("h3")
                tagList = [tag.text.strip() for tag in tags]

                csv_data.append(
                    {
                        "job_title": title,
                        "company": company,
                        "location": location,
                        "salary": salary,
                        "tags": ",".join(list(set(tagList))),
                    }
                )
            except AttributeError as e:
                print(f"Error parsing a job card for {option}: {e}")
                continue  # Skip this job card and move to the next one

        # Save to CSV if data exists
        if csv_data:
            df = pd.DataFrame(csv_data)
            try:
                df.to_csv(
                    rf"F:\amad-study\scraping-study\task-2\{option}_jobs.csv",
                    index=False,
                )
                print(f"Saved {option} jobs to CSV.")
            except Exception as e:
                print(f"Error saving {option} jobs to CSV: {e}")
        else:
            print(f"No valid data found for {option}. Skipping CSV save...")

    except Exception as e:
        print(f"An unexpected error occurred while processing {option}: {e}")

Searching for engineer jobs...
Saved engineer jobs to CSV.
Searching for executive jobs...
Error parsing a job card for executive: 'NoneType' object has no attribute 'text'
Error parsing a job card for executive: 'NoneType' object has no attribute 'text'
Saved executive jobs to CSV.
Searching for senior jobs...
Error parsing a job card for senior: 'NoneType' object has no attribute 'text'
Saved senior jobs to CSV.
Searching for developer jobs...
Error parsing a job card for developer: 'NoneType' object has no attribute 'text'
Error parsing a job card for developer: 'NoneType' object has no attribute 'text'
Error parsing a job card for developer: 'NoneType' object has no attribute 'text'
Error parsing a job card for developer: 'NoneType' object has no attribute 'text'
Error parsing a job card for developer: 'NoneType' object has no attribute 'text'
Error parsing a job card for developer: 'NoneType' object has no attribute 'text'
Error parsing a job card for developer: 'NoneType' object 