In [12]:
import requests
from bs4 import BeautifulSoup
from datetime import datetime
import pandas as pd
import re
import time

def clean_text(text):
    return re.sub(r'[^\x00-\x7F]+', '', text).strip()

def scrape_remoteok(keyword="software engineer", wait_time=30):
    url = f"https://remoteok.com/remote-{keyword.replace(' ', '-')}-jobs"
    headers = {
        "User-Agent": "Mozilla/5.0"
    }

    print(f"🌐 Loading page for '{keyword}'... waiting {wait_time} seconds for content to load")
    response = requests.get(url, headers=headers)
    time.sleep(wait_time)

    soup = BeautifulSoup(response.text, "html.parser")
    jobs = []
    listings = soup.find_all("tr", class_="job")

    print(f"🔍 Found {len(listings)} job cards")

    for job in listings:
        try:
            title = job.find("h2", itemprop="title").get_text(strip=True)
        except:
            title = ''
        try:
            company = job.find("h3", itemprop="name").get_text(strip=True)
        except:
            company = ''
        try:
            location_div = job.find("div", class_="location")
            raw_location = location_div.get_text(" ", strip=True) if location_div else 'Remote'
            location = clean_text(raw_location)
        except:
            location = 'Remote'
        try:
            date_posted = job.find("time")["datetime"]
        except:
            date_posted = ''
        try:
            job_link = "https://remoteok.com" + job["data-href"]
        except:
            job_link = ''

        timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")

        job_data = {
            "Title": title,
            "Company": company,
            "Location": location,
            "Date Posted": date_posted,
            "Job Link": job_link,
            "Timestamp": timestamp
        }
        jobs.append(job_data)

    if jobs:
        df = pd.DataFrame(jobs)
        filename_base = keyword.replace(" ", "_")
        df.to_csv(f"{filename_base}_remote_jobs.csv", index=False, encoding='utf-8-sig')
        df.to_excel(f"{filename_base}_remote_jobs.xlsx", index=False)
        print(f"✅ {len(jobs)} jobs saved to CSV and Excel.")
    else:
        print("❌ No jobs extracted.")

# Run with 30 seconds wait
scrape_remoteok("software engineer", wait_time=30)


🌐 Loading page for 'software engineer'... waiting 30 seconds for content to load
🔍 Found 20 job cards
✅ 20 jobs saved to CSV and Excel.
