In [None]:
# ==============================================
# Cell 1: Install dependencies (uncomment if needed)
# ==============================================
# !pip install requests pandas sqlalchemy psycopg2-binary

In [None]:
# ==============================================
# Cell 2: Imports
# ==============================================
import requests
import pandas as pd
from sqlalchemy import create_engine, text
from datetime import datetime
import time

In [None]:
# ==============================================
# Cell 3: PostgreSQL setup
# ==============================================
DB_USER = 'postgres'
DB_PASSWORD = 'admin'
DB_HOST = 'localhost'
DB_NAME = 'TestDB'

engine = create_engine(f'postgresql+psycopg2://{DB_USER}:{DB_PASSWORD}@{DB_HOST}/{DB_NAME}')
print("Connected to PostgreSQL")

In [None]:
# ==============================================
# Cell 4: Create jobs table (drop first if necessary)
# ==============================================
# Uncomment this if you want a fresh table
# with engine.connect() as conn:
#     conn.execute(text("DROP TABLE IF EXISTS jobs"))
#     conn.commit()

create_table_query = """
CREATE TABLE IF NOT EXISTS jobs (
    job_id SERIAL PRIMARY KEY,
    remote_job_id VARCHAR(100) UNIQUE,
    title VARCHAR(500),
    company VARCHAR(255),
    location_geo VARCHAR(100),
    job_type VARCHAR(100),
    salary_min VARCHAR(100),
    salary_max VARCHAR(100),
    salary_currency VARCHAR(20),
    description TEXT,
    pub_date TIMESTAMP,
    job_url VARCHAR(500),
    source VARCHAR(100)
);
"""
with engine.connect() as conn:
    conn.execute(text(create_table_query))
    conn.commit()
print("Table jobs ready")


In [31]:







# ==============================================
# Cell 5: Function to fetch jobs from Jobicy API
# ==============================================
API_URL = "https://jobicy.com/api/v2/remote-jobs"

def fetch_jobs(max_pages=100, per_page=100, geo="Singapore", delay=1):
    all_jobs = []
    for page in range(1, max_pages+1):
        params = {
            "geo": geo,
            "count": per_page,
            "page": page
        }
        resp = requests.get(API_URL, params=params)
        if resp.status_code != 200:
            print(f"Failed to fetch page {page}: {resp.status_code}")
            break
        data = resp.json()
        jobs = data.get("jobs", [])
        if not jobs:
            print(f"No jobs on page {page}, stopping.")
            break
        all_jobs.extend(jobs)
        print(f"Fetched page {page}, total jobs so far: {len(all_jobs)}")
        time.sleep(delay)
    return all_jobs

# ==============================================
# Cell 6: Fetch jobs
# ==============================================
jobs = fetch_jobs(max_pages=100, per_page=1000, geo="Singapore", delay=1)
print(f"Total jobs fetched: {len(jobs)}")

# ==============================================
# Cell 7: Insert jobs into PostgreSQL
# ==============================================
with engine.connect() as conn:
    for job in jobs:
        job_data = {
            "remote_job_id": job.get("id"),
            "title": job.get("jobTitle"),
            "company": job.get("companyName"),
            "location_geo": job.get("jobGeo"),
            "job_type": ", ".join(job.get("jobType")) if job.get("jobType") else None,
            "salary_min": job.get("annualSalaryMin"),
            "salary_max": job.get("annualSalaryMax"),
            "salary_currency": job.get("salaryCurrency"),
            "description": job.get("jobDescription"),
            "pub_date": job.get("pubDate"),
            "job_url": job.get("url"),
            "source": "Jobicy"
        }
        insert = text("""
            INSERT INTO jobs (
                remote_job_id, title, company, location_geo,
                job_type, salary_min, salary_max, salary_currency,
                description, pub_date, job_url, source
            ) VALUES (
                :remote_job_id, :title, :company, :location_geo,
                :job_type, :salary_min, :salary_max, :salary_currency,
                :description, :pub_date, :job_url, :source
            )
            ON CONFLICT (remote_job_id) DO NOTHING
        """)
        conn.execute(insert, job_data)
    conn.commit()
print("Jobs inserted into DB")

# ==============================================
# Cell 8: Load into Pandas and preview
# ==============================================
df = pd.read_sql("SELECT * FROM jobs ORDER BY pub_date DESC NULLS LAST", engine)
df.head()



Connected to PostgreSQL
Table jobs ready
Failed to fetch page 1: 400
Total jobs fetched: 0
Jobs inserted into DB


Unnamed: 0,job_id,remote_job_id,title,company,location_geo,job_type,salary_min,salary_max,salary_currency,description,pub_date,job_url,source
0,1,137268,"Executive Assistant, Sales",CommandLink,"Argentina, Brazil, Costa Rica, Mexico, Phi...",Full-Time,,,,<p><u><b><strong>About Command|Link</strong></...,2025-12-02 12:07:18,https://jobicy.com/jobs/137268-executive-assis...,Jobicy
1,2,137267,US Client Bookkeeper &amp;#8211; Philippine-based,The Back Room,Philippines,Full-Time,,,,<p>Step into a role where accuracy and attenti...,2025-12-02 12:07:16,https://jobicy.com/jobs/137267-us-client-bookk...,Jobicy
2,3,137266,Solutions Consultant,Education Perfect,Australia,Full-Time,,,,<p>Education Perfect is an EdTech platform des...,2025-12-02 12:07:15,https://jobicy.com/jobs/137266-solutions-consu...,Jobicy
3,4,137265,Staff Asset Control Delivery Manager,GE Vernova,Australia,Full-Time,,,,<h2><b><b><b>Job Description Summary</b></b></...,2025-12-02 12:07:13,https://jobicy.com/jobs/137265-staff-asset-con...,Jobicy
4,5,137264,Executive Assistant,The Back Room,Philippines,Full-Time,,,,<p>Want to be part of a community that focuses...,2025-12-02 12:07:11,https://jobicy.com/jobs/137264-executive-assis...,Jobicy
