In [6]:
import csv
import json
from datetime import datetime
from urllib.parse import urljoin

import requests
from bs4 import BeautifulSoup

# ---- Search settings (edit these two lines) ----
keyword = "data analyst"
location = "bangkok"

keyword_slug = "-".join(keyword.lower().split())
location_slug = "-".join(location.lower().split())
url = f"https://th.jobsdb.com/th/{keyword_slug}-jobs"

headers = {
    "User-Agent": (
        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
        "AppleWebKit/537.36 (KHTML, like Gecko) "
        "Chrome/121.0.0.0 Safari/537.36"
    ),
    "Accept-Language": "en-US,en;q=0.9,th;q=0.8",
}

response = requests.get(url, headers=headers, timeout=20)
response.raise_for_status()
soup = BeautifulSoup(response.text, "html.parser")

records = []
seen = set()

def add_record(title, company, location_name, posted_date, job_url):
    row = {
        "title": (title or "N/A").strip(),
        "company": (company or "N/A").strip(),
        "location": (location_name or "N/A").strip(),
        "posted_date": (posted_date or "N/A").strip(),
        "url": (job_url or "N/A").strip(),
    }
    key = (row["title"], row["company"], row["url"])
    if key not in seen and row["title"] != "N/A":
        seen.add(key)
        records.append(row)

# 1) Try JSON-LD first (often contains clean structured fields)
for script in soup.select('script[type="application/ld+json"]'):
    text = script.get_text(strip=True)
    if not text:
        continue
    try:
        payload = json.loads(text)
    except Exception:
        continue

    entries = payload if isinstance(payload, list) else [payload]
    for item in entries:
        if not isinstance(item, dict):
            continue
        item_type = item.get("@type", "")
        is_job = item_type == "JobPosting" or (isinstance(item_type, list) and "JobPosting" in item_type)
        if not is_job:
            continue

        org = item.get("hiringOrganization") or {}
        org_name = org.get("name") if isinstance(org, dict) else "N/A"

        loc = item.get("jobLocation")
        location_name = "N/A"
        if isinstance(loc, list) and loc:
            loc = loc[0]
        if isinstance(loc, dict):
            address = loc.get("address")
            if isinstance(address, dict):
                location_name = address.get("addressLocality") or address.get("addressRegion") or "N/A"

        add_record(
            title=item.get("title"),
            company=org_name,
            location_name=location_name,
            posted_date=item.get("datePosted"),
            job_url=item.get("url"),
        )

# 2) Fallback parsing from job cards
for card in soup.select("article"):
    title_el = card.select_one("h1, h2, [data-automation='jobTitle']")
    company_el = card.select_one("[data-automation='jobCompany'], a[data-automation='jobCompany']")
    location_el = card.select_one("[data-automation='jobLocation']")
    date_el = card.select_one("time, [data-automation='jobListingDate']")
    link_el = card.select_one("a[href]")

    title = title_el.get_text(" ", strip=True) if title_el else "N/A"
    company = company_el.get_text(" ", strip=True) if company_el else "N/A"
    location_name = location_el.get_text(" ", strip=True) if location_el else "N/A"
    posted_date = date_el.get_text(" ", strip=True) if date_el else "N/A"
    job_url = urljoin("https://th.jobsdb.com", link_el["href"]) if link_el else "N/A"

    add_record(title, company, location_name, posted_date, job_url)

print(f"Target URL: {url}")
print(f"Found jobs: {len(records)}")

if records:
    try:
        import pandas as pd

        df = pd.DataFrame(records)
        display(df.head(20))
    except Exception:
        for idx, item in enumerate(records[:10], start=1):
            print(f"{idx}. {item['title']} | {item['company']} | {item['location']} | {item['posted_date']}")
            print(item["url"])
            print("-" * 100)

    output_file = f"jobsdb_{keyword_slug}_{datetime.now().strftime('%Y%m%d')}.csv"
    with open(output_file, "w", newline="", encoding="utf-8-sig") as f:
        writer = csv.DictWriter(f, fieldnames=["title", "company", "location", "posted_date", "url"])
        writer.writeheader()
        writer.writerows(records)

    print(f"Saved to: {output_file}")
else:
    print("No jobs found. Try another keyword/location or run again later.")

Target URL: https://th.jobsdb.com/th/data-analyst-jobs
Found jobs: 32


Unnamed: 0,title,company,location,posted_date,url
0,Data Analyst (Fraud Management),Triple T Broadband Public Company Limited,ปากเกร็ด นนทบุรี,1 วันที่ผ่านมา,https://th.jobsdb.com/th/job/90337958?type=sta...
1,Data Analyst,"Focus Media (Thailand) Co., Ltd.",กรุงเทพมหานคร,3 วันที่ผ่านมา,https://th.jobsdb.com/th/job/90278961?type=sta...
2,Data Analyst,GMM Grammy Public Company Limited,วัฒนา กรุงเทพมหานคร,2 วันที่ผ่านมา,https://th.jobsdb.com/th/job/90297020?type=sta...
3,Data Analyst – Business Decision & Data Quality,Maybank Securities (Thailand) Public Company L...,กรุงเทพมหานคร,18 วันที่ผ่านมา,https://th.jobsdb.com/th/job/89949693?type=sta...
4,Data Analyst (Executive Support),Master Style Public Company Limited,ดุสิต กรุงเทพมหานคร,2 วันที่ผ่านมา,https://th.jobsdb.com/th/job/90300290?type=sta...
5,Business Analyst (Regulatory Data),Bank of Ayudhya Public Company Limited,ยานนาวา กรุงเทพมหานคร,1 วันที่ผ่านมา,https://th.jobsdb.com/th/job/90334018?type=sta...
6,Product Data and Reporting Officer,SCG CERAMICS PUBLIC COMPANY LIMITED,ดอนเมือง กรุงเทพมหานคร,3 วันที่ผ่านมา,https://th.jobsdb.com/th/job/90272928?type=sta...
7,Data Analyst / นักวิเคราะห์ข้อมูล (ธุรกิจหลักท...,"SBI Thai Online Securities Co., Ltd.",ห้วยขวาง กรุงเทพมหานคร,2 วันที่ผ่านมา,https://th.jobsdb.com/th/job/90309715?type=sta...
8,Data Analyst (Food Business),Central Group (Central Pattana Public Company ...,ปทุมวัน กรุงเทพมหานคร,16 วันที่ผ่านมา,https://th.jobsdb.com/th/job/90012046?type=sta...
9,เจ้าหน้าที่วิเคราะห์ข้อมูล (MIS/Data Analyst),Muang Thai Life Assurance Public Company Limited,ห้วยขวาง กรุงเทพมหานคร,9 วันที่ผ่านมา,https://th.jobsdb.com/th/job/90163043?type=sta...


Saved to: jobsdb_data-analyst_20260214.csv
