In [None]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import time
import random
from tqdm import tqdm

collected_jobs = set()  # 중복 방지용 set
data_list = []
total_pages = 10  # 원하는 페이지 개수

base_url = "https://www.saramin.co.kr/zf_user/search/recruit"

for page in tqdm(range(1, total_pages + 1), desc="크롤링 진행중", unit="페이지"):
    params = {
        "searchType": "search",
        "company_cd": "0,1,2,3,4,5,6,7,9,10",
        "searchword": "데이터분석가",
        "recruitPage": page,
        "recruitSort": "relation",
        "recruitPageCount": 40,
    }

    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Safari/537.36"
    }

    response = requests.get(base_url, params=params, headers=headers)
    soup = BeautifulSoup(response.text, "html.parser")

    for tag in soup.select("div.item_recruit"):
        corp_tag = tag.select_one("div.area_corp .corp_name a")
        corp_name = corp_tag.get_text(strip=True) if corp_tag else "N/A"

        job_tag = tag.select_one("div.area_job .job_tit")
        job_title = job_tag.get_text(strip=True) if job_tag else "N/A"

        job_key = (corp_name, job_title)
        if job_key in collected_jobs:
            continue
        collected_jobs.add(job_key)

        how_apply = tag.select_one(".sri_btn_xs")
        how_apply = how_apply.get_text(strip=True) if how_apply else "N/A"

        job_sector_list = tag.select(".job_sector a")
        job_sector = ", ".join([a.get_text(strip=True) for a in job_sector_list]) if job_sector_list else "N/A"

        job_condition = tag.select(".job_condition span")
        loc = job_condition[0].get_text(strip=True) if len(job_condition) > 0 else "N/A"
        p_h = job_condition[1].get_text(strip=True) if len(job_condition) > 1 else "N/A"
        degree = job_condition[2].get_text(strip=True) if len(job_condition) > 2 else "N/A"
        work_division = job_condition[3].get_text(strip=True) if len(job_condition) > 3 else "N/A"

        data_list.append([corp_name, job_title, how_apply, job_sector, loc, p_h, degree, work_division])

    # ✅ AI 차단 방지 (5~10초 랜덤 대기)
    sleep_time = random.uniform(5, 10)
    print(f"💤 {sleep_time:.2f}초 대기 중...")
    time.sleep(sleep_time)

df = pd.DataFrame(data_list, columns=["기업명", "기업공고", "지원방법", "직업 관련", "지역", "경력", "학위", "정규직 유무"])
df.to_csv("job_info_saramin.csv", index=False, encoding="utf-8-sig")

print(f"✅ 크롤링 완료! {len(df)}개의 공고를 저장했습니다.")
