In [None]:
import time
import pandas as pd
from apify_client import ApifyClient
from dotenv import load_dotenv
import os

# Загружаем переменные из .env
load_dotenv()
API_TOKEN = os.getenv("APIFY_TOKEN")

#API_TOKEN="apify_api_oyTJYoRZLi31jxi2aX9fhqTS5xhMdv0Crsz9"


if not API_TOKEN:
    raise ValueError("APIFY_TOKEN не найден. Добавь его в .env файл.")

Apify_user_ID="kp2mogw2lAhASokqy"
ACTOR_ID = "easyapi/stepstone-jobs-scraper"

SEARCH_TERMS = [
    "Business Transformation Analyst",
    "Digital Process Analyst",
    "AI Project Manager",
    "AI Product Manager",
    "AI Governance Analyst",
    "AI Automation Specialist",
    "Prompt Engineer",
    "Junior Automation Specialist",
    "Prozessmanager/in",
    "KI-Manager/in",
    "Prozessmanager/in – RPA",
    "KI-Prompter",
    "Prozessmanager/in – RPA (Junior)"
]

FIELDS = [
    "title",
    "company",
    "company_url",
    "location",
    "employmentType",
    "seniority",
    "job_url",
    "skills",
    "languages",
    "education",
    "experience",
]

client = ApifyClient(API_TOKEN)


def run_stepstone_scraper(search_term: str):
    """Запускает Actor и возвращает список вакансий."""
    print(f"▶ Запуск скрапинга для: {search_term}")

    run = client.actor(ACTOR_ID).call(
        run_input={
            "search": search_term,
            "location": "Germany",      # ← фильтрация по Германии
            "maxItems": 150,
            "includeDescription": True,
            "includeSkills": True,
        }
    )

    dataset_id = run["defaultDatasetId"]
    dataset = client.dataset(dataset_id)

    items = dataset.list_items().items
    print(f"✔ Получено {len(items)} вакансий для: {search_term}")

    return items


def extract_fields(job: dict, search_term: str):
    """Извлекает нужные поля + добавляет search_term."""
    row = {field: job.get(field) for field in FIELDS}
    row["search_term"] = search_term
    return row


def scrape_all():
    all_rows = []

    for term in SEARCH_TERMS:
        try:
            jobs = run_stepstone_scraper(term)
            for job in jobs:
                all_rows.append(extract_fields(job, term))
        except Exception as e:
            print(f"⚠ Ошибка при обработке '{term}': {e}")
            time.sleep(3)

    df = pd.DataFrame(all_rows)
    return df


if __name__ == "__main__":
    df = scrape_all()
    print(df.head())
    print(f"Всего строк: {len(df)}")
