In [None]:
variables_fp = {
    "url": "https://aixplain.com/careers/senior-backend-engineer-48/",
    "doc_type": ["resume", "cover_letter"],
    "style": ["professional", "professional"],
    "page_count": ["2", "1"],
}

In [None]:
from custom_aixplain.pipelines import scrape_job_info, document_format, document_write
from custom_aixplain.utils import (
    process_JSON_api_response,
    process_standard_api_output,
    process_standard_agent_api_output,
)
from custom_aixplain.jd_search import (
    search_background_first_pass,
)
import json
from custom_aixplain.utils import unwrap_triple_backticks
from utils.logger_utils import log_anything
from utils.utils import save_object_to_pickle
from custom_aixplain.agents import writer_agent
from my_validators.md_check import clean_and_validate
from parsers.md_to_context import md_to_context
from my_validators.patch_guard import ensure_valid
from builders import build_docx, persist_new_doc
from typing import Any
import os

BASE_URL = os.getenv("API_BASE_URL", "http://localhost:8000")


def first_pass_pipeline(variables: dict[str, Any]) -> dict[str, Any]:

    return_dict: dict[str, Any] = {}
    raw_scrape = scrape_job_info(variables["url"])
    job_info = process_JSON_api_response(raw_scrape)
    log_anything(job_info, label="job_info")
    return_dict["job_info"] = job_info
    background_info = search_background_first_pass(job_info)
    log_anything(background_info, label="background_info")
    return_dict["background_info"] = background_info
    for i, doc_type in enumerate(variables["doc_type"]):
        style: str = variables["style"][i]
        page_count: int = variables["page_count"][i]
        query = (
            f"Generate a {doc_type} that fits {page_count} A-4 pages. with '{style}' style "
            f"Job: {job_info} Background: {background_info} "
            "Return only Markdown with the YAML front-matter specified in the system prompt—no code fences, no commentary."
        )
        raw_text = writer_agent.run(query=query)
        save_object_to_pickle(raw_text, file_path="pkl/raw_agent_response.pkl")
        raw_text = process_standard_agent_api_output(raw_text)
        save_object_to_pickle(raw_text, file_path="pkl/processed_agent_output.pkl")
        log_anything(raw_text, label="processed raw text")
        raw_text_clean = clean_and_validate(raw_text)
        log_anything(raw_text_clean, label="raw_text_clean")
        raw_text_obj = md_to_context(raw_text_clean)
        log_anything(raw_text_obj, label="raw_text_obj")
        resume_doc = ensure_valid(raw_text_obj, allow_llm_patch=False)
        log_anything(resume_doc, label="resume_doc")
        doc_bytes = build_docx(resume_doc.model_dump())
        log_anything(doc_bytes, label="doc_bytes")
        doc_id = persist_new_doc(doc_bytes)
        log_anything(doc_id, label="doc_id")
        view_url = f"{BASE_URL}/api/docs/{doc_id}/view"
        return_dict[doc_type] = {
            "doc_id": doc_id,
            "view_url": view_url,
        }
    log_anything(return_dict, label="return dict")
    return return_dict