In [None]:

# If you're reading this you'll have to change the paths to the files for your system. Additionally, you will need to add your API keys for Anthropic and OpenAI, but the code will prompt you for them.
# This code is designed to be run in a Python 3 environment with the requests library installed.
# ──────────────────────────────────────────────────────────────────────────────
#  Air‑Force “Mega‑Skills”  →  Hierarchical Taxonomy Generator  (v 2.1)
# ──────────────────────────────────────────────────────────────────────────────
import json, os, pathlib, time, requests, textwrap

# ╭──────────────────────────  PATHS  ─────────────────────────╮
ENHANCED_TAXONOMY_PATH = pathlib.Path(
    r"C:"
)
OUTPUT_DIR = pathlib.Path(
    r"C:"
)
OUTPUT_DIR.mkdir(exist_ok=True, parents=True)

# ╭──────────────────────  CONFIG KNOBS  ──────────────────────╮
DEFAULT_SAMPLE_SIZE     = 24           # AFSCs per category
EXCLUDE_CATEGORIES      = ["Warrant"]  # omit Warrant officers in pilot
CLAUDE_MODEL            = "claude-3-5-sonnet-20240620"
OPENAI_DEFAULT_MODEL    = "gpt-4-turbo"
MAX_TOKENS              = 4000

# ╭────────────────────────  HELPERS  ─────────────────────────╮
def load_taxonomy(path: pathlib.Path) -> dict:
    print(f"📂  Loading enhanced taxonomy → {path}")
    with path.open(encoding="utf-8") as f:
        data = json.load(f)
    print("✅  Loaded")
    return data


def create_sample(data: dict,
                  sample_size: int = DEFAULT_SAMPLE_SIZE,
                  exclude: list[str] = EXCLUDE_CATEGORIES) -> dict:
    """Return lightweight sample: {metadata, afscCategories:[…]}"""
    src = data.get("llm_optimized_structure", data)
    out = {"metadata": src["metadata"], "afscCategories": []}

    # src["afscCategories"] may be list OR dict ― handle both
    cats = (
        src["afscCategories"].items()
        if isinstance(src["afscCategories"], dict)
        else ((c["categoryName"], c) for c in src["afscCategories"])
    )

    for name, cat in cats:
        if name in exclude:
            continue
        out["afscCategories"].append(
            {"categoryName": name, "afscs": cat["afscs"][:sample_size]}
        )
    return out


# ── 3.  Build LLM prompt ────────────────────────────────────
def build_prompt(sample: dict) -> str:
    sample_json = json.dumps(sample, indent=2)
    prompt = f"""
    I have a dataset containing Air‑Force Specialty Codes (AFSCs) and their
    associated skills. Create a comprehensive hierarchical taxonomy of
    *military* skills from this data.

    ▼ Representative sample (JSON)
    ```json
    {sample_json}
    ```

    REQUIREMENTS
    1. STRUCTURE
       • Level 1 – Major Domains (6‑8)            • Level 3 – Specific Skills
       • Level 2 – Skill Categories               • Level 4 – Sub‑skills (if useful)

    2. FORMAT
       • Roman I., A., 1., a. outline
       • Show relevant AFSCs in parentheses        • 8‑10 skills per category

    3. CONTENT
       • Verb‑based, action‑oriented skills
       • Prefer high‑relevance / high‑confidence (≥ 70)
       • Group synonyms; keep military context

    4. EXTRAS
       • Brief description + main verb for each skill
       • 3‑4 visualisation ideas (bullet list)
       • ≤ 120‑word methodology paragraph

    OUTPUT EXAMPLE
    I.  MAJOR DOMAIN
        A. Skill Category
           1. Specific skill (AFSCs: 1A111, 1A211)
              – sub‑skill / application
           2. …

    Keep the outline concise; max depth = 4 levels.
    """
    return textwrap.dedent(prompt)


# ── 4.  API CALLS ───────────────────────────────────────────
def call_claude(prompt: str, api_key: str, retries: int = 3) -> str | None:
    if not api_key:
        print("🔑  No Anthropic key provided."); return None
    headers = {
        "x-api-key": api_key,
        "content-type": "application/json",
        "anthropic-version": "2023-06-01"
    }
    payload = {"model": CLAUDE_MODEL, "max_tokens": MAX_TOKENS,
               "messages": [{"role":"user","content": prompt}]}
    for i in range(retries):
        r = requests.post("https://api.anthropic.com/v1/messages",
                          headers=headers, json=payload, timeout=60)
        if r.status_code == 200:
            return r.json()["content"][0]["text"]
        print(f"Claude error {r.status_code}; retry {i+1}/{retries}")
        time.sleep(5*(i+1))
    return None


def call_openai(prompt: str, api_key: str, model: str) -> str | None:
    if not api_key:
        print("🔑  No OpenAI key provided."); return None
    headers = {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"}
    payload = {"model": model, "messages": [{"role":"user","content": prompt}],
               "max_tokens": MAX_TOKENS}
    r = requests.post("https://api.openai.com/v1/chat/completions",
                      headers=headers, json=payload, timeout=60)
    if r.status_code == 200:
        return r.json()["choices"][0]["message"]["content"]
    print("OpenAI error", r.status_code, r.text[:300])
    return None


def save_output(txt: str, tag: str) -> pathlib.Path:
    ts = time.strftime("%Y%m%d-%H%M%S")
    p  = OUTPUT_DIR / f"military_skills_taxonomy_{tag}_{ts}.txt"
    p.write_text(txt, encoding="utf-8")
    print("💾  Saved →", p); return p


# ── 5.  DRIVER ──────────────────────────────────────────────
def main():
    data = load_taxonomy(ENHANCED_TAXONOMY_PATH)
    sample = create_sample(data)
    prompt = build_prompt(sample)
    (OUTPUT_DIR/"prompt_for_reference.txt").write_text(prompt, encoding="utf-8")

    print("\nGeneration route:")
    print("1) Claude 3‑5 Sonnet   2) OpenAI GPT‑4‑Turbo / 4o   3) Manual only")
    choice = input("Select (1/2/3): ").strip()

    if choice == "3":
        print("Prompt saved; paste into web UI as required."); return

    if choice == "1":
        claude_key = input("Paste Anthropic API key: ").strip()
        out = call_claude(prompt, claude_key)
        if out: save_output(out, "claude_3_5_sonnet_pilot")

    if choice == "2":
        openai_key = input("Paste OpenAI key: ").strip()
        mdl = input("Model (gpt-4-turbo / gpt-4o) [default turbo]: ").strip() or OPENAI_DEFAULT_MODEL
        out = call_openai(prompt, openai_key, mdl)
        if out: save_output(out, mdl.replace("-","_")+"_pilot")


if __name__ == "__main__":
    main()


📂  Loading enhanced taxonomy → C:\Users\Kyle\Desktop\Grad School\IS Demo\Phase 2 Rebuild\enhanced_military_skills_taxonomy.json
✅  Loaded

Generation route:
1) Claude 3‑5 Sonnet   2) OpenAI GPT‑4‑Turbo / 4o   3) Manual only
💾  Saved → C:\Users\Kyle\Desktop\Grad School\IS Demo\Phase 2 Rebuild\military_skills_taxonomy_gpt_4_turbo_pilot_20250422-084024.txt
