In [None]:
# Finalized Military Skills Taxonomy Generator
# Uses claude-3-5-sonnet-20240620 model and a sample size of 15 AFSCs per category
# If you're reading this you'll have to change the paths to the files for your system. Additionally, you will need to add your API keys for Anthropic and OpenAI, but the code will prompt you for them.
# This code is designed to be run in a Python 3 environment with the requests library installed.
# Addtionally there is a V2 of this code that runs over a larger sample size of 25-30 AFSCs per category. I would recommend using that version if you have the time and resources to do so. It is located in the same directory as this code.

import json
import os
import requests
import time

# Define file paths
ENHANCED_TAXONOMY_PATH = r"C:"
OUTPUT_DIR = r"C:"
os.makedirs(OUTPUT_DIR, exist_ok=True)

# 1. Load and examine the enhanced taxonomy file
def load_taxonomy_data(file_path):
    print(f"Loading enhanced taxonomy data from {file_path}...")
    with open(file_path, 'r', encoding='utf-8') as f:
        data = json.load(f)
    
    print("Data loaded successfully!")
    return data

# 2. Create a representative sample of the data without Warrant category
def create_representative_sample(data, sample_size=15, exclude_categories=["Warrant"]):
    """Create a representative sample with AFSCs from desired categories only"""
    
    sample_data = {
        "metadata": data["metadata"],
        "afscCategories": []
    }
    
    # If llm_optimized_structure exists, use that instead
    if "llm_optimized_structure" in data:
        print("Using the LLM-optimized structure as the base data")
        optimized_data = data["llm_optimized_structure"]
        
        # Add the metadata
        sample_data["metadata"] = optimized_data["metadata"]
        sample_data["afscCategories"] = []
        
        # Process each category, excluding specified ones
        for category_name, category_data in optimized_data["afscCategories"].items():
            if category_name not in exclude_categories:
                sample_category = {
                    "categoryName": category_name,
                    "afscs": []
                }
                
                # Take a sample of AFSCs from each category
                afsc_sample = category_data["afscs"][:sample_size]
                
                # Add to the sample
                sample_category["afscs"] = afsc_sample
                sample_data["afscCategories"].append(sample_category)
    else:
        # Use the original structure
        for category in data["afscCategories"]:
            if category["categoryName"] not in exclude_categories:
                sample_category = {
                    "categoryName": category["categoryName"],
                    "afscs": []
                }
                
                # Take a sample of AFSCs from each category
                afsc_sample = category["afscs"][:sample_size]
                
                # Add to the sample
                sample_category["afscs"] = afsc_sample
                sample_data["afscCategories"].append(sample_category)
    
    return sample_data

# 3. Create a structured prompt for the LLM
def create_taxonomy_prompt(sample_data):
    """Create a detailed prompt for the LLM to generate the taxonomy"""
    
    # Convert the sample data to a formatted JSON string
    sample_json = json.dumps(sample_data, indent=2)
    
    # Build the prompt
    prompt = f"""
I have a dataset containing Air Force Specialty Codes (AFSCs) and their associated skills. I need you to create a comprehensive hierarchical taxonomy of military skills based on this data.

Here is a representative sample of the dataset:

```json
{sample_json}
```

This is a portion of the dataset. The full dataset contains information for all Air Force Specialty Codes, organized by category (Officer and Enlisted), with each AFSC having associated skills and relevance scores.

Please create a structured military skills taxonomy with the following requirements:

1. STRUCTURE:
   - Level 1: Major Skill Domains (6-8 broad categories like "Technical Operations", "Leadership & Management", etc.)
   - Level 2: Skill Categories (subcategories within each domain)
   - Level 3: Specific Skills (individual skills or competencies)
   - Level 4: Specific Applications or Subskills (where appropriate)

2. FORMAT:
   - Present the taxonomy as a hierarchical outline
   - Include clear headers for each level
   - Cross-reference where appropriate to show relationships between skills across domains
   - Include at least 8-10 specific skills per skill category for comprehensive coverage

3. CONTENT REQUIREMENTS:
   - Focus on action-oriented skills (verbs/capabilities)
   - Prioritize skills with high relevance scores (70+) and "high" confidence
   - Group similar skills together even if they use different terminology
   - Emphasize military context and Air Force-specific applications
   - Include appropriate skill level indicators (basic, intermediate, advanced) where appropriate
   - Ensure coverage of both Officer and Enlisted skill requirements

4. ADDITIONAL CONSIDERATIONS:
   - This taxonomy will serve as a baseline for defining operational staff needs
   - It should enhance resource allocation and training program development
   - The taxonomy should be useful across different AFSCs and be scalable to other military services
   - Include examples of how the taxonomy could be applied in practice (e.g., for training development)

5. VISUALIZATION SUGGESTIONS:
   - Provide suggestions for 3-4 visualization types that would effectively represent this taxonomy
   - Include brief descriptions of how each visualization would help understand the taxonomy
   - Suggest specific tools or approaches for implementing these visualizations

Please format the taxonomy as follows:

I. MAJOR SKILL DOMAIN
   A. Skill Category
      1. Specific Skill (relevant AFSCs: XXX, XXX)
         - Subskill/Application
         - Additional applications
      2. Specific Skill
   B. Skill Category

II. MAJOR SKILL DOMAIN
    ...

For each skill, include:
- Brief description
- Primary associated verb(s)
- Key AFSCs where this skill is most relevant
- Confidence level (derived from the data)

Please also provide a detailed explanation of your methodology and organizing principles for creating this taxonomy, including how this taxonomy could be further developed and applied in military contexts.
"""
    
    return prompt

# 4. Function to call Claude API (claude-3-5-sonnet-20240620)
def call_claude_api(prompt, api_key, max_retries=3):
    """Call the Claude API with the specific model claude-3-5-sonnet-20240620"""
    
    if not api_key:
        print("No Anthropic API key provided")
        return None
    
    # Use the verified working model
    model = "claude-3-5-sonnet-20240620"
    
    headers = {
        "x-api-key": api_key,
        "content-type": "application/json",
        "anthropic-version": "2023-06-01"  # Required header
    }
    
    data = {
        "model": model,
        "max_tokens": 4000,
        "messages": [{"role": "user", "content": prompt}]
    }
    
    for attempt in range(max_retries):
        try:
            print(f"Calling Claude API with {model} (attempt {attempt+1}/{max_retries})...")
            response = requests.post(
                "https://api.anthropic.com/v1/messages",
                headers=headers,
                json=data
            )
            
            if response.status_code == 200:
                result = response.json()
                return result["content"][0]["text"]
            elif response.status_code == 529:  # Overloaded
                print("Claude API is currently overloaded. Waiting before retry...")
                time.sleep(10 * (attempt + 1))  # Increasing backoff
            else:
                print(f"Error: {response.status_code}")
                print(response.text)
                if attempt < max_retries - 1:
                    print(f"Retrying in {5 * (attempt + 1)} seconds...")
                    time.sleep(5 * (attempt + 1))
        except Exception as e:
            print(f"API call error: {str(e)}")
            if attempt < max_retries - 1:
                print(f"Retrying in {5 * (attempt + 1)} seconds...")
                time.sleep(5 * (attempt + 1))
    
    print(f"Failed to call Claude API with model {model}. Check your API key and access.")
    return None

# 5. Function to call OpenAI's models
def call_openai_api(prompt, api_key, model="gpt-4-turbo"):
    """Call the OpenAI API to generate the taxonomy"""
    
    if not api_key:
        print("No OpenAI API key provided")
        return None
    
    headers = {
        "Authorization": f"Bearer {api_key}",
        "Content-Type": "application/json"
    }
    
    data = {
        "model": model,
        "messages": [{"role": "user", "content": prompt}],
        "max_tokens": 4000  # Reduced token limit
    }
    
    try:
        print(f"Calling OpenAI API using {model}...")
        response = requests.post(
            "https://api.openai.com/v1/chat/completions",
            headers=headers,
            json=data
        )
        
        if response.status_code == 200:
            result = response.json()
            return result["choices"][0]["message"]["content"]
        else:
            print(f"Error: {response.status_code}")
            print(response.text)
            return None
    except Exception as e:
        print(f"API call error: {str(e)}")
        return None

# 6. Save the generated taxonomy
def save_taxonomy_output(output, model_name, output_dir):
    """Save the generated taxonomy to a file"""
    
    timestamp = time.strftime("%Y%m%d-%H%M%S")
    output_path = os.path.join(output_dir, f"military_skills_taxonomy_{model_name}_{timestamp}.txt")
    
    with open(output_path, 'w', encoding='utf-8') as f:
        f.write(output)
    
    print(f"Taxonomy saved to {output_path}")
    return output_path

# 7. Extract taxonomy data for manual submission
def extract_data_for_manual_submission():
    """Extracts data and creates a prompt for manual submission to web interfaces"""
    
    # Load the data
    data = load_taxonomy_data(ENHANCED_TAXONOMY_PATH)
    
    # Create a sample, excluding Warrant category
    sample_data = create_representative_sample(data, sample_size=15, exclude_categories=["Warrant"])
    
    # Create the prompt
    prompt = create_taxonomy_prompt(sample_data)
    
    # Save the prompt for manual submission
    manual_path = os.path.join(OUTPUT_DIR, "manual_submission_prompt.txt")
    with open(manual_path, 'w', encoding='utf-8') as f:
        f.write(prompt)
    
    print(f"\nPrompt for manual submission created and saved to: {manual_path}")
    print("You can copy this prompt and paste it directly into the Claude or ChatGPT web interface.")
    
    return manual_path

# Main execution - finalized version
def generate_finalized_taxonomy():
    # Step 1: Load the data
    data = load_taxonomy_data(ENHANCED_TAXONOMY_PATH)
    print(f"Loaded data with {len(data['afscCategories'])} AFSC categories")
    
    # Step 2: Create a sample with a more reasonable size (15 AFSCs per category), excluding Warrant
    exclude_categories = ["Warrant"]
    sample_size = 15  # Reduced from 25 to 15
    sample_data = create_representative_sample(data, sample_size=sample_size, exclude_categories=exclude_categories)
    
    print(f"Created enhanced sample without Warrant category ({len(sample_data['afscCategories'])} categories)")
    for category in sample_data['afscCategories']:
        print(f"  - {category['categoryName']}: {len(category['afscs'])} AFSCs")
    
    # Step 3: Create the prompt
    prompt = create_taxonomy_prompt(sample_data)
    print(f"Created prompt with {len(prompt)} characters")
    
    # Optional: Save the prompt for reference
    prompt_path = os.path.join(OUTPUT_DIR, "enhanced_taxonomy_prompt.txt")
    with open(prompt_path, 'w', encoding='utf-8') as f:
        f.write(prompt)
    print(f"Prompt saved to {prompt_path}")
    
    # Step 4: Select API or manual approach
    print("\n===== GENERATION OPTIONS =====")
    print("1: Use API directly (requires API keys)")
    print("2: Create prompt for manual submission to web interfaces")
    approach = input("Select option (1 or 2): ").strip()
    
    if approach == "2":
        # Manual approach - create the prompt file only
        extract_data_for_manual_submission()
        return "Enhanced manual submission prompt generated. Please paste it into Claude or ChatGPT web interface."
    
    # API approach - get API keys directly
    print("\n===== API KEY INPUT =====")
    use_claude = input("Do you want to use Claude? (y/n): ").strip().lower() == 'y'
    use_openai = input("Do you want to use OpenAI? (y/n): ").strip().lower() == 'y'
    
    results = {}
    
    # Step 5: Call the selected APIs
    if use_claude:
        claude_key = input("Paste your Anthropic API key: ").strip()
        claude_output = call_claude_api(prompt, claude_key)
        if claude_output:
            claude_path = save_taxonomy_output(claude_output, "claude_3_5_sonnet", OUTPUT_DIR)
            print(f"Claude taxonomy generated and saved to {claude_path}")
            results["claude"] = claude_path
    
    if use_openai:
        openai_key = input("Paste your OpenAI API key: ").strip()
        model_options = {
            "1": "gpt-4-turbo",
            "2": "gpt-4o",
            "3": "gpt-3.5-turbo"
        }
        model_choice = input("Which model? 1: gpt-4-turbo (recommended), 2: gpt-4o, 3: gpt-3.5-turbo (default: 1): ").strip() or "1"
        model = model_options.get(model_choice, "gpt-4-turbo")
        
        openai_output = call_openai_api(prompt, openai_key, model)
        if openai_output:
            openai_path = save_taxonomy_output(openai_output, f"{model.replace('-', '_')}_enhanced", OUTPUT_DIR)
            print(f"Enhanced {model} taxonomy generated and saved to {openai_path}")
            results["openai"] = openai_path
    
    # Step 6: Summary
    print("\nTaxonomy Generation Summary:")
    print("===========================")
    for model, path in results.items():
        print(f"✓ {model.upper()} taxonomy generated")
    
    if not results:
        print("No taxonomies were generated. Please check API keys and try again.")
        print("Consider using the manual submission option instead.")
    
    return "Taxonomy generation process completed!"

# Run the finalized version directly
if __name__ == "__main__":
    generate_finalized_taxonomy()

Loading enhanced taxonomy data from C:\Users\Kyle\Desktop\Grad School\IS Demo\Phase 2 Rebuild\enhanced_military_skills_taxonomy.json...
Data loaded successfully!
Loaded data with 3 AFSC categories
Using the LLM-optimized structure as the base data
Created enhanced sample without Warrant category (2 categories)
  - Enlisted: 15 AFSCs
  - Officer: 15 AFSCs
Created prompt with 76429 characters
Prompt saved to C:\Users\Kyle\Desktop\Grad School\IS Demo\Phase 2 Rebuild\enhanced_taxonomy_prompt.txt

===== GENERATION OPTIONS =====
1: Use API directly (requires API keys)
2: Create prompt for manual submission to web interfaces

===== API KEY INPUT =====
Calling Claude API with claude-3-5-sonnet-20240620 (attempt 1/3)...
Taxonomy saved to C:\Users\Kyle\Desktop\Grad School\IS Demo\Phase 2 Rebuild\military_skills_taxonomy_claude_3_5_sonnet_20250403-001643.txt
Claude taxonomy generated and saved to C:\Users\Kyle\Desktop\Grad School\IS Demo\Phase 2 Rebuild\military_skills_taxonomy_claude_3_5_sonnet_