In [1]:
import json
from typing import List, Dict, Any

In [2]:
# ============================================================================
# 1. WHAT YOU SEND vs WHAT THE MODEL SEES
# ============================================================================

def show_what_you_send_vs_what_model_sees():
    """Show the difference between your request and what the model actually sees"""
    
    print("WHAT YOU SEND vs WHAT MODEL SEES")
    print("="*60)
    
    # What you send
    your_request = {
        "model": "gpt-4",
        "messages": [
            {"role": "user", "content": "What's the weather in Paris?"}
        ],
        "tools": [
            {
                "type": "function",
                "function": {
                    "name": "get_weather",
                    "description": "Get weather for a city",
                    "parameters": {
                        "type": "object",
                        "properties": {
                            "city": {"type": "string", "description": "City name"}
                        },
                        "required": ["city"]
                    }
                }
            }
        ]
    }
    
    print("1. WHAT YOU SEND:")
    print("-" * 30)
    print("Messages:", json.dumps(your_request["messages"], indent=2))
    print("Tools:", json.dumps(your_request["tools"], indent=2))
    
    # What the model actually sees (OpenAI's internal transformation)
    actual_model_context = [
        {
            "role": "system",
            "content": """You are a helpful assistant with access to functions. Use them when appropriate.

# Available Functions

## get_weather
**Description:** Get weather for a city
**Parameters:**
- city (string, required): City name

When you need to call a function, respond with:
<function_calls>
<invoke name="get_weather">
<parameter name="city">value</parameter>
</invoke>
</function_calls>"""
        },
        {
            "role": "user", 
            "content": "What's the weather in Paris?"
        }
    ]
    
    print("\n2. WHAT MODEL ACTUALLY SEES:")
    print("-" * 30)
    for msg in actual_model_context:
        print(f"Role: {msg['role']}")
        print(f"Content: {msg['content'][:200]}...")
        print()

# ============================================================================
# 2. OPENAI'S INTERNAL TOOL INJECTION PROCESS
# ============================================================================

def inject_tools_into_system_prompt(messages: List[Dict], tools: List[Dict]) -> List[Dict]:
    """Simulate OpenAI's internal tool injection process"""
    
    if not tools:
        return messages
    
    # Generate tool descriptions
    tool_descriptions = []
    for tool in tools:
        if tool["type"] == "function":
            func = tool["function"]
            
            desc = f"## {func['name']}\n"
            desc += f"**Description:** {func['description']}\n"
            
            if "parameters" in func and "properties" in func["parameters"]:
                desc += "**Parameters:**\n"
                props = func["parameters"]["properties"]
                required = func["parameters"].get("required", [])
                
                for param_name, param_info in props.items():
                    param_type = param_info.get("type", "string")
                    param_desc = param_info.get("description", "")
                    req_text = ", required" if param_name in required else ""
                    desc += f"- {param_name} ({param_type}{req_text}): {param_desc}\n"
                    
                    if "enum" in param_info:
                        desc += f"  Options: {param_info['enum']}\n"
            
            tool_descriptions.append(desc)
    
    # Create system message with tools
    system_content = """You are a helpful assistant with access to functions. Use them when appropriate.

# Available Functions

""" + "\n".join(tool_descriptions) + """

# Function Calling Format
When you need to call a function, respond with JSON in this exact format:
{"tool_calls": [{"id": "call_1", "type": "function", "function": {"name": "function_name", "arguments": "{\"param\": \"value\"}"}}]}

Important:
- Generate unique call IDs (call_1, call_2, etc.)
- Arguments must be a JSON string, not an object
- You can call multiple functions in one response
- Only call functions when the user's request requires it"""
    
    # Inject into messages
    modified_messages = []
    
    # Check if there's already a system message
    has_system = messages and messages[0]["role"] == "system"
    
    if has_system:
        # Prepend to existing system message
        existing_content = messages[0]["content"]
        modified_messages.append({
            "role": "system",
            "content": system_content + "\n\n" + existing_content
        })
        modified_messages.extend(messages[1:])
    else:
        # Add new system message
        modified_messages.append({"role": "system", "content": system_content})
        modified_messages.extend(messages)
    
    return modified_messages

# ============================================================================
# 3. SPECIAL TOKENS AND FORMATTING
# ============================================================================

def show_special_token_injection():
    """Show how OpenAI might use special tokens for function calling"""
    
    print("\n" + "="*60)
    print("SPECIAL TOKEN INJECTION")
    print("="*60)
    
    # OpenAI likely uses special tokens that aren't visible in the API
    print("THEORY: OpenAI uses special tokens like:")
    print("-" * 40)
    
    special_tokens = [
        "<|function_start|>",
        "<|function_name|>get_weather<|/function_name|>", 
        "<|function_description|>Get weather for a city<|/function_description|>",
        "<|function_parameters|>",
        "<|param_name|>city<|/param_name|>",
        "<|param_type|>string<|/param_type|>",
        "<|param_required|>true<|/param_required|>",
        "<|/function_parameters|>",
        "<|function_end|>"
    ]
    
    for token in special_tokens:
        print(f"  {token}")
    
    print("\nThese tokens:")
    print("- Don't count toward your token limit")
    print("- Are invisible in the API")
    print("- Help the model understand function structure")
    print("- Enable precise function calling behavior")

# ============================================================================
# 4. DEMONSTRATE THE INJECTION
# ============================================================================

def demonstrate_tool_injection():
    """Demonstrate the complete tool injection process"""
    
    print("\n" + "="*60)
    print("TOOL INJECTION DEMONSTRATION")
    print("="*60)
    
    # Original messages (what you send)
    original_messages = [
        {"role": "user", "content": "What's the weather in Tokyo and calculate 15 + 25?"}
    ]
    
    # Tools (what you send)
    tools = [
        {
            "type": "function",
            "function": {
                "name": "get_weather",
                "description": "Get current weather for a city",
                "parameters": {
                    "type": "object",
                    "properties": {
                        "city": {"type": "string", "description": "City name"},
                        "units": {"type": "string", "enum": ["celsius", "fahrenheit"], "default": "fahrenheit"}
                    },
                    "required": ["city"]
                }
            }
        },
        {
            "type": "function", 
            "function": {
                "name": "calculate",
                "description": "Perform mathematical calculations",
                "parameters": {
                    "type": "object",
                    "properties": {
                        "expression": {"type": "string", "description": "Math expression to evaluate"}
                    },
                    "required": ["expression"]
                }
            }
        }
    ]
    
    print("BEFORE INJECTION:")
    print("-" * 20)
    print(f"Messages: {len(original_messages)}")
    for i, msg in enumerate(original_messages):
        print(f"  {i+1}. {msg['role']}: {msg['content'][:50]}...")
    
    # Apply injection
    injected_messages = inject_tools_into_system_prompt(original_messages, tools)
    
    print(f"\nAFTER INJECTION:")
    print("-" * 20)
    print(f"Messages: {len(injected_messages)}")
    for i, msg in enumerate(injected_messages):
        content_preview = msg['content'][:100].replace('\n', ' ')
        print(f"  {i+1}. {msg['role']}: {content_preview}...")
    
    print(f"\nFULL INJECTED SYSTEM MESSAGE:")
    print("-" * 30)
    print(injected_messages[0]['content'])

# ============================================================================
# 5. TOKEN COUNT IMPACT
# ============================================================================

def show_token_impact():
    """Show how tool injection affects token count"""
    
    print("\n" + "="*60)
    print("TOKEN COUNT IMPACT")
    print("="*60)
    
    try:
        import tiktoken
        encoding = tiktoken.encoding_for_model("gpt-4")
        
        # Original message
        original = "What's the weather in Tokyo?"
        original_tokens = len(encoding.encode(original))
        
        # With tools injected
        tools = [{"type": "function", "function": {"name": "get_weather", "description": "Get weather", "parameters": {"type": "object", "properties": {"city": {"type": "string"}}, "required": ["city"]}}}]
        injected = inject_tools_into_system_prompt([{"role": "user", "content": original}], tools)
        
        total_tokens = 0
        for msg in injected:
            tokens = len(encoding.encode(msg['content']))
            total_tokens += tokens
            print(f"{msg['role']} message: {tokens} tokens")
        
        print(f"\nOriginal: {original_tokens} tokens")
        print(f"With tools: {total_tokens} tokens")
        print(f"Overhead: {total_tokens - original_tokens} tokens ({((total_tokens - original_tokens) / original_tokens * 100):.1f}% increase)")
        
    except ImportError:
        print("Install tiktoken to see token counts: pip install tiktoken")

# ============================================================================
# 6. THE COMPLETE PICTURE
# ============================================================================

def show_complete_picture():
    """Show the complete picture of how tool injection works"""
    
    print("\n" + "="*60)
    print("THE COMPLETE PICTURE")
    print("="*60)
    
    print("""
HOW OPENAI INJECTS TOOLS:

1. YOU SEND:
   ├── messages: [{"role": "user", "content": "..."}]
   └── tools: [{"type": "function", "function": {...}}]

2. OPENAI PROCESSES:
   ├── Parses tool JSON schemas
   ├── Converts to natural language descriptions  
   ├── Adds function calling instructions
   ├── Injects into system message
   └── May add special tokens (invisible to you)

3. MODEL SEES:
   ├── System message with tool descriptions
   ├── Instructions on how to call functions
   ├── Your original user message
   └── Possibly special tokens for structure

4. MODEL RESPONDS:
   ├── Understands available functions
   ├── Decides when to call them
   ├── Generates structured JSON responses
   └── Follows the injected format instructions

5. OPENAI PARSES:
   ├── Extracts function calls from response
   ├── Converts to tool_calls format
   └── Returns to you

KEY INSIGHT: Your tools become part of the model's context
through sophisticated prompt engineering and possibly special tokens.
    """)

if __name__ == "__main__":
    # Show the comparison
    show_what_you_send_vs_what_model_sees()
    
    # Show special tokens theory
    show_special_token_injection()
    
    # Demonstrate injection
    demonstrate_tool_injection()
    
    # Show token impact
    show_token_impact()
    
    # Show complete picture
    show_complete_picture()


WHAT YOU SEND vs WHAT MODEL SEES
1. WHAT YOU SEND:
------------------------------
Messages: [
  {
    "role": "user",
    "content": "What's the weather in Paris?"
  }
]
Tools: [
  {
    "type": "function",
    "function": {
      "name": "get_weather",
      "description": "Get weather for a city",
      "parameters": {
        "type": "object",
        "properties": {
          "city": {
            "type": "string",
            "description": "City name"
          }
        },
        "required": [
          "city"
        ]
      }
    }
  }
]

2. WHAT MODEL ACTUALLY SEES:
------------------------------
Role: system
Content: You are a helpful assistant with access to functions. Use them when appropriate.

# Available Functions

## get_weather
**Description:** Get weather for a city
**Parameters:**
- city (string, required...

Role: user
Content: What's the weather in Paris?...


SPECIAL TOKEN INJECTION
THEORY: OpenAI uses special tokens like:
---------------------------------------