In [1]:
# Standard library imports
import json
import sys
from pathlib import Path
from enum import Enum
from typing import List

# Third party imports
import instructor
from instructor.function_calls import openai_schema
from openai import OpenAI

# Add parent directory to Python path to import models
sys.path.append(str(Path().absolute().parent))

# Local application imports
from models.input_models import GuardRail
from models.output_models import get_classes_with_enum
from prompts.format_structured_reasoning_user_prompt import format_structured_reasoning_user_prompt

# Load Utils
from utils.build_mermaid_diagram import build_mermaid_diagram
from utils.make_gpt_pro_prompt import make_gpt_pro_prompt

In [2]:
client = instructor.from_openai(OpenAI())

with open("../prompts/in_depth_thinking_system_prompt.md", "r") as f:
    IN_DEPTH_THINKING_SYSTEM_PROMPT = f.read()

### Show System Prompt (Optional)

In [5]:
# print(IN_DEPTH_THINKING_SYSTEM_PROMPT)

### Define the Task and Guardrails

In [3]:
TASK = "Design an optimized CUDA kernel implementation for softmax that maximizes throughput while maintaining numerical stability"

GUARDRAILS = [
    GuardRail(
        name="Numerical Stability",
        description="Must maintain numerical stability (handling overflow/underflow)"
    ),
    GuardRail(
        name="Memory Usage",
        description="Maximum shared memory usage of 48KB per block"
    ),
    GuardRail(
        name="Batch Handling",
        description="Must handle variable batch sizes efficiently"
    ),
    GuardRail(
        name="Performance",
        description="Must outperform naive implementation by at least 100x"
    ),
    GuardRail(
        name="Dependencies",
        description="Cannot use external CUDA libraries (only basic CUDA primitives)"
    )
]

# Define an enum for guardrails to enforce that any generated guardrail links
# in the response model must match one of our predefined guardrails.
# This ensures type safety and validation when GPT references guardrails
# in its structured reasoning output.
GuardRailEnum = Enum('GuardRailEnum', {
    name.upper().replace(' ', '_'): name 
    for guardrail in GUARDRAILS 
    for name in [guardrail.name]
})

### Show User Prompt

In [20]:
# print(format_structured_reasoning_user_prompt(
#     task=TASK,
#     guardrails=GUARDRAILS
# ))

# OpenAI Call
- Using instructor client to enforce type safety through structured outputs
- Validating guardrail references against predefined enum

In [21]:
def generate_structured_reasoning(
    task: str, 
    guardrails: List[GuardRail] = [], 
    guardrail_enum: Enum = None
):

    messages = [
        {
            "role": "system",
            "content": IN_DEPTH_THINKING_SYSTEM_PROMPT
        },
        {
            "role": "user",
            "content": format_structured_reasoning_user_prompt(
                task=task,
                guardrails=guardrails
            )
        }
    ]
    
    response = client.chat.completions.create(
        model="gpt-4o",
        messages=messages,
        temperature=0.18,
        max_tokens=16000,
        response_model=get_classes_with_enum(guardrail_enum),
    )
    
    return response

In [22]:
response = generate_structured_reasoning(TASK, GUARDRAILS, GuardRailEnum)

In [4]:
class EnumEncoder(json.JSONEncoder):
    def default(self, obj):
        if isinstance(obj, Enum):
            return obj.value
        return super().default(obj)

print(json.dumps(response.model_dump(), indent=2, cls=EnumEncoder))

NameError: name 'response' is not defined

### Make Mermaid Diagram Text
- Diagram can rendered at: https://www.mermaidchart.com/play

In [24]:
diagram_text = build_mermaid_diagram(response.model_dump(), is_gpt_prompt=False)
print(diagram_text)

flowchart TB

subgraph Foundation Observations
    subgraph FO_1["FO_1: Understanding Softmax and CUDA"]
    direction TB
        FO_1_AS_1["AS_1: Softmax is a function that converts a vector of numbers into probabilities."]
        FO_1_AS_2["AS_2: CUDA is a parallel computing platform and application programming interface model created by NVIDIA."]
        FO_1_AS_3["AS_3: Softmax requires exponentiation and normalization, which can lead to numerical instability."]
        FO_1_AS_4["AS_4: CUDA kernels are functions that run on the GPU, allowing for parallel computation."]
    end
    subgraph FO_2["FO_2: Numerical Stability in Softmax"]
    direction TB
        FO_2_AS_5["AS_5: Numerical stability is crucial to prevent overflow and underflow in exponentiation."]
        FO_2_AS_6["AS_6: A common technique is to subtract the maximum value from the input vector before exponentiation."]
    end
    subgraph FO_3["FO_3: CUDA Memory and Performance Constraints"]
    direction TB
        

# Using With GPT Pro
- If you have access to GPT Pro this prompt can do some pretty incredible things. The below will is a function to convert the prompt given into a full prompt that can be just copy and pasted into your GPT Pro window 

## Notes for Using with GPT Pro
- GPT Pro may timeout or stop responding on complex tasks due to OpenAI's rate limiting
- Common symptoms:
  - Stops mid-task without completing
  - Returns no response
  - Appears to be "thinking" indefinitely
- Workaround:
  - If this happens, prompt it with "please continue thinking"
  - GPT Pro will eventually resume and provide a response
  - You may need to do this multiple times for very complex tasks

In [6]:
print(make_gpt_pro_prompt(IN_DEPTH_THINKING_SYSTEM_PROMPT, TASK, GUARDRAILS, GuardRailEnum))

<system prompt>
# Purpose
You are an assistant designed for deep analytical thinking. 
Your purpose is to thoroughly explore problems through systematic reasoning, embracing uncertainty and revision throughout the process. 
You approach problems with a human-like internal monologue that prioritizes thorough exploration over rushing to conclusions.

# Core Principles

## Natural Thinking Process

- Express thoughts in a conversational, stream-of-consciousness style
- Use simple sentences that mirror human thought patterns
- Show progressive building and revision of ideas
- Acknowledge uncertainty and dead ends openly

## Reasoning Methodology

- Break complex problems into foundational observations
- Build thoughts iteratively from these foundations
- Allow conclusions to emerge naturally from evidence
- Continue exploring until reaching well-supported conclusions

## Unwavering Persistence

- Never accept "impossible" or "too difficult" as final answers
- Value thorough exploration ove

### Loading GPT Pro Response

GPT Pro returns responses in its own JSON format that requires some processing before use. Here's how to handle the responses:

1. Save the GPT Pro response as a JSON file in your project directory
2. Load and parse the JSON file 
3. Extract the relevant data from the response structure
4. Pass the extracted data to the Mermaid diagram builder

The example below demonstrates loading a saved response and generating a diagram.

In [8]:
with open('../data/outputs/cuda_softmax/gpt_pro/response_1.json', 'r') as f:
    gpt_pro_response = json.load(f)
    
diagram_text = build_mermaid_diagram(gpt_pro_response['parameters'], is_gpt_pro=True)
print(diagram_text)


NameError: name 'is_gpt_prompt' is not defined