In [6]:
import os
import openai
import yaml


# Load API key from a YAML configuration file
with open("config.yaml") as f:
    config_yaml = yaml.load(f, Loader=yaml.FullLoader)

# Initialize the OpenAI client with the API key
client = openai.OpenAI(api_key=config_yaml['token'])

# Set the model name
MODEL = "gpt-4o-mini"


def llm(prompt, stop=["\n"]):
    # Prepare the dialog for the API request
    dialogs = [{"role": "user", "content": prompt}]

    # Call OpenAI API to generate the completion
    completion = client.chat.completions.create(
        model=MODEL,
        messages=dialogs,
        temperature=0,  # Controls randomness, 0 means more deterministic output
        max_tokens=100,  # Limits the generated text length to 100 tokens
        top_p=1,  # Controls diversity via nucleus sampling, 1 means using the full distribution
        frequency_penalty=0.0,  # Reduces repeated words in the output, 0 means no penalty
        presence_penalty=0.0,  # Reduces likelihood of word repetition, 0 means no penalty
        stop=stop  # Stop sequence for generation, here it's a newline "\n"
    )

    # Return the generated content
    return completion.choices[0].message.content


In [8]:
# Assuming the LLM client and llm function are already defined as per the provided code.

# Define the instruction to be analyzed
instruction = "足球运动员滑跪"

# Prompt for LLM to think and analyze the instruction
think_prompt = f"""
Determine whether the instruction '{instruction}' needs to be split into multiple consecutive actions. 
If splitting is required, identify the specific actions that should be created.

You should think first before providing the response.
When your thinking is done, say '<DONE>'
Now think: 
"""

# Function to interact with LLM for the 'think' step
def analyze_instruction(instruction):
    prompt = think_prompt + instruction
    thought_action = llm(prompt, stop=["<DONE>"])
    
    # Extract thought from the response
    try:
        thought = thought_action.strip().split("\n")[0]
    except:
        thought = thought_action.strip().split('\n')[0]

    # Display the thought process
    print(f"Thought: {thought}")
    
    return thought

# Function to determine if decomposition is needed
def determine_decomposition(thought):
    prompt = f"Based on the thought '{thought}', does the instruction '{instruction}' need to be decomposed into multiple actions?\nAnswer with 'True' or 'False':"
    response = llm(prompt, stop=["\n"])
    
    # Extract and return the true/false response
    return response.strip()

# Function to convert the response to a boolean
def convert_to_bool(response):
    if response.lower() == 'true':
        return True
    elif response.lower() == 'false':
        return False
    else:
        raise ValueError("The response is not a valid boolean value.")

# Main execution
thought = analyze_instruction(instruction)
decomposition_needed = determine_decomposition(thought)
decomposition_needed_bool = convert_to_bool(decomposition_needed)

print(f"Decomposition Needed: {decomposition_needed_bool}")


Thought: The instruction '足球运动员滑跪' translates to 'football player slides on knees'. This action can be interpreted as a single action, but it may involve multiple components: the football player preparing to slide, the actual sliding motion, and then coming to a stop on the knees.
Decomposition Needed: True
