In [1]:
# prompt: get the openai api key from secrets

import os
from google.colab import userdata

# Replace 'YOUR_SECRET_NAME' with the actual name of your secret
openai_api_key = userdata.get('OPENAI_API_KEY')

if openai_api_key:
  os.environ['OPENAI_API_KEY'] = openai_api_key
else:
  print("OpenAI API key not found in secrets.")


In [2]:
# prompt: install latest openai

!pip install --upgrade openai


Collecting openai
  Downloading openai-1.67.0-py3-none-any.whl.metadata (24 kB)
Downloading openai-1.67.0-py3-none-any.whl (580 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m580.2/580.2 kB[0m [31m7.9 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: openai
  Attempting uninstall: openai
    Found existing installation: openai 1.61.1
    Uninstalling openai-1.61.1:
      Successfully uninstalled openai-1.61.1
Successfully installed openai-1.67.0


In [3]:
from openai import OpenAI
client = OpenAI(api_key=openai_api_key)

def run_llm(user_prompt: str, model: str = 'gpt-4o', system_prompt: str = None):
    messages = []
    if system_prompt:
        messages.append({"role": "developer", "content": system_prompt})
    messages.append({"role": "user", "content": user_prompt})

    completion = client.chat.completions.create(
        model=model,
        messages=messages
    )
    return completion.choices[0].message.content

run_llm('what is the meaning of life?')

'"The meaning of life" is a question that has intrigued humans for centuries, and there is no single answer that satisfies everyone. Different people and cultures have interpreted it in various ways. Here are a few perspectives:\n\n1. **Philosophical Perspective**: Philosophers have offered myriad interpretations. Existentialists like Jean-Paul Sartre suggest that life has no inherent meaning and it is up to individuals to create their own purpose. Albert Camus argues for embracing the absurdity of life.\n\n2. **Religious Perspective**: Many religious traditions provide their own answers to this question. For instance, Christianity often views the purpose of life as knowing, loving, and serving God. Buddhism sees life as a cycle of suffering and rebirth, with the ultimate goal of reaching enlightenment.\n\n3. **Scientific Perspective**: From a scientific standpoint, life can be described as a complex set of chemical reactions and processes. Evolutionarily, the "purpose" of life could b

In [5]:
import re

task = """
Write a one-sentence bedtime story about a unicorn, for a five year old girl
"""

GENERATOR_PROMPT = """
Your goal is to complete the task based on <user input>. If there are feedback
from your previous generations, you should reflect on them to improve your solution

Output your answer concisely in the following format:

Thoughts:
[Your understanding of the task and feedback and how you plan to improve]

Response:
[Your response here]
"""

def generate(task: str, generator_prompt: str, context: str="") -> tuple[str, str]:
    """Generate and improve a solution based on feedback."""
    full_prompt = f"{generator_prompt}\n{context}\nTask: {task}" if context else f"{generator_prompt}\nTask: {task}"

    response = run_llm(full_prompt)

    print("\n## Generation start")
    print(f"Output:\n{response}\n")

    return response


EVALUATOR_PROMPT = """
Evaluate this following response for:
1. age appropriateness
2. is only ten words or fewer
3. style and best practices

You should be evaluating only and not attempting to solve the task.

Only output "PASS" if all criteria are met and you have no further suggestions for improvements.

Provide detailed feedback if there are areas that need improvement. You should specify what needs improvement and why.

Return in this format:
Status: [PASS/FAIL]
Feedback: [Your feedback here]
"""

def evaluate(task: str, evaluator_prompt: str, generated_content: str) -> tuple[str, str]:
    """Evaluate if a solution meets requirements"""

    full_prompt = f"{evaluator_prompt}\nOriginal task: {task}\nContent to evaluate: {generated_content}"

    response = run_llm(full_prompt)

    status_match = re.search(r"Status:\s*(.*?)(?:\n|$)", response, re.IGNORECASE)
    feedback_match = re.search(r"Feedback:\s*([\s\S]*)", response, re.IGNORECASE)

    if status_match is None or feedback_match is None:
        raise ValueError("Could not parse evaluation response. Expected format: 'Status: [PASS/FAIL]\\nFeedback: [feedback]'")

    evaluation = status_match.group(1).strip()
    feedback = feedback_match.group(1).strip()

    print("## Evaluation start")
    print(f"Status: {evaluation}")
    print(f"Feedback: {feedback}")

    return evaluation, feedback

def loop_workflow(task: str, generator_prompt: str, evaluator_prompt: str, context: str = "") -> tuple[str, list[dict]]:
    """Keep generating and evaluating until the evaluator passes the last generated response."""
    memory = []

    response = generate(task, generator_prompt)
    memory.append(response)

    max_iterations = 5
    while max_iterations > 0:
        evaluation, feedback = evaluate(task, evaluator_prompt, response)

        if evaluation.upper() == "PASS":
            return response

        context = "\n".join([
            "Previous attempts:",
            *[f"- {m}" for m in memory],
            f"\nFeedback: {feedback}"
        ])
        response = generate(task, generator_prompt, context)
        memory.append(response)

        max_iterations -= 1

loop_workflow(task, GENERATOR_PROMPT, EVALUATOR_PROMPT)


## Generation start
Output:
Thoughts:
The task is to create a simple and imaginative bedtime story suitable for a five-year-old. I will focus on making it whimsical and positive, as that suits the age group. Ensuring it's concise and magical will capture her interest.

Response:
Once upon a time, a sparkling unicorn named Luna used her magical horn to light up the night sky with a rainbow path, guiding lost stars back home.

## Evaluation start
Status: FAIL
Feedback: 1. **Age Appropriateness:** The content is appropriate for a five-year-old, as it features whimsical and magical elements like a unicorn and a rainbow, which are typically appealing to young children.

2. **Is Only Ten Words or Fewer:** The response is not ten words or fewer; it is instead a complex sentence that exceeds this limit. The task specifically required a response of ten words at most.

3. **Style and Best Practices:** 
   - The style is imaginative and engaging, aligning well with what captures a young child’s 

"Thoughts:\nThe task requires crafting a simple, one-sentence bedtime story about a unicorn for a five-year-old, focusing on making it whimsical and age-appropriate. Based on feedback, I'll use straightforward and vivid imagery to make the story immediately understandable and magical within ten words or fewer.\n\nResponse:\nLuna the unicorn soared, leaving sparkling rainbows in her wake."