# Experiment 0604 - estimation problem solving step by step

In [None]:
# dependency
%pip install python-dotenv
%pip install openai
%pip install pandas

In [1]:
import os
from dotenv import load_dotenv, find_dotenv # load keys from .env file
import openai # use OpenAI API
import random
import re
import json
import pandas as pd
import numpy as np
import datetime

_ = load_dotenv(find_dotenv()) # read local .env file
openai.api_key  = os.getenv('OPENAI_API_KEY')

In [47]:
# search for the json object at the end using regular expressions
def get_json(text):
    '''extract the json object from the text using regular expressions'''
    match = re.search(r'{.*}', text)
    if match:
        json_string = match.group()
        # json_object = json.loads(json_string)
        return json_string

def get_python(text):
    '''extract the python code from the text using regular expressions'''
    pattern = re.compile(r'"""python(.*?)"""', re.DOTALL)
    match = pattern.search(text)
    if match:
        code = match.group(1)
        return code.strip()
    else:
        return None

def get_completion(prompt, model="gpt-3.5-turbo"):
    '''Query OpenAI API to get a completion from a naive prompt'''
    messages = [{"role": "user", "content": prompt}]
    response = openai.ChatCompletion.create(
        model=model,
        messages=messages,
        temperature=0, # this is the degree of randomness of the model's output
        max_tokens=500, # this is the maximum number of returned tokens
    )
    print(">>>>RESPONSE>>>>")
    print(str(response.choices[0].message["content"]))
    print("<<<<END<<<<")
    return response.choices[0].message["content"]

def get_completion_from_messages(messages, model="gpt-3.5-turbo", temperature=0):
    '''ChatML format to query OpenAI API to get a completion from a list of messages'''
    response = openai.ChatCompletion.create(
        model=model,
        messages=messages,
        temperature=temperature,
        max_tokens=2500,
    )
    return response.choices[0].message["content"]

# write function to combine two messages
# turn the above process into a function
def get_answer_step_by_step(question, model="gpt-4"):
    '''Query OpenAI API to answer a question step-by-step'''
    system_message = """A user will give you a problem. Follow these steps.

Step 1: Reason step-by-step to clearly define the problem and what additional information is desired.

Step 2: Reason step-by-step a methodology that can be used to answer the problem.

Step 3: Calculate the results based on step 2.

Step 4: Stating assumptions used in the methodology and the results.

    """
    messages = [{"role": "system", "content": system_message},
    {"role": "user", "content": question}]
    result = get_completion_from_messages(messages, model=model) # returns a string
    return result

def text_to_json(message, model="gpt-3.5"):
    '''Query OpenAI API to convert text to a parsable JSON'''
    JSON_Parser_system_message = '''\
A user will give you a text contain 4 steps. Your task is to convert the text to make it more readable. Each step will be the key, and the details will be the value.
Your output will be a JSON object in deliminator (""") looks like this: 
JSON={"Step 1": "details in step 1", "Step 2": "details in step 2", "Step 3": "details in step 3", "Step 4": "details in step 4"}
Here's an example:
INPUT:
======
Step 1: 
To answer this problem, we need to know the following information:
1. How much power is required to jump start a car?
2. How much power can be generated by one lemon battery?
3. How many lemon batteries are required to generate the power needed to jump start a car?

Step 2:
To answer this problem, we can follow these steps:
1. Determine the power required to jump start a car.
2. Determine the power generated by one lemon battery.
3. Calculate the number of lemon batteries required to generate the power needed to jump start a car.

Step 3:
1. According to AAA, the power required to jump start a car is around 300-400 amps for 5-10 seconds. Let's assume that we need 350 amps for 7 seconds to jump start a car.
2. One lemon battery can generate around 0.7 volts and 0.0003 amps of current. Using Ohm's law, we can calculate the power generated by one lemon battery as follows: Power = Voltage x Current = 0.7 x 0.0003 = 0.00021 watts or 0.21 milliwatts.
3. To generate 350 amps for 7 seconds, we need a total power of 350 x 7 = 2450 watts or 2.45 kilowatts. To generate this much power using lemon batteries, we need 2.45 / 0.00021 = 11,666.67 lemon batteries.

Step 4:
Assumptions:
1. The power required to jump start a car is assumed to be 350 amps for 7 seconds.
2. The power generated by one lemon battery is assumed to be 0.21 milliwatts.
3. The lemon batteries are assumed to be connected in series to generate the required voltage and current.

======
OUTPUT:"""
JSON={"problem definition": "If a lemon can be made into a battery, how many lemons can jump start a car?", "methodology": "To answer this problem, we need to know the power required to jump start a car, the power generated by one lemon battery, and the number of lemon batteries required to generate the power needed to jump start a car. We can calculate the number of lemon batteries required by dividing the required power by the power generated by one lemon battery.", "results": "To jump start a car, we need 11,666.67 lemon batteries.", "assumptions": "The power required to jump start a car is assumed to be 350 amps for 7 seconds. The power generated by one lemon battery is assumed to be 0.21 milliwatts. The lemon batteries are assumed to be connected in series to generate the required voltage and current."}
"""
'''
    message_wrapper = f'''Convert message into JSON object.\nInput:\n==={message}\n===\n OUTPUT:\n """[JSON=]"""'''
    get_json_messages = [{"role": "system", "content": JSON_Parser_system_message},
    {"role": "user", "content": message_wrapper}]

    JSON_result = get_completion_from_messages(get_json_messages) # returns a string
    return JSON_result

def text_to_python(message, model="gpt-4"):
    '''Query OpenAI API to convert text to python'''
    Python_generator_system_message = '''A user will give you a text contain 4 steps. Your task is to edit the text to make it more readable in the following format.
Output a JSON object after deliminator """ in the following structure: JSON={"Step 1": "details in step 1", "Step 2": "details in step 2", "Step 3": "details in step 3", "Step 4": "details in step 4"}
One Example is as follows:
INPUT TEXT:
Step 1: 
To answer this problem, we need to know the following information:
1. How much power is required to jump start a car?
2. How much power can be generated by one lemon battery?
3. How many lemon batteries are required to generate the power needed to jump start a car?

Step 2:
To answer this problem, we can follow these steps:
1. Determine the power required to jump start a car.
2. Determine the power generated by one lemon battery.
3. Calculate the number of lemon batteries required to generate the power needed to jump start a car.

Step 3:
1. According to AAA, the power required to jump start a car is around 300-400 amps for 5-10 seconds. Let's assume that we need 350 amps for 7 seconds to jump start a car.
2. One lemon battery can generate around 0.7 volts and 0.0003 amps of current. Using Ohm's law, we can calculate the power generated by one lemon battery as follows: Power = Voltage x Current = 0.7 x 0.0003 = 0.00021 watts or 0.21 milliwatts.
3. To generate 350 amps for 7 seconds, we need a total power of 350 x 7 = 2450 watts or 2.45 kilowatts. To generate this much power using lemon batteries, we need 2.45 / 0.00021 = 11,666.67 lemon batteries.

Step 4:
Assumptions:
1. The power required to jump start a car is assumed to be 350 amps for 7 seconds.
2. The power generated by one lemon battery is assumed to be 0.21 milliwatts.
3. The lemon batteries are assumed to be connected in series to generate the required voltage and current.

OUTPUT PYTHON:
"""python
    # Step 1: Define the variables
    power_car_jump_start_amps = 350    # Amps
    time_car_jump_start_seconds = 7    # Seconds

    lemon_battery_volts = 0.7          # Volts
    lemon_battery_amps = 0.0003        # Amps

    # Step 2: Calculate the power required to jump start a car and the power generated by one lemon battery

    # Power = Voltage x Current
    power_car_jump_start_watts = power_car_jump_start_amps * time_car_jump_start_seconds  # Watts
    power_lemon_battery_watts = lemon_battery_volts * lemon_battery_amps  # Watts

    # Step 3: Calculate the number of lemon batteries required to generate the power needed to jump start a car
    number_of_lemon_batteries = power_car_jump_start_watts / power_lemon_battery_watts

    # Step 4: Print the result
    print(f"You need approximately {number_of_lemon_batteries:.0f} lemon batteries to jump start a car.")
"""
Your will only output the python code after """ in the following format:
"""python
# Step 1: Define the variables
# ...
# Last step: Print the result
print(f"...")
"""
'''
    get_python_messages = [{"role": "system", "content": Python_generator_system_message},
    {"role": "user", "content": message}]
    result = get_completion_from_messages(get_python_messages, model=model) # returns a string
    return result
# # Example of an OpenAI ChatCompletion request with stream=True
# # https://platform.openai.com/docs/guides/chat

# # a ChatCompletion request

In [None]:
# Uncomment and Run this code block to get an example output

# prompt_example= f"""What is Fermi Estimation and how does it work?"""
# response = get_completion(prompt_example)


## Question inference (with GPT4 model)

In [51]:
question_bank = ["How many people in the US are currently interacting with a chatbot?"]

for index, question in enumerate(question_bank):
    answer = get_answer_step_by_step(question)
    print(f"<<<<Completed Question {index} in question bank<<<<")
    answer_json = text_to_json(answer)
    print(f"<<<<Received Response to Question {index} <<<<")
    python_message = text_to_python(answer)
    print(f"<<<<Received Python Code to Question {index} <<<<")
    
print(answer)
print(get_json(answer_json))
print(get_python(python_message))

## get_answer_step_by_step is about 30s per question
## get_json is about 10s per question
## get_python is about 20s per question

<<<<Completed Question 0 in question bank<<<<
<<<<Received Response to Question 0 <<<<
<<<<Received Python Code to Question 0 <<<<
Step 1: Define the problem and desired information
The problem is to determine the number of people in the United States who are currently interacting with a chatbot.

Step 2: Develop a methodology to answer the problem
To estimate the number of people interacting with a chatbot, we can use the following approach:
1. Estimate the total number of internet users in the United States.
2. Estimate the percentage of internet users who interact with chatbots.
3. Multiply the total number of internet users by the percentage of users who interact with chatbots.

Step 3: Calculate the results based on the methodology
1. According to Datareportal's "Digital 2021: United States of America" report, there are approximately 312.32 million internet users in the United States.
2. According to a survey by Drift, 15% of American consumers have used chatbots to interact with 

In [48]:
print(answer)
answer_json = text_to_json(answer)


Step 1: Define the problem and additional information needed
The problem is to find the approximate weight of a 150lb person if they were able to stand on the surface of the sun. To do this, we need to know the mass of the person, the mass of the sun, and the radius of the sun.

Step 2: Methodology to answer the problem
We can use the formula for gravitational force to calculate the weight of the person on the sun's surface. The formula is:

F = (G * m1 * m2) / r^2

where F is the gravitational force (weight), G is the gravitational constant (6.67430 x 10^-11 m^3 kg^-1 s^-2), m1 is the mass of the person, m2 is the mass of the sun, and r is the distance between the centers of the person and the sun (which is approximately equal to the sun's radius).

First, we need to convert the person's weight (150lb) to mass (in kg). We can do this using the conversion factor 1lb = 0.453592kg:

mass_person = 150lb * 0.453592kg/lb

Next, we need the mass of the sun (m2) and the radius of the sun (r).

{"problem definition": "Find the approximate weight of a 150lb person if they were able to stand on the surface of the sun.", "methodology": "To answer this problem, we can use the formula for gravitational force to calculate the weight of the person on the sun's surface. We need to know the mass of the person, the mass of the sun, and the radius of the sun. First, we convert the person's weight to mass. Then, we calculate the gravitational force using the formula F = (G * m1 * m2) / r^2.", "results": "The approximate weight of a 150lb person if they were able to stand on the surface of the sun is 1,328.5 N (Newtons).", "assumptions": "The person can stand on the surface of the sun. The sun's mass and radius are constant. The gravitational constant (G) is accurate."}


In [32]:
# exec code directly will be dangerous, so we use exec to run the code in a sandbox
#exec(get_python(python_message))
# just print the message will do : )



The approximate weight of a 150lb person if they were able to stand on the surface of the sun would be around 18627.5 Newtons.


The approximate weight of a 150lb person if they were able to stand on the surface of the sun would be around 18627.5 Newtons.


In [None]:
# Best practices:
# https://platform.openai.com/docs/guides/gpt-best-practices/strategy-split-complex-tasks-into-simpler-subtasks

# Strategy: Split complex tasks into simpler subtasks

In [None]:
# Strategy: Use a persona

In [None]:
# Strategy: User inner monologue
# instruct the model to put parts of the output that are meant to be hidden from the user into a structured format
# makes parsing them easy. 

# before presenting the output to the user, the output is parsed and only part of the output is made visible.

system_message_IM_example = '''Follow these steps to answer the user queries.

Step 1 - First work out your own solution to the problem. Don't rely on the student's solution since it may be incorrect. Enclose all your work for this step within triple quotes (""").

Step 2 - Compare your solution to the student's solution and evaluate if the student's solution is correct or not. Enclose all your work for this step within triple quotes (""").

Step 3 - If the student made a mistake, determine what hint you could give the student without giving away the answer. Enclose all your work for this step within triple quotes (""").

Step 4 - If the student made a mistake, provide the hint from the previous step to the student (outside of triple quotes). Instead of writing "Step 4 - ..." write "Hint:".'''