In [7]:
import argparse
from typing import Optional
from dotenv import load_dotenv
import os
from tqdm import tqdm
import numpy as np

# torch
import torch

# init hugging face
from openai import OpenAI
from langchain.agents import AgentType, Tool, initialize_agent
from langchain.prompts import PromptTemplate, FewShotPromptTemplate
from langchain.chains import LLMChain, LLMMathChain
from langchain.utilities import SearchApiAPIWrapper


from unidecode import unidecode

from utils import compile_fp, convert_units, accuracy_metric
import json

load_dotenv()

True

In [8]:

client = OpenAI(
    # This is the default and can be omitted
    api_key=os.environ.get("OPENAI_API_KEY"),
)

def chat(messages, model_name="gpt-4"):
    chat_completion = client.chat.completions.create(
        messages=messages,
        model=model_name,
    )

    return chat_completion.choices[0].message.content

chat([{"role": "user", "content": "Say Hello World"}])

'Hello World'

In [9]:
## PROMPT CELL

QUERY_PROMPT = PromptTemplate(
                        input_variables=["question"],
                        template="""{question}"""
                    )

CONTEXT_PROMPT = PromptTemplate(
                        input_variables=["context"],
                        template="""CONTEXT:=\n{context}"""
                    )

ASSISTANT_FIRST_RESP_PROMPT = PromptTemplate(
                        input_variables=["summary", "program"],
                        template="""\nSUMMARY:= {summary}\nPROGRAM:=\n{program}\n"""
                    )

ASSISTANT_SECOND_RESP_PROMPT = PromptTemplate(
                        input_variables=["revised_program"],
                        template="""\nREVISED_PROGRAM:=\n{revised_program}\n"""
                    )


SYSTEM_PROMPT_STRING = """
You are a helpful assistant tasked with answering estimation questions.
The user will first ask an estimation question to answer and at the end of the question the user will indicate the units that they want the answer to be in or dimensionless if the answer has no units.

Your initial answer will contain two parts: a summary and a program.
SUMMARY:= First you should write a concise summary of any hypothetical scenarios which the question sets up as well as what is the exact quantity that the question is asking you to estimate.
PROGRAM:=
Next you must write a Python program that answers the estimation question and stores that answer in the variable `A0`.
Your program will have the following structure:
# Q_: Your program should ask sub-questions to help you answer the question.
# R_: After asking each sub-question, you must provide your reasoning for why the sub-question is relevant to our goal of answering the main question.
... You are then encouraged to decompose the sub-question even further unless you are confident that you know the numerical answer to the sub-question ...
# F_: Before answer each sub-question, you will provide a fact that you are referencing to answer the sub-question. The fact must be verifiable by a third party.
A_= You will then answer these sub-questions by assigning the answer to the variable `A_` where the number after the `A` corresponds to the number after the `Q` of the sub-question. # then put the units in a comment after the variable assignment
Please decompose all sub-questions as much as possible to ensure that all quantities are estimated correctly and the math to recompose the quantities is as simple as possible.
Ensure that all quantities use units which are consistent with the units for the final answer.
Additionally, make sure all numerical quantities used in your Python code are written in scientific notation.

After you provide this initial answer, the user will then provide a list of contextual information which they want you to integrate into your answer.
CONTEXT:=
The user will provide this contextual information as a bullet point list of facts.

Your response to the user will be a revision of your initial program.
REVISED PROGRAM:=
This program will have the exact same structure as your initial program with questions (Q_), reasoning (R_), facts (F_), and answers (A_) but you will revise the program to integrate the new contextual information provided by the user.
For ever question, reasoning and fact used in your revised program, you must cite whether it is from the original program (Same as original) or from the new contextual information provided by the user (Revised based on new context).
If you are reusing a question, reasoning or fact from the original program, it does not have to be the same location that it appears the original program but you must cite where it is located in the original program.
Note that not all of the contextual information provided by the user will be relevant so you do not need to integrate all of it into your revised program.
You are encouraged to keep as much of your original program as much as possible but you are allowed to introduce new sub-questions and answer them if they are necessary to answer the main question given the new contextual information.
Additionally, you do not need to keep all sub-questions from your original program if they become unnecessary to the estimation question given the new contextual information.

In the following messages, you are provided with verified examplar conversations between the user and the assissant.
Lastly, you will be provided with a user question and you must continue the conversation in the same format as the previous examplar conversations.
"""

FEW_SHOT_PROMPTS = [
"""User:If I were to take an English penny, melt it down, then somehow stretch it out into a perfect hollow sphere 1 atom thick, how large would this sphere be? (units: in**3)

Assistant:
SUMMARY:= We take a penny, melt it down, and stretch it out into a hollow sphere 1 atom thick. We want to compute the volume of this sphere.
PROGRAM:=
# Q0: If I were to take an English penny, melt it down, then somehow stretch it out into a perfect hollow sphere 1 atom thick, how large would this sphere be?
# R0: This is the main question we want to answer.
# Q1: What is the volume of a single penny?
# R1: To compute the volume of the hollow sphere, we need the original volume of the penny since it is what gets melted down and turned into the sphere.
# F1: The volume of a penny is 0.84 in**3.
A1=8.4e-1 # (in**3)
# Q2: What is the surface area of a hollow sphere 1 atom thick which is constructed from a melted penny?
# R2: We only know the volume of the penny and we need to translate that information into information about the sphere.
# F2: The surface area of a hollow sphere 1 atom thick constructed from a melted penny is the same as the volume of the penny.
A2=8.4e-1 # (in**2)
# Q3: What is the radius of the sphere given its surface area?
# R3: To compute the volume of the sphere given its surface area, we first need to compute the radius of the sphere.
# F3: The radius of a sphere is the square root of the surface area of the sphere divided by 4*pi.
A3=(A2/(4*np.pi))**(1/2) # (in)
# F0: The volume of a sphere is 4/3*pi*r**3 where r is the radius of the sphere.
A0=(4/3)*np.pi*(A3 ** 3) # (in**3)

User:CONTEXT:=
- The volume of a penny is 12 cm**3.

Assistant:
REVISED PROGRAM:=
# Q0: If I were to take an English penny, melt it down, then somehow stretch it out into a perfect hollow sphere 1 atom thick, how large would this sphere be? (Same as original question Q0)
# R0: This is the main question we want to answer. (Same as original reasoning R0)
# Q1: What is the volume of a single penny? (Same as original question Q1)
# R1: To compute the volume of the hollow sphere, we need the original volume of the penny since it is what gets melted down and turned into the sphere. (Same as original reasoning R1)
# F1: The volume of a penny is 12 cm**3. We need to convert this into in**3 by multiplying by 0.0610237. (Revised fact based on new context)
A1=0.0610237 * 1.2e+1 # (in**3)
# Q2: What is the surface area of a hollow sphere 1 atom thick which is constructed from a melted penny? (Same as original question Q2)
# R2: We only know the volume of the penny and we need to translate that information into information about the sphere. (Same as original reasoning R2)
# F2: The surface area of a hollow sphere 1 atom thick constructed from a melted penny is the same as the volume of the penny. (Same as original fact F2)
A2=A1 # (in**2)
# Q3: What is the radius of the sphere given its surface area? (Same as original question Q3)
# R3: To compute the volume of the sphere given its surface area, we first need to compute the radius of the sphere. (Same as original reasoning R3)
# F3: The radius of a sphere is the square root of the surface area of the sphere divided by 4*pi. (Same as original fact F3)
A3=(A2/(4*np.pi))**(1/2) # (in)
# F0: The volume of a sphere is 4/3*pi*r**3 where r is the radius of the sphere. (Same as original fact F0)
A0=(4/3)*np.pi*(A3 ** 3) # (in**3)
""",
"""User:How much would the sea level rise if all the ice melted? (units: mi)

Assistant:
SUMMARY:= All of the ice on Earth has melted and flown into the ocean. We want to compute how much the sea level will rise.
PROGRAM:=
# Q0: How much would the sea level rise if all the ice melted?
# R0: This is the main question we want to answer.
# Q1: What is the volume of water that will be added to the ocean?
# R1: If we know the volume of water that will be added to the ocean, we can compute the sea level rise by dividing the volume of water by the surface area of the ocean.
# Q2: What is the volume of ice on Earth that is on land?
# R2: The volume of water that will be added to the ocean is the same as the volume of ice on Earth that is on land since that ice will melt and flow into the ocean.
# Q3: What is the volume of ice in Antarctica?
# R3: The majority of the ice on land is in Antarctica so it is a good approximation to use the volume of ice in Antarctica as the volume of ice on Earth that is on land.
# Q4: What is the thickness of the ice in Antarctica?
# R4: To compute the volume of ice in Antarctica, we need to know the thickness of the ice because we can multiply that by its surface area to get its volume.
# F4: The thickness of the ice in Antarctica is 4.8 km. We must convert this into mi by multiplying by 0.621371.
A4=0.621371 * 4.8e+0 # (mi)
# Q5: What is the surface area of Antarctica?
# R5: To compute the volume of ice in Antarctica, we need to know the surface area of Antarctica because we can multiply that by its thickness to get its volume.
# Q6: What fraction of the Earth's surface is Antarctica?
# R6: To compute the surface area of Antarctica, we need to know what fraction of the Earth's surface is Antarctica because we can multiply that by the surface area of the Earth to get the surface area of Antarctica.
# F6: Antarctica is 1/30th of the Earth's surface.
A6=1/30 # (dimensionless)
# Q7: What is the surface area of the Earth?
# R7: To compute the surface area of Antarctica, we need to know the surface area of the Earth because we can multiply that by the fraction of the Earth's surface that is Antarctica to get the surface area of Antarctica.
# F7: The surface area of the Earth is 1.97e+8 mi**2.
A7=1.97e+8 # (mi**2)
# F5: The surface area of Antarctica is the fraction of the Earth's surface that is Antarctica multiplied by the surface area of the Earth.
A5=A6*A7 # (mi**2)
# F3: The volume of ice in Antarctica is the surface area of Antarctica multiplied by the thickness of the ice in Antarctica.
A3=A5*A4 # (mi**3)
# F2: The volume of ice on Earth is approximated by the volume of ice in Antarctica.
A2=A3 # (mi**3)
# F1: The volume of water that will be added to the ocean is the same as the volume of ice on Earth.
A1=A2 # (mi**3)
# Q8: What is the surface area of the ocean?
# R8: To compute the sea level rise, we need to know the surface area of the ocean because we can divide the volume of water that will be added to the ocean by the surface area of the ocean to get the sea level rise.
# F8: The surface area of the ocean is 361 million km**2. We must convert this into mi**2 by multiplying by 0.386102.
A8=0.386102 * 361e+8 # (mi**2)
# F0: The sea level rise is the volume of water that will be added to the ocean divided by the surface area of the ocean.
A0=A1/A8 # (mi)

User:CONTEXT:=
- A basketball has a volume of 0.01 km**3.
- The mass of ice on land is 2.6e+19 kg.
- Antarctica is the largest content on Earth.
- A basketball has a volume of 0.01 km**3.
- The surface area of Antarctica is 5.4 million mi**2.
- The density of ice is 0.92 g/cm**3.
- The total volume of paint available to be painted on the wall is 0.13 cubic feet.
- An average human inhales 6 liters of air every minute

Assistant:
REVISED PROGRAM:=
# Q0: How much would the sea level rise if all the ice melted? (Same as original question Q0)
# R0: This is the main question we want to answer. (Same as original reasoning R0)
# Q1: What is the volume of water that will be added to the ocean? (Same as original question Q1)
# R1: If we know the volume of water that will be added to the ocean, we can compute the sea level rise by dividing the volume of water by the surface area of the ocean. (Same as original reasoning R1)
# Q2: What is the volume of ice on Earth that is on land? (Same as original question Q2)
# R2: The volume of water that will be added to the ocean is the same as the volume of ice on Earth that is on land since that ice will melt and flow into the ocean. (Same as original reasoning R2)
# Q3: What is the mass of the ice on Earth that is on land? (Revised question based on new context)
# R3: To compute the volume of ice on Earth that is on land, we need to know the mass of the ice because we can divide the mass of the ice by the density of ice to get the volume of the ice. (Revised reasoning based on new context)
# F3: The mass of ice on land is 2.6e+19 kg. (Revised fact based on new context)
A3=2.6e+19 # (kg)
# Q4: What is the density of ice? (Revised question based on new context)
# R4: To compute the volume of ice on Earth that is on land, we need to know the density of ice because we can divide the mass of the ice by the density of ice to get the volume of the ice. (Revised reasoning based on new context)
# F4: The density of ice is 0.92 g/cm**3. We must convert this into kg/(m**3) by multiplying by 1e+3. We must then convert this further into kg/(mi**3) by dividing by (6.2e-4 ** 3). (Revised fact based on new context)
A4=(1e+3)/(6.2e-4 ** 3) * 9.2e-1 # (kg/(mi**3))
# F2: The volume of ice on Earth that is on land is the mass of the ice on Earth that is on land divided by the density of ice. (Revised fact based on new context)
A2=A3/A4 # (mi**3)
# F1: The volume of water that will be added to the ocean is the same as the volume of ice on Earth that is on land. (Same as previous fact F1)
A1=A2 # (mi**3)
# Q5: What is the surface area of the ocean? (Same as original question Q8)
# R5: To compute the sea level rise, we need to know the surface area of the ocean because we can divide the volume of water that will be added to the ocean by the surface area of the ocean to get the sea level rise. (Same as original reasoning R8)
# F5: The surface area of the ocean is 361 million km**2. We must convert this into mi**2 by multiplying by 0.386102. (Same as original fact F8)
A5=0.386102 * 361e+8 # (mi**2)
# F0: The sea level rise is the volume of water that will be added to the ocean divided by the surface area of the ocean. (Same as original fact F0)
A0=A1/A5 # (mi)
"""
]
# FEW_SHOT_PROMPT_STRING = "\n\n".join(FEW_SHOT_PROMPTS)

UNUSED_FEW_SHOT_PROMPTS = [
"""Input:
QUESTION:=How long would it take to pluck each hair out of your body one at a time?

CONTEXT:=
F1: It takes around 5 seconds to pluck a single strand of hair.
F2: The entire human body has 5e+6 hair follicles.
PROGRAM:=
# Q0: How long would it take to pluck each hair out of your body one at a time?
# Q1: How long does it take to pluck a single strand of hair?
A1=5 # time to pluck single strand of hair (s) R: F1 tells us how long it takes to pluck a single strand of hair.
# Q2: How many hairs do we have on our body?
A2=5e+6 # number of hairs on body (hairs) R: F2 tells us how many hairs we have on our body.
A0=A1*A2 # time to pluck all hairs on body (s) R: The time to pluck all hairs on our body is the time to pluck a single strand of hair multiplied by the number of hairs on our body.
""",
"""Input:
QUESTION:=A cure for all cancers would increase the average American lifespan by how many years?

PROGRAM:=
# Q0: A cure for all cancers would increase the average American lifespan by how many years?
# Q1: What is the average American lifespan?
# R1: The average American lifespan is 78 years.
A1=78 # (years)
# Q2: How much does cancer reduce the average lifespan by?
# R2: Cancer reduces the average lifespan by 5 years.
A2=5 # (years)
# R0: To compute the average lifespan with a cure for cancer, take the average American lifespan plus the amount that cancer reduces the average lifespan by.
A0=A1+A2 # (years)
""",
"""Input:
QUESTION:=What is the number of queen-size mattresses it would take to fill the star Betelgeuse?

PROGRAM:=
# Q0: What is the number of queen-size mattresses it would take to fill the star Betelgeuse which has 18 times the mass of the Sun?
# Q1: What is the mass of a queen size mattress?
# R1: The mass of a queen size mattress is 140 pounds.
A1=140 # (pounds)
# Q2: What is the mass of Betelgeuse?
# Q3: What is the mass of the sun?
# R3: The mass of the sun is 4.3e+30 pounds.
A3=4.3e+30 # (pounds)
# Q4: What is the ratio of the mass of Betelgeuse to the mass of the sun?
# The ratio of mass of Betelgeuse to mass of sun is 18
A4=18 # (dimensionless)
# R2: The mass of Betelgeuse is the mass of the sun multiplied by the ratio of the mass of Betelgeuse to the mass of the sun.
A2=A3*A4 # (pounds)
# R0: The number of queen size mattresses it would take to fill Betelgeuse is the mass of Betelgeuse divided by the mass of a queen size mattress.
A0=A2/A1 # (mattresses)
""",
"""Input:
QUESTION:=How many air molecules are in this lecture hall?

SUMMARY:= We have a lecture hall and we want to compute the number of air molecules inside of it.
PROGRAM:=```python
# Q0: How many air molecules are in this lecture hall?
# R0: This is the main question we want to answer.
# Q1: What is the volume of an average lecture hall?
# R1: To get a sense of how many air molecules we can fit in this lecture hall, we first need to know its volume which can be approximated by the volume of an average lecture hall.
# F1: The volume of an average lecture hall is 1000 cubic meters.
A1=1000 # (m**3)
# Q2: What is the volume of a single air molecule?
# R2: To compute the number of air molecules in some space with a known volume, we need to know the volume of a single air molecule.
# F2: The volume of a single air molecule is 3e-24 cubic meter.
A2=3e-24 # (m**3)
# F0: The number of air molecules in the lecture hall is the volume of the lecture hall divided by the volume of a single air molecule.
A0=A1/A2 # (molecules)
```
""",
]

# COMBINED_PROMPT = PromptTemplate(
#     template=f"{SYSTEM_PROMPT_STRING}\n\n{FEW_SHOT_PROMPT_STRING}\n\n{INPUT_PROMPT.template}",
#     input_variables=INPUT_PROMPT.input_variables,
# )

In [10]:
class SamplePredictor:
    def __init__(self, temperature=0):
        self.chat = chat


    @staticmethod
    def eval(question, preds, answer, context=None, num_tokens=None, verbose=True):
        prediction = {
            "question": question,
            "correct_answer": answer,
            "raw_outputs": preds,
            "context": "- " + "\n- ".join(context) if context is not None else "",
        }

        try:
            summary = preds.split("SUMMARY:=")[-1].split("PROGRAM:=")[0].strip('\n')
            program = preds.split("PROGRAM:=")[-1].strip('\n')
            program_lines = program.split("\n")
            program_lines = [line if line[0] not in ['Q', 'R', 'F'] else "# " + line for line in program_lines]
            program = "\n".join(program_lines)
            # program = program[program.find("```python")+len("```python"):program.rfind("```")]
            loc = {}
            # exec(program.replace("'UNK'", "100"), globals(), loc)
            exec(program, globals(), loc)
            compiled_out = loc["A0"]
            # compiled_units = program.split(")")[-2].split("(")[-1]

            prediction = {
                    "summary": summary,
                    "program": program,
                    "compiled_answer": compiled_out,
                    **prediction,
                }

            compiled_acc = accuracy_metric(answer, compiled_out)
            parsable = 1
        except:
            compiled_acc = 0
            parsable = 0
        if verbose:
            print("Question: {}; \nContext: {}\nCorrect Answer: {}".format(prediction["question"], prediction["context"], prediction["correct_answer"]))
            if num_tokens is not None:
                print("Number of Tokens in Response: ", num_tokens)

            if "compiled_answer" in prediction:
                print("Compiled Answer is: {} \nSummarized Problem is: {}\nProgram:\n```python\n{}\n```".format(prediction['compiled_answer'], prediction['summary'], prediction['program']))
            else:
                print("Unable to parse output; Outputting Raw Output:\n{}".format(prediction["raw_outputs"]))
        return compiled_acc, parsable, prediction

    def ask(self, question, answer, context=None, verbose=True):
        messages = [{"role": "system", "content": SYSTEM_PROMPT_STRING}]
        for few_shot_prompt in FEW_SHOT_PROMPTS:
            user_question = few_shot_prompt.split("User:")[1].split("Assistant:")[0].strip('\n')
            messages.append({"role": "user", "content": user_question})
            assistant_response1 = "\n" + few_shot_prompt.split("Assistant:")[1].split("User:")[0].strip('\n')
            messages.append({"role": "assistant", "content": assistant_response1})
            user_context = few_shot_prompt.split("User:")[2].split("Assistant:")[0].strip('\n')
            messages.append({"role": "user", "content": user_context})
            assistant_response2 = "\n" + few_shot_prompt.split("Assistant:")[2].strip('\n')
            messages.append({"role": "assistant", "content": assistant_response2})
        messages.append({"role": "user", "content": QUERY_PROMPT.format(question=question)})
        if verbose:
            print(f"\n{'-' * 105}\n{'-' * 45} FIRST MESSAGE {'-' * 45}\n{'-' * 105}\n\n")
        # for msg in messages:
        #     print(msg["role"].capitalize() + ": " + msg["content"])

        preds = self.chat(messages)
        noctxt_acc, noctxt_parsable, noctxt_pred = self.eval(question, preds, answer, verbose=verbose)
        ctxt_acc, ctxt_parsable, ctxt_pred = 0, 0, {}
        if context is not None:
            print(f"\n{'-' * 106}\n{'-' * 45} SECOND MESSAGE {'-' * 45}\n{'-' * 106}\n\n")
            messages.append({"role": "assistant", "content": ASSISTANT_FIRST_RESP_PROMPT.format(summary=noctxt_pred["summary"], program=noctxt_pred["program"])})
            messages.append({"role": "user", "content": CONTEXT_PROMPT.format(context="- " + "\n- ".join(context))})
            preds = self.chat(messages)
            ctxt_acc, ctxt_parsable, ctxt_pred = self.eval(question, preds, answer, context=context, verbose=verbose)

        return noctxt_acc, noctxt_parsable, noctxt_pred, ctxt_acc, ctxt_parsable, ctxt_pred

    def predict(self, dataset, N, include_context=True, verbose=False):
        compiled_acc = 0
        parsable = 0
        shuffled_idxs = np.arange(len(dataset))
        np.random.shuffle(shuffled_idxs)
        print("Shuffled Indices w/ fixed seed: ", shuffled_idxs[:10])
        predictions = []
        for i in tqdm(range(N)):
            entry = dataset[shuffled_idxs[i]]
            question = entry["question"]
            answer = entry["answer"]
            context = entry["context"] if include_context else ""
            compiled_acc_i, parsable_i, prediction = self.ask(question, answer, context, verbose=verbose)
            compiled_acc += compiled_acc_i
            parsable += parsable_i
            predictions.append(prediction)

        return compiled_acc/N, parsable/N, predictions


# Define the SamplePredictor
predictor = SamplePredictor(temperature=0.0)


In [18]:
entry = {
    "question": "How much space would be required to fit 1 individual of each species of plants and animals?",
    "program": "PROGRAM:=Q1: What is the volume occupied by a single species of all the animals in the world?=Q2: What is the volume occupied by single species of all the plants in the world?=Q3: What is the volume of an average animal?=Q4: How many different species of animals present?=Q5: What is the volume of a single plant?=Q6: How many different species of plant present?=A1: 7200 cm**3=A2: 7.7e+6=A3: 1570 cm**3=A4: 391000=Q6 -> A4 | F16=Q5 -> A3 | F15=Q4 -> A2 | F19=Q3 -> A1 | F11=Q2 -> Mul ( Q5, Q6)=Q1 -> Mul (Q3, Q4)=P: Add (Q1, Q2)",
    "answer": "56053870000 cm**3",
    "context": "CONTEXT:=F1: An average person's annual consumption of food is 1996 pounds=F2: The height is 1.73 m=F3: It takes around 1 second to count a number=F4: The circumference of the Earth is 1577756575 inches=F5: The length of a nerd rope is 12 inches=F6: The limit of a planet size is 10x that of jupiter.=F7: There are 60 minutes in an hour.=F8: The size of Jupiter is 1.4e+15 km cube.=F9: There are 60 seconds in a minute.=F10: There are 24 hours in a day.=F11: Assuming the average volume of a single animal to be 7200 cc (90x40x2)=F12: An average farm produces around 52000 pounds of food each year.=F13: Around 360 sq. yards is required to build a house with a yard.=F14: The average thickness of a sheet of paper is 0.00008 m=F15: The average volume of a plant is 1570 cc (20cm height and 5cm radius)=F16: The number of plant species in the world are 391000=F17: There are 1000000 numbers in a million=F18: The total human population is 7.2e+9=F19: The number of animal species in the world are 7.7e+6=F20: the average family size is 4",
    "fact_transform": {
        "10": 0,
        "18": 1,
        "14": 2,
        "15": 3
    }
}

# entry = {
#     "question": "How many golf balls put into the worlds oceans would it take to submerge all of the land on earth from the displaced water?",
#     "program": "PROGRAM:=Q1: What is the volume of the oceans?=Q2: What is the volume of a golf ball?=A1: 1.3e+9 km**3=A2: 4e-5 km**3=Q2 -> A2 | F10=Q1 -> A1 | F11=P: Div (Q1, Q2)",
#     "answer": "3.20E+13",
#     "context": "CONTEXT:=F1: A single nuke contains around 4e+15 J=F2: Earth is to be moved 1% out of its orbit.=F3: The gravitational constant of earth is 6.6e-11 m**3*kg**-1*s**-2=F4: The volume of single SO bacterium is 2e-12 cc=F5: The surface area of oceans is 361e+6 km**2=F6: The average volume of a living thing in an ocean is 5e-3 km**3=F7: The radius of earth is 6.3e+6 m=F8: The average weight of normal person is 63 kg=F9: The height of the water in ocean is 3.7 km=F10: The volume of a golf ball is 4e-5 km**3=F11: The volume of the oceans is 1.3e+9 km**3=F12: The number of living things in an ocean is 1e+9=F13: The weight of a megalodon is 66138 pounds.=F14: Even Though dead bodies float, dead people weighing upwards of 125 kg sink in water.=F15: The numerical coefficient to calculate the square of the radius is 2.=F16: The towing capacity of a Model X is 6700 pounds=F17: The original volume of the oceans is 1335700000 km**3=F18: Around 4e+35 J is required to knock earth from its orbit.=F19: The mass of earth is 6e+24 kg=F20: The volume of ping pong ball is 33.5 cc",
#     "fact_transform": {
#         "10": 0,
#         "9": 1
#     }
# }

# entry = {
#     "question": "The density of water is just about 1000 kg/m3. The density of air is just about 1000 times less. Estimate the height of the atmosphere. As a simplifying assumption, take the atmosphere to be of uniform density up to some height after which the density rapidly falls to zero.",
#     "program": "PROGRAM:=Q1: What is the value of normal air pressure?=Q2: What is the product of density of air and acceleration due to gravity?=Q3: What is the acceleration due to gravity on earth?=Q4: What is the density of air?=Q5: What is the density of water?=Q6: What is the relationship between density of air and density of water?=A1: 101325 kg*m**-1*s**-2=A2: 10 m*s**-2=A3: 1000 kg*m**-3=A4: 1000=Q6 -> A4 | F1=Q5 -> A3 | F20=Q4 -> Div (Q5, Q6)=Q3 -> A2 | F8=Q2 -> Mul (Q3, Q4)=Q1 -> A1 | F10=P: Div (Q1, Q2)",
#     "answer": "10132.5 m",
#     "context": "CONTEXT:=F1: The density of air is 1000 times less than density of water.=F2: The average volume of a living thing in an ocean is 5e-3 km**3=F3: The ratio of height of volume of water without all the animals and with the animals is 0.8.=F4: The old depth of the ocean is 3.7 km=F5: The height of the original sea level is 3.7 km.=F6: The surface area of the oceans is 510e+6 km**2=F7: The increase in depth of the ocean is 0.001 km=F8: The acceleration due to gravity is 10 m*s**-2=F9: The cooling effect provided by a single km**2 of water is 0.5 W/km**2 [assumption]=F10: The value of normal air pressure is 101325 kg*m**-1*s**-2=F11: The total heat gain of global warming every year is 69666000 W=F12: The number of living things in an ocean is 1e+9=F13: The old volume of the ocean is 1.35e+9 km**3=F14: Even Though dead bodies float, dead people weighing upwards of 125 kg sink in water.=F15: The weight of a megalodon is 66138 pounds.=F16: The height of the water in ocean is 3.7 km=F17: The surface area of oceans is 361e+6 km**2=F18: The original volume of the oceans is 1335700000 km**3=F19: The towing capacity of a Model X is 6700 pounds=F20: The density of water is 1000 kg*m**-3",
#     "fact_transform": {
#         "9": 0,
#         "7": 1,
#         "19": 2,
#         "0": 3
#     }
# }
# entry = {
#     "question": "If 100% of the energy of all lightning strikes around the world could be harnessed and used, what percentage of the world's power could be supplied by lightning strikes?",
#     "program": "PROGRAM:=Q1: What is the total energy generated by lightning in a year?=Q2: What is the total energy consumption in the world annually?=A1: 4.7e+18=A2: 575e+24=Q2 -> A2 | F2=Q1 -> A1 | F1=P: Div (Q1, Q2)",
#     "answer": "8.10E-09",
#     "context": "CONTEXT:=F1: The total energy generated by lightning in a year is 4.7e+18 J=F2: The total energy consumption in the world annually is 575e+24 J"
# }

# entry = {
#     "question": "How many balloons would it take to fill the school hall?",
#     "program": "PROGRAM:=Q1: What is the average volume of a school hall?=Q2: What is the average volume occupied by a single balloon in free space?=A1: 36000 ft**3=A2: 0.50 ft**3=Q2 -> A2 | F12=Q1 -> A1 | F16=P: Div(Q1, Q2)",
#     "answer": "72000",
#     "context": "CONTEXT:=F1: The average air present inside these balls is 0.8 atmospheres of air.=F2: The average volume of a classroom is 7680 cubic feet.=F3: There are a total of 40 balls in the school, 10 of each sport.=F4: The fraction of Earth as USA is 1.87 %=F5: The perimeter of the classroom is 0.02 miles=F6: At any given time, 1270000 people are on air.=F7: The average thickness of a sheet of paper is 0.00008 m=F8: The average body height of a person is 170 cm.=F9: The density ratio of nerd candy to water is 3 (approx)=F10: Ideal wingpsan length is 83% of the length of the flying body.=F11: The average depth one could go through a normal dive in a pool is 7 feet.=F12: The average volume occupied by a single ping pong ball in freespace is 0.0012 cubic feet.=F13: An average person occupies 4 sq.ft space.=F14: The height is 1.73 m=F15: There are 365 days in a year=F16: The average volume of a school hall is 36000 cubic feet given its ceiling height is 16 feet and surface area to be 209 sq. ft.=F17: A million rally has a total of 1e+6 people.=F18: An average person needs 4 sq.ft of space.=F19: The number of miles to be run is 1.=F20: The total population in the world is 7.2e+9",
#     "fact_transform": {
#         "15": 0,
#         "11": 1
#     }
# }

In [19]:
units = entry['answer'].split()[1] if len(entry['answer'].split()) > 1 else "dimensionless"
question = entry["question"] + f" (units: {units})"
answer = entry["answer"]
context = entry["context"]
context = context.split("CONTEXT:=")[-1].split("=")
context = [c.split(": ")[-1] for c in context]

# question = "What do you think the profit of Crayola is in a year? (units: dollars)"
# answer = "1e+20 cells"
# context = None

output = predictor.ask(question, answer, context)



---------------------------------------------------------------------------------------------------------
--------------------------------------------- FIRST MESSAGE ---------------------------------------------
---------------------------------------------------------------------------------------------------------


Question: How much space would be required to fit 1 individual of each species of plants and animals? (units: cm**3); 
Context: 
Correct Answer: 56053870000 cm**3
Compiled Answer is: 17000000.0 
Summarized Problem is:  We select one individual from each species of plants and animals and place them in a space. We want to compute the volume of this space.
Program:
```python
# Q0: How much space would be required to fit 1 individual of each species of plants and animals?
# R0: This is the main question we want to answer.
# Q1: What is the volume of the space occupied by 1 individual of each species of plants and animals?
# R1: If we know the volume of the space occupied by 

In [16]:
np.log10(281250000.0)

8.449092531119419

In [None]:
output[0], output[1], output[3], output[4]

In [None]:
np.log(2411197387842214.5)/np.log(10)

In [None]:
question = "If all but 1 million people on Earth died, how far in kilometers (on average) would you have to walk to meet someone?\n\nSUMMARY:= We want to compute the average distance in kilometers that one would have to walk to meet someone if all but 1 million people on Earth died.\nPROGRAM:=\n# Q0: If all but 1 million people on Earth died, how far in kilometers (on average) would you have to walk to meet someone?"

preds = """
SUMMARY:= We want to compute the average distance in kilometers that one would have to walk to meet someone if all but 1 million people on Earth died.
PROGRAM:=
# Q0: If all but 1 million people on Earth died, how far in kilometers (on average) would you have to walk to meet someone?
# R0: This is the main question we want to answer.
# Q1: What is the total land area of Earth?
# R1: To compute the average distance one would have to walk to meet someone, we need to know the total land area of Earth.
# F1: The total land area of Earth is 148,939,063 km**2.
A1=148939063 # (km**2)
# Q2: How many people are left on Earth if all but 1 million people died?
# R2: To compute the average distance one would have to walk to meet someone, we need to know how many people are left on Earth.
# F2: If all but 1 million people on Earth died, then there would be 1 million people left on Earth.
A2=1000000 # (people)
# F0: The average distance one would have to walk to meet someone is the total land area of Earth divided by the number of people left on Earth.
A0=A1/A2 # (km)
"""

predictor.eval(question, preds, answer)

In [None]:
llm = OpenAI(temperature=0)
llm("What is the energy needed to evaporate the oceans in calories?")

In [None]:

llm_math_chain = LLMMathChain.from_llm(llm=llm, verbose=True)
search = SearchApiAPIWrapper()
tools = [
    Tool(
        name="Intermediate Answer",
        func=search.run,
        description="Useful for when you need to search for information",
    ),
    # Tool(
    #     name="Calculator",
    #     func=llm_math_chain.run,
    #     description="Useful for when you need to do mathematical calculations",
    # ),
]

self_ask_with_search = initialize_agent(
    tools, llm, agent=AgentType.SELF_ASK_WITH_TOOLS, verbose=True
)



In [None]:
self_ask_with_search.run("How much rice can fit in an average sized house?")


In [None]:
search.run("the average volume of blood donated by a person")

In [None]:
search.run("1 g/ml = ? kg/m^3")

In [None]:
llm("What is the formula for the volume of a rectangular prism?")

In [None]:
llm = OpenAI(temperature=0.5)

llm("""Find a simple formula to answer the following question: How much would the sea level rise if all of the ice on Earth melted?
    You are not allowed to use any numbers in your formula. Use as few variables as possible in your formula.
    After you have found a formula, provide an in-depth explanation of what each variable in your formula represents.
    """)

In [None]:
variables = "\n\n1. The amount of carbon dioxide exhaled per breath\n2. The amount of oxygen inhaled per breath\n3. The amount of fat in kilograms that needs to be lost\n4. The amount of carbon dioxide produced when fat is burned"
llm(f"You are given the following variables:{variables}\nUsing these variables, devise a mathematical formula to answer the following: How many kilograms of co2 would i have to breath out to lose 1 kg of fat? Refer to the variables by their index in the list above.")

In [None]:
llm_math_chain("(7.2e+2 m**3) * (1.452e+3 kg/m**3) # (Volume of an average house) * (the density of rice) ")

In [None]:
string = """Input: CONTEXT:=F1: The volume of the penny is 0.93 in**3=QUESTION:=If I were to take an English penny, melt it down, then somehow stretch it out into a perfect hollow sphere 1 atom thick, how large would this sphere be?

8.4e-2 in**3=PROGRAM:=```python\n# Q0: If I were to take an English penny, melt it down, then somehow stretch it out into a perfect hollow sphere 1 atom thick, how large would this sphere be?\n# Q1: What is the volume of a single penny?\nA1=0.93 # volume of a single penny (in**3)\n# Q2: What is the surface area of a hollow sphere 1 atom thick which is constructed from a melted penny?\nA2=0.93 # surface area of a hollow 1 atom thick sphere constructed from a melted penny (in**2)\nA0=(4/3)*np.pi*(A2/(4*np.pi))**(3/2) # volume of a hollow 1 atom thick sphere constructed from a melted penny (in**3)```"""

print(string)

In [None]:
program = "# Q0: How much farmed land does one need to survive?\n# Q1: What is the average amount of food that a person needs to consume in a day?\nA1=2e+3 # The average amount of food that a person needs to consume in a day (kcal)\n# Q2: How many days are in a year?\nA2=365.25 # The number of days in a year (days)\n# Q3: What is the average amount of food that a person needs to consume in a year?\nA3=A1*A2 # The average amount of food that a person needs to consume in a year (kcal)\n# Q4: What is the average amount of food that can be produced per unit area of farmland in a year?\nA4=1e+6 # The average amount of food that can be produced per unit area of farmland in a year (kcal/acre)\nA0=A3/A4 # The amount of farmed land that one person needs to survive (acre)"

print(program)

In [None]:
program = "# Q0: How much farmed land does one need to survive?\n# R0: This is the main question we want to answer.\n# Q1: How many calories does one person need to survive?\n# R1: If we know how many calories one person needs to survive, we can compute the area of farmed land that one person needs to survive by dividing the number of calories one person needs to survive by the number of calories produced per unit area of farmed land.\n# F1: One person needs 2000 calories to survive.\nA1=2e+3 # (calories)\n# Q2: How many calories are produced per unit area of farmed land?\n# R2: If we know how many calories are produced per unit area of farmed land, we can compute the area of farmed land that one person needs to survive by dividing the number of calories one person needs to survive by the number of calories produced per unit area of farmed land.\n# Q3: What is the yield of a crop?\n# R3: To compute the number of calories produced per unit area of farmed land, we need to know the yield of a crop because we can multiply the yield of a crop by the number of calories produced per unit yield of the crop to get the number of calories produced per unit area of farmed land.\n# F3: The yield of a crop is 2 tons per acre.\nA3=2 # (tons/acre)\n# Q4: How many calories are produced per unit yield of a crop?\n# R4: To compute the number of calories produced per unit area of farmed land, we need to know the number of calories produced per unit yield of a crop because we can multiply the yield of a crop by the number of calories produced per unit yield of the crop to get the number of calories produced per unit area of farmed land.\n# F4: There are 1 million calories produced per ton of a crop.\nA4=1e+6 # (calories/ton)\n# F2: The number of calories produced per unit area of farmed land is the yield of a crop multiplied by the number of calories produced per unit yield of the crop.\nA2=A3*A4 # (calories/acre)\n# F0: The area of farmed land that one person needs to survive is the number of calories one person needs to survive divided by the number of calories produced per unit area of farmed land.\nA0=A1/A2 # (acre)"
print(program)


In [None]:
with open("/Users/sidhartkrishnan/Documents/Programming/Projects/cs236/carafe/results/expmt1/output_store.json", "r") as f:
    exp1 = json.load(f)

with open("/Users/sidhartkrishnan/Documents/Programming/Projects/cs236/carafe/results/expmt2/output_store_nounits.json", "r") as f:
    exp2 = json.load(f)

assert len(exp1) == len(exp2)
N = len(exp1)
len(exp1), len(exp2)

In [None]:
from collections import defaultdict
idx = 0
exp1_acc = defaultdict(lambda: [])
exp2_acc = defaultdict(lambda: [])
for i in range(N):
    print(f"\n\n{i}\n")
    print("Expmt1: ", {k: v["accuracy"] for k, v in exp1[i].items()})
    print("Expmt2: ", {k: v["accuracy"] for k, v in exp2[i].items()})
    for k in exp1[i].keys():
        exp1_acc[k].append(exp1[i][k]["accuracy"])
        exp2_acc[k].append(exp2[i][k]["accuracy"])

In [None]:
interesting_idxs = [1]
def print_output(output):
    prediction = output["pred"]
    print("Question: {}; \nContext: {}\nCorrect Answer: {}".format(prediction["question"], prediction["context"], prediction["correct_answer"]))
    print("Compiled Answer is: {} \nSummarized Problem is: {}\nProgram:\n```python\n{}\n```".format(prediction['compiled_answer'], prediction['summary'], prediction['program']))

k = "regular-context"
for i in interesting_idxs:
    print(f"\n\n{i}\n")
    print_output(exp1[i][k])
    print_output(exp2[i][k])

    print("\nPROGRAM COMPARISON: ")
    print(exp1[i][k]["pred"]["program"])
    print("\n")
    print(exp2[i][k]["pred"]["program"])

In [None]:
np.floor(np.log10(21714090909.090908)), np.floor(np.log10(3844000000000.0))
