In [1]:
import argparse
from typing import Optional
from dotenv import load_dotenv
import os
from tqdm import tqdm
import numpy as np

# torch
import torch

# init hugging face
from langchain.llms import OpenAI
from langchain.agents import AgentType, Tool, initialize_agent
from langchain.prompts import PromptTemplate, FewShotPromptTemplate
from langchain.chains import LLMChain, LLMMathChain
from langchain.utilities import SearchApiAPIWrapper


from unidecode import unidecode

from utils import compile_fp, convert_units, accuracy_metric
import json

load_dotenv()

True

In [2]:
# ## PROMPTS

# INPUT_PROMPT = PromptTemplate(
#                         input_variables=["context", "question"],
#                         template="""CONTEXT:=\n{context}\nQUESTION:={question}\n"""
#                     )

# SYSTEM_PROMPT_STRING = """
# You are a helpful assistant tasked with answering estimation questions using provided information.
# You are provided with contextual information which contains facts that is relevant to answering the estimation question.
# You are also provided with a question to answer.
# You must write a program that outputs the answer to the question.
# Your program should ask sub-questions to help you answer the question.
# When you answer a sub-question you must provide your reasoning for the answer.
# You can only use the provided contextual information to answer the question and sub-questions.
# Do not use any other information to answer the question or sub-questions.
# If you are unable to answer a question or sub-question, you can set the answer to be the string 'UNK' and proceed with your program as if 'UNK' was the correct answer.
# Some example programs are provided for you below.
# """
# FEW_SHOT_PROMPTS = [
# """Input:
# CONTEXT:=
# F1: The volume of the penny is 0.93 in**3.
# F2: The surface area of a sphere is 4*pi*r**2 where r is the radius of the sphere.
# F3: The volume of a sphere is 4/3*pi*r**3 where r is the radius of the sphere.
# QUESTION:=If I were to take an English penny, melt it down, then somehow stretch it out into a perfect hollow sphere 1 atom thick, how large would this sphere be?

# ```python
# # Q0: If I were to take an English penny, melt it down, then somehow stretch it out into a perfect hollow sphere 1 atom thick, how large would this sphere be?
# # Q1: What is the volume of a single penny?
# A1=0.93 # volume of a single penny (in**3); R: F1 tells us what the volume of a penny is.
# # Q2: What is the surface area of a hollow sphere 1 atom thick which is constructed from a melted penny?
# A2=0.93 # surface area of a hollow 1 atom thick sphere constructed from a melted penny (in**2) R: If a penny is melted down and stretched out into a hollow sphere 1 atom thick, the surface area of the sphere is the same as the volume of the penny.
# # Q3: What is the radius of the sphere given its surface area?
# A3=(A2/(4*np.pi))**(1/2) # radius of the sphere (in) R: F2 implies that the radius of the sphere is the square root of the surface area of the sphere divided by 4*pi.
# A0=(4/3)*np.pi*(A3 ** 3) # volume of the sphere (in**3) R: F3 tells us the volume of the sphere is 4/3*pi*r**3 where r is the radius.
# ```""",
# """Input:
# CONTEXT:=
# F1: The volume of a single air molecule is 3e-24 cubic meter.
# QUESTION:=How many air molecules are in this lecture hall?

# ```python
# # Q0: How many air molecules are in this lecture hall?
# # Q1: What is the volume of the lecture hall?
# A1='UNK' # volume of the lecture hall (m**3) R: Unknown Quantity
# # Q2: What is the volume of a single air molecule?
# A2=3e-24 # volume of single air molecule (m**3) R: F1 tells us the volume of a single air molecule.
# A0=A1/A2 # number of air molecules in lecture hall (molecules) R: The number of air molecules in the lecture hall is the volume of the lecture hall divided by the volume of a single air molecule.
# ```""",
# """Input:
# CONTEXT:=

# QUESTION:=A cure for all cancers would increase the average American lifespan by how many years?

# ```python
# # Q0: A cure for all cancers would increase the average American lifespan by how many years?
# # Q1: What is the average American lifespan?
# A1='UNK' # average American lifespan (years) R: Unknown Quantity
# # Q2: How much does cancer reduce the average lifespan by?
# A2='UNK' # amount cancer reduces average lifespan by (years) R: Unknown Quantity
# A0=A1+A2 # average lifespan with cancer (years) R: The average lifespan with a cure for cancer is the average American lifespan plus the amount that cancer reduces the average lifespan by.
# ```""",
# """Input:
# CONTEXT:=
# F1: The mass of a queen size mattress is 140 pounds.
# F2: The mass of the sun is 4.3e+30 pounds.
# F3: The ratio of mass of Betelgeuse to mass of sun is 18
# QUESTION:=What is the number of queen-size mattresses it would take to fill the star Betelgeuse?

# ```python
# # Q0: What is the number of queen-size mattresses it would take to fill the star Betelgeuse which has 18 times the mass of the Sun?
# # Q1: What is the mass of a queen size mattress?
# A1=140 # mass of queen size mattress (pounds) R: F1 tells us the mass of a queen size mattress.
# # Q2: What is the mass of Betelgeuse?
# # Q3: What is the mass of the sun?
# A3=4.3e+30 # mass of the sun (pounds) R: F2 tells us the mass of the sun.
# # Q4: What is the ratio of the mass of Betelgeuse to the mass of the sun?
# A4=18 # ratio of mass of Betelgeuse to mass of the sun (dimensionless) R: F3 tells us the ratio of the mass of Betelgeuse to the mass of the sun.
# A2=A3*A4 # mass of Betelgeuse (pounds) R: The mass of Betelgeuse is the mass of the sun multiplied by the ratio of the mass of Betelgeuse to the mass of the sun.
# A0=A2/A1 # number of queen size mattresses it would take to fill Betelgeuse (mattresses) R: The number of queen size mattresses it would take to fill Betelgeuse is the mass of Betelgeuse divided by the mass of a queen size mattress.
# ```"""
# ]
# FEW_SHOT_PROMPT_STRING = "\n\n".join(FEW_SHOT_PROMPTS)
# extra_prompts = [
# """Input:
# CONTEXT:=
# F1: It takes around 5 seconds to pluck a single strand of hair.
# F2: The entire human body has 5e+6 hair follicles.
# QUESTION:=How long would it take to pluck each hair out of your body one at a time?

# ```python
# # Q0: How long would it take to pluck each hair out of your body one at a time?
# # Q1: How long does it take to pluck a single strand of hair?
# A1=5 # time to pluck single strand of hair (s) R: F1 tells us how long it takes to pluck a single strand of hair.
# # Q2: How many hairs do we have on our body?
# A2=5e+6 # number of hairs on body (hairs) R: F2 tells us how many hairs we have on our body.
# A0=A1*A2 # time to pluck all hairs on body (s) R: The time to pluck all hairs on our body is the time to pluck a single strand of hair multiplied by the number of hairs on our body.
# ```""",
# ]

# COMBINED_PROMPT = PromptTemplate(
#     template=f"{SYSTEM_PROMPT_STRING}\n\n{FEW_SHOT_PROMPT_STRING}\n\n{INPUT_PROMPT.template}",
#     input_variables=INPUT_PROMPT.input_variables,
# )

In [7]:
## PROMPTS

INPUT_PROMPT = PromptTemplate(
                        input_variables=["question"],
                        template="""QUESTION:={question}\n"""
                    )


SYSTEM_PROMPT_STRING = """
You are a helpful assistant tasked with answering estimation questions.
You are provided with a question to answer.
You must write a program that outputs the answer to the question.
Your program should ask sub-questions to help you answer the question.
After asking each sub-question, you must provide your reasoning for why the sub-question is relevant to our goal of answering the main question.
You will then answer these sub-questions in code format.
Before answer each sub-question, you will provide a fact that you are referencing to answer the sub-question. The fact must be verifiable by a third party.
Some example programs are provided for you below.
"""
FEW_SHOT_PROMPTS = [
# """Input:
# QUESTION:=If I were to take an English penny, melt it down, then somehow stretch it out into a perfect hollow sphere 1 atom thick, how large would this sphere be?

# SUMMARY:= We take a penny, melt it down, and stretch it out into a hollow sphere 1 atom thick. We want to compute the volume of this sphere.
# PROGRAM:=
# # Q0: If I were to take an English penny, melt it down, then somehow stretch it out into a perfect hollow sphere 1 atom thick, how large would this sphere be?
# # R0: This is the main question we want to answer.
# # Q1: What is the volume of a single penny?
# # R1: To compute the volume of the hollow sphere, we need the original volume of the penny since it is what gets melted down and turned into the sphere.
# # F1: The volume of a penny is 0.93 in**3.
# A1=0.93 # (in**3)
# # Q2: What is the surface area of a hollow sphere 1 atom thick which is constructed from a melted penny?
# # R2: We only know the volume of the penny and we need to translate that information into information about the sphere.
# # F2: The surface area of a hollow sphere 1 atom thick constructed from a melted penny is the same as the volume of the penny.
# A2=0.93 # (in**2)
# # Q3: What is the radius of the sphere given its surface area?
# # R3: To compute the volume of the sphere given its surface area, we first need to compute the radius of the sphere.
# # F3: The radius of a sphere is the square root of the surface area of the sphere divided by 4*pi.
# A3=(A2/(4*np.pi))**(1/2) # (in)
# # F0: The volume of a sphere is 4/3*pi*r**3 where r is the radius of the sphere.
# A0=(4/3)*np.pi*(A3 ** 3) # (in**3)
# """,
"""Input:
QUESTION:=How many air molecules are in this lecture hall?

SUMMARY:= We have a lecture hall and we want to compute the number of air molecules inside of it.
PROGRAM:=
# Q0: How many air molecules are in this lecture hall?
# R0: This is the main question we want to answer.
# Q1: What is the volume of an average lecture hall?
# R1: To get a sense of how many air molecules we can fit in this lecture hall, we first need to know its volume which can be approximated by the volume of an average lecture hall.
# F1: The volume of an average lecture hall is 1000 cubic meters.
A1=1000 # (m**3)
# Q2: What is the volume of a single air molecule?
# R2: To compute the number of air molecules in some space with a known volume, we need to know the volume of a single air molecule.
# F2: The volume of a single air molecule is 3e-24 cubic meter.
A2=3e-24 # (m**3)
# F0: The number of air molecules in the lecture hall is the volume of the lecture hall divided by the volume of a single air molecule.
A0=A1/A2 # (molecules)
""",
# """Input:
# QUESTION:=How much would the sea level rise if all the ice melted?

# SUMMARY:= All of the ice on Earth has melted and flown into the ocean. We want to compute how much the sea level will rise.
# PROGRAM:=
# # Q0: How much would the sea level rise if all the ice melted?
# # R0: This is the main question we want to answer.
# # Q1: What is the volume of water that will be added to the ocean?
# # R1: If we know the volume of water that will be added to the ocean, we can compute the sea level rise by dividing the volume of water by the surface area of the ocean.
# # Q2: What is the volume of ice on Earth that is on land?
# # R2: The volume of water that will be added to the ocean is the same as the volume of ice on Earth that is on land since that ice will melt and flow into the ocean.
# # Q3: What is the volume of ice in Antarctica?
# # R3: The majority of the ice on land is in Antarctica so it is a good approximation to use the volume of ice in Antarctica as the volume of ice on Earth that is on land.
# # Q4: What is the thickness of the ice in Antarctica?
# # R4: To compute the volume of ice in Antarctica, we need to know the thickness of the ice because we can multiply that by its surface area to get its volume.
# # F4: The thickness of the ice in Antarctica is 3 mi.
# A4=3 # (mi)
# # Q5: What is the surface area of Antarctica?
# # R5: To compute the volume of ice in Antarctica, we need to know the surface area of Antarctica because we can multiply that by its thickness to get its volume.
# # Q6: What fraction of the Earth's surface is Antarctica?
# # R6: To compute the surface area of Antarctica, we need to know what fraction of the Earth's surface is Antarctica because we can multiply that by the surface area of the Earth to get the surface area of Antarctica.
# # F6: Antarctica is 1/30th of the Earth's surface.
# A6=1/30 # (dimensionless)
# Q7: What is the surface area of the Earth?
# # R7: To compute the surface area of Antarctica, we need to know the surface area of the Earth because we can multiply that by the fraction of the Earth's surface that is Antarctica to get the surface area of Antarctica.
# # F7: The surface area of the Earth is 1.97e+8 mi**2.
# A7=1.97e+8 # (mi**2)
# # F5: The surface area of Antarctica is the fraction of the Earth's surface that is Antarctica multiplied by the surface area of the Earth.
# A5=A6*A7 # (mi**2)
# # F3: The volume of ice in Antarctica is the surface area of Antarctica multiplied by the thickness of the ice in Antarctica.
# A3=A5*A4 # (mi**3)
# # F2: The volume of ice on Earth is approximated by the volume of ice in Antarctica.
# A2=A3 # (mi**3)
# # F1: The volume of water that will be added to the ocean is the same as the volume of ice on Earth.
# A1=A2 # (mi**3)
# # Q8: What is the surface area of the ocean?
# # R8: To compute the sea level rise, we need to know the surface area of the ocean because we can divide the volume of water that will be added to the ocean by the surface area of the ocean to get the sea level rise.
# # F8: The surface area of the ocean is 1.40e+8 mi**2.
# A8=1.40e+8 # (mi**2)
# # F0: The sea level rise is the volume of water that will be added to the ocean divided by the surface area of the ocean.
# A0=A1/A8 # (mi)
# """
]
FEW_SHOT_PROMPT_STRING = "\n\n".join(FEW_SHOT_PROMPTS)
extra_prompts = [
"""Input:
QUESTION:=How long would it take to pluck each hair out of your body one at a time?

CONTEXT:=
F1: It takes around 5 seconds to pluck a single strand of hair.
F2: The entire human body has 5e+6 hair follicles.
PROGRAM:=
# Q0: How long would it take to pluck each hair out of your body one at a time?
# Q1: How long does it take to pluck a single strand of hair?
A1=5 # time to pluck single strand of hair (s) R: F1 tells us how long it takes to pluck a single strand of hair.
# Q2: How many hairs do we have on our body?
A2=5e+6 # number of hairs on body (hairs) R: F2 tells us how many hairs we have on our body.
A0=A1*A2 # time to pluck all hairs on body (s) R: The time to pluck all hairs on our body is the time to pluck a single strand of hair multiplied by the number of hairs on our body.
""",
"""Input:
QUESTION:=A cure for all cancers would increase the average American lifespan by how many years?

PROGRAM:=
# Q0: A cure for all cancers would increase the average American lifespan by how many years?
# Q1: What is the average American lifespan?
# R1: The average American lifespan is 78 years.
A1=78 # (years)
# Q2: How much does cancer reduce the average lifespan by?
# R2: Cancer reduces the average lifespan by 5 years.
A2=5 # (years)
# R0: To compute the average lifespan with a cure for cancer, take the average American lifespan plus the amount that cancer reduces the average lifespan by.
A0=A1+A2 # (years)
""",
"""Input:
QUESTION:=What is the number of queen-size mattresses it would take to fill the star Betelgeuse?

PROGRAM:=
# Q0: What is the number of queen-size mattresses it would take to fill the star Betelgeuse which has 18 times the mass of the Sun?
# Q1: What is the mass of a queen size mattress?
# R1: The mass of a queen size mattress is 140 pounds.
A1=140 # (pounds)
# Q2: What is the mass of Betelgeuse?
# Q3: What is the mass of the sun?
# R3: The mass of the sun is 4.3e+30 pounds.
A3=4.3e+30 # (pounds)
# Q4: What is the ratio of the mass of Betelgeuse to the mass of the sun?
# The ratio of mass of Betelgeuse to mass of sun is 18
A4=18 # (dimensionless)
# R2: The mass of Betelgeuse is the mass of the sun multiplied by the ratio of the mass of Betelgeuse to the mass of the sun.
A2=A3*A4 # (pounds)
# R0: The number of queen size mattresses it would take to fill Betelgeuse is the mass of Betelgeuse divided by the mass of a queen size mattress.
A0=A2/A1 # (mattresses)
""",
]

COMBINED_PROMPT = PromptTemplate(
    template=f"{SYSTEM_PROMPT_STRING}\n\n{FEW_SHOT_PROMPT_STRING}\n\n{INPUT_PROMPT.template}",
    input_variables=INPUT_PROMPT.input_variables,
)

In [8]:
class SamplePredictor:
    def __init__(self, temperature=0, prompt=COMBINED_PROMPT):

        self.prompt = prompt
        llm = OpenAI(
                temperature=temperature,
                openai_api_key=os.getenv("OPENAI_API_KEY")
            )

        self.llm_chain = LLMChain(prompt=self.prompt, llm=llm)

    def ask(self, question, answer, context="", verbose=True):
        # preds = self.llm_chain.run(context=context, question=question)
        preds = self.llm_chain.run(question=question)
        print(self.llm_chain._get_num_tokens(preds))
        try:
            # program = program[program.find("```python")+len("```python"):program.rfind("```")]
            summary = preds.split("SUMMARY:=")[-1].split("PROGRAM:=")[0].strip('\n')
            program = preds.split("PROGRAM:=")[-1].strip('\n')

            loc = {}
            # exec(program.replace("'UNK'", "100"), globals(), loc)
            exec(program, globals(), loc)
            compiled_out = loc["A0"]
            compiled_units = program.split(")")[-2].split("(")[-1]

            prediction = {
                    "question": question,
                    "correct_answer": answer,
                    "summary": summary,
                    "program": program,
                    "raw_outputs": preds,
                    "compiled_answer": compiled_out,
                    "compiled_units": compiled_units
                }

            compiled_acc = accuracy_metric(answer, compiled_out)
            parsable = 1
        except:
            prediction = {
                    "question": question,
                    "correct_answer": answer,
                    "raw_outputs": preds
                }
            compiled_acc = 0
            parsable = 0

        if verbose:
            print("Question: {}; \nCorrect Answer: {}".format(prediction["question"], prediction["correct_answer"]))

            if len(prediction) > 3:
                print("Compiled Answer is: {} ({})\nSummarized Problem is: {}\nProgram:\n{}".format(prediction['compiled_answer'], prediction['compiled_units'], prediction['summary'], prediction['program']))
            else:
                print("Unable to parse output; Outputting Raw Output:\n{}".format(prediction["raw_outputs"]))
        return compiled_acc, parsable, prediction

    def predict(self, dataset, N, include_context=True, verbose=False):
        compiled_acc = 0
        parsable = 0
        shuffled_idxs = np.arange(len(dataset))
        np.random.shuffle(shuffled_idxs)
        print("Shuffled Indices w/ fixed seed: ", shuffled_idxs[:10])
        predictions = []
        for i in tqdm(range(N)):
            entry = dataset[shuffled_idxs[i]]
            question = entry["question"]
            answer = entry["answer"]
            context = entry["context"] if include_context else ""
            compiled_acc_i, parsable_i, prediction = self.ask(question, answer, context, verbose=verbose)
            compiled_acc += compiled_acc_i
            parsable += parsable_i
            predictions.append(prediction)

        return compiled_acc/N, parsable/N, predictions


In [9]:
predictor = SamplePredictor(temperature=0.0)

In [10]:
question = "What is the number of queen-size mattresses it would take to fill the star Betelgeuse which has 18 times the mass of the Sun?"
answer = "68"
context = """"""

predictor.ask(question, answer, context)


256
Question: What is the number of queen-size mattresses it would take to fill the star Betelgeuse which has 18 times the mass of the Sun?; 
Correct Answer: 68
Unable to parse output; Outputting Raw Output:

SUMMARY:= We want to compute the number of queen-size mattresses it would take to fill the star Betelgeuse which has 18 times the mass of the Sun.
PROGRAM:=
# Q0: What is the number of queen-size mattresses it would take to fill the star Betelgeuse which has 18 times the mass of the Sun?
# R0: This is the main question we want to answer.
# Q1: What is the mass of a queen-size mattress?
# R1: To compute the number of mattresses it would take to fill the star Betelgeuse, we need to know the mass of a single queen-size mattress.
# F1: The mass of a queen-size mattress is approximately 40 kg.
A1=40 # (kg)
# Q2: What is the mass of the star Betelgeuse?
# R2: To compute the number of mattresses it would take to fill the star Betelgeuse, we need to know the mass of the star.
# F2: The ma

(0,
 0,
 {'question': 'What is the number of queen-size mattresses it would take to fill the star Betelgeuse which has 18 times the mass of the Sun?',
  'correct_answer': '68',
  'raw_outputs': '\nSUMMARY:= We want to compute the number of queen-size mattresses it would take to fill the star Betelgeuse which has 18 times the mass of the Sun.\nPROGRAM:=\n# Q0: What is the number of queen-size mattresses it would take to fill the star Betelgeuse which has 18 times the mass of the Sun?\n# R0: This is the main question we want to answer.\n# Q1: What is the mass of a queen-size mattress?\n# R1: To compute the number of mattresses it would take to fill the star Betelgeuse, we need to know the mass of a single queen-size mattress.\n# F1: The mass of a queen-size mattress is approximately 40 kg.\nA1=40 # (kg)\n# Q2: What is the mass of the star Betelgeuse?\n# R2: To compute the number of mattresses it would take to fill the star Betelgeuse, we need to know the mass of the star.\n# F2: The mass

In [None]:
llm = OpenAI(temperature=0)
llm_math_chain = LLMMathChain.from_llm(llm=llm, verbose=True)
search = SearchApiAPIWrapper()
tools = [
    Tool(
        name="Intermediate Answer",
        func=search.run,
        description="Useful for when you need to search for information",
    ),
    # Tool(
    #     name="Calculator",
    #     func=llm_math_chain.run,
    #     description="Useful for when you need to do mathematical calculations",
    # ),
]

self_ask_with_search = initialize_agent(
    tools, llm, agent=AgentType.SELF_ASK_WITH_TOOLS, verbose=True
)



In [None]:
self_ask_with_search.run("How much rice can fit in an average sized house?")


In [None]:
search.run("the average volume of blood donated by a person")

In [None]:
search.run("1 g/ml = ? kg/m^3")

In [None]:
llm("What is the formula for the volume of a rectangular prism?")

In [None]:
llm = OpenAI(temperature=0.5)

llm("""Find a simple formula to answer the following question: How much would the sea level rise if all of the ice on Earth melted?
    You are not allowed to use any numbers in your formula. Use as few variables as possible in your formula.
    After you have found a formula, provide an in-depth explanation of what each variable in your formula represents.
    """)

In [None]:
variables = "\n\n1. The amount of carbon dioxide exhaled per breath\n2. The amount of oxygen inhaled per breath\n3. The amount of fat in kilograms that needs to be lost\n4. The amount of carbon dioxide produced when fat is burned"
llm(f"You are given the following variables:{variables}\nUsing these variables, devise a mathematical formula to answer the following: How many kilograms of co2 would i have to breath out to lose 1 kg of fat? Refer to the variables by their index in the list above.")

In [None]:
llm_math_chain("(7.2e+2 m**3) * (1.452e+3 kg/m**3) # (Volume of an average house) * (the density of rice) ")