In [1]:
import os
import re
import time
import random
from groq import Groq
from dataclasses import dataclass, field
from typing import List, Tuple
from math import log, sqrt

In [6]:
from dotenv import load_dotenv
load_dotenv()

client = Groq(
    api_key=os.environ.get("GROQ_API_KEY"),
)

# Initialize LLaMA3-8B model
model_name = "llama3-8b-8192"  # Replace with actual model name if different

def LLM_response(prompt, system_prompt="", temperature=0, max_tokens=16384, model_name=model_name):
    chat_completion = client.chat.completions.create(
        messages=[
            {
                "role": "system",
                "content": f"{system_prompt}",
            },
            {
                "role": "user",
                "content": f"{prompt}",
            }
        ],
        model=model_name,
        temperature=temperature,
        max_tokens=max_tokens,
    )

    return chat_completion.choices[0].message.content

In [7]:
def direct_response(question: str) -> str:
        prompt = (
            "Aswer the following question.\n"
            f"Question: {question}\n\n"
            "Let's think step by step"
            )
        direct_response = LLM_response(prompt)
        return direct_response

In [13]:
question = "Kevin Kangaroo begins hopping on a number line at 0. He wants to get to 1, but he can hop only 1/3 of the distance. Each hop tires him out so that he continues to hop 1/3 of the remaining distance. How far has he hopped after five hops? Express your answer as a common fraction."
correct_answer = "211/243"
response = direct_response(question)
print(response)

Let's break it down step by step.

1. Sally has 3 brothers.
2. Each brother has 2 sisters. This means that Sally's 3 brothers, in total, have 2 sisters.
3. Since Sally is one of the sisters, she is counted twice in the "2 sisters" mentioned in step 2.
4. Therefore, Sally has only 1 sister (herself). The other "sister" mentioned in step 2 is actually Sally herself.

So, Sally has only 1 sister, which is herself.


In [26]:
def critique(question: str, draft_answer: str) -> str:
        prompt = (
            f"Question: {question}\n"
            f"Draft answer: {draft_answer}\n"
            "Please provide a comprehensive critique of the draft answer by addressing the following points:\n\n"
            "1. Accuracy and Correctness:\n"
            "   - Is the answer factually correct? Explain why or why not.\n"
            "   - Are there any logical inconsistencies or errors in reasoning?\n"
            "   - Does the answer fully address all aspects of the question?\n\n"
            "2. Verification Methods:\n"
            "   - Suggest at least three different ways to verify the correctness of the answer.\n"
            "   - Apply these verification methods and report your findings.\n\n"
            "3. Clarity and Coherence:\n"
            "   - Is the answer well-structured and easy to understand?\n"
            "   - Are there any ambiguous or vague statements that need clarification?\n\n"
            "4. Completeness:\n"
            "   - Does the answer cover all necessary information?\n"
            "   - Are there any important points or perspectives missing?\n\n"
            "5. Flaws and Weaknesses:\n"
            "   - Identify and explain every flaw in the draft answer, no matter how minor.\n"
            "   - Hold the answer to the highest possible standard in your critique.\n\n"
            "6. Improvement Recommendations:\n"
            "   - Provide specific, actionable recommendations to improve each aspect of the answer.\n"
            "   - Suggest additional information or context that could enhance the answer.\n\n"
            "7. Step-by-Step Analysis:\n"
            "   - Break down your critique into clear, logical steps.\n"
            "   - Ensure each step of your analysis is thorough and well-explained.\n\n"
            "Remember: Do not provide a revised answer. Focus solely on critiquing and providing recommendations for improvement.\n"
            "Let's work through this carefully to ensure we have the right critique."
            )
        critique = LLM_response(prompt)
        return critique


In [27]:
# draft_answer = "I don't know"
critique_reponse = critique(question, response)
print(critique_reponse)

Here is a comprehensive critique of the draft answer:

**Accuracy and Correctness:**

The answer is factually correct. The draft answer correctly simplifies the given expression by combining like terms and combining constant terms. The reasoning process is logical and accurate.

**Verification Methods:**

To verify the correctness of the answer, we can use the following methods:

1. Plug in some values of x and check that the expression evaluates to the same value as the simplified expression.
2. Use a calculator or computer algebra system to simplify the expression and compare the result with the draft answer.
3. Check the answer by re-writing the original expression in its simplified form, as done in the draft answer.

Applying these verification methods, we can confirm that the answer is correct.

**Clarity and Coherence:**

The answer is well-structured and easy to understand. The reasoning process is clear and logical, and the draft answer provides a step-by-step explanation of ho

In [31]:
def improve_answer(question: str, draft_answer: str, critique: str) -> str:
        prompt = (
            f"Task: Improve the draft answer based on the given critique while ensuring relevance to the original question.\n\n"
            f"Original Question: {question}\n"
            f"Draft Answer: {draft_answer}\n"
            f"Critique: {critique}\n\n"
            "Please provide an improved answer following this format:\n\n"
            "1. Reasoning Process:\n"
            "   - Analyze the critique and identify areas for improvement\n"
            "   - Provide a step-by-step explanation of how you'll address each point\n"
            "   - Ensure your reasoning directly relates to the original question\n\n"
            "2. Verification:\n"
            "   - Fact-check the information in the improved answer\n"
            "   - Cite reliable sources if applicable\n"
            "   - Ensure all statements are accurate and well-supported\n\n"
            "3. Final Answer:\n"
            "   - Provide a concise, clear, and improved answer\n"
            "   - Incorporate the insights from your reasoning and verification\n"
            "   - Ensure the answer directly addresses the original question\n\n"
            "Remember to be concise, clear, and directly address all points raised in the critique.\n"
            "Your improved answer should be more accurate, comprehensive, and relevant to the original question."
            )
        improved_answer = LLM_response(prompt)
        return improved_answer.strip()

In [32]:
improve_answer = improve_answer(question, response, critique_reponse)
print(improve_answer)

Here is an improved answer based on the given critique:

**Reasoning Process:**

To simplify the given expression, we will follow a step-by-step process to combine like terms and combine constant terms. First, we will identify the terms with the same power of x and combine them correctly. We have two terms with x^2, one with 2x^2 and one with -8x^2. We can combine these terms by adding their coefficients: 2x^2 - 8x^2 = -6x^2. Next, we will combine the terms with x. We have two terms with x, one with 2x and one with -2x. We can combine these terms by adding their coefficients: 2x - 2x = 0. Finally, we will combine the constant terms. We have two constant terms, one with 9 and one with -4. We can combine these terms by adding their coefficients: 9 - 4 = 5.

**Verification:**

To verify our answer, we can re-write the original expression in its simplified form: 2x+8x^2+9-(4-2x-8x^2) = 2x+8x^2+9-4+2x+8x^2 = 16x^2+4x+5. We can also verify our answer by plugging in some values of x and check

In [37]:
def rate_answer(question: str, answer: str) -> float:
        prompt = (
            f"Question: {question}\n"
            f"Answer: {answer}\n"
            "Please analyze the answer based on the following criteria:\n"
            "1. Accuracy of information\n"
            "2. Completeness of the response\n"
            "3. Relevance to the question\n"
            "4. Clarity and coherence\n"
            "5. Use of evidence or examples (if applicable)\n\n"
            "Provide a detailed critique addressing each of these aspects. Be specific about strengths and weaknesses, "
            "and point out any factual errors or misconceptions. Do not suggest improvements or provide a corrected answer.\n\n"
            "After your critique, rate the answer on a scale of 0 to 100, where:\n"
            "0-20: Poor (major flaws, largely incorrect or irrelevant)\n"
            "21-40: Below Average (significant issues, partially relevant or correct)\n"
            "41-60: Average (some flaws, mostly relevant and partially correct)\n"
            "61-80: Good (minor issues, relevant and mostly correct)\n"
            "81-100: Excellent (minimal to no issues, highly relevant and correct)\n\n"
            "Your response should follow this format:\n"
            "Critique: <detail critique>\n"
            "Rating: <rating>"
            )
        rating_response = LLM_response(prompt)
        # Extract the rating 
        try:
            match = re.search(r"Rating:\s*(\d+)", rating_response)
            if match:
                rating = int(match.group(1))
                if rating > 95:
                    rating = 95
                rating = float(rating)/100
            else:
                raise ValueError("Rating not found in the response") 
        except Exception as e:
            print(f"Error extracting rating: {e}")
            print(f"Rating response was {rating_response}")
            rating = 0.0
        return rating

In [38]:
rate_answer = rate_answer(question, improve_answer)
print(rate_answer)

0.81


In [1]:
dummy_answers = ["I Don't Know", "I can't understand this question.",
                      "I can't help with this question.", "I don't know how to solve this question.",
                      "I don't know the answer to this question.", "I don'tknow the answer to this question, sorry."]


In [4]:
import random
answer = random.choice(dummy_answers)
print(answer)

I don't know the answer to this question.


In [39]:
def generate_expert_methods(question: str, other_expert_idea: str = "") -> str:
    # Construct the initial part of the prompt
    prompt = (
        f"You are an expert tasked with providing step-by-step instructions to analyze and outline methods for solving this question:\n"
        f"Question: {question}\n\n"
        "Instructions:\n"
        "1. Analyze the type of problem presented in the question.\n"
        "2. Brainstorm your thinking process and outline the steps you would take to answer the question. Create a relationships map for better visualize is encouraged\n"
        "3. Provide a clear step-by-step guide on how you would approach the problem. Do not solve the problem completely or provide any computations.\n"
        "4. Your response should be in the following format:\n"
        "   Expert:\n"
        "   Instruction: <Step-by-step instructions>\n"
    )
    
    # Add other expert ideas if they exist
    if other_expert_idea:
        prompt += (
            "\nAdditional Context:\n"
            "You are working in a group of experts. Another expert has proposed the following approach:\n"
            f"Other Expert's Idea: {other_expert_idea}\n\n"
            "Your task is to provide a different approach from the one mentioned above, ensuring a diverse range of solutions.\n"
        )
        
    # Simulate a response from an LLM (Language Model)
    expert_methods = LLM_response(prompt, temperature=1.0)
    return expert_methods

# Main execution
# question = "There are 3 men in the room. One person is cooking. A person is playing chess. What are the other people doing?"
question = "If I hang 5 shirts outside and it takes them 5 hours to dry, how long would it take to dry 30 shirts?"
other_expert_idea = ""

# Collect expert methods in a list
expert_methods_list = []
for _ in range(3):
    expert_methods = generate_expert_methods(question, other_expert_idea)
    expert_methods_list.append(expert_methods)
    # Update the string with all previous expert ideas
    other_expert_idea += expert_methods + " "

# Print the collected expert methods
for i,method in enumerate(expert_methods_list):
    print(f"====================== Expert {i+1} ======================\n")
    print(method)
    print("=========================================================\n")


Expert:

**Instruction 1: Analyze the type of problem**

* Identify the problem as a classic example of a scaling problem, where we need to extrapolate the time it takes to dry a certain number of shirts to a larger quantity.
* Recognize that the problem involves a direct relationship between the number of shirts and the time it takes to dry them.

**Instruction 2: Brainstorm thinking process and outline steps**

* Relationships map:

[5 shirts] → [5 hours]
[30 shirts] → [? hours]

* Key questions to answer:
	+ Is the drying process affected by the number of shirts?
	+ Is the drying process linear, i.e., does the time it takes to dry shirts increase linearly with the number of shirts?
	+ Can we use the given information to establish a proportionality relationship between the number of shirts and the time it takes to dry them?

**Instruction 3: Step-by-step guide**

Step 1: Understand the problem and identify the key variables
* Identify the independent variable: number of shirts
* Ide

In [40]:
def generate_answer(question: str, expert_idea: str) -> str:
    # Construct the initial part of the prompt
    prompt = (
        f"Question: {question}\n\n"
        "Please provide a comprehensive answer with detailed reasoning and analysis. The answer is definitely in the question. Follow this structure from the expert:\n"
        f"Expert idea: {expert_idea}\n\n"
        "Let's work this out in a step by step way to be sure we have the right answer.\n"
        "Your response should be in the following format:\n"
        "Reasoning step: <Step-by-step reasoning>\n"
        "Final answer: <final answer>\n"
    )
    
    # Simulate a response from an LLM (Language Model)
    answer = LLM_response(prompt)
    return answer

for i,method in enumerate(expert_methods_list):
    answer = generate_answer(question, method)
    print(f"====================== Answer {i+1} ======================\n")
    print(answer)
    print("=======================================================\n")



Let's work through the problem step by step.

**Instruction 1: Analyze the type of problem**

This is a classic scaling problem, where we need to extrapolate the time it takes to dry a certain number of shirts to a larger quantity. We recognize that the problem involves a direct relationship between the number of shirts and the time it takes to dry them.

**Instruction 2: Brainstorm thinking process and outline steps**

Relationships map:

[5 shirts] → [5 hours]
[30 shirts] → [? hours]

Key questions to answer:

* Is the drying process affected by the number of shirts?
* Is the drying process linear, i.e., does the time it takes to dry shirts increase linearly with the number of shirts?
* Can we use the given information to establish a proportionality relationship between the number of shirts and the time it takes to dry them?

**Instruction 3: Step-by-step guide**

Step 1: Understand the problem and identify the key variables
* Identify the independent variable: number of shirts
* Id

In [14]:
import re

text = """
Final Answer: Here is the improved answer:

**Reasoning Process:**

1. Analyze the critique: The critique highlights the lack of logical reasoning, mathematical understanding, and clarity in the original draft answer.      
2. Identify areas for improvement: The critique suggests that the original answer lacks a step-by-step explanation of the thought process and mathematical 
steps taken to arrive at the answer.
3. Provide a step-by-step explanation: To address this, I will break down the solution into clear and concise steps.

**Step 1:** Identify the relationship between the number of shirts and the time it takes to dry them. From the given information, 5 shirts take 5 hours to 
dry. This means that the time it takes to dry 1 shirt is 5 hours/5 shirts = 1 hour per shirt.

**Step 2:** Apply the constant rate at which the shirts dry to the new scenario. To dry 30 shirts, we can multiply the time it takes to dry 1 shirt by 30: 
1 hour/shirt * 30 shirts = 30 hours.

**Verification:**

* The given information is accurate: 5 shirts take 5 hours to dry.
* The calculation is correct: 1 hour/shirt * 30 shirts = 30 hours.
* The assumption is valid: The environmental conditions remain the same for drying 5 shirts and 30 shirts.

**Final Answer:**

If you hang 5 shirts outside and it takes them 5 hours to dry, it would take 30 shirts the same amount of time to dry, which is 30 hours. This is because  
the time it takes to dry shirts is independent of the number of shirts, as long as the environmental conditions remain the same.
"""

pattern = r'\*\*(3\. )?Final Answer:\*\*(.*?)$'
match = re.search(pattern, text, re.DOTALL)

if match:
    final_answer = match.group(2).strip()
    print(final_answer)
else:
    print("Final answer not found.")

If you hang 5 shirts outside and it takes them 5 hours to dry, it would take 30 shirts the same amount of time to dry, which is 30 hours. This is because  
the time it takes to dry shirts is independent of the number of shirts, as long as the environmental conditions remain the same.


In [18]:
children = {
    "child 1": {"Critique": "critique 1", "Answer": "answer 1"},
    "child 2": {"Critique": "critique 2", "Answer": "answer 2"},
    "child 3": {"Critique": "critique 3", "Answer": "answer 3"}
}

In [19]:
for i, child_data in enumerate(children.values()):
    print(f"Child {i+1}")
    print(child_data["Critique"])
    print(child_data["Answer"])

Child 1
critique 1
answer 1
Child 2
critique 2
answer 2
Child 3
critique 3
answer 3


In [11]:
for child_name, child_data in children.items():
    print(f"Child: {child_name}")
    print(f"Critique: {child_data['Critique']}")
    print(f"Answer: {child_data['Answer']}")
    print()  # Empty line for better readability

Child: child 1
Critique: critique 1
Answer: answer 1

Child: child 2
Critique: critique 2
Answer: answer 2

Child: child 3
Critique: critique 3
Answer: answer 3



In [16]:
for child_data in children.values():
    print(child_data["Critique"])

critique 1
critique 2
critique 3
