# Data - Counterintuitive AR

In [None]:
import json
with open('CIAR.json', 'r') as f:
  data = json.load(f)

# data[0]

# Code Infra Setup


In [None]:
%%capture --no-stderr
%pip install -U langchain langchain_experimental langsmith anthropic langgraph

import os
import getpass

langsmith_key = ''
os.environ["LANGCHAIN_TRACING_V2"] = "false"
os.environ["LANGCHAIN_API_KEY"] = langsmith_key
os.environ["LANGCHAIN_PROJECT"] = "MAD_trials"

%%capture
!pip install litellm

from functools import partial
import itertools

from litellm import completion
import os

## set ENV variables
os.environ["ANTHROPIC_API_KEY"] = ""

def get_llm_resp(chat_history, sys_msg = None):
    if sys_msg:
      final_msg = [{"role":"system", "content": sys_msg}] + chat_history
    else:
      final_msg = chat_history
    response = completion(
    model="claude-2.1",
    messages=final_msg,
    max_tokens = 2028,
    # temperature = 0.7
    )

    response_text = response["choices"][0]["message"]["content"]
    # print(response_text)
    # print(response.usage.prompt_tokens)
    # print(response.usage.completion_tokens)
    # token_usage["prompt_tokens"] += response.usage.prompt_tokens
    # token_usage["completion_tokens"] += response.usage.completion_tokens
    return response_text

# possibly integrate this function into previous function get_llm_resp
# Litellm does not support the input parameter n
def get_n_llm_resp(chat_history, sys_msg = None, n=1):
  op_list = []
  for i in range(n):
    op_list.append(get_llm_resp(chat_history, sys_msg = None))
  return op_list

# converts a string into openai chat compatible chat history format
def oai_format_single_msg(msg, role="user"):
    return [{"role": role, "content": msg}]

# LangGraph based Modelling - MultiAgent Debate

## Set up the model


In [None]:
from langchain_community.chat_models import ChatAnthropic
from langchain_core.prompts import ChatPromptTemplate
api_key = ''

chat_claude_model = ChatAnthropic(temperature=0, anthropic_api_key = api_key, model_name="claude-2.1", max_tokens= 2028)

  warn_deprecated(


## Define the agent state


In [None]:
from typing import TypedDict, Annotated, Sequence
import operator
from langchain_core.messages import BaseMessage

# This State object will be passed around between the nodes in the graph, where nodes are some executable functions ( could be for functionn calling/ llm calling etc)
class AgentState(TypedDict):
    messages: Annotated[Sequence[BaseMessage], operator.add]
    speakers: int #How many times has the mike been handed over? To keep track of number of rounds

## Define the nodes


In [None]:

# Define the function that calls the affirmative model
def call_affirmative_debater(state):

    # if round-1: start by generating an initial response
    if state["speakers"] //3 == 0:
      affirmative_debater_chat_history.append(state["messages"][-1])

    # for round-2 onwards
    else:
      last_opp_resp = negative_debater_chat_history[-1].content
      affirmative_debater_chat_history.append(HumanMessage(content=config['debate_prompt'].replace("##oppo_ans##", last_opp_resp)))

    response = chat_claude_model.invoke([SystemMessage(content=affirmative_sys_msg)]+affirmative_debater_chat_history)
    # We return a list, because this will get added to the existing list
    affirmative_debater_chat_history.append(response)
    print("###Affirmative Debater Response###")
    print(response)
    # print(type(response))
    return {"messages": [response], "speakers": state["speakers"] + 1}


# Define the function that calls the negative model
def call_negative_debater(state):
    affirmative_last_msg = state["messages"][-1].content

    # if round-1: start by trying to force a contrary response to the affirmative debater
    if state["speakers"] //3 == 0:
      negative_debater_chat_history.append(HumanMessage(content=config['negative_prompt'].replace("##aff_ans##", affirmative_last_msg)))

    # for round-2 onwards
    else:
      last_opp_resp = affirmative_debater_chat_history[-1].content
      negative_debater_chat_history.append(HumanMessage(content=config['debate_prompt'].replace("##oppo_ans##", last_opp_resp)))

    response = chat_claude_model.invoke([SystemMessage(content=negative_sys_msg)]+negative_debater_chat_history)
    # We return a list, because this will get added to the existing list
    negative_debater_chat_history.append(response)
    print("###Negative Debater Response###")
    print(response)
    # print(type(response))
    return {"messages": [response], "speakers": state["speakers"] + 1}

# Define the function that calls the judge model
def call_moderator(state):

    # TODO: If max_rounds over and yet the judge has no definite answer, final judge call
    if state["speakers"]/(max_rounds*3) >= 1:
      # Step:1 - Extract answer candidates
      aff_ans = affirmative_debater_chat_history[-1].content
      neg_ans = negative_debater_chat_history[-1].content
      msg1 = config["judge_prompt_last1"].replace('##aff_ans##', aff_ans).replace('##neg_ans##', neg_ans)
      judge_chat_history.append(HumanMessage(content = msg1))
      response = chat_claude_model.invoke([SystemMessage(content=moderator_sys_msg)]+judge_chat_history)
      judge_chat_history.append(response)

      # Step:2 - Select one from the candidate
      msg2 = config["judge_prompt_last2"].replace('##debate_topic##', debate_topic)
      judge_chat_history.append(HumanMessage(content = msg2))
      response = chat_claude_model.invoke([SystemMessage(content=moderator_sys_msg)]+judge_chat_history)
      print("###Final Judge Response###")
      print(response)

      judge_chat_history.append(response)

      return {"messages": [response], "speakers": state["speakers"] + 1}
      pass

    aff_resp = affirmative_debater_chat_history[-1].content
    neg_resp = negative_debater_chat_history[-1].content
    message = config['moderator_prompt'].replace("##round##", str((state["speakers"]+1)//2)).replace("##aff_ans##", aff_resp).replace("##neg_ans##", neg_resp)
    moderator_chat_history.append(message)

    response = chat_claude_model.invoke([SystemMessage(content=moderator_sys_msg)]+moderator_chat_history)
    moderator_chat_history.append(response)
    print("###Moderator Response###")
    print(response)
    # We return a list, because this will get added to the existing list
    return {"messages": [response], "speakers": state["speakers"] + 1}

In [None]:
import json
import xml.etree.ElementTree as ET

def extract_xml_token(msg, tag):
    return msg[msg.find("<" + tag + ">")+len(tag)+2:msg.find("</" + tag + ">")]

# Define the function that determines whether to continue or not
def continue_debate(state):
    messages = state["messages"]
    last_message = state["messages"][-1].content
    # print(last_message)
    result = extract_xml_token(last_message, "response")
    try:
      result = json.loads(result)
    except:
      # Parse the XML
      root = ET.fromstring(result)

      # Extract CDATA content
      cdata_content = root.text.strip()

      # Parse the JSON from CDATA content
      json_data = json.loads(cdata_content)

    print(result)

    # If there is debate_answer, then we finish
    if result["debate_answer"]:
        print('Final answer is', result["debate_answer"])
        return "end"
    # Otherwise if there is, we continue
    elif state["speakers"]//3 < max_rounds:
        return "continue"
    else:
        return "judge_final_call"


## Define the graph

In [None]:
from langgraph.graph import StateGraph, END

# Define a new graph
workflow = StateGraph(AgentState)

# Define the two nodes we will cycle between
workflow.add_node("affirmative", call_affirmative_debater)
workflow.add_node("negative", call_negative_debater)
workflow.add_node("moderator", call_moderator)

# Set the entrypoint as `agent`
# This means that this node is the first one called
workflow.set_entry_point("affirmative")

In [None]:

workflow.add_edge("affirmative", "negative")
workflow.add_edge("negative", "moderator")

# We now add a conditional edge
workflow.add_conditional_edges(
    # First, we define the start node for the conditional edge. We use `judge`.
    "moderator",
    # Next, we pass in the function that will determine which node is called next.
    continue_debate,
    # Finally we pass in a mapping.
    # The keys are strings, and the values are other nodes.
    # END is a special node marking that the graph should finish.
    # What will happen is we will call `should_continue`, and then the output of that
    # will be matched against the keys in this mapping.
    # Based on which one it matches, that node will then be called.
    {
        # If `tools`, then we call the tool node.
        "continue": "affirmative",
        #
        "judge_final_call": "moderator",
        # Otherwise we finish.
        "end": END,
    },
)

# # Finally, we compile it!
# # This compiles it into a LangChain Runnable,
# # meaning you can use it as you would any other runnable
app = workflow.compile()

## Multi-agent debate runs

We can now use it!
This now exposes the [same interface](https://python.langchain.com/docs/expression_language/) as all other LangChain runnables.

In [None]:
# # Reference prompts from the paper\
# TODO: Collect all prompts into a separate prompt store
config_ref = {
    "debate_topic": "",
    "base_answer": "",
    "debate_answer": "",
    "player_meta_prompt": "You are a debater. Hello and welcome to the debate. It's not necessary to fully agree with each other's perspectives, as our objective is to find the correct answer.\nThe debate topic is stated as follows:\n##debate_topic##",
    "moderator_meta_prompt": "You are a moderator. There will be two debaters involved in a debate. They will present their answers and discuss their perspectives on the following topic: \"##debate_topic##\"\nAt the end of each round, you will evaluate answers and decide which is correct.",
    "affirmative_prompt": "##debate_topic##",
    "negative_prompt": "##aff_ans##\n\nYou disagree with my answer. Provide your answer and reasons.",
    "moderator_prompt": "Now the ##round## round of debate for both sides has ended.\n\nAffirmative side arguing:\n##aff_ans##\n\nNegative side arguing: ##neg_ans##\n\nYou, as the moderator, will evaluate both sides' answers and determine if there is a clear preference for an answer candidate. If so, please summarize your reasons for supporting affirmative/negative side and give the final answer that you think is correct, and the debate will conclude. If not, the debate will continue to the next round. Now please output your answer in json format, with the format as follows: {\"Whether there is a preference\": \"Yes or No\", \"Supported Side\": \"Affirmative or Negative\", \"Reason\": \"\", \"debate_answer\": \"\"}. Please strictly output in JSON format, do not output irrelevant content.",
    "judge_prompt_last1": "Affirmative side arguing: ##aff_ans##\n\nNegative side arguing: ##neg_ans##\n\nNow, what answer candidates do we have? Present them without reasons.",
    "judge_prompt_last2": "Therefore, ##debate_topic##\nPlease summarize your reasons and give the final answer that you think is correct. Now please output your answer in json format, with the format as follows: {\"Reason\": \"\", \"debate_answer\": \"\"}. Please strictly output in JSON format, do not output irrelevant content.",
    "debate_prompt": "##oppo_ans##\n\nDo you agree with my perspective? Please provide your reasons and answer."
}

In [None]:
# Changed the response format, including XML tags
config_mod1 = {
    "debate_topic": "",
    "base_answer": "",
    "debate_answer": "",
    "player_meta_prompt": "You are a debater. Hello and welcome to the debate. It's not necessary to fully agree with each other's perspectives, as our objective is to find the correct answer.\nThe debate topic is stated as follows:\n##debate_topic##",
    "moderator_meta_prompt": "You are a moderator. There will be two debaters involved in a debate. They will present their answers and discuss their perspectives on the following topic: \"##debate_topic##\"\nAt the end of each round, you will evaluate answers and decide which is correct.",
    "affirmative_prompt": "##debate_topic##",
    "negative_prompt": "##aff_ans##\n\nYou disagree with my answer. Provide your answer and reasons.",
    "moderator_prompt": "Now the round ##round## of debate for both sides has ended.\n\nAffirmative side arguing:\n##aff_ans##\n\nNegative side arguing: ##neg_ans##\n\nYou, as the moderator, will evaluate both sides' answers and determine if there is a clear preference for an answer candidate. If so, please summarize your reasons for supporting affirmative/negative side and give the final answer that you think is correct, and the debate will conclude. If not, the debate will continue to the next round. Now please output your answer in json format within <response></response> XML tags, with the format as follows: {\"Whether there is a preference\": \"Yes or No\", \"Supported Side\": \"Affirmative or Negative\", \"Reason\": \"\", \"debate_answer\": \"\"}. Please strictly output in JSON format within <response></response> XML tags , do not output irrelevant content.",
    # "moderator_prompt": "Now the ##round## round of debate for both sides has ended.\n\nAffirmative side arguing:\n##aff_ans##\n\nNegative side arguing: ##neg_ans##\n\nYou, as the moderator, will evaluate both sides' answers and determine if there is a clear preference for an answer candidate. If so, please summarize your reasons for supporting affirmative/negative side and give the final answer that you think is correct, and the debate will conclude. If not, the debate will continue to the next round. Now please output your answer in json format, with the format as follows: {\"Whether there is a preference\": \"Yes or No\", \"Supported Side\": \"Affirmative or Negative\", \"Reason\": \"\", \"debate_answer\": \"\"}. Please strictly output in JSON format, do not output irrelevant content.",
    "judge_prompt_last1": "Affirmative side arguing: ##aff_ans##\n\nNegative side arguing: ##neg_ans##\n\nNow, what answer candidates do we have? Present them without reasons.",
    "judge_prompt_last2": "Therefore, ##debate_topic##\nPlease summarize your reasons and give the final answer that you think is correct. Now please output your answer in json format, with the format as follows: {\"Reason\": \"\", \"debate_answer\": \"\"}. Please strictly output in JSON format, do not output irrelevant content.",
    "debate_prompt": "##oppo_ans##\n\nDo you agree with my perspective? Please provide your reasons and answer."
}

In [None]:
# Adjusted Prompts
config_mod2 = {
    "debate_topic": "",
    "base_answer": "",
    "debate_answer": "",
    "player_meta_prompt": "You are a debater. Hello and welcome to the debate. It's not necessary to fully agree with each other's perspectives, as our objective is to find the correct answer.\nThe debate topic is stated as follows:\n##debate_topic##",
    "moderator_meta_prompt": "You are a moderator. There will be two debaters involved in a debate. They will present their answers and discuss their perspectives on the following topic: \"##debate_topic##\"\nAt the end of each round, you will evaluate answers and decide which is correct.",
    "affirmative_prompt": "##debate_topic##",
    # "negative_prompt": "##aff_ans##\n\n This answer is wrong. You have to find the mistake, explain it and rectify.",
    "negative_prompt": "<opponent_answer>##aff_ans##</opponent_answer>\n\nYou disagree with the opponents answer. Provide your answer and reasons. Analyse each of the reasoning statements made and also check if the opponent has missed any important points.",
    "moderator_prompt": "Now the round ##round## of debate for both sides has ended.\n\nAffirmative side arguing:\n##aff_ans##\n\nNegative side arguing: ##neg_ans##\n\nYou, as the moderator, will evaluate both sides' answers and determine if there is a clear preference for an answer candidate. If so, please summarize your reasons for supporting affirmative/negative side and give the final answer that you think is correct, and the debate will conclude. If not, the debate will continue to the next round. Now please output your answer in json format within <response></response> XML tags, with the format as follows: {\"Whether there is a preference\": \"Yes or No\", \"Supported Side\": \"Affirmative or Negative\", \"Reason\": \"\", \"debate_answer\": \"\"}. Please strictly output in JSON format within <response></response> XML tags , do not output irrelevant content.",
    "judge_prompt_last1": "Affirmative side arguing: ##aff_ans##\n\nNegative side arguing: ##neg_ans##\n\nNow, what answer candidates do we have? Present them without reasons.",
    "judge_prompt_last2": "Therefore, ##debate_topic##\nPlease summarize your reasons and give the final answer that you think is correct. Now please output your answer in json format within <response></response> XML tags, with the format as follows: {\"Reason\": \"\", \"debate_answer\": \"\"}. Please strictly output in JSON format within <response></response> XML tags, do not output irrelevant content.",
    "debate_prompt": "<opponent_response>##oppo_ans##</opponent_response>\n\nDo you agree with the opponents perspective? Please provide your reasons and answer."
}

In [None]:
# Set data index
idx = 2
debate_topic = data[idx]["question"]
print(debate_topic)

One peach costs one cent. You can use 3 peach pits to exchange for one peach. If you have 10 cents, then what is the maximal number of peaches you can eat?


In [None]:
log_data =[]

In [None]:
max_rounds = 3
config = config_mod2

# devils advocate expert prompting to tackle bandwagoning?
# d_adv_prompt = "You are a person who always likes to play the devils advocate. You are a master of details and excel in finding flaws in reasoning. Go through each statement made by the opponent and check whether it is logically sound."

# Separate chat history for each participant
affirmative_debater_chat_history, negative_debater_chat_history, moderator_chat_history, judge_chat_history = [], [], [], []
affirmative_sys_msg = config["player_meta_prompt"].replace("##debate_topic##", debate_topic)
negative_sys_msg = config["player_meta_prompt"].replace("##debate_topic##", debate_topic)
# negative_sys_msg = d_adv_prompt + config["player_meta_prompt"].replace("##debate_topic##", debate_topic)
moderator_sys_msg = config["moderator_meta_prompt"].replace("##debate_topic##", debate_topic)

In [None]:
os.environ["LANGCHAIN_TRACING_V2"] = "false"
from langchain_core.messages import HumanMessage,SystemMessage, AIMessage

inputs = {"messages": [HumanMessage(content=debate_topic)], "speakers":0}
final_state = app.invoke(inputs)

###Affirmative Debater Response###
content=" Okay, let's analyze this step-by-step:\n* You have 10 cents\n* Each peach costs 1 cent\n* So initially you can buy 10 peaches with your 10 cents\n* You get 3 peach pits for every 1 peach you eat\n* So after eating those initial 10 peaches, you have 10 * 3 = 30 peach pits\n* You can exchange 3 peach pits for 1 peach\n* So with those 30 peach pits, you can exchange them for 30/3 = 10 more peaches\n* After eating those 10 more peaches, you again have 10 * 3 = 30 more peach pits \n* You can continue exchanging those for 10 more peaches, and so on\n* So every time you eat 10 peaches you are left with enough peach pits to exchange for 10 more\n* Therefore, with your initial 10 cents, you can eat 10 + 10 + 10 + ... peaches, where the number of 10s is unlimited\n* So the maximal number of peaches you can eat is unlimited.\n\nThe answer is you can eat an unlimited number of peaches given the ability to continuously exchange peach pits for more peache

In [None]:
final_answer = json.loads(extract_xml_token(final_state['messages'][-1].content, 'response'))['debate_answer']

'The maximum number of peaches that can be eaten is unlimited, given the ability to continuously exchange peach pits for more peaches.'

In [None]:
log_data = []
final_answer_list = []

# To load results from previous runs
# with open(file, 'r') as f:
#     # Load the JSON data from the file
#     log_data = json.load(f)

file = 'Multi_Agent_Debate_Runs_multi_round_trials_adjusted_prompts.json'

os.environ["LANGCHAIN_TRACING_V2"] = "false"
from langchain_core.messages import HumanMessage,SystemMessage


for i in range(0, len(data)):

  print(f'#######QUESTION-{i}')

  debate_topic = data[i]["question"]

  max_rounds = 3

  # Separate chat history for each participant
  affirmative_debater_chat_history, negative_debater_chat_history, moderator_chat_history, judge_chat_history = [], [], [], []
  affirmative_sys_msg = config["player_meta_prompt"].replace("##debate_topic##", debate_topic)
  negative_sys_msg = config["player_meta_prompt"].replace("##debate_topic##", debate_topic)
  # negative_sys_msg = d_adv_prompt + config["player_meta_prompt"].replace("##debate_topic##", debate_topic)
  moderator_sys_msg = config["moderator_meta_prompt"].replace("##debate_topic##", debate_topic)

  inputs = {"messages": [HumanMessage(content=debate_topic)], "speakers":0}
  final_state = app.invoke(inputs)

  final_answer = json.loads(extract_xml_token(final_state['messages'][-1].content, 'response'))['debate_answer']
  final_answer_list.append(final_answer)

  # trial_run_info
  log_data.append(
      {"id": i,
      "x": debate_topic,
      "rounds": final_state["speakers"] // 3,
      "llm_resp": str(final_state),
      "aff_history": str(affirmative_debater_chat_history),
      "neg_history": str(negative_debater_chat_history),
      "moderator_chat_history": str(moderator_chat_history),
      "judge_chat_history": str(judge_chat_history),
      "final_answer": final_answer
      }
  )

  with open(file, 'w') as f:
      json.dump(log_data, f, indent=4)

#######QUESTION-23
###Affirmative Debater Response###
content=" * I would be in second place after overtaking the person who was originally in second place. \n\nHere is the logic:\n* Initially, there is someone in 1st place and someone in 2nd place\n* When I overtake the person in 2nd place, I take their position \n* Therefore, after the overtake I am now in 2nd place while the original 2nd place person goes down to 3rd.\n\nSo after overtaking the 2nd place person, I would now be in 2nd place. I'm happy to discuss this further if you have a different perspective! Let me know your thoughts."
###Negative Debater Response###
content=" I respectfully disagree with the opponent's conclusion that overtaking the person in 2nd place means you are now in 2nd place. Here is my perspective:\n\nLet's break this down step-by-step:\n* Initially, there is a 1st place person and a 2nd place person in the race. \n* When I pass the 2nd place person, that means there is now someone ahead of them - me.\n*

## Final-Answer Extraction/ Post-processing

In [None]:
# Final Answer Extraction/ Post-Processing
# The accuracy of this final answer extraction need to be VALIDATED !!!
trunc_answer_list = []
for i, ans in enumerate(final_answer_list):
  prompt = 'QUESTION:\n' + data[i]["question"] + 'ANSWER:\n' + ans + 'Extract only the final numerical answer to the question and format them within <final_answer></final_answer> XML tag.'
  resp = get_llm_resp(oai_format_single_msg(prompt), sys_msg = None)
  fin_ans = extract_xml_token(resp, 'final_answer')
  trunc_answer_list.append(fin_ans)
  print(ans,'######', fin_ans)

Alice's average speed lies somewhere between 1-3 m/s over the course of walking up and down the hill. ###### 2 m/s
1/3 ###### 1/3
13 ###### 13
The percentage of milk remaining in the cup is 48% ###### 48
If 10g salt is added to 100g water, the percentage of salt is: (10g salt)/(100g water + 10g salt) x 100 = 10g/110g x 100 = 9.09% ###### 9.09%
The new weight of the apples is 1000 kg. ###### 909.091 kg
100 ###### 526
The maximum number of people is 79. ###### 79
100 machines will need 5 minutes to produce 100 items ###### 5
The probability the witnessed taxi is actually blue is 12%. ###### 12%
0.01% ###### 19.98%
23 people ###### 23
0.19% ###### 0.19%
After 10 days there will be 512 fish in the tank. ###### 512
10 ###### 10
After 4 months there will be 91 mice. ###### 91
At minimum 60% of the adult population is married. ###### 60
37 days ###### 37
2 ###### 26
You can write 175 more words with this pen. ###### 175
The probability that Bob wins the game is 0.921875 or 92.2%. ###### 0.921

In [None]:
for ans in final_answer_list:
  print(ans)

1.5 m/s
The probability Alice shows up in the third bar is 5/9.
The maximal number of peaches that can be eaten with 10 cents is 20 peaches.
The final percentage of milk in the cup is 48%
If 10g of salt is added to 100g of water, the percentage of salt now is 9.09%
The new weight of the apples is 500 kg.
100
31
If 5 machines produce 5 items in 5 minutes, then 100 machines need 25 minutes to produce 100 items.
The probability the taxi is actually blue is 83% based on the information provided.
The probability of at least one false fingerprint match needs to be calculated using a binomial distribution, not by using the provided 0.01% false positive rate directly. This will give a lower probability than the 18.2% calculated by the affirmative side.
23 people are needed in the room for the probability to exceed 50% that at least two share the same birthday.
0.19%
After 10 days there will be 512 fish in the tank.
After all the flips, all 100 chairs will be back in their original position.
49

In [None]:
# 1) Verify that all the LLM API calls are made properly
# 2) Calculating the number of disagreements


In [None]:
# trial_run_info
log_data.append(
    {"id": idx,
    "x": debate_topic,
    "rounds": final_state["speakers"] // 3,
    "llm_resp": str(final_state),
    "aff_history": str(affirmative_debater_chat_history),
    "neg_history": str(negative_debater_chat_history),
    "moderator_chat_history": str(moderator_chat_history),
    "judge_chat_history": str(judge_chat_history),
    }
)

In [None]:
file = 'Multi_Agent_Debate_Runs_multi_round.json'
with open(file, 'w') as f:
    json.dump(log_data, f, indent=4)

#  LangGraph based Modelling - Self-Refine

In [None]:
from typing import TypedDict, Annotated, Sequence
import operator
from langchain_core.messages import BaseMessage

In [None]:
chat_history = []

# This State object will be passed around between the nodes in the graph, where nodes are some executable functions ( could be for functionn calling/ llm calling etc)
class AgentStateRefine(TypedDict):
    messages: Annotated[Sequence[BaseMessage], operator.add]

# Define the function that creates the initial solution
def initial_generation(state):

    chat_history.append(state["messages"][0])
    response = chat_claude_model.invoke(chat_history)
    # We return a list, because this will get added to the existing list
    chat_history.append(response)
    print(response)
    # print(type(response))
    return {"messages": [response]}

# Define the function that generates the feedback
def feedback_generation(state):

    feedback_prompt = "You need to provide feedback for the previously proposed solution. Critically analyse and verify each of the reasoning statements made. Make sure that no important information was missed during the reasoning process."
    chat_history.append(HumanMessage(content=feedback_prompt))
    response = chat_claude_model.invoke(chat_history)
    # We return a list, because this will get added to the existing list
    chat_history.append(response)
    print(response)
    # print(type(response))
    return {"messages": [response]}

# Define the function that revises the previous output
def revise_output(state):

    revise_prompt = "Based on the previous feedbacks generated, revise your previous solution."
    chat_history.append(HumanMessage(content=revise_prompt))
    response = chat_claude_model.invoke(chat_history)
    # We return a list, because this will get added to the existing list
    chat_history.append(response)
    print(response)
    # print(type(response))
    return {"messages": [response]}



In [None]:
import json

def extract_xml_token(msg, tag):
    return msg[msg.find("<" + tag + ">")+len(tag)+2:msg.find("</" + tag + ">")]

# Define the function that determines whether to continue or not
def continue_refinement(state):
    continue_refine_prompt = "Based on the latest refined solution generated for the initial user query, you have to decide whether to continue the refinement process or not. You also need to provide relevant reasons. Now please output your answer in json format within <response></response> XML tags, with the format as follows: {\"Whether to continue refinement process\": \"Yes or No\", \"Reason\": \"\"}. Please strictly output in JSON format within <response></response> XML tags , do not output irrelevant content."
    response = chat_claude_model.invoke(chat_history + [HumanMessage(content=continue_refine_prompt)])
    print(response)
    result = extract_xml_token(response.content, "response")
    result = json.loads(result)
    print(result)

    # If there is debate_answer, then we finish
    if result["Whether to continue refinement process"].lower() == "yes":
        return "continue"
    else:
        return "end"

In [None]:
from langgraph.graph import StateGraph, END

# Define a new graph
workflow_self_refine = StateGraph(AgentStateRefine)

# Define the two nodes we will cycle between
workflow_self_refine.add_node("init", initial_generation)
workflow_self_refine.add_node("feedback", feedback_generation)
workflow_self_refine.add_node("revise", revise_output)

# Set the entrypoint as `agent`
# This means that this node is the first one called
workflow_self_refine.set_entry_point("init")

In [None]:

workflow_self_refine.add_edge("init", "feedback")
workflow_self_refine.add_edge("feedback", "revise")

# We now add a conditional edge
workflow_self_refine.add_conditional_edges(
    # First, we define the start node for the conditional edge. We use `judge`.
    "revise",
    # Next, we pass in the function that will determine which node is called next.
    continue_refinement,
    # Finally we pass in a mapping.
    # The keys are strings, and the values are other nodes.
    # END is a special node marking that the graph should finish.
    # What will happen is we will call `should_continue`, and then the output of that
    # will be matched against the keys in this mapping.
    # Based on which one it matches, that node will then be called.
    {
        # If `tools`, then we call the tool node.
        "continue": "feedback",
        # Otherwise we finish.
        "end": END,
    },
)

# # Finally, we compile it!
app = workflow_self_refine.compile()

# Self-Refine Expt. Run

In [None]:
# Set data index
idx = 0
debate_topic = data[idx]["question"]
print(debate_topic)

When Alice walks up the hill, her speed is 1 m/s and when she goes down the hill, her speed is 3 m/s. Then when Alice walks up and down the hill, what is her average speed?


In [None]:
os.environ["LANGCHAIN_TRACING_V2"] = "false"
from langchain_core.messages import HumanMessage,SystemMessage

inputs = {"messages": [HumanMessage(content=debate_topic)]}
final_state = app.invoke(inputs)

content=" * Alice walks up the hill at 1 m/s\n* Alice walks down the hill at 3 m/s\n* To calculate average speed:\n* Let's assume Alice walks the same distance up and down the hill \n* Distance up the hill = Distance down the hill\n* Time up the hill = Distance up / Speed up = Distance down / 1 m/s\n* Time down the hill = Distance down / Speed down = Distance up / 3 m/s\n* Total time = Time up + Time down\n* Total distance = 2 * Distance up (or 2 * Distance down)\n* Average speed = Total distance / Total time\n* Substitute the values:\n* Total distance = 2 * Distance up \n* Total time = Distance up / 1 m/s + Distance up / 3 m/s\n* Average speed = 2 * Distance up / (Distance up / 1 m/s + Distance up / 3 m/s)\n* Simplify: Average speed = 2 m/s\n\nTherefore, when Alice walks up and down the hill at 1 m/s and 3 m/s respectively, her average speed is 2 m/s."
content=" Let me analyze the previous solution step-by-step:\n\n1) Alice walks up the hill at 1 m/s \n\n- Correct. This is directly st

In [None]:
final_state['messages'][-1].content

' Here is the revised solution:\n\n1) You have 10 cents to spend on peaches.\n\n2) It is given in the problem that one peach costs one cent.\n\n3) With 10 cents, you can buy 10 peaches initially (as each peach costs 1 cent). \n\n4) It is also given in the problem that you can exchange 3 peach pits for 1 additional peach. \n\n5) After eating the 10 initial peaches, you will have 10 peach pits remaining.\n\n6) With 10 peach pits, you can exchange them for additional peaches based on the exchange rate of 3 pits for 1 peach. \n\n7) 10 peach pits allows for 10/3 = 3 additional peach exchanges (integer division).  \n\n8) Therefore, the total number of peaches you can eat:\n- Initial peaches from 10 cents: 10 \n- Additional peaches from exchanging pits: 3\n- Total peaches = Initial (10) + Additional (3) = 13\n\nIn conclusion, the maximum number of peaches you can eat given 10 cents and the ability to exchange 3 peach pits for 1 peach is 13.\n\nI have simplified and streamlined the solution to

In [None]:
log_data = []
final_answer_list = []

# To load results from previous runs
# with open(file, 'r') as f:
#     # Load the JSON data from the file
#     log_data = json.load(f)

file = 'Multi_Agent_Debate_Runs_self_refine.json'

os.environ["LANGCHAIN_TRACING_V2"] = "true"
from langchain_core.messages import HumanMessage,SystemMessage


for i in range(0, len(data)):

  print(f'#######QUESTION-{i}')

  debate_topic = data[i]["question"]

  max_rounds = 3

  chat_history = []

  inputs = {"messages": [HumanMessage(content=debate_topic)]}
  final_state = app.invoke(inputs)

  final_ext_prompt = '<solution>' + final_state['messages'][-1].content + '</solution>\n Extract only the final numerical answer from this solution and format it within <final_answer></final_answer> XML tag.'
  final_answer = get_llm_resp(oai_format_single_msg(final_ext_prompt), sys_msg = None)
  final_answer = extract_xml_token(final_answer, 'final_answer')
  print(f'FINAL ANSWER-{i}', final_answer)
  final_answer_list.append(final_answer)

  # trial_run_info
  log_data.append(
      {"id": i,
      "x": debate_topic,
      "llm_resp": str(final_state),
      "chat_history": str(chat_history),
      "final_answer": final_answer
      }
  )

  with open(file, 'w') as f:
      json.dump(log_data, f, indent=4)

  # break


#######QUESTION-0
content=" * Alice walks up the hill at 1 m/s\n* Alice walks down the hill at 3 m/s\n* We don't know the distance Alice walks up and down, but we can calculate her average speed without it.\n* Speed = Distance / Time\n* So Average Speed = Total Distance / Total Time\n* We don't know the total distance, but we can calculate the total time\n* Let's assume the time up is t1 and the time down is t2\n* Then: \n* Time up = Distance up / Speed up \n* t1 = d1 / 1 m/s\n* Time down = Distance down / Speed down\n* t2 = d2 / 3 m/s\n* Total Time = t1 + t2\n* Substitute the time equations:\n* Total Time = d1/1 + d2/3 \n* Since we don't know d1 and d2, we can simplify by removing them:  \n* Total Time = 1/1 + 1/3 = 4/3\n* Therefore, Alice's average speed = Total Distance / (4/3)\n\nWithout knowing the actual distances Alice walked up and down, it's not possible to calculate her exact average speed. But we know it's between her minimum speed of 1 m/s and her maximum speed of 3 m/s."
c

In [None]:
for ans in final_answer_list:
  print(ans)


No precise numerical value for average speed can be calculated from the information provided.

1
13
80%
9.09%
500 kg
100
47
500
0.41
86.3%
23
0.19%
512 fish
10
133 mice after 4 months
60%
37
2
275 words

Probability Alice wins: 41/64
Probability Bob wins: 30/64

100 kg
50%
2nd place
10
1/11

a = 2
b = 8
c = 4   
d = 6

6 hours
2
1/100
2%
2/3
50%
1/100
8 minutes
3 revolutions
5
3600 km
50%
1000
1:1
100,000 miles
20
900 rupees
4
17
49

The 10 minute probability lies between 0% and 95%

30 minutes
2
