In [None]:
# README

# Multi agent review board notebook using "mmlu" data (Source code has "geography", "gsm", "math", and "mmlu" data)
# Source code: https://github.com/composable-models/llm_multiagent_debate

# CONFIGURE parameters like which models to use and agent configs using fourth code block/cell.

In [None]:
# Install depencies and authenticate with hugging face to access gated models
!pip install transformers accelerate -q

# Authenticate with Hugging Face
from huggingface_hub import login
login()  # You'll need a HF token

In [None]:
# CONFIGURE rounds, questions, pipelines, and agent configs in this cell
from transformers import pipeline
import torch

# =====ROUNDS & QUESTIONS=====
rounds = 2
questions = 100

# =====GENERATION PIPELINES=====
# NOTE: You can create pipelines from multiple models, not just one.
# But beware that you have a limited amount of RAM and VRAM to work withRuntime -> View Resources
model_id="meta-llama/Llama-3.2-1B-Instruct"
pipe = pipeline(
    "text-generation",
    model=model_id,
    torch_dtype=torch.bfloat16,
    device_map="auto"
)

# =====AGENTS=====
# Currently each agent uses the same model/pipeline, but different temperatures
agent_configs = [{"pipe": pipe, "temp": 0.1}, {"pipe": pipe, "temp": 0.7}, {"pipe": pipe, "temp": 1.5}]

In [None]:
from glob import glob
import pandas as pd
import json
import time
import random


In [None]:
# Load MMLU data and store data frames
!curl "https://people.eecs.berkeley.edu/~hendrycks/data.tar" | tar -xvf -

tasks = glob("./data/test/*.csv")
dfs = [pd.read_csv(task) for task in tasks]

len(dfs)


In [None]:
def construct_message(agents, question, idx):
    if len(agents) == 0:
        return {"role": "user", "content": "Can you double check that your answer is correct. Put your final answer in the form (X) at the end of your response."}

    prefix_string = "These are the solutions to the problem from other agents: "

    for agent in agents:
        agent_response = agent[idx]["content"]
        response = "\n\n One agent solution: ```{}```".format(agent_response)

        prefix_string = prefix_string + response

    prefix_string = prefix_string + """\n\n Using the reasoning from other agents as additional advice, can you give an updated answer? Examine your solution and that other agents step by step. Put your answer in the form (X) at the end of your response.""".format(question)
    return {"role": "user", "content": prefix_string}


def construct_assistant_message(completion):
    # content = completion["choices"][0]["message"]["content"]
    content = completion[0]["generated_text"][-1]["content"]
    return {"role": "assistant", "content": content}


def generate_answer(agent_config, agent_context):
    # try:

    #     completion = openai.ChatCompletion.create(
    #               model="gpt-3.5-turbo-0301",
    #               messages=answer_context,
    #               n=1)
    # except:
    #     print("retrying due to an error......")
    #     time.sleep(20)
    #     return generate_answer(answer_context)
    completion = agent_config["pipe"](
      agent_context,
      max_new_tokens=1024,
      do_sample=True,
      temperature=agent_config["temp"],
      top_p=0.9,
    )


    return completion


def parse_question_answer(df, ix):
    question = df.iloc[ix, 0]
    a = df.iloc[ix, 1]
    b = df.iloc[ix, 2]
    c = df.iloc[ix, 3]
    d = df.iloc[ix, 4]

    question = "Can you answer the following question as accurately as possible? {}: A) {}, B) {}, C) {}, D) {} Explain your answer, putting the answer in the form (X) at the end of your response.".format(question, a, b, c, d)

    answer = df.iloc[ix, 5]

    return question, answer



In [None]:
random.seed(0)
response_dict = {}

for i in range(questions):
    df = random.choice(dfs)
    ix = len(df)
    idx = random.randint(0, ix-1)

    question, answer = parse_question_answer(df, idx)

    # agent_contexts = [[{"role": "user", "content": question}] for agent in range(agents)]
    agent_contexts = [[{"role": "user", "content": question}] for agent in range(len(agent_configs))]
    print(f"Question {i+1}/{questions}: {question}")
    print(f"Answer: {answer}")

    for round in range(rounds):
        for i, agent_context in enumerate(agent_contexts):
            print(f"Round: {round + 1}/{rounds} | Agent: {i+1}/{len(agent_configs)}")
            if round != 0:
                agent_contexts_other = agent_contexts[:i] + agent_contexts[i+1:]
                message = construct_message(agent_contexts_other, question, 2 * round - 1)
                agent_context.append(message)

            completion = generate_answer(agent_configs[i], agent_context)

            assistant_message = construct_assistant_message(completion)
            agent_context.append(assistant_message)
            print(assistant_message)

    response_dict[question] = (agent_contexts, answer)

json.dump(response_dict, open("mmlu_{}_{}.json".format(len(agent_configs), rounds), "w"))


Eval

In [11]:
import sklearn
import json
from collections import Counter
import re

In [12]:
# Import json result


In [15]:

# def solve_math_problems(input_str):
#     pattern = r"\d+\.?\d*"

#     matches = re.findall(pattern, input_str)
#     if matches:
#         return matches[-1]

#     return None

def parse_answer(input_str):
    pattern = r'\((\w)\)'
    matches = re.findall(pattern, input_str)

    solution = None
    # print("predicted solution")
    # print(input_str)
    # print("matches")
    # print(matches)

    for match_str in matches[::-1]:
        solution = match_str.upper()
        if solution:
            break

    return solution


def parsed_answers(pred_solutions):
    if type(pred_solutions) == list:
        pred_answers = []

        for pred_solution in pred_solutions:
            pred_answer = parse_answer(pred_solution)
            pred_answers.append(pred_answer)

        # if pred_answer is None:
        #     return 0
        # pred_answer = most_frequent(pred_answers)
    else:
        pred_answers = [parse_answer(pred_solutions)]
        # if pred_answer is None:
        #     pred_answer = solve_math_problems(pred_solutions)

    return pred_answers


def most_frequent(letters):
  # Given a list of letters (ex: ["A", "A", "C"]), return the first most frequent letter
  if not letters:
    raise ValueError("Expected non-empty list.")

  return Counter(letters).most_common(1)[0][0]




In [21]:
response_dict = json.load(open("mmlu_3_2.json", "r"))
questions = list(response_dict.keys())

accuracies = []

i = 4

for question in questions:
    responses, gt = response_dict[question]

    pred_solutions = []
    for response in responses:
        pred_solution = response[-1]['content']
        pred_solutions.append(pred_solution)
        print(pred_solution)


    pred_answers = parsed_answers(pred_solutions)
    consensus = most_frequent(pred_answers)
    print("Predicted: ", pred_answers)
    print("Consensus: ", consensus)
    print("Actual: ", gt)
    print("=========================")

    if i == 0:
      break
    else:
      i -= 1
    # # pred_solutions = pred_solutions[:1]

    # # accurate = compute_accuracy(gt, pred_solutions)
    # predicted_answer = answer_consensus(pred_solutions)




    # if accurate is not None:
    #     accuracies.append(float(accurate))
    # else:
    #     import pdb
    #     pdb.set_trace()
    #     print(gt)

    # print("accuracies:", np.mean(accuracies), np.std(accuracies) / (len(accuracies) ** 0.5))



To answer this question accurately, let's break down the key concepts and definitions related to historical materialism.

Historical materialism is a theoretical framework that emphasizes the role of material conditions in shaping social and economic structures. It is rooted in the idea that social relations and institutions are based on the economic base, which is the foundation of society.

A) This option mentions HM's focus on class and the productive basis of societies, as well as the ways in which social structures have the capacity to change and produce change. While materialism does indeed emphasize the role of class and social structures, it also incorporates the concept of productive bases and change, which can be seen as part of the broader definition of materialism.

B) This option highlights HM's roots in postmodern schools of thought, but this does not directly define materialism. Materialism can be seen as a critical approach that encompasses various theories and perspect