<a href="https://colab.research.google.com/github/JayTiptown/self-consistency-ensemble/blob/self-consistency/self_consistency_ensemble.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# imports

import asyncio
import random
from collections import defaultdict
import nest_asyncio
nest_asyncio.apply()  # Needed to run asyncio in Colab
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.animation import FuncAnimation
import pandas as pd
from google.colab import userdata

In [2]:
# model import

!pip install openai
!pip install anthropic
!pip install google-genai

import openai
import anthropic
import google.genai

openai.api_key = userdata.get("OPENAI_API_KEY")
anthropic.api_key = userdata.get("ANTHROPIC_API_KEY")
# google.genai.api_key = userdata.get("GEMINI_API_KEY")



In [3]:
# simulated chain of thought sample

async def sample_chain(prompt, model_spec):

  provider, model = model_spec

  full_prompt = prompt + "\nLet's think step by step.\nAnswer:"

  if provider == 'openai':

    # Run the blocking call in a thread so we don't block asyncio
    # Use openai.chat.completions.create for chat models like gpt-4
    response = await asyncio.to_thread(
        openai.chat.completions.create,  # Changed to chat.completions.create
        model="gpt-4",     # or any completions model
        messages=[{"role": "user", "content": full_prompt}],  # Added messages parameter
        temperature=0.7,
        max_tokens=150,
        # logprobs=1
    )

    choice = response.choices[0]
    trace = choice.message.content  # Access content from message
    # crude extraction: last line is the answer
    # answer = trace.strip().split("\n")[-1]
    def extract_final_answer(text):
      for line in reversed(text.strip().splitlines()):
          if line.lower().startswith("answer:"):
              return line.split(":", 1)[1].strip()
      return text.strip().splitlines()[-1].strip()

    answer = extract_final_answer(trace)

  # fill these out for relevant providers

  elif provider == 'anthropic':
    pass
  elif provider == 'gemini':
    pass
  else:
    raise ValueError(f"Unknown provider: {provider}")

  total_logprob = 0 # Placeholder logprobs isn't directly supported for chat models.
  return answer, total_logprob

In [4]:
# adaptive controller
def controller(prompt, max_budget=5):
  """
  Enforce a fixed sampling budget.
  """
  return max_budget

In [5]:
# parallel orchestrator

async def orchestrate_samples(prompt, budget, model_spec):
  tasks = [
    asyncio.create_task(sample_chain(prompt, model_spec))
    for _ in range(budget)
  ]
  return await asyncio.gather(*tasks)

In [6]:
# aggregator

from collections import Counter

def aggregate_votes(samples):
    """
    samples: list of (answer_str, log_prob_float) – ignores log_prob_float
    Returns the answer that appears most frequently (majority vote).
    """
    answers = [answer for answer, _ in samples]
    counts = Counter(answers)
    winner = counts.most_common(1)[0][0]
    return winner, dict(counts)

In [7]:
# self-consistency run
def run_self_consistency(prompt, model_spec, max_budget):
    budget = controller(prompt, max_budget)
    samples = asyncio.run(orchestrate_samples(prompt, budget, model_spec))
    winner, counts = aggregate_votes(samples)

    print(f"Prompt:\n{prompt}\n")
    print("Sampled Answers:")
    for i, (answer, _) in enumerate(samples, 1):
        print(f"{i}: {answer}")

    print("\nAnswer Frequencies:")
    for ans, count in counts.items():
        print(f"{repr(ans)}: {count}")

    print(f"\nFinal Answer (Majority Vote):\n{winner}")
    return winner

# Example usage
model_spec = ('openai', 'gpt-4')
run_self_consistency("Who invented the atomic bomb?", model_spec, max_budget=20)

Prompt:
Who invented the atomic bomb?

Sampled Answers:
1: A team of scientists during the Manhattan Project invented the atomic bomb. The team was led by J. Robert Oppenheimer.
2: The atomic bomb was developed by a team of scientists during the Manhattan Project, which was a research project during World War II that was led by the United States with the support of the United Kingdom and Canada. The project director was Major General Leslie Groves of the U.S. Army Corps of Engineers. Nuclear physicist Robert Oppenheimer was the director of the Los Alamos National Laboratory that designed the actual bomb.
3: J. Robert Oppenheimer and his team at the Manhattan Project.
4: The atomic bomb was developed during World War II by the Manhattan Project, a research team which included scientists such as Robert Oppenheimer, Enrico Fermi, and Richard Feynman. However, Robert Oppenheimer is often referred to as the "father of the atomic bomb".
5: The atomic bomb was developed by a team of scientist

'The atomic bomb was developed by a team of scientists under the Manhattan Project, led by J. Robert Oppenheimer.'