<a href="https://colab.research.google.com/github/JayTiptown/self-consistency-ensemble/blob/self-consistency/self_consistency_ensemble.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# imports

import asyncio
import random
from collections import defaultdict
import nest_asyncio
nest_asyncio.apply()  # Needed to run asyncio in Colab
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.animation import FuncAnimation
import pandas as pd
from google.colab import userdata

In [3]:
# model import

!pip install openai
!pip install anthropic
!pip install google-genai

import openai
import anthropic
import google.genai

openai.api_key = userdata.get("OPENAI_API_KEY")
anthropic.api_key = userdata.get("ANTHROPIC_API_KEY")
# google.genai.api_key = userdata.get("GEMINI_API_KEY")



In [5]:
# simulated chain of thought sample

async def sample_chain(prompt, model_spec):

  provider, model = model_spec

  full_prompt = prompt + "\nLet's think step by step."

  if provider == 'openai':

    # Run the blocking call in a thread so we don't block asyncio
    # Use openai.chat.completions.create for chat models like gpt-4
    response = await asyncio.to_thread(
        openai.chat.completions.create,  # Changed to chat.completions.create
        model="gpt-4",     # or any completions model
        messages=[{"role": "user", "content": full_prompt}],  # Added messages parameter
        temperature=0.7,
        max_tokens=150,
        # logprobs=1
    )

    choice = response.choices[0]
    trace = choice.message.content  # Access content from message
    # crude extraction: last line is the answer
    # answer = trace.strip().split("\n")[-1]
    def extract_final_answer(text):
      lines = text.strip().splitlines()
      for line in reversed(lines):
          if len(line.split()) > 3 and line[-1] == '.':
              return line.strip()
      return lines[-1].strip() if lines else ""
    answer = extract_final_answer(trace)

    # logprobs is not supported in chat completions as of now
    # token_lps = choice.logprobs.token_logprobs or []
    # total_logprob = sum(token_lps)

  # fill these out for relevant providers

  elif provider == 'anthropic':
    pass
  elif provider == 'gemini':
    pass
  else:
    raise ValueError(f"Unknown provider: {provider}")

  total_logprob = 0 # Placeholder logprobs isn't directly supported for chat models.
  return answer, total_logprob

In [6]:
# adaptive controller
def controller(prompt, max_budget=5):
  """
  Enforce a fixed sampling budget.
  """
  return max_budget

In [7]:
# parallel orchestrator

async def orchestrate_samples(prompt, budget, model_spec):
  tasks = [
    asyncio.create_task(sample_chain(prompt, model_spec))
    for _ in range(budget)
  ]
  return await asyncio.gather(*tasks)

In [8]:
# aggregator

from collections import Counter

def aggregate_votes(samples):
    """
    samples: list of (answer_str, log_prob_float) – ignores log_prob_float
    Returns the answer that appears most frequently (majority vote).
    """
    answers = [answer for answer, _ in samples]
    counts = Counter(answers)
    winner = counts.most_common(1)[0][0]
    return winner, dict(counts)

In [9]:
# self-consistency run
def run_self_consistency(prompt, model_spec, max_budget):
    budget = controller(prompt, max_budget)
    samples = asyncio.run(orchestrate_samples(prompt, budget, model_spec))
    winner, counts = aggregate_votes(samples)

    print(f"Prompt:\n{prompt}\n")
    print("Sampled Answers:")
    for i, (answer, _) in enumerate(samples, 1):
        print(f"{i}: {answer}")

    print("\nAnswer Frequencies:")
    for ans, count in counts.items():
        print(f"{repr(ans)}: {count}")

    print(f"\nFinal Answer (Majority Vote):\n{winner}")
    return winner

# Example usage
model_spec = ('openai', 'gpt-4')
run_self_consistency("Who invented the atomic bomb?", model_spec, max_budget=20)

Prompt:
Who invented the atomic bomb?

Sampled Answers:
1: 3. However, the atomic bomb, as we know it, was developed as part of the Manhattan Project during World War II in the United States.
2: 3. The idea of using nuclear fission as a weapon was first proposed by Leo Szilard, a Hungarian-American physicist. He patented the concept of a nuclear chain reaction in 1933.
3: 5. The project resulted in the production of two types of atomic bombs - "Little Boy", a uranium weapon, and "Fat Man", a plutonium weapon.
4: 3. However, the actual development of the atomic bomb was not the work of a single individual, but a collaborative effort by several scientists. This effort was carried out under the Manhattan Project, a U.S. government research project that started in 1939.
5: 4. Among these scientists, the most notable were Enrico Fermi, J. Robert Oppenheimer, and Albert Einstein.
6: 4. The project was led by physicist J. Robert Oppenheimer. Several other prominent scientists were also involv

'3. However, the atomic bomb, as we know it, was developed as part of the Manhattan Project during World War II in the United States.'