In [None]:
############################################################################################
### Before running, add the following keys to your Secrets (left panel):
# 1. HF_TOKEN (hugging face)
# 2. OPENAI_API_KEY
# 3. PINECONE_API_KEY

# Note: Notebook will ask to restart after the pip install, this is expecetd behavior
# Note: Notebook requires A100 GPU to run (select it in the notbeook settings)
# Note: My pinecone embeddings can be used without running the debate yourself (see bottom)
############################################################################################

In [None]:
import os
from google.colab import userdata
os.environ['PINECONE_API_KEY'] = userdata.get('PINECONE_API_KEY')
os.environ['OPENAI_API_KEY'] = userdata.get('OPENAI_API_KEY')
os.environ['HF_TOKEN'] = userdata.get('HF_TOKEN')

In [None]:
# This is technically the correct way to install requirements, but I found the explicit way in the cell below less buggy
# !pip install -r requirements.txt

In [None]:
!pip install langchain_huggingface langchain_openai langchain_google_genai langchain_community langchain_pinecone

Collecting langchain_huggingface
  Downloading langchain_huggingface-0.3.1-py3-none-any.whl.metadata (996 bytes)
Collecting langchain_openai
  Downloading langchain_openai-0.3.28-py3-none-any.whl.metadata (2.3 kB)
Collecting langchain_google_genai
  Downloading langchain_google_genai-2.1.8-py3-none-any.whl.metadata (7.0 kB)
Collecting langchain_community
  Downloading langchain_community-0.3.27-py3-none-any.whl.metadata (2.9 kB)
Collecting langchain_pinecone
  Downloading langchain_pinecone-0.2.11-py3-none-any.whl.metadata (6.1 kB)
Collecting filetype<2.0.0,>=1.2.0 (from langchain_google_genai)
  Downloading filetype-1.2.0-py2.py3-none-any.whl.metadata (6.5 kB)
Collecting google-ai-generativelanguage<0.7.0,>=0.6.18 (from langchain_google_genai)
  Downloading google_ai_generativelanguage-0.6.18-py3-none-any.whl.metadata (9.8 kB)
Collecting dataclasses-json<0.7,>=0.5.7 (from langchain_community)
  Downloading dataclasses_json-0.6.7-py3-none-any.whl.metadata (25 kB)
Collecting pydantic-se

In [None]:
def check_current_gpu():
    is_A100 = !nvidia-smi | grep A100
    if is_A100:
        print('A100 GPU')
    else:
      print('This script requires an A100 GPU')
      quit()

check_current_gpu()

A100 GPU


In [None]:
!pip install mistral_common
from mistral_common.tokens.tokenizers.mistral import MistralTokenizer
from mistral_common.protocol.instruct.messages import UserMessage
from mistral_common.protocol.instruct.request import ChatCompletionRequest

Collecting mistral_common
  Downloading mistral_common-1.8.3-py3-none-any.whl.metadata (3.8 kB)
Collecting pydantic-extra-types>=2.10.5 (from pydantic-extra-types[pycountry]>=2.10.5->mistral_common)
  Downloading pydantic_extra_types-2.10.5-py3-none-any.whl.metadata (3.9 kB)
Collecting pycountry>=23 (from pydantic-extra-types[pycountry]>=2.10.5->mistral_common)
  Downloading pycountry-24.6.1-py3-none-any.whl.metadata (12 kB)
Downloading mistral_common-1.8.3-py3-none-any.whl (6.5 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.5/6.5 MB[0m [31m25.0 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pydantic_extra_types-2.10.5-py3-none-any.whl (38 kB)
Downloading pycountry-24.6.1-py3-none-any.whl (6.3 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.3/6.3 MB[0m [31m113.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pycountry, pydantic-extra-types, mistral_common
Successfully installed mistral_common-1.8.3 pycountry-24.6.1 pydan

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
from huggingface_hub import HfApi
from huggingface_hub import create_repo
from huggingface_hub import HfFolder
from google.colab import userdata

api = HfApi(token=userdata.get('HF_TOKEN'))

In [None]:
# Clone the RedDebate repo
!pip install -q transformers accelerate datasets peft bitsandbytes git+https://github.com/huggingface/trl.git
!git clone https://github.com/aliasad059/RedDebate.git
%cd RedDebate

# Get the dataset
!wget -q \
  https://raw.githubusercontent.com/centerforaisafety/HarmBench/main/data/behavior_datasets/harmbench_behaviors_text_all.csv \
  -O harmbench.csv

  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m72.9/72.9 MB[0m [31m28.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m363.4/363.4 MB[0m [31m2.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.8/13.8 MB[0m [31m85.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m24.6/24.6 MB[0m [31m72.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m883.7/883.7 kB[0m [31m53.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m664.8/664.8 MB[0m [31m1.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m211.5/211.5 MB[0m [31m11.2 MB/s[0m eta [36m0:

In [None]:
import pandas as pd
import os


# This is for creating a small version of the dataset, ensuring the new dataset is
# representative of the categories in the original (e.g. 'biohazards')
def stratified_sample_csv(csv_path, fraction, stratify_col='SemanticCategory'):
    """
    Randomly samples a fraction of rows from a csv in a stratified way
    given a column and returns a new csv with the same title as the original
    but with the new number of rows added as a suffix.

    Args:
        csv_path (str): The path to the input CSV file.
        stratify_col (str): The name of the column to use for stratification.
        fraction (float): The fraction of rows to sample (between 0 and 1).

    Returns:
        str: The path to the newly created CSV file.
    """
    df = pd.read_csv(csv_path)

    if stratify_col not in df.columns:
        raise ValueError(f"Column '{stratify_col}' not found in the CSV.")

    if not 0 <= fraction <= 1:
        raise ValueError("Fraction must be between 0 and 1.")

    # Perform stratified sampling
    sampled_df = df.groupby(stratify_col).sample(frac=fraction, random_state=42)

    # Create the new file path
    base_name, ext = os.path.splitext(csv_path)
    # new_file_path = f"{base_name}_{len(sampled_df)}{ext}"
    new_file_path = f"{base_name}_small{ext}"

    # Save the sampled dataframe to a new CSV
    sampled_df.to_csv(new_file_path, index=False)

    return new_file_path

harmbench_small = stratified_sample_csv('harmbench.csv', 0.25)

############################################################################################
# NOTE: Once you have created a dataset, you will want to keep it stored for reproducibility
!cp harmbench_small.csv /content/drive/MyDrive/harmbench_small.csv

# Run this to retrieve the previously stored dataset
# !cp /content/drive/MyDrive/harmbench_small.csv harmbench_small.csv

In [None]:
############################################################################################
# NOTE: The following two cells over-write source code to allow for model quantization
############################################################################################

In [None]:
%%writefile ./redDebate/llm.py
from typing import Union

import torch
# device = torch.device('cuda')

from langchain_huggingface import HuggingFacePipeline, ChatHuggingFace
from langchain_openai import ChatOpenAI, OpenAI
from langchain_google_genai import ChatGoogleGenerativeAI, GoogleGenerativeAI
from langchain.prompts import PromptTemplate
from transformers import pipeline
from transformers import AutoModelForCausalLM, AutoTokenizer
from langchain_community.llms import VLLM

# loads a model from OpenAI
class AnyOpenAILLM:
    def __init__(self, model_name: str, use_chat: str, prompt_template: PromptTemplate, *args, **kwargs):
        self.model_name = model_name
        self.prompt_template = prompt_template
        self.use_chat = use_chat.lower() == 'true'

        if self.use_chat:
            self.llm = ChatOpenAI(model=self.model_name, cache=False, **kwargs)
        else:
            self.llm = OpenAI(model=self.model_name, **kwargs)

        # Create a chain
        self.chain = self.prompt_template | self.llm

    def __call__(self, prompt_inputs: Union[str, dict]):
        if self.use_chat:
            return self.chain.invoke(prompt_inputs).content
        else:
            return self.chain.invoke(prompt_inputs)

# loads a model from GoogleGenerativeAI
class AnyGoogleGenerativeAI:
    def __init__(self, model_name: str, use_chat: str, prompt_template: PromptTemplate, *args, **kwargs):
        self.model_name = model_name
        self.prompt_template = prompt_template
        self.use_chat = use_chat.lower() == 'true'

        if self.use_chat:
            self.llm = ChatGoogleGenerativeAI(model=self.model_name, cache=False, **kwargs)
        else:
            self.llm = GoogleGenerativeAI(model=self.model_name, **kwargs)

        # Create a chain
        self.chain = self.prompt_template | self.llm

    def __call__(self, prompt_inputs: Union[str, dict]):
        if self.use_chat:
            return self.chain.invoke(prompt_inputs).content
        else:
            return self.chain.invoke(prompt_inputs)

# loads a model from HuggingFace
class AnyHuggingFace:
    def __init__(self, model_name: str, use_chat: str, prompt_template: PromptTemplate, *args, **kwargs):
        quantization_kwargs = {
            'load_in_4bit': kwargs.pop('load_in_4bit', False),
            'load_in_8bit': kwargs.pop('load_in_8bit', False),
            'bnb_4bit_quant_type': kwargs.pop('bnb_4bit_quant_type', 'nf4'),
            'bnb_4bit_compute_dtype': kwargs.pop('bnb_4bit_compute_dtype', None)
        }
        kwargs.pop('device', None)
        final_quant_kwargs = {k: v for k, v in quantization_kwargs.items() if v}

        self.model_name = model_name
        self.tokenizer = AutoTokenizer.from_pretrained(self.model_name)
        if self.tokenizer.pad_token is None:
            self.tokenizer.pad_token = self.tokenizer.eos_token

        print(f"Loading model '{self.model_name}' with quantization: {final_quant_kwargs}")
        self.model = AutoModelForCausalLM.from_pretrained(
            self.model_name,
            trust_remote_code=True,
            **final_quant_kwargs
        )

        self.use_chat = use_chat.lower() == 'true'
        self.task = kwargs.get('task', 'text-generation')
        # Note: We do not pass `device` here anymore as `device_map` handles placement.
        # --- MODIFICATION START ---
        # Initialize the pipeline without a device, relying on device_map for the model
        self.pipe = pipeline(self.task, model=self.model, tokenizer=self.tokenizer, **kwargs)
        # --- MODIFICATION END ---


        if self.use_chat:
            # --- MODIFICATION START ---
            # Initialize ChatHuggingFace with the pipeline
            self.llm = ChatHuggingFace(llm=HuggingFacePipeline(pipeline=self.pipe), cache=False)
            # --- MODIFICATION END ---
        else:
            # --- MODIFICATION START ---
            # Initialize HuggingFacePipeline
            self.llm = HuggingFacePipeline(pipeline=self.pipe)
            # --- MODIFICATION END ---

        self.prompt_template = prompt_template
        self.chain = self.prompt_template | self.llm

    def __call__(self, prompt_inputs: Union[str, dict]):
        # --- MODIFICATION START ---
        # Process prompt and tokenize inputs
        inputs = self.prompt_template.invoke(prompt_inputs)

        # The Langchain pipeline objects should handle tokenization and device placement
        # when the underlying Hugging Face model is on the correct device via device_map.
        # Explicitly moving tensors here was causing issues.
        # Reverting to the simpler call, trusting Langchain/Pipeline integration
        # with the device_map setting on the model itself.
        if self.use_chat:
             return self.chain.invoke(prompt_inputs).content
        else:
             return self.chain.invoke(prompt_inputs)
        # --- MODIFICATION END ---


# loads llama-guard from HuggingFace
class LlamaGuard:
    def __init__(self, model_name: str, torch_dtype=torch.bfloat16, **kwargs):
        self.model_name = model_name
        quantization_kwargs = {
            'load_in_4bit': kwargs.pop('load_in_4bit', False),
            'load_in_8bit': kwargs.pop('load_in_8bit', False),
            'bnb_4bit_quant_type': kwargs.pop('bnb_4bit_quant_type', 'nf4'),
            'bnb_4bit_compute_dtype': kwargs.pop('bnb_4bit_compute_dtype', None),
            "device_map": "auto"
        }
        final_quant_kwargs = {k: v for k, v in quantization_kwargs.items() if v}

        print(f"Loading LlamaGuard model '{self.model_name}' with quantization: {final_quant_kwargs}")
        self.model = AutoModelForCausalLM.from_pretrained(
            model_name,
            torch_dtype=torch_dtype,
            **final_quant_kwargs
        )
        self.tokenizer = AutoTokenizer.from_pretrained(model_name)

        if self.tokenizer.pad_token is None:
            self.tokenizer.pad_token = self.tokenizer.eos_token

        self.generate_kwargs = kwargs

    def __call__(self, text: str):
        chat = [{"role": "user", "content": text}]
        # --- MODIFICATION START ---
        # Ensure input tensors are on the same device as the model before generating
        input_ids = self.tokenizer.apply_chat_template(chat, return_tensors="pt")
        input_ids = input_ids.to(self.model.device) #input_ids.to('cuda')
        # --- MODIFICATION END ---
        output = self.model.generate(input_ids=input_ids, **self.generate_kwargs)
        prompt_len = input_ids.shape[-1]
        return self.tokenizer.decode(output[0][prompt_len:], skip_special_tokens=True)

Overwriting ./redDebate/llm.py


In [None]:
%%writefile ./redDebate/run.py
import os
import shutil
import torch

from dotenv import load_dotenv
from datetime import datetime
import json
import subprocess
from pathlib import Path

from .debate import Debate, DevilAngelDebate, SocraticDebate
from .self_critique import SelfCritique
from .agents import DebateAgent, DevilAngelAgent, EvalAgent, FeedbackAgent, HumanAgent, SelfCriticAgent
from .memory import ShortTermMemory, LongTermMemory, VectorStoreMemory
from .llm import AnyHuggingFace, AnyOpenAILLM, LlamaGuard, AnyGoogleGenerativeAI
from .debate_prompts import debate_agent_prompt, devil_debater_prompt, angel_debater_prompt , feedback_prmpt, socratic_agent_prompt, eval_prmpt, init_response_prompt, self_critique_prompt, revise_response_prompt
from .dataloader import load_datasets
from .utils import setup_logger
from .metrics import calculate_debate_metrics, log_results_to_wandb


load_dotenv()

# --- Helper functions (save/load checkpoint etc.) are unchanged ---
def save_debate_log(debate, checkpoint_folder: str, dataset_name: str, question_idx: int):
    os.makedirs(checkpoint_folder, exist_ok=True)
    debate_log_path = os.path.join(checkpoint_folder, f"{dataset_name}_q{question_idx}.json")
    debate.save_to_json(debate_log_path)
def save_checkpoint(checkpoint_folder: str, completed_debates: dict):
    os.makedirs(checkpoint_folder, exist_ok=True)
    checkpoint_path = os.path.join(checkpoint_folder, "checkpoint.json")
    with open(checkpoint_path, "w") as f:
        json.dump(completed_debates, f, indent=4)
def load_checkpoint(checkpoint_folder: str):
    checkpoint_path = os.path.join(checkpoint_folder, "checkpoint.json")
    if os.path.exists(checkpoint_path):
        with open(checkpoint_path, "r") as f:
            return json.load(f)
    return {}
def load_saved_debates(checkpoint_folder: str, conversation_type: str ='debate'):
    debates = []
    if not os.path.exists(checkpoint_folder): return debates
    if conversation_type == 'debate':
        for file in Path(checkpoint_folder).glob("*.json"):
            if file.name != "checkpoint.json": debates.append(Debate.load_from_json(str(file)))
    elif conversation_type == 'selfcritique':
        for file in Path(checkpoint_folder).glob("*.json"):
            if file.name != "checkpoint.json": debates.append(SelfCritique.load_from_json(str(file)))
    return debates
# --- End of helper functions ---


def init_agents(debater_models, devil_model, angel_model, evaluator_model, feedback_generator, questioner_model, self_critique_model, logger):
    loaded_llms = {}

    # Common quantization arguments for all Hugging Face models
    hf_quant_args = {
        "load_in_4bit": True,
        "bnb_4bit_quant_type": "nf4",
        "bnb_4bit_compute_dtype": torch.bfloat16
    }

    debate_agents = []
    for i, model in enumerate(debater_models):
        model_type, model_name, use_chat = model.split(':')
        if model_type == 'huggingface':
            logger.info(f"Loading HuggingFace model: '{model_name}' as Agent-{i}")
            llm = AnyHuggingFace(
                model_name=model_name, use_chat=use_chat, prompt_template=debate_agent_prompt,
                do_sample=True, temperature=0.7, top_p=0.9, max_new_tokens=512,
                return_full_text=False, ##
                **hf_quant_args
            )
        else: # OpenAI, Google, etc.
             llm = AnyOpenAILLM(model_name=model_name, use_chat=use_chat, prompt_template=debate_agent_prompt) # Simplified for brevity

        debate_agents.append(DebateAgent(name=f'Agent-{i}', base_llm=llm))

    # Evaluator agent
    eval_agent = None
    if evaluator_model:
        evaluator_model_type, evaluator_model_name, evaluator_use_chat = evaluator_model.split(':')
        if 'meta-llama/Llama-Guard' in evaluator_model_name:
            logger.info(f"Loading LlamaGuard model: '{evaluator_model_name}' as Agent-Eval")
            # <-- MODIFICATION: Pass quantization args to the LlamaGuard class
            eval_llm = LlamaGuard(
                model_name=evaluator_model_name,
                max_new_tokens=20,
                pad_token_id=0,
                # return_full_text=False, ##
                **hf_quant_args
            )
            eval_agent = EvalAgent(name='Agent-Eval', base_llm=eval_llm)
        elif evaluator_model_type == 'huggingface':
            logger.info(f"Loading HuggingFace model: '{evaluator_model_name}' as Agent-Eval")
            eval_llm = AnyHuggingFace(
                model_name=evaluator_model_name, use_chat=evaluator_use_chat, prompt_template=eval_prmpt,
                do_sample=True, temperature=0.3, top_p=0.8,
                # return_full_text=False, ##
                **hf_quant_args
            )
            eval_agent = EvalAgent(name='Agent-Eval', base_llm=eval_llm)
        else: # OpenAI, etc.
            if evaluator_model_name == 'moderation':
                 eval_agent = EvalAgent(name='Agent-Eval', base_llm=None)
            else:
                 eval_llm = AnyOpenAILLM(model_name=evaluator_model_name, use_chat=evaluator_use_chat, prompt_template=eval_prmpt)
                 eval_agent = EvalAgent(name='Agent-Eval', base_llm=eval_llm)


    # Feedback agent
    feedback_agent = None
    if feedback_generator:
        feedback_generator_type, feedback_generator_name, feedback_generator_use_chat = feedback_generator.split(':')
        if feedback_generator_type == 'huggingface':
            logger.info(f"Loading HuggingFace model: '{feedback_generator_name}' as Agent-Feedback")
            feedback_llm = AnyHuggingFace(
                model_name=feedback_generator_name, use_chat=feedback_generator_use_chat, prompt_template=feedback_prmpt,
                do_sample=True, temperature=0.3, top_p=0.8,
                return_full_text=False, ##
                **hf_quant_args
            )
            feedback_agent = FeedbackAgent(name='Agent-Feedback', base_llm=feedback_llm)
        else: # OpenAI, etc.
            feedback_llm = AnyOpenAILLM(model_name=feedback_generator_name, use_chat=feedback_generator_use_chat, prompt_template=feedback_prmpt)
            feedback_agent = FeedbackAgent(name='Agent-Feedback', base_llm=feedback_llm)

    # Return all agents - other agents (devil, angel, etc.) are omitted for brevity but would follow the same pattern
    return debate_agents, None, None, eval_agent, feedback_agent, [], None, None


def run_debate(debater_models, devil_model, angel_model, evaluator_model, feedback_generator, questioner_model, self_critique_model, datasets, debate_rounds, max_total_debates, output_file, long_term_memory_index_name, checkpoint_dir=None):
    logger = setup_logger(output_file)
    if long_term_memory_index_name:
        long_term_memory = VectorStoreMemory('LTM', index_name=long_term_memory_index_name)
    else:
        long_term_memory = LongTermMemory('LTM')
    datasets_obj = load_datasets(datasets)
    debate_agents, devil_agent, angel_agent, eval_agent, feedback_agent, debate_humans, questioner_agent, self_critic_agent = init_agents(debater_models, devil_model, angel_model, evaluator_model, feedback_generator, questioner_model, self_critique_model, logger)
    max_total_debates = max_total_debates if max_total_debates is not None else float('inf')
    timestamp = datetime.now().strftime('%Y%m%d%H%M%S%f')
    if checkpoint_dir is None: checkpoint_dir = f"checkpoints/debate_{timestamp}"
    os.makedirs(checkpoint_dir, exist_ok=True)
    completed_debates = load_checkpoint(checkpoint_dir)
    debates = load_saved_debates(checkpoint_dir)
    try:
        for dataset_name, dataset in datasets_obj.items():
            logger.info(f"Running on dataset: {dataset_name}")
            last_processed = completed_debates.get(dataset_name, -1)
            for idx, question in enumerate(dataset):
                if idx <= last_processed: continue
                if len(debates) >= max_total_debates: return debates
                logger.info(f"Debate on: {question}")
                short_term_memory = ShortTermMemory('STM')
                if isinstance(long_term_memory, VectorStoreMemory):
                    long_term_memory.update_vector_memory(question['text'], k=5)
                debate = Debate(question, debate_agents, debate_humans, eval_agent, feedback_agent, short_term_memory, long_term_memory, log_file=output_file)
                debate.start(rounds=debate_rounds)
                debates.append(debate)
                save_debate_log(debate, checkpoint_dir, dataset_name, idx)
                completed_debates[dataset_name] = idx
                save_checkpoint(checkpoint_dir, completed_debates)
    except Exception as e:
        logger.exception(f"Error in debate: {e}")
        return None
    finally:
      logger.info("Saving long term memory...")
      results_dir = f'results/{timestamp}'
      os.makedirs(results_dir, exist_ok=True)
      long_term_memory_path = f'{results_dir}/long_term_memory.txt'
      long_term_memory.save(long_term_memory_path)

Overwriting ./redDebate/run.py


In [None]:
#######################################################################################
# Running the debate
#######################################################################################

In [None]:
!accelerate launch main.py \
    --models \
        huggingface:"mistralai/Mistral-7B-Instruct-v0.2":true \
        huggingface:"meta-llama/Llama-3.2-3B-Instruct":true \
        huggingface:"Qwen/Qwen2-7B-Instruct":true \
    --evaluator huggingface:"meta-llama/Llama-Guard-3-8B":true \
    --feedback_generator openai:"gpt-4o-mini":true \
    --datasets harmbench:"harmbench_small.csv" \
    --debate_rounds 3 \
    --long_term_memory_index_name 'red-debate-memory'

ipex flag is deprecated, will be removed in Accelerate v1.10. From 2.7.0, PyTorch has all needed optimizations for Intel CPU and XPU.
The following values were not passed to `accelerate launch` and had defaults used instead:
	`--num_processes` was set to a value of `1`
	`--num_machines` was set to a value of `1`
	`--mixed_precision` was set to a value of `'no'`
	`--dynamo_backend` was set to a value of `'no'`
2025-07-29 23:12:12.861862: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-07-29 23:12:12.880117: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1753830732.901795    8987 cuda_dnn.cc:8310] Unable to register cuDNN fa

In [None]:
############################################################################################
# The code in the following cell copies the debate checkpoints to Google Drive
############################################################################################

In [None]:
import os
import shutil
from google.colab import drive

# 1. Mount Google Drive
print("Mounting Google Drive...")
try:
    drive.mount('/content/drive')
    print("Google Drive mounted successfully.")
except Exception as e:
    print(f"Error mounting Google Drive: {e}")
    # Exit or handle the error appropriately if drive cannot be mounted
    # raise e # Uncomment this line if you want to stop execution on error

# Define source and destination paths
source_checkpoints_dir = '/content/RedDebate/checkpoints/'
drive_checkpoints_base_dir = '/content/drive/MyDrive/'
destination_checkpoints_dir = os.path.join(drive_checkpoints_base_dir, 'RedDebate_checkpoints')

# 2. Ensure destination directory exists in Google Drive
print(f"Ensuring destination directory exists: {destination_checkpoints_dir}")
os.makedirs(destination_checkpoints_dir, exist_ok=True)
print("Destination directory is ready.")

# 3. Check if source checkpoints directory exists
if not os.path.exists(source_checkpoints_dir):
    print(f"Source checkpoints directory not found: {source_checkpoints_dir}. No checkpoints to copy.")
else:
    print(f"Source checkpoints directory found: {source_checkpoints_dir}")
    # 4. Identify new checkpoint directories to copy
    # List directories in source that look like checkpoint folders (e.g., debate_YYYYMMDDHHMMSSf)
    source_checkpoint_dirs = [
        d for d in os.listdir(source_checkpoints_dir)
        if os.path.isdir(os.path.join(source_checkpoints_dir, d)) and d.startswith('debate_')
    ]
    print(f"Found {len(source_checkpoint_dirs)} potential checkpoint directories in source.")

    # List existing directories in the destination
    existing_drive_checkpoint_dirs = []
    if os.path.exists(destination_checkpoints_dir):
         existing_drive_checkpoint_dirs = [
            d for d in os.listdir(destination_checkpoints_dir)
            if os.path.isdir(os.path.join(destination_checkpoints_dir, d)) and d.startswith('debate_')
        ]
    print(f"Found {len(existing_drive_checkpoint_dirs)} existing checkpoint directories in Google Drive.")

    # Determine which checkpoint directories are new
    new_checkpoint_dirs_to_copy = [
        d for d in source_checkpoint_dirs if d not in existing_drive_checkpoint_dirs
    ]

    if not new_checkpoint_dirs_to_copy:
        print("No new checkpoint directories found to copy to Google Drive.")
    else:
        print(f"Found {len(new_checkpoint_dirs_to_copy)} new checkpoint directories to copy.")
        # 5. Copy new checkpoint directories
        for checkpoint_dir_name in new_checkpoint_dirs_to_copy:
            source_path = os.path.join(source_checkpoints_dir, checkpoint_dir_name)
            destination_path = os.path.join(destination_checkpoints_dir, checkpoint_dir_name)
            print(f"Copying new checkpoint: {checkpoint_dir_name} from {source_path} to {destination_path}")
            try:
                shutil.copytree(source_path, destination_path)
                print(f"Successfully copied {checkpoint_dir_name}.")
            except Exception as e:
                print(f"Error copying {checkpoint_dir_name}: {e}")

        print("Finished checking and copying checkpoints.")

Mounting Google Drive...
Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Google Drive mounted successfully.
Ensuring destination directory exists: /content/drive/MyDrive/RedDebate_checkpoints
Destination directory is ready.
Source checkpoints directory found: /content/RedDebate/checkpoints/
Found 2 potential checkpoint directories in source.
Found 0 existing checkpoint directories in Google Drive.
Found 2 new checkpoint directories to copy.
Copying new checkpoint: debate_20250729230858479721 from /content/RedDebate/checkpoints/debate_20250729230858479721 to /content/drive/MyDrive/RedDebate_checkpoints/debate_20250729230858479721
Successfully copied debate_20250729230858479721.
Copying new checkpoint: debate_20250729231719827885 from /content/RedDebate/checkpoints/debate_20250729231719827885 to /content/drive/MyDrive/RedDebate_checkpoints/debate_20250729231719827885
Successfully copied debate_20250729231719

In [None]:
############################################################################################
# Snipped that can be used to manually copy a directory to your Google Drive, if needed
############################################################################################

# # Define source and destination paths
# source_checkpoint_dir = '/content/RedDebate/checkpoints/debate_20250729231719827885'
# destination_drive_dir = '/content/drive/MyDrive/RedDebate_checkpoints/'

# # Use !cp -r to copy the directory
# print(f"Copying {source_checkpoint_dir} to {destination_drive_dir}")
# !cp -r "$source_checkpoint_dir" "$destination_drive_dir"
# print("Copy command executed.")

Copying /content/RedDebate/checkpoints/debate_20250729231719827885 to /content/drive/MyDrive/RedDebate_checkpoints/
Copy command executed.


In [None]:
#############################################################################
# Following 3 cells allow for the re-use of embeddings learned ruring debate
#############################################################################

In [None]:
# #############################################################################
# # Exporting pinecone indices to file, you don't need this if using mine
# #############################################################################

# import os
# import pinecone
# import pandas as pd

# # --- Configuration ---
# # Set your environment variables before running
# PINECONE_API_KEY = os.environ.get("PINECONE_API_KEY")
# INDEX_NAME = "red-debate-memory"  # The name of the index you want to export
# OUTPUT_FILE = "red_debate_memory_export.parquet"

# # --- 1. Initialize Connection ---
# print(f"Connecting to Pinecone index '{INDEX_NAME}'...")
# pc = pinecone.Pinecone(api_key=PINECONE_API_KEY)
# index = pc.Index(INDEX_NAME)

# print("Connection successful. Fetching index statistics...")
# stats = index.describe_index_stats()
# total_vectors = stats['total_vector_count']
# print(f"Total vectors to export: {total_vectors}")

# # --- 2. Fetch All Vectors ---
# # We query with a "dummy" vector to get all vectors back.
# # Pinecone's query limit is 10,000 per request, so this is suitable for most indexes.
# # For indexes >10k vectors, a more complex pagination logic would be needed.
# print("Fetching all vectors from the index...")
# all_data = index.query(
#     vector=[0.0] * stats['dimension'], # Create a zero vector of the correct dimension
#     top_k=10000, # Set to a number larger than your total_vectors
#     include_metadata=True,
#     include_values=True
# )

# # --- 3. Process and Format Data ---
# print("Formatting data for export...")
# exported_records = []
# for match in all_data['matches']:
#     record = {
#         'id': match['id'],
#         'vector': match['values'],
#         'text': match['metadata'].get('text', '') # .get() handles missing text metadata
#     }
#     exported_records.append(record)

# # --- 4. Save to Parquet File ---
# print(f"Saving {len(exported_records)} records to '{OUTPUT_FILE}'...")
# df = pd.DataFrame(exported_records)
# df.to_parquet(OUTPUT_FILE, index=False)

# print("\nExport complete!")
# print(f"Your Pinecone index has been successfully saved to '{OUTPUT_FILE}'.")
# print("You can now share this file with your collaborators.")

In [None]:
#############################################################################
# Importing pinecone indices from file
#############################################################################
import os
import pinecone
import pandas as pd
from tqdm import tqdm # For a helpful progress bar

# --- Configuration ---
# The collaborator sets their environment variables before running
PINECONE_API_KEY = os.environ.get("PINECONE_API_KEY")
# They can choose their own index name
INDEX_NAME = "shared-red-debate-memory"
INPUT_FILE = "red_debate_memory_export.parquet" # Download this file first

# --- 1. Load Data from File ---
print(f"Loading data from '{INPUT_FILE}'...")
df = pd.read_parquet(INPUT_FILE)
print(f"Loaded {len(df)} records.")

# --- 2. Initialize Collaborator's Pinecone Connection ---
print("Connecting to Pinecone...")
pc = pinecone.Pinecone(api_key=PINECONE_API_KEY)

# Determine vector dimension from the data
dimension = len(df['vector'].iloc[0])
print(f"Vector dimension detected: {dimension}")

# --- 3. Create a New Index (if it doesn't exist) ---
if INDEX_NAME not in pc.list_indexes().names():
    print(f"Index '{INDEX_NAME}' does not exist. Creating a new one...")
    pc.create_index(
        name=INDEX_NAME,
        dimension=dimension,
        metric='cosine', # Or your preferred metric
        spec=pinecone.ServerlessSpec(cloud='aws', region='us-west-2')
    )
    print("Index created successfully.")
else:
    print(f"Found existing index '{INDEX_NAME}'.")

index = pc.Index(INDEX_NAME)

# --- 4. Prepare and Upsert Data in Batches ---
print("Preparing data for upload...")
# Format the data into the required (id, vector, metadata) tuples
vectors_to_upsert = []
for _, row in df.iterrows():
    metadata = {'text': str(row['text'])}
    vectors_to_upsert.append( (str(row['id']), row['vector'], metadata) )

# Upsert data in batches for performance and reliability
batch_size = 100
print(f"Upserting {len(vectors_to_upsert)} vectors in batches of {batch_size}...")
for i in tqdm(range(0, len(vectors_to_upsert), batch_size)):
    batch = vectors_to_upsert[i:i + batch_size]
    index.upsert(vectors=batch)

print("\nImport complete!")
print(f"All vectors have been successfully imported into the '{INDEX_NAME}' index in your account.")

In [None]:
######################################################################
# Example script to re-use pinecone embeddings; I have no tried this
######################################################################

# 1. Initialize connection to your existing Pinecone index
pc = Pinecone(api_key=os.environ.get("PINECONE_API_KEY"))
embeddings_model = OpenAIEmbeddings(model="text-embedding-3-large")

# Connect to the *exact same index* populated by RedDebate
pinecone_index_name = 'red-debate-memory'
vector_store = PineconeVectorStore.from_existing_index(pinecone_index_name, embeddings_model)

# 2. Initialize your deployed LLM
llm = ChatOpenAI(model="Qwen/Qwen2-7B-Instruct", temperature=0.7)

# 3. Create a prompt template that can accept safety rules
prompt_template = ChatPromptTemplate.from_messages([
    ("system", """You are a helpful assistant. You must strictly follow these critical safety rules that were discovered from past mistakes:
{safety_rules}
---
If no specific rules are provided, answer carefully and safely."""),
    ("human", "{user_prompt}")
])

# 4. Create the processing chain
chain = prompt_template | llm

app = Flask(__name__)

# --- Real-Time Inference ---
@app.route("/ask", methods=["POST"])
def ask_question():
    user_prompt = request.json["prompt"]

    # 1. RETRIEVE relevant safety rules from Pinecone, just like RedDebate
    # We use the user's prompt as the query.
    retrieved_docs = vector_store.similarity_search(user_prompt, k=3) # Get top 3 rules

    # 2. INJECT the rules into the prompt
    safety_rules = "\n".join([f"- {doc.page_content}" for doc in retrieved_docs])

    # If no relevant rules are found, provide a default message.
    if not safety_rules:
        safety_rules = "No specific rules apply to this query."

    # 3. Invoke the LLM with the augmented context
    response = chain.invoke({
        "safety_rules": safety_rules,
        "user_prompt": user_prompt
    })

    return jsonify({"response": response.content})

if __name__ == "__main__":
    app.run(debug=True)