In the first stage we run the data generation pipeilne

In [None]:
# --- Cell 1: Installation & Setup ---
# Install required libraries.  Uncomment for installation.
# !pip install -q transformers accelerate bitsandbytes datasets sentencepiece protobuf openai

In [None]:
import sys
import os
from google.colab import drive, userdata

# Mount Google Drive so we can save our checkpoints
drive.mount('/content/drive')

# Add the project directory to python's path 
sys.path.append('/content/spectral-llm_hallucinations-project')

# Import our modules
from spectral_detection import config, data_generation
from spectral_detection.data import loaders

# --- Cell 2: Run Data Generation ---

# Initialize the Generator (loads Llama-3.2-3B)
pipeline = data_generation.Pipeline()

# Define which datasets to process.
# We load them into memory first.
tasks = [
    # Format: (name, data_loader_function)
    ("truthfulqa", loaders.load_truthfulqa()), 
    
    # For TriviaQA, MMLU, and MATH, we sample 2000 examples 
    ("triviaqa", loaders.load_triviaqa(n=2000)),
    # ("mmlu", loaders.load_mmlu(n=2000)), # Uncomment to run MMLU
    # ("math", loaders.load_math(n=2000))  # Uncomment to run MATH
]

# The Master Loop
for dataset_name, data in tasks:
    # We must generate at BOTH temperatures (0.1 and 1.0)
    for temp in config.TEMPERATURES:
        pipeline.run_generation(data, dataset_name, temperature=temp)

# --- Cell 3: Run LLM Judge ---

# Get API Key from Colab Secrets (User must set this up in the sidebar)
api_key = userdata.get('OPENAI_API_KEY')
judge = data_generation.LLMJudge(api_key=api_key)

# Iterate through the files we just created in Drive and judge them
for dataset_name, _ in tasks:
    for temp in config.TEMPERATURES:
        # Path to the checkpoint file
        file_path = config.CHECKPOINT_DIR / f"{dataset_name}_t{temp}.jsonl"
        
        # Only run if the file actually exists
        if file_path.exists():
            judge.evaluate_file(file_path)

print("Phase 1 Complete. Data is saved in Google Drive.")

ModuleNotFoundError: No module named 'google'