In [1]:
!nvidia-smi
!nvcc --version

Wed Feb 19 13:10:50 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 550.54.15              Driver Version: 550.54.15      CUDA Version: 12.4     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  Tesla V100-SXM2-32GB           On  |   00000000:1A:00.0 Off |                    0 |
| N/A   34C    P0             39W /  300W |       0MiB /  32768MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
|   1  Tesla V100-SXM2-32GB           On  |   00

In [2]:
%load_ext autoreload
%autoreload 2

In [2]:
import os
os.chdir("..")  # Move to the project root

In [10]:
import argparse
from tqdm import tqdm
from utils.io_utils import IOHandler, DebiasedOutput
from main import MultiLLMDebiasing
from prompts import get_feedback_prompt
# Mocking IOHandler and MultiLLMDebiasing for demonstration

In [5]:
harm_assignments_file="harm_assignments.yaml"
input_file="/home/sklioui/captions/samples.csv"
output_file="/home/sklioui/captions/debiased_samples_wtf.json"
max_rounds=3
temperature=0.0
return_lineage=True
include_metadata=True
max_new_tokens=512
temperature=0.0
return_lineage=True
return_feedback=True,
include_metadata=True

In [6]:
# Simulate argparse.Namespace for compatibility
args = argparse.Namespace(
    harm_assignments=harm_assignments_file,
    input_file=input_file,
    output_file=output_file,
    max_rounds=max_rounds,
    max_new_tokens=max_new_tokens,
    temperature=temperature,
    return_lineage=return_lineage,
    return_feedback=return_feedback,
    include_metadata=include_metadata
)

In [7]:
# Process harm assignments
harm_assignments, strategy = IOHandler.process_harm_assignments(args.harm_assignments)

# Load queries
queries = IOHandler.load_queries(args.input_file)

config = {
    'max_rounds': args.max_rounds,
    'max_new_tokens': args.max_new_tokens,
    'temperature': args.temperature
}

In [8]:
harm_assignments

{'Qwen/Qwen2.5-14B-Instruct': [],
 'tiiuae/Falcon3-10B-Instruct': ['DEROGATORY',
  'DISPARATE_PERFORMANCE',
  'ERASURE',
  'EXCLUSIONARY',
  'MISREPRESENTATION',
  'STEREOTYPING',
  'TOXICITY',
  'DIRECT_DISCRIMINATION',
  'INDIRECT_DISCRIMINATION'],
 'meta-llama/Llama-3.1-8B-Instruct': ['DEROGATORY',
  'DISPARATE_PERFORMANCE',
  'ERASURE',
  'EXCLUSIONARY',
  'MISREPRESENTATION',
  'STEREOTYPING',
  'TOXICITY',
  'DIRECT_DISCRIMINATION',
  'INDIRECT_DISCRIMINATION'],
 'unsloth/phi-4': ['DEROGATORY',
  'DISPARATE_PERFORMANCE',
  'ERASURE',
  'EXCLUSIONARY',
  'MISREPRESENTATION',
  'STEREOTYPING',
  'TOXICITY',
  'DIRECT_DISCRIMINATION',
  'INDIRECT_DISCRIMINATION']}

In [9]:
debiasing = MultiLLMDebiasing(
        harm_assignments=harm_assignments,
        config=config,
        strategy=strategy
    )

2025-02-19 13:16:19,464 - main - INFO - Initializing MultiLLMDebiasing with strategy: centralized
2025-02-19 13:16:19,467 - main - INFO - Loading model: Qwen/Qwen2.5-14B-Instruct


Found token file at: config/hf_token.yml


Loading checkpoint shards:   0%|          | 0/8 [00:00<?, ?it/s]

2025-02-19 13:20:30,540 - main - INFO - Assigned harm types for Qwen/Qwen2.5-14B-Instruct: set()
2025-02-19 13:20:30,542 - main - INFO - Loading model: tiiuae/Falcon3-10B-Instruct


Found token file at: config/hf_token.yml


Loading checkpoint shards:   0%|          | 0/5 [00:00<?, ?it/s]

2025-02-19 13:23:02,899 - main - INFO - Assigned harm types for tiiuae/Falcon3-10B-Instruct: {'ERASURE', 'STEREOTYPING', 'EXCLUSIONARY', 'DEROGATORY', 'DIRECT_DISCRIMINATION', 'INDIRECT_DISCRIMINATION', 'MISREPRESENTATION', 'TOXICITY', 'DISPARATE_PERFORMANCE'}
2025-02-19 13:23:02,901 - main - INFO - Loading model: meta-llama/Llama-3.1-8B-Instruct


Found token file at: config/hf_token.yml


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

2025-02-19 13:24:54,887 - main - INFO - Assigned harm types for meta-llama/Llama-3.1-8B-Instruct: {'ERASURE', 'STEREOTYPING', 'EXCLUSIONARY', 'DEROGATORY', 'DIRECT_DISCRIMINATION', 'INDIRECT_DISCRIMINATION', 'MISREPRESENTATION', 'TOXICITY', 'DISPARATE_PERFORMANCE'}
2025-02-19 13:24:54,889 - main - INFO - Loading model: unsloth/phi-4


Found token file at: config/hf_token.yml


Loading checkpoint shards:   0%|          | 0/6 [00:00<?, ?it/s]

2025-02-19 13:27:50,489 - main - INFO - Assigned harm types for unsloth/phi-4: {'ERASURE', 'STEREOTYPING', 'EXCLUSIONARY', 'DEROGATORY', 'DIRECT_DISCRIMINATION', 'INDIRECT_DISCRIMINATION', 'MISREPRESENTATION', 'TOXICITY', 'DISPARATE_PERFORMANCE'}
2025-02-19 13:27:50,491 - main - INFO - Successfully initialized centralized reducer


In [19]:
# Process queries and collect outputs
outputs = []
for i, query in tqdm(enumerate(queries), desc="Processing queries", total=len(queries)):
    try:
        result = debiasing.get_debiased_response(
            query, 
            args.return_lineage, 
            args.return_feedback
        )
    except Exception as e:
        print(f"Error processing query {i}: {e}")
        continue    

    metadata = {"query_index": i} if args.include_metadata else None

    output = DebiasedOutput(
        original_query=query,
        debiased_response=result.final_response,
        lineage=result.lineage,
        feedback=result.feedback,
        metadata=metadata
    )
    outputs.append(output)

# Save results
IOHandler.save_outputs(outputs, args.output_file)

Processing queries: 100%|██████████| 3/3 [06:33<00:00, 131.15s/it]
