In [2]:

%pip install openai --upgrade

Note: you may need to restart the kernel to use updated packages.


In [8]:
import json
from openai import OpenAI
import pandas as pd
import os
from dotenv import load_dotenv
import sys

In [9]:
# Load environment variables from .env file
load_dotenv()

# Now you can access variables using os.getenv()
api_key = os.getenv("OPENAI_API_KEY")
print("API Key loaded successfully.")
# Initialize OpenAI API client
openai = OpenAI(api_key=api_key)

API Key loaded successfully.


## Create the batch files

In [10]:
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), "utils")))
from create_batchFiles_openAi import generate_all_batches
question_bank = generate_all_batches()

Processing: choice_with_norm
Batch file created: /home/maloti12/shreshtho/self/Research/ethics/illusion_of_ethics/data/batch/choice_prompts_with_norm_batch.jsonl
Processing: choice_without_norm
Batch file created: /home/maloti12/shreshtho/self/Research/ethics/illusion_of_ethics/data/batch/choice_prompts_without_norm_batch.jsonl
Processing: immoral_with_norm
Batch file created: /home/maloti12/shreshtho/self/Research/ethics/illusion_of_ethics/data/batch/immoral_act_with_norm_prompts_batch.jsonl
Processing: immoral_without_norm
Batch file created: /home/maloti12/shreshtho/self/Research/ethics/illusion_of_ethics/data/batch/immoral_act_without_norm_prompts_batch.jsonl
Processing: moral_action_immoral_outcome
Batch file created: /home/maloti12/shreshtho/self/Research/ethics/illusion_of_ethics/data/batch/with_moralAction_immoralConsequnece_prompts_batch.jsonl
Processing: immoral_action_moral_outcome
Batch file created: /home/maloti12/shreshtho/self/Research/ethics/illusion_of_ethics/data/batc

In [11]:
info_dir = os.path.join("../data", "info")
os.makedirs(info_dir, exist_ok=True)
question_bank_path = os.path.join(info_dir, "question_bank.json")

with open(question_bank_path, "w", encoding="utf-8") as f:
    json.dump(question_bank, f, indent=2, ensure_ascii=False)

print(f"Question bank saved to {question_bank_path}")


Question bank saved to ../data/info/question_bank.json


In [7]:

BATCH_DIR = "../data/batch"
TASKS_DIR = "../data/tasks"
QUESTION_BANK_PATH = "../data/info/question_bank.json"

def load_question_bank(path=QUESTION_BANK_PATH):
    with open(path, "r", encoding="utf-8") as f:
        return json.load(f)

def extract_key_from_filename(filename):
    return filename.replace("_batch.jsonl", "")

def create_task_files_for_batches(model="gpt-4o-mini", temperature=0.1):
    os.makedirs(TASKS_DIR, exist_ok=True)
    question_bank = load_question_bank()

    for filename in os.listdir(BATCH_DIR):
        if not filename.endswith("_batch.jsonl"):
            continue

        key = extract_key_from_filename(filename)
        question = None

        for q_key, value in question_bank.items():
            if value["file"] == f"{key}.jsonl":
                question = value["question"]
                break

        if question is None:
            print(f"[Warning] No question found for batch file: {filename}")
            continue

        batch_file_path = os.path.join(BATCH_DIR, filename)
        task_file_path = os.path.join(TASKS_DIR, f"{key}_tasks.jsonl")

        with open(batch_file_path, 'r', encoding='utf-8') as infile, \
             open(task_file_path, 'w', encoding='utf-8') as outfile:

            for idx, line in enumerate(infile):
                data = json.loads(line)
                prompt_text = data.get("prompt", "")
                task = {
                    "custom_id": f"{key}-task-{idx}",
                    "method": "POST",
                    "url": "/v1/chat/completions",
                    "body": {
                        "model": model,
                        "temperature": temperature,
                        "response_format": { "type": "json_object" },
                        "messages": [
                            {"role": "system", "content": question},
                            {"role": "user", "content": prompt_text}
                        ]
                    }
                }
                outfile.write(json.dumps(task) + "\n")

        print(f"✅ Task file created: {task_file_path}")


In [12]:
create_task_files_for_batches()


✅ Task file created: ../data/tasks/choice_prompts_with_norm_tasks.jsonl
✅ Task file created: ../data/tasks/choice_prompts_without_norm_tasks.jsonl
✅ Task file created: ../data/tasks/immoral_act_with_norm_prompts_tasks.jsonl
✅ Task file created: ../data/tasks/immoral_act_without_norm_prompts_tasks.jsonl
✅ Task file created: ../data/tasks/with_moralAction_immoralConsequnece_prompts_tasks.jsonl
✅ Task file created: ../data/tasks/with_immoralAction_moralConsequnece_prompts_tasks.jsonl
✅ Task file created: ../data/tasks/injection_moralAction_immoralOutcome_prompts_tasks.jsonl
✅ Task file created: ../data/tasks/anti_action_immoralAction_prompts_tasks.jsonl
✅ Task file created: ../data/tasks/pro_outcome_immoralAction_prompts_tasks.jsonl
✅ Task file created: ../data/tasks/outcome_weighted_moralAction_prompts_tasks.jsonl


In [13]:
def upload_all_batch_files(tasks_dir=TASKS_DIR):
    batch_file_ids = {}

    for file_name in os.listdir(tasks_dir):
        if not file_name.endswith(".jsonl"):
            continue

        file_path = os.path.join(tasks_dir, file_name)

        with open(file_path, "rb") as f:
            uploaded_file = openai.files.create(
                file=f,
                purpose="batch"
            )
            batch_file_ids[file_name] = uploaded_file.id
            print(f"✅ Uploaded: {file_name} → File ID: {uploaded_file.id}")

    return batch_file_ids

In [14]:
file_id_map = upload_all_batch_files()
def save_file_id_map(file_id_map, output_path="../data/file_id_map.json"):
    with open(output_path, "w", encoding="utf-8") as f:
        json.dump(file_id_map, f, indent=2, ensure_ascii=False)
    print(f"✅ File ID map saved to {output_path}")
save_file_id_map(file_id_map)


✅ Uploaded: choice_prompts_with_norm_tasks.jsonl → File ID: file-AwnthnRvAwNHNPWQV9tYzo
✅ Uploaded: choice_prompts_without_norm_tasks.jsonl → File ID: file-UxhyCJvAo2KYRutkbZaf5d
✅ Uploaded: immoral_act_with_norm_prompts_tasks.jsonl → File ID: file-RSPzHMSnYK8ZkxwsvFyNZ7
✅ Uploaded: immoral_act_without_norm_prompts_tasks.jsonl → File ID: file-H2v4RGhDCXW8GVu4Aoficz
✅ Uploaded: with_moralAction_immoralConsequnece_prompts_tasks.jsonl → File ID: file-SX8DdcV73dYsux6GstsY3Q
✅ Uploaded: with_immoralAction_moralConsequnece_prompts_tasks.jsonl → File ID: file-Eb5NHoeWbJgae75oGLCFSD
✅ Uploaded: injection_moralAction_immoralOutcome_prompts_tasks.jsonl → File ID: file-DhnGstxrpFDM3d3ccDCe4Q
✅ Uploaded: anti_action_immoralAction_prompts_tasks.jsonl → File ID: file-1gf5QVEYpfncieGuNcpJ7C
✅ Uploaded: pro_outcome_immoralAction_prompts_tasks.jsonl → File ID: file-MuFdEbqeDgDkwytfC4TQ1U
✅ Uploaded: outcome_weighted_moralAction_prompts_tasks.jsonl → File ID: file-B9WC1rmKvB6LqiDc4WWjSj
✅ File ID map sa

In [15]:

FILE_ID_MAP_PATH = "../data/file_id_map.json"

def create_batch_jobs_from_existing_files(tasks_dir=TASKS_DIR, file_id_map_path=FILE_ID_MAP_PATH):
    # Load file ID map
    with open(file_id_map_path, "r", encoding="utf-8") as f:
        file_id_map = json.load(f)

    batch_jobs = {}

    for file_name in os.listdir(tasks_dir):
        if not file_name.endswith(".jsonl"):
            continue

        if file_name not in file_id_map:
            print(f"⚠️ Skipping {file_name} — not found in file_id_map.")
            continue

        file_id = file_id_map[file_name]

        # Create batch job
        batch_job = openai.batches.create(
            input_file_id=file_id,
            endpoint="/v1/chat/completions",
            completion_window="24h"
        )
        print(f"🚀 Batch job submitted for {file_name} → Job ID: {batch_job.id}")

        batch_jobs[file_name] = {
            "file_id": file_id,
            "batch_job_id": batch_job.id,
            "status": batch_job.status
        }

    return batch_jobs


In [16]:
job_info = create_batch_jobs_from_existing_files()
def save_job_info(job_info, output_path="../data/job_info.json"):
    with open(output_path, "w", encoding="utf-8") as f:
        json.dump(job_info, f, indent=2, ensure_ascii=False)
    print(f"✅ Job info saved to {output_path}")
save_job_info(job_info)

🚀 Batch job submitted for choice_prompts_with_norm_tasks.jsonl → Job ID: batch_6820e081b6a48190a2089a2a1a52dd28
🚀 Batch job submitted for choice_prompts_without_norm_tasks.jsonl → Job ID: batch_6820e0822c68819090fdee2646c5ce80
🚀 Batch job submitted for immoral_act_with_norm_prompts_tasks.jsonl → Job ID: batch_6820e082c5708190a75bf2abd17db752
🚀 Batch job submitted for immoral_act_without_norm_prompts_tasks.jsonl → Job ID: batch_6820e0834ebc8190b119eeb79976325a
🚀 Batch job submitted for with_moralAction_immoralConsequnece_prompts_tasks.jsonl → Job ID: batch_6820e083c0948190a1911d7e7df0d769
🚀 Batch job submitted for with_immoralAction_moralConsequnece_prompts_tasks.jsonl → Job ID: batch_6820e08bbc0c8190a553670997ad26e5
🚀 Batch job submitted for injection_moralAction_immoralOutcome_prompts_tasks.jsonl → Job ID: batch_6820e08c46b48190a6211e34a0f1e936
🚀 Batch job submitted for anti_action_immoralAction_prompts_tasks.jsonl → Job ID: batch_6820e08caaf48190b87734efac4d5def
🚀 Batch job submitted

In [24]:
JOB_INFO_PATH = "../data/job_info.json"

def check_job_status(job_id):
    try:
        job_info = openai.batches.retrieve(job_id)
        return job_info
    except Exception as e:
        print(f"⚠️ Error retrieving job status for {job_id}: {e}")
        return None

def check_all_jobs_status(job_info_path=JOB_INFO_PATH):
    with open(job_info_path, "r", encoding="utf-8") as f:
        job_info = json.load(f)

    for file_name, info in job_info.items():
        job_id = info.get("batch_job_id")
        status = check_job_status(job_id)
        if status:
            print(f"📄 {file_name} → Job ID: {job_id}, Status: {status.status}")
        else:
            print(f"⚠️ Could not retrieve status for {file_name}")


In [26]:
check_all_jobs_status()


📄 choice_prompts_with_norm_tasks.jsonl → Job ID: batch_6820e081b6a48190a2089a2a1a52dd28, Status: in_progress
📄 choice_prompts_without_norm_tasks.jsonl → Job ID: batch_6820e0822c68819090fdee2646c5ce80, Status: failed
📄 immoral_act_with_norm_prompts_tasks.jsonl → Job ID: batch_6820e082c5708190a75bf2abd17db752, Status: failed
📄 immoral_act_without_norm_prompts_tasks.jsonl → Job ID: batch_6820e0834ebc8190b119eeb79976325a, Status: failed
📄 with_moralAction_immoralConsequnece_prompts_tasks.jsonl → Job ID: batch_6820e083c0948190a1911d7e7df0d769, Status: failed
📄 with_immoralAction_moralConsequnece_prompts_tasks.jsonl → Job ID: batch_6820e08bbc0c8190a553670997ad26e5, Status: failed
📄 injection_moralAction_immoralOutcome_prompts_tasks.jsonl → Job ID: batch_6820e08c46b48190a6211e34a0f1e936, Status: failed
📄 anti_action_immoralAction_prompts_tasks.jsonl → Job ID: batch_6820e08caaf48190b87734efac4d5def, Status: failed
📄 pro_outcome_immoralAction_prompts_tasks.jsonl → Job ID: batch_6820e08d0de88190

In [27]:
def get_batch_errors(job_id):
    try:
        return openai.batches.retrieve(batch_id=job_id).errors
    except Exception as e:
        print(f"⚠️ Failed to retrieve errors for job {job_id}: {e}")
        return None

# Example: check one failed job
get_batch_errors("batch_6820e0822c68819090fdee2646c5ce80")


Errors(data=[BatchError(code='token_limit_exceeded', line=None, message='Enqueued token limit reached for gpt-4o-mini in organization org-M75PpyAXTAENv6t6agrahFM4. Limit: 2,000,000 enqueued tokens. Please try again once some in_progress batches have been completed.', param=None)], object='list')

In [None]:
import openai
import json

batch_id = "batch_6820e081b6a48190a2089a2a1a52dd28"


batch_job = openai.batches.retrieve(batch_id)
result_file_id = batch_job.output_file_id

# Get result content
result = openai.files.content(result_file_id).content

# Save to a file
result_file_name = "../data/batch_job_results_choice_with_norm.jsonl"
with open(result_file_name, 'wb') as file:
    file.write(result)

# Load and parse results
results = []
with open(result_file_name, 'r') as file:
    for line in file:
        results.append(json.loads(line.strip()))

print(f"✅ Loaded {len(results)} results from {result_file_name}")


Batch(id='batch_6820e081b6a48190a2089a2a1a52dd28', completion_window='24h', created_at=1746985089, endpoint='/v1/chat/completions', input_file_id='file-AwnthnRvAwNHNPWQV9tYzo', object='batch', status='in_progress', cancelled_at=None, cancelling_at=None, completed_at=None, error_file_id=None, errors=None, expired_at=None, expires_at=1747071489, failed_at=None, finalizing_at=None, in_progress_at=1746985092, metadata=None, output_file_id=None, request_counts=BatchRequestCounts(completed=0, failed=11947, total=12000))


ValueError: Expected a non-empty value for `file_id` but received None