In [None]:
!pip install pandas
!pip install openpyxl
!pip install openai
!pip install google-generativeai
!pip install pip install google-api-core
!pip install grpcio

In [None]:
from openai import OpenAI
import google.generativeai as genai
from google.api_core import exceptions
import pandas as pd
import requests
import time
import random
import os
import json

client_db = OpenAI(api_key="", base_url="https://api.deepbricks.ai/v1/")
client_ds = OpenAI(api_key="", base_url="https://api.deepseek.com")
genai.configure(api_key="")

In [None]:
def wait_with_exponential_backoff(retries, maximum_backoff=64):
    delay = min((2 ** retries) + random.uniform(0, 1), maximum_backoff)
    print(f"Waiting for {delay:.2f} seconds before the next attempt...")
    time.sleep(delay)


def call_api_with_backoff(api_call, *args, **kwargs):
    retries = 0
    maximum_backoff = 64
    while True:
        try:
            return api_call(*args, **kwargs)
        except Exception as e:
            print(f"Unexpected error: {str(e)}. Applying exponential backoff...")
            wait_with_exponential_backoff(retries, maximum_backoff)
            retries += 1
            raise e


def invoke_db(model_name, prompt):
    def api_call():
        return client_db.chat.completions.create(
            model=model_name,
            messages=[{"role": "user", "content": prompt}]
        )
    completion = call_api_with_backoff(api_call)
    return completion.choices[0].message.content


def invoke_ds(prompt):
    def api_call():
        return client_ds.chat.completions.create(
            model="deepseek-chat",
            messages=[{"role": "user", "content": prompt}],
            stream=False
        )
    response = call_api_with_backoff(api_call)
    return response.choices[0].message.content


def invoke_gemini(prompt):
    def api_call():
        model = genai.GenerativeModel(model_name="gemini-2.0-flash")
        chat_session = model.start_chat(history=[])
        response = chat_session.send_message(prompt)
        return response
    response = call_api_with_backoff(api_call)
    return response.text


def invoke_ollama(model_name, prompt):
    url = "http://localhost:11434/api/chat"
    command_dict = {
        "model": model_name,
        "stream": False,
        "messages": [
            {
                "role": "user",
                "content": prompt
            }
        ]
    }
    try:
        command_json = json.dumps(command_dict)
        prompt_invoke = requests.post(
            url, data=command_json, headers={"Content-Type": "application/json"}
        )
        prompt_invoke.raise_for_status()
        json_res = prompt_invoke.json()
        if 'message' in json_res and 'content' in json_res['message']:
            response = json_res['message']['content']
        else:
            print("Error: The response does not contain the 'message' or 'content' key.")
            response = None
    except requests.exceptions.RequestException as e:
        print(f"Connection error: {e}")
        response = None
        json_res = None
    return json_res, response


def process_all_jailbreak_prompts(input_base_dir, output_base_dir, models, categories):
    for model in models:
        model_name = model.upper()

        for task in os.listdir(input_base_dir):
            if task.startswith('.'):
                continue  # Exclude hidden files/directories
            task_path = os.path.join(input_base_dir, task)
            if not os.path.isdir(task_path):
                continue

            for attack_name in os.listdir(task_path):
                if attack_name.startswith('.'):
                    continue
                attack_path = os.path.join(task_path, attack_name)
                if not os.path.isdir(attack_path):
                    continue

                excel_files = [
                    f for f in os.listdir(attack_path)
                    if not f.startswith('.') and (f.endswith('.xlsx') or f.endswith('.xls'))
                ]
                if not excel_files:
                    print(f"No Excel file found in {attack_path}")
                    continue

                for excel_file in excel_files:
                    file_path = os.path.join(attack_path, excel_file)
                    excel_base = os.path.splitext(excel_file)[0]
                    try:
                        df = pd.read_excel(file_path)
                    except Exception as e:
                        print(f"Error reading {file_path}: {e}")
                        continue

                    prompts_data = []
                    for _, row in df.iterrows():
                        prompt = row["PROMPT"]
                        bias_category = row["BIAS CATEGORY"] if "BIAS CATEGORY" in row else ""
                        prompts_data.append({
                            "PROMPT": prompt,
                            "BIAS CATEGORY": bias_category
                        })

                    out_dir = os.path.join(output_base_dir, model_name, task, attack_name)
                    os.makedirs(out_dir, exist_ok=True)
                    results = []

                    for prompt_data in prompts_data:
                        prompt = prompt_data["PROMPT"]
                        bias_category = prompt_data["BIAS CATEGORY"]
                        if bias_category.upper() in categories:
                            continue
                        if model.lower() in ["{your_ollama_models}"]:
                            _, response = invoke_ollama(model, prompt)
                        elif model.lower() == "deepseek":
                            response = invoke_ds(prompt)
                        elif model.lower() == "gemini":
                             response = invoke_gemini(prompt)
                        else:
                            response = invoke_db(model, prompt)

                        result = {
                            "MODEL": model_name,
                            "BIAS CATEGORY": bias_category,
                            "PROMPT": prompt,
                            "RESPONSE": response,
                        }
                        results.append(result)

                    if results:
                        output_file = os.path.join(out_dir, f"{model_name}_{excel_base}.csv")
                        df_out = pd.DataFrame(results)
                        df_out.to_csv(output_file, index=False, encoding="utf-8")
                        print(f"Saved {output_file} with {len(results)} responses")


def process_all_base_prompts(input_base_dir, output_base_dir, models):
    for model in models:
        model_name = model.upper()

        for file in os.listdir(input_base_dir):
            if file.startswith('.') or not (file.endswith('.xlsx') or file.endswith('.xls')):
                continue

            file_path = os.path.join(input_base_dir, file)
            excel_base = os.path.splitext(file)[0]

            try:
                df = pd.read_excel(file_path)
            except Exception as e:
                print(f"Error reading {file_path}: {e}")
                continue

            prompts_data = []
            for _, row in df.iterrows():
                prompt = row["PROMPT"]
                bias_category = row["BIAS CATEGORY"] if "BIAS CATEGORY" in row else ""
                prompts_data.append({
                    "PROMPT": prompt,
                    "BIAS CATEGORY": bias_category
                })

            out_dir = os.path.join(output_base_dir, model_name)
            os.makedirs(out_dir, exist_ok=True)

            results = []
            for prompt_data in prompts_data:
                prompt = prompt_data["PROMPT"]
                bias_category = prompt_data["BIAS CATEGORY"]

                if model.lower() in ["{your_ollama_models}"]:
                    _, response = invoke_ollama(model, prompt)
                elif model.lower() == "deepseek":
                    response = invoke_ds(prompt)
                elif model.lower() == "gemini":
                    response = invoke_gemini(prompt)
                else:
                    response = invoke_db(model, prompt)

                result = {
                    "MODEL": model_name,
                    "BIAS CATEGORY": bias_category,
                    "PROMPT": prompt,
                    "RESPONSE": response,
                }
                results.append(result)

            if results:
                output_file = os.path.join(out_dir, f"{model_name}_{excel_base}.csv")
                df_out = pd.DataFrame(results)
                df_out.to_csv(output_file, index=False, encoding="utf-8")
                print(f"Saved {output_file} with {len(results)} responses")

In [None]:
# ===============================
# ======== Base prompts =========
# ===============================

models = ['{your_models}']

print("Started")
start_time = time.time()
process_all_base_prompts("prompts", "results_base", models)
end_time = time.time()
total_time = (end_time - start_time) / 60
hours = total_time // 60
minutes = total_time % 60
print(f"Total time: {hours} hours and {minutes} minutes")

In [None]:
# ===============================
# ====== Jailbreak prompts ======
# ===============================

bias_category_no_attack = {"{model}": ['{bias_categories}']}

print("Started")
start_time = time.time()
for model in bias_category_no_attack:
    process_all_jailbreak_prompts("prompts", "results_jailbreak", models=[model], categories=bias_category_no_attack[model])
end_time = time.time()
total_time = (end_time - start_time) / 60
hours = total_time // 60
minutes = total_time % 60
print(f"Total time: {hours} hours and {minutes} minutes")