# Imports and installations

In [1]:
import pandas as pd
import logging
import json
import time
import os
from getpass import getpass

from nemoguardrails import LLMRails, RailsConfig

In [2]:
os.environ["OPENAI_API_KEY"] = getpass("🔑 Enter your OpenAI API key: ")

🔑 Enter your OpenAI API key: ········


In [3]:
logging.basicConfig()
logging.getLogger().setLevel(logging.WARN)

# Instrumentation

In [4]:
!pip install git+https://github.com/Arize-ai/openinference/@nemo#subdirectory=python/instrumentation/openinference-instrumentation-nemo-guardrails/

Collecting git+https://github.com/Arize-ai/openinference/@nemo#subdirectory=python/instrumentation/openinference-instrumentation-nemo-guardrails/
  Cloning https://github.com/Arize-ai/openinference/ (to revision nemo) to /private/var/folders/rj/076x1by56wn8qvbncwq6jszh0000gn/T/pip-req-build-26ew2kqb
  Running command git clone --filter=blob:none --quiet https://github.com/Arize-ai/openinference/ /private/var/folders/rj/076x1by56wn8qvbncwq6jszh0000gn/T/pip-req-build-26ew2kqb
  Running command git checkout -b nemo --track origin/nemo
  Switched to a new branch 'nemo'
  branch 'nemo' set up to track 'origin/nemo'.
  Resolved https://github.com/Arize-ai/openinference/ to commit c50fb225ae261989b259e6ef89dec8b1359c0f17
  Installing build dependencies ... [?25ldone
[?25h  Getting requirements to build wheel ... [?25ldone
[?25h  Preparing metadata (pyproject.toml) ... [?25ldone


In [5]:
import phoenix as px
px.launch_app()

from opentelemetry import trace as trace_api
from opentelemetry import trace
from opentelemetry.sdk.trace import TracerProvider
from opentelemetry.sdk.trace.export import BatchSpanProcessor

from openinference.instrumentation.openai import OpenAIInstrumentor
from openinference.instrumentation.nemo_guardrails import NemoGuardrailsInstrumentor
from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter
from opentelemetry.sdk.trace.export import ConsoleSpanExporter, SimpleSpanProcessor

endpoint = "http://127.0.0.1:6006/v1/traces"
trace_provider = TracerProvider()
trace_provider.add_span_processor(SimpleSpanProcessor(OTLPSpanExporter(endpoint)))
trace_api.set_tracer_provider(trace_provider)
NemoGuardrailsInstrumentor().instrument(skip_dep_check=True)
OpenAIInstrumentor().instrument()

  from .autonotebook import tqdm as notebook_tqdm


🌍 To view the Phoenix app in your browser, visit http://localhost:6006/
📖 For more information on how to use Phoenix, check out https://docs.arize.com/phoenix
Successfully wrapped function: nemoguardrails.actions.action_dispatcher.ActionDispatcher.execute_action


# Load Jailbreak and Regular Prompt Datasets

In [6]:
# Jailbreak prompts sourced from "Do Anything Now" dataset (described in paper on arxiv) https://github.com/verazuo/jailbreak_llms
JAILBREAK_DATASET_FILEPATH = "./jailbreak_prompts_2023_05_07.csv"
# Sourced from HuggingFace dataset https://huggingface.co/datasets/MohamedRashad/ChatGPT-prompts
VANILLA_PROMPTS_DATASET_FILEPATH = "./regular_prompts.json"
NUM_EVAL_EXAMPLES = 500
NUM_EVAL_EXAMPLES = 500
NUM_FEW_SHOT_EXAMPLES = 10
MODEL = "gpt-4o-mini"

In [7]:
def split_dataset(sources):
    train_prompts = sources[:NUM_FEW_SHOT_EXAMPLES]
    test_prompts = sources[NUM_FEW_SHOT_EXAMPLES:NUM_FEW_SHOT_EXAMPLES + NUM_EVAL_EXAMPLES]
    return train_prompts["prompt"].tolist(), test_prompts["prompt"].tolist()

In [8]:
# Jailbreak prompts that we expect to Fail the Guard
sources = pd.read_csv(JAILBREAK_DATASET_FILEPATH)
jailbreak_train_prompts, jailbreak_test_prompts = split_dataset(sources)

In [9]:
with open(VANILLA_PROMPTS_DATASET_FILEPATH, 'r') as f:
    vanilla_prompts = json.loads(f.read())
    vanilla_prompts = vanilla_prompts[:NUM_EVAL_EXAMPLES]

# Load NeMo Guards from Configs

In [10]:
default_nemo_rails = LLMRails(RailsConfig.from_path("./default_jailbreak_guard"))

I0000 00:00:1730230202.187088 16886270 fork_posix.cc:77] Other threads are currently calling into gRPC, skipping fork() handlers
Fetching 5 files: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 5/5 [00:00<00:00, 55480.21it/s]


In [11]:
arize_embeddings_rails = LLMRails(RailsConfig.from_path("./dataset_guard_config"))

ModuleNotFoundError: No module named 'arize'

# Load Sample Jailbreak Prompt

In [None]:
sample_jailbreak_prompt = jailbreak_test_prompts[116]
sample_jailbreak_prompt

# Run Off-the-Shelf NeMo Guard

In [None]:
start_time = time.perf_counter()
default_nemo_response = await default_nemo_rails.generate_async(messages=[{
    "role": "user",
    "content": sample_jailbreak_prompt
}])
default_nemo_latency = time.perf_counter() - start_time

In [None]:
default_nemo_response

In [None]:
default_nemo_latency

# Run Arize Dataset Embeddings NeMo Guard 

In [None]:
start_time = time.perf_counter()
arize_embeddings_nemo_response = await arize_embeddings_rails.generate_async(messages=[{
    "role": "user",
    "content": sample_jailbreak_prompt
}])
arize_embeddings_nemo_latency = time.perf_counter() - start_time

In [None]:
arize_embeddings_nemo_response

In [None]:
arize_embeddings_nemo_latency