In [None]:
invalid_topics = ["bike"]
valid_topics = ["phone", "tablet", "computer"]

In [1]:
prompt = """Introducing the Galaxy Tab S7, a sleek and sophisticated device that seamlessly combines \
cutting-edge technology with unparalleled design. With a stunning 5.1-inch Quad HD Super AMOLED display, \
every detail comes to life in vibrant clarity. The Samsung Galaxy S7 boasts a powerful processor, \
ensuring swift and responsive performance for all your tasks. \
Capture your most cherished moments with the advanced camera system, which delivers stunning photos in any lighting conditions."""

In [2]:
device = -1


In [3]:
import guardrails as gd
from guardrails.hub import RestrictToTopic
from guardrails.errors import ValidationError
# Create the Guard with the OnTopic Validator
# Create the Guard with the OnTopic Validator
guard = gd.Guard.from_string(
    validators=[
        RestrictToTopic(
            valid_topics=[
                "math",
                "algebra",
                "calculus",
                "geometry",
                "equations",
                "statistics",
                "probability",
                "theorems",
                "proofs",
            ],
            # invalid_topics=invalid_topics,
            device=device,
            disable_classifier=False,
            disable_llm=True,
            on_fail="exception",
        )
    ]
)

# Test with a given text
try:
    test = guard.parse(
        llm_output="What is Fourier series?",
    )
    print(test)
except ValidationError as e:
    print(e)
# print(test)



ValidationOutcome(
    call_id='127258179664576',
    raw_llm_output='What is Fourier series?',
    validation_summaries=[],
    validated_output='What is Fourier series?',
    reask=None,
    validation_passed=True,
    error=None
)


In [11]:
import litellm
import json
def openrouter_llm_callable_FIXED(prompt, topics):
    """
    This function now re-creates the JSON prompt, just like
    the internal Guardrails code.
    """

    # 1. Get the topics, which Guardrails passes in kwargs
    
    print("--- üïµÔ∏è‚Äç‚ôÇÔ∏è DEBUGGING VALIDATOR ---")
    print(f"Validator received text:\n{prompt[:100]}...")  # Print first 100 chars
    print(f"Validator is using topics:\n{topics}")

    # 2. Build the special JSON-forcing prompt
    # This is the same prompt you just found!
    json_prompt = f"""
        Given a text and a list of topics, return a valid json list of which topics are present in the text. If none, just return an empty list.

        Output Format:
        -------------
        {{"topics_present": []}}

        Text:
        ----
        "{prompt}"

        Topics: 
        ------
        {topics}

        Result:
        ------ 
    """

    print("Validator is sending this JSON prompt to Mistral...")

    # 3. Call the LLM with the *new* JSON prompt
    try:
        response = litellm.completion(
            model="openrouter/google/gemma-3-4b-it",
            messages=[{"role": "user", "content": json_prompt}],
            api_base="https://openrouter.ai/api/v1",
            api_key=os.getenv("OPENROUTER_API_KEY"),
            # We must force the model to return JSON
            response_format={"type": "json_object"},
        )
        
        raw_response = response.choices[0].message.content
        print(f"Validator got this raw JSON response:\n{raw_response}")

        # 4. Parse the JSON and return the list
        # We need to clean the response to find the JSON
        json_str = raw_response[raw_response.find("{") : raw_response.rfind("}") + 1]
        json_data = json.loads(json_str)

        # This is the list Guardrails is expecting
        topics_found = json_data.get("topics_present", [])

        print(f"Validator is returning this list: {topics_found}")
        print("-------------------------------")

        return topics_found

    except Exception as e:
        print(f"Validator failed to parse JSON: {e}")
        return []  # Return empty list on failure


In [42]:
import os
from guardrails import Guard
from guardrails.errors import ValidationError
from guardrails.hub import RestrictToTopic  # We will only use this validator
from openai import OpenAI

# --- Global Client for Embeddings ---
# This client is used by BOTH guards for semantic search.
try:
    embedding_client = OpenAI(
        base_url="https://openrouter.ai/api/v1",
        api_key=os.getenv("OPENROUTER_API_KEY"),
    )
except Exception as e:
    print(f"Warning: Failed to initialize embedding client: {e}")
    embedding_client = None


class InputGuard:
    """
    Validates the user's INPUT.
    Checks if the query IS a valid math topic.
    """

    def __init__(self):
        if not embedding_client:
            raise ConnectionError("Embedding client not initialized.")

        self.guard = Guard().use(
            RestrictToTopic(
                valid_topics=[
                    "math",
                    "algebra",
                    "calculus",
                    "geometry",
                    "equations",
                    "statistics",
                    "probability",
                    "theorems",
                    "proofs",
                ],
                # invalid_topics=invalid_topics,
                device=device,
                llm_callable=openrouter_llm_callable_FIXED,
                disable_classifier=False,
                disable_llm=False,
                on_fail="reask",
            )
        )
        self.embedding_client = embedding_client

    def validate(self, text_to_validate: str):
        """
        Validates the user's question string.
        Raises ValidationError on failure.
        """
        validation_result = self.guard.parse(
            llm_output=text_to_validate,
            # llm_api=self.embedding_client
        )

        if not validation_result.validation_passed:
            raise ValidationError("Input is not a math-related topic.")

        return True


class OutputGuard:
    """
    Validates the LLM's OUTPUT.
    Checks if the response IS a refusal.
    """

    def __init__(self):
        if not embedding_client:
            raise ConnectionError("Embedding client not initialized.")

        # [MODIFICATION] We use the same validator, but in "reverse"
        self.guard = Guard().use(
            RestrictToTopic(
                # We check if the response is semantically
                # similar to one of these invalid topics.
                invalid_topics=[
                    "I don't know",
                    "I cannot answer",
                    "I'm sorry, I can't",
                    "I am not programmed to",
                    "I don't have enough information",
                    "As an AI, I cannot help with that",
                ],
                # Force fast, non-LLM embedding search
                device=device,
                llm_callable=openrouter_llm_callable_FIXED,
                disable_classifier=True,
                disable_llm=False,
                # This time, on_fail="noop" because we
                # check the validation result manually.
                on_fail="reask",
            )
        )
        self.embedding_client = embedding_client

    def validate(self, text_to_validate: str):
        """
        Validates the LLM's response string.
        Raises ValidationError on failure.
        """
        validation_result = self.guard.parse(
            llm_output=text_to_validate,
        )

        # [REVERSE LOGIC]
        # We used `invalid_topics`. This means:
        # PASSED = The text was NOT a refusal. (This is good!)
        # FAILED = The text WAS a refusal. (This is bad!)

        if not validation_result.validation_passed:
            # The guard FAILED, meaning it found a refusal.
            # So, we raise an error.
            print(validation_result)
            raise ValidationError("The LLM refused to answer.")

        # The guard PASSED, meaning the text was clean.
        return True

In [43]:
input_g = InputGuard()
output_g = OutputGuard()

In [44]:
input_g.validate("What is Fourier series?")



--- üïµÔ∏è‚Äç‚ôÇÔ∏è DEBUGGING VALIDATOR ---
Validator received text:
What is Fourier series?...
Validator is using topics:
['geometry', 'theorems', 'algebra', 'statistics', 'probability', 'equations', 'math', 'calculus', 'proofs']
Validator is sending this JSON prompt to Mistral...
Validator got this raw JSON response:
{"topics_present": ["math"]}
Validator is returning this list: ['math']
-------------------------------


True

In [45]:
output_g.validate(prompt)

--- üïµÔ∏è‚Äç‚ôÇÔ∏è DEBUGGING VALIDATOR ---
Validator received text:
Introducing the Galaxy Tab S7, a sleek and sophisticated device that seamlessly combines cutting-edg...
Validator is using topics:
['As an AI, I cannot help with that', 'I am not programmed to', "I don't know", 'I cannot answer', "I'm sorry, I can't", "I don't have enough information"]
Validator is sending this JSON prompt to Mistral...
Validator got this raw JSON response:
{}
Validator is returning this list: []
-------------------------------


True