In [None]:
# Clone the RestrictToTopic validator from GitHub
git clone https://github.com/tryolabs/restricttotopic.git
cd restricttotopic
git checkout 7290756
pip install -e .
cd ..

In [None]:
import os
import gradio as gr
import guardrails as gd
from guardrails.errors import ValidationError
from ibm_watsonx_ai.foundation_models import ModelInference
from ibm_watsonx_ai.metanames import GenTextParamsMetaNames, GenTextModerationsMetaNames

# Import validators
import sys
sys.path.insert(0, './ban_list')
from validator import BanList as BanListValidator

sys.path.insert(0, './restricttotopic')
from restricttotopic.validator import RestrictToTopic

# Setup watsonx.ai credentials
project_id = "skills-network"
my_credentials = {"url": "https://us-south.ml.cloud.ibm.com"}

# Initialize the model
model = ModelInference(
    model_id='ibm/granite-3-8b-instruct',
    credentials=my_credentials,
    project_id=project_id
)

max_new_tokens = 1000

# Generation parameters
generation_params = {
    GenTextParamsMetaNames.MAX_NEW_TOKENS: max_new_tokens,
    GenTextParamsMetaNames.DECODING_METHOD: 'greedy'
}

# Existing guardrails
guardrails_pii_params = {
    GenTextModerationsMetaNames.INPUT: True,
    GenTextModerationsMetaNames.OUTPUT: True,
    GenTextModerationsMetaNames.MASK: {"remove_entity_value": True}
}

guardrails_granite_guardian_params = {
    GenTextModerationsMetaNames.INPUT: True,
    GenTextModerationsMetaNames.OUTPUT: True,
    GenTextModerationsMetaNames.THRESHOLD: 0.5
}

# Classified operations
classified_operations = ["Eagle Shelter"]

operation_guard = gd.Guard().use(
    BanListValidator(
        banned_words=classified_operations,
        on_fail="exception"
    )
)

# Topic restriction guard
topic_guard = gd.Guard.from_string(
    validators=[
        RestrictToTopic(
            valid_topics=[
                "emergency preparedness",
                "weather safety",
                "evacuation procedures",
                "hurricane preparedness",
                "emergency supplies"
            ],
            invalid_topics=[
                "politics",
                "unverified rumors",
                "entertainment",
                "sports"
            ],
            disable_classifier=False,
            disable_llm=True,
            on_fail="exception"
        )
    ]
)

def stormguard_v5_complete(user_input):
    """Complete StormGuard with all guardrail layers"""

    # Layer 1: Check for classified operations
    try:
        operation_guard.validate(llm_output=user_input)
    except ValidationError:
        return "[WARNING] Your query references restricted information."

    # Layer 2: Check topic boundaries
    try:
        topic_guard.validate(llm_output=user_input)
    except ValidationError:
        return "[WARNING] Please keep questions focused on emergency preparedness topics."

    # If there is a file that contains a database, read it
    if os.path.exists('database.md'):
        with open('database.md', 'r', encoding='utf-8') as file:
            file_content = file.read()
    else:
        file_content = ''

    # Layer 3: Generate response with Granite Guardian and PII protection
    # Construct a prompt template using standard Granite prompt tags
    full_prompt = f"""
<|start_of_role|>system<|end_of_role|>
You are an emergency management assistant for Orlando, FL.
Provide helpful information about storm preparedness and emergency procedures.
<|end_of_text|>
<|start_of_role|>user<|end_of_role|>
{file_content}/n/n{user_input}
<|end_of_text|>
<|start_of_role|>assistant<|end_of_role|>
"""

    response = model.generate(
        prompt=full_prompt,
        params=generation_params,
        guardrails=True,
        guardrails_pii_params=guardrails_pii_params,
        guardrails_granite_guardian_params=guardrails_granite_guardian_params
    )

    # Check for violations
    if 'moderations' in response['results'][0]:
        moderations = response['results'][0]['moderations']
        if 'pii' in moderations:
            pii_categories = []
            for i in moderations['pii']:
                pii_categories.append(i['entity'])

            pii_categories_string = ", ".join(pii_categories)
            if ', ' in pii_categories_string:
                k = pii_categories_string.rfind(",")
                pii_categories_string = pii_categories_string[:k] + " and" + pii_categories_string[k+1:]
            return_string = "[WARNING] Request contains personally identifiable information (PII): " + \
                            pii_categories_string + \
                            ". Please refrain from sharing PII in the chat."
            return return_string
        if 'granite_guardian' in moderations:
            return "[WARNING] Your query violates safety guidelines."

    response_text = response['results'][0]['generated_text']

    # Layer 4: Verify response doesn't contain classified terms
    try:
        operation_guard.validate(llm_output=response_text)
    except ValidationError:
        return "[WARNING] Response blocked due to restricted content."

    # Layer 5: Verify response stays on topic
    try:
        topic_guard.validate(llm_output=response_text)
    except ValidationError:
        return "[WARNING] Response blocked - off-topic content detected."

    return response_text


# Create Gradio interface
demo = gr.Interface(
    fn=stormguard_v5_complete,
    inputs=gr.Textbox(label="Your Question", placeholder="Ask about emergency preparedness...", lines=10),
    outputs=gr.Textbox(label="StormGuard Response", lines=10),
    title="StormGuard V5: Complete Multi-Layer Defense",
    description="Emergency management chatbot with complete layered protection: PII masking, Granite Guardian (HAP/jailbreak), BanList (classified ops), and RestrictToTopic (conversation boundaries)",
    flagging_mode="never"
)

if __name__ == "__main__":
    demo.launch(share=False, server_port=7864)

### Test Query 1

In [1]:
"""
What is the safest way to cook food during a power outage caused by a hurricane?
"""

'\nWhat is the safest way to cook food during a power outage caused by a hurricane?\n'