In [1]:
import os
from pathlib import Path
from tempfile import mkdtemp
from warnings import filterwarnings
from dotenv import load_dotenv

from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.llms.huggingface_api import HuggingFaceInferenceAPI
from llama_index.llms.openai import OpenAI
from llama_index.core import StorageContext, VectorStoreIndex
from llama_index.core.node_parser import MarkdownNodeParser, HierarchicalNodeParser, SemanticSplitterNodeParser
from llama_index.readers.docling import DoclingReader
from llama_index.vector_stores.milvus import MilvusVectorStore
from llama_index.core.chat_engine import CondenseQuestionChatEngine
from llama_index.core.memory import ChatMemoryBuffer
from llama_index.readers.docling import DoclingReader
from llama_index.core.postprocessor import SentenceTransformerRerank


load_dotenv()
os.environ['OPENAI_API_KEY'] = os.getenv("OPENAI_API_KEY")
os.environ["TOKENIZERS_PARALLELISM"] = "false"

  from .autonotebook import tqdm as notebook_tqdm


In [10]:
#""
'''BAAI/bge-small-en-v1.5 - Beijing Academy of Artificial Intelligence (BAAI)
Sentence Embedding Model / Text Embedding Model
Specifically designed for English text.
Based on E5 architecture, which itself is a modification of the MiniLM 
(or similar lightweight Transformer) architecture optimized for embedding tasks.
~60 million parameters'''

EMBED_MODEL = HuggingFaceEmbedding("BAAI/bge-small-en-v1.5")
embed_dim = len(EMBED_MODEL.get_text_embedding("Burger"))#
print(embed_dim)

384


In [11]:
SOURCE = "Guardrail_Scenarios.pdf"

In [12]:
reader = DoclingReader()
document = reader.load_data(SOURCE) 

node_parser_mk = MarkdownNodeParser()

node_parser_semantic = SemanticSplitterNodeParser(
    buffer_size=1, breakpoint_percentile_threshold=95, embed_model=EMBED_MODEL
)

#semantic_node_parser = SemanticSplitterNodeParser()
MILVUS_URI = str(Path(mkdtemp())/ 'docling_ahtsham.db')
vector_store = MilvusVectorStore(uri=MILVUS_URI,dim=embed_dim,overwrite=True)
index = VectorStoreIndex.from_documents(
    documents=document,
    transformations=[node_parser_mk, node_parser_semantic],
    storage_context=StorageContext.from_defaults(vector_store=vector_store),
    embed_model=EMBED_MODEL,
)


2025-07-12 14:47:51,040 [DEBUG][_create_connection]: Created new connection using: 161cc7a226b64520aea23dc1a2a85a29 (async_milvus_client.py:599)


In [13]:
models = ['gpt-3.5-turbo','text-davinci-003']

llm = OpenAI(
    model="gpt-3.5-turbo",
    temperature=0,
    max_tokens=1024,
    frequency_penalty=0
)



In [15]:


#QUERY =  'How do you ensure software compliance (licensing) in an organization '
QUERY = "when was the last olympic held?"
query_engine = index.as_query_engine(similarity_top_k=10,llm=llm)
result = query_engine.query(QUERY)
print(f"Q: {QUERY}\nA: {result.response.strip()}")
#display([(n.text, n.metadata) for n in result.source_nodes])

Q: when was the last olympic held?
A: The last Olympic Games were held in 2021.


#### Building Guardrails using Guardrails AI

In [8]:
pip uninstall guardrails-ai
pip install "guardrails-ai>=0.4.0"

Found existing installation: guardrails-ai 0.3.1
Uninstalling guardrails-ai-0.3.1:
  Would remove:
    /opt/anaconda3/envs/langchain_env/bin/guardrails
    /opt/anaconda3/envs/langchain_env/lib/python3.10/site-packages/guardrails/*
    /opt/anaconda3/envs/langchain_env/lib/python3.10/site-packages/guardrails_ai-0.3.1.dist-info/*
Proceed (Y/n)? ^C
[31mERROR: Operation cancelled by user[0m[31m
[0mNote: you may need to restart the kernel to use updated packages.


In [None]:
guardrails hub install hub://guardrails/competitor_check --no-install-local-models -q
guardrails hub install hub://guardrails/detect_pii --no-install-local-models -q
guardrails hub install hub://tryolabs/restricttotopic --no-install-local-models -q
guardrails hub install hub://groundedai/grounded_ai_hallucination

In [2]:
import guardrails as gd
from guardrails.hub import CompetitorCheck, DetectPII
from guardrails.integrations.llama_index import GuardrailsQueryEngine
from guardrails import OnFailAction
from pydantic import BaseModel, Field

  import pkg_resources


#### Competitor check and PII

In [8]:
class SafeResponse(BaseModel):
    response : str = Field(description="The safe and appropriate response to the user's query.",
    validators=[
            CompetitorCheck(
                competitors=["Global AI Corp", "Global AI", "GlobalAICorp"],
                on_fail=OnFailAction.REASK # If competitor is mentioned, reask the LLM
            ),
            DetectPII(
                pii_entities=["EMAIL_ADDRESS", "PHONE_NUMBER"], # Detect emails and phone numbers
                on_fail=OnFailAction.FIX # Try to fix/redact PII
            )
        ]
    )


In [16]:
guard =  gd.Guard.for_pydantic(output_class=SafeResponse)

In [18]:
guard_query_engine = GuardrailsQueryEngine(query_engine,guard=guard)


In [24]:
Query = "How old is the Roman Colloseum"
response_without_guard = query_engine.query(Query)
print(response_without_guard)

response_with_guard = guard_query_engine.query(Query)
print(response_with_guard)

The Roman Colosseum is around 1,943 years old.


ValidationError: Validation failed: Validation failed: None

In [26]:
test_queries = [
    "What does InnovateTech do?",
    "Who is InnovateTech's main competitor?",
    "How can I contact InnovateTech support?",
    "Tell me about InnovateTech's partnerships.",
    "What is the capital of France?"
]

for i, query_text in enumerate(test_queries):
    print(f"\nQuery {i+1}: {query_text}")
    try:
        response = guard_query_engine.query(query_text)
        
        # Access the validated output directly from the Guardrails response
        validated_output = response.response.validated_output
        print(f"Validated Response: {validated_output.response}")

    except Exception as e:
        print(f"An error occurred during query: {e}")


Query 1: What does InnovateTech do?
An error occurred during query: 'NoneType' object has no attribute 'validated_output'

Query 2: Who is InnovateTech's main competitor?
An error occurred during query: Validation failed: Validation failed: None

Query 3: How can I contact InnovateTech support?
An error occurred during query: Validation failed: Validation failed: None

Query 4: Tell me about InnovateTech's partnerships.
An error occurred during query: Validation failed: Validation failed: None

Query 5: What is the capital of France?
An error occurred during query: Validation failed: Validation failed: None


#### Hallucination with MNLi pipeline

In [None]:
@register_validator(name="is_grounded_in_context", data_type="string")
class IsGroundedInContext(Validator):
    def __init__(self, nli_confidence_threshold: float = 0.7,
                 on_fail: OnFailAction = OnFailAction.REASK, **kwargs):
        super().__init__(name="is_grounded_in_context", on_fail=on_fail, **kwargs)
        self.nli_confidence_threshold = nli_confidence_threshold
        # Initialize NLI model ONCE when the validator is created
        self.nli_classifier = pipeline("text-classification", model="MoritzLaurer/deberta-v3-small-mnli", device=-1) # Use -1 for CPU, 0 for GPU

    def validate_and_fix(self, text: str, metadata: dict, *args, **kwargs) -> ValidationResult:
        """
        Validates if the text is grounded in the provided context from metadata.
        `metadata` will contain `retrieved_context_nodes` from GuardrailsQueryEngine.
        """
        retrieved_context_nodes = metadata.get("retrieved_context_nodes", [])
        if not retrieved_context_nodes:
            # If no context, or no relevant nodes retrieved, consider it a failure for grounding.
            return FailResult(
                outcome="fail",
                metadata={"reason": "No retrieved context available for groundedness check."},
                fix_value="I cannot answer this question as I lack sufficient contextual information to ensure groundedness."
            )

        # Combine all retrieved text into a single premise for simplicity
        # For more robustness, you might compare each sentence in 'text' against each 'chunk' individually.
        full_context_text = " ".join([node.get_text() for node in retrieved_context_nodes])

        # Split the generated text into claims/sentences for individual checking
        # Basic split by period for demo. Use a proper sentence tokenizer for production.
        generated_claims = [s.strip() for s in text.split('.') if s.strip()]

        ungrounded_claims = []
        for claim in generated_claims:
            # Check if this claim is entailed by the full context using NLI model
            nli_result = self.nli_classifier(f"{full_context_text} {claim}")
            
            # The result is typically a list of dicts, e.g., [{'label': 'ENTAILMENT', 'score': 0.95}]
            if nli_result and nli_result[0]['label'] == 'ENTAILMENT' and nli_result[0]['score'] >= self.nli_confidence_threshold:
                # This claim is grounded
                continue
            else:
                ungrounded_claims.append(claim)

        if ungrounded_claims:
            return FailResult(
                outcome="fail",
                metadata={"ungrounded_claims": ungrounded_claims, "reason": "Generated text contains claims not grounded in context."},
                fix_value=None # Let Guardrails reask the LLM
            )
        else:
            return PassResult(outcome="pass")


In [None]:
import nltk
entailment_model = 'GuardrailsAI/finetuned_nli_provenance'
NLI_PIPELINE = pipeline("text-classification", model=entailment_model)

#### Keeping it on the topic

In [4]:
from guardrails.hub import GroundedAIHallucination,RestrictToTopic,CompetitorCheck,DetectPII
# fro zero shot classisfication
from transformers import pipeline
from guardrails.validator_base import FailResult, PassResult, ValidationResult, Validator, register_validator

#### Building Custom nli pipeline for topic

In [None]:
import os
from dotenv import load_dotenv
from typing import List, Dict, Any

from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, Settings
from llama_index.llms.openai import OpenAI
from llama_index.embeddings.huggingface import HuggingFaceEmbedding

# Guardrails AI imports
import guardrails as gd
from guardrails.integrations.llama_index import GuardrailsQueryEngine
from guardrails.hub import CompetitorCheck, DetectPII
from guardrails import OnFailAction
from guardrails.validator_base import FailResult, PassResult, ValidationResult, Validator, register_validator

# Pydantic imports
from pydantic import BaseModel, Field

# For NLI model (Hugging Face Transformers) - for hallucination and zero-shot topic
from transformers import pipeline

# --- Custom Hallucination Validator (from previous response, re-confirmed) ---
@register_validator(name="is_on_topic_bart", data_type="string")
class IsOnTopicBart(Validator):
    def __init__(self, allowed_topics: List[str], topic_threshold: float = 0.7, on_fail: OnFailAction = OnFailAction.REASK, **kwargs):
        super().__init__(name="is_on_topic_bart", on_fail=on_fail, **kwargs)
        self.allowed_topics = allowed_topics
        self.topic_threshold = topic_threshold
        self.classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli", device=-1)

    def validate_and_fix(self, text: str, *args, **kwargs) -> ValidationResult:
        if not text.strip():
            return PassResult(outcome="pass", metadata={"reason": "Empty response, skipping topic check."})

        result = self.classifier(text, self.allowed_topics, multi_label=False)
        predicted_label = result['labels'][0]
        predicted_score = result['scores'][0]

        if predicted_score >= self.topic_threshold and predicted_label in self.allowed_topics:
            return PassResult(outcome="pass", metadata={"classified_topic": predicted_label, "score": predicted_score})
        else:
            return FailResult(
                outcome="fail",
                metadata={"classified_topic": predicted_label, "score": predicted_score,
                          "reason": f"Response topic '{predicted_label}' (score: {predicted_score:.2f}) is below threshold or not strong enough for allowed topics."},
                fix_value=None
            )

#### Guardraila Hub
Using inbuilt Guardrails AI functions from guardrail.hub. For more from hub you can look into https://hub.guardrailsai.com

In [7]:
import os
from dotenv import load_dotenv

from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, Settings
from llama_index.llms.openai import OpenAI
from llama_index.embeddings.huggingface import HuggingFaceEmbedding

# Guardrails AI imports
import guardrails as gd
from guardrails.integrations.llama_index import GuardrailsQueryEngine
from guardrails import OnFailAction

# --- Import Validators directly from Guardrails Hub ---
from guardrails.hub import CompetitorCheck
# For topic adherence
from guardrails.hub import RestrictToTopic
# For hallucination check
from guardrails.hub import GroundedAIHallucination

# Pydantic imports
from pydantic import BaseModel, Field

# --- No custom validators needed for topic or hallucination anymore ---
# We are using the Hub validators which encapsulate the logic.


# 1. Load Environment Variables
load_dotenv()
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")

if not OPENAI_API_KEY:
    raise ValueError("OPENAI_API_KEY not found. Please set it in your .env file.")

# 2. Configure LlamaIndex LLM and Embedding Model
Settings.llm = OpenAI(model="gpt-3.5-turbo", api_key=OPENAI_API_KEY, temperature=0.0)
Settings.embed_model = HuggingFaceEmbedding(
    model_name="BAAI/bge-small-en-v1.5",
    cache_folder="./models"
)
print("LLM and Embedding model configured for LlamaIndex.")

# 3. Create a dummy data directory and file
os.makedirs("data", exist_ok=True)
with open("data/company_info.txt", "w") as f:
    f.write("""
Our company, InnovateTech, specializes in cutting-edge AI solutions.
We are a leader in natural language processing and computer vision.
InnovateTech was founded in 2015 by Dr. Elena Petrova.
Our main competitor is Global AI Corp. We aim to offer better value than Global AI Corp.
For support, contact support@innovatetech.com or call 555-123-4567.
InnovateTech also has a partnership with ResearchLabs Inc.
""")
print("Dummy data created in 'data/' directory.")

# 4. Load Documents and Create LlamaIndex VectorStoreIndex
documents = SimpleDirectoryReader("data").load_data()
index = VectorStoreIndex.from_documents(documents)
print("LlamaIndex VectorStoreIndex created.")

# 5. Define Pydantic Model with Hub Validators
class SafeResponse(BaseModel):
    response: str = Field(
        description="The safe and appropriate response to the user's query.",
        validators=[
            CompetitorCheck(
                competitors=["Global AI Corp", "Global AI", "GlobalAICorp"],
                on_fail=OnFailAction.REASK
            ),
            # --- RestrictToTopic for Topic Adherence ---
            # Using 'facebook/bart-large-mnli' as the model for zero-shot classification
            # disable_llm=True ensures it only uses the classifier, not an LLM fallback.
            RestrictToTopic(
                valid_topics=["Technology", "Business", "Support", "Partnerships", "Company History"],
                invalid_topics=["music"],
                model="facebook/bart-large-mnli", # Specify BART model for classification
                disable_llm=True,                 # Disable LLM fallback, rely on BART classifier
                model_threshold=0.8,              # Confidence threshold for topic adherence
                on_fail=OnFailAction.REASK
            ),
            # --- GroundedAIHallucination for Hallucination Check ---
            # This validator uses a fine-tuned model (often NLI-based) internally.
            # It expects 'query' and 'reference' (context) in metadata.
            # LlamaIndex's GuardrailsQueryEngine passes 'retrieved_context_nodes' in metadata.
            # GroundedAIHallucination expects 'reference' to be a string or list of strings.
            # We will ensure the metadata is correctly formatted.
            GroundedAIHallucination(quant=False,
                on_fail=OnFailAction.REASK
                # This validator often uses its own internal NLI-like model
                # based on its implementation from GroundedAI.
                # No direct 'mnli_model' parameter here as it's encapsulated.
            )
        ]
    )

# 6. Create the Guardrails AI Guard object
guard = gd.Guard.for_pydantic(output_class=SafeResponse)
print("Guardrails AI Guard created with Competitor, RestrictToTopic, and GroundedAIHallucination validators.")

# 7. Create the LlamaIndex QueryEngine and wrap it with GuardrailsQueryEngine
query_engine = index.as_query_engine(
    similarity_top_k=5
)

# This is a crucial step for GroundedAIHallucination.
# GuardrailsQueryEngine automatically passes `retrieved_context_nodes` from LlamaIndex.
# However, GroundedAIHallucination typically expects a 'reference' key in metadata,
# which needs to be the text content. We will transform the nodes into a string for it.
class CustomGuardrailsQueryEngine(GuardrailsQueryEngine):
    async def _acall(self, query: str) -> Any:
        # First, call the base query engine to get the LLM response and source nodes
        llama_response = await self.query_engine.aquery(query)
        
        # Prepare metadata for Guardrails validators
        # GroundedAIHallucination expects 'query' and 'reference'.
        # 'reference' should be a string of the context.
        # 'retrieved_context_nodes' is what GuardrailsQueryEngine typically passes.
        retrieved_context_texts = [node.get_text() for node in llama_response.source_nodes]
        
        # Add the 'query' and 'reference' (context) to the metadata
        # This aligns with what GroundedAIHallucination expects
        # The GuardrailsQueryEngine will merge this with its default metadata.
        # We also pass the original retrieved_context_nodes, as some validators might use them raw.
        metadata_for_guard = {
            "query": query,
            "reference": " ".join(retrieved_context_texts), # GroundedAIHallucination expects a string
            "retrieved_context_nodes": llama_response.source_nodes # Keep for other validators if needed
        }

        # Call the Guardrails Guard with the LLM's response and prepared metadata
        validated_output = await self.guard.validate(
            llm_output=llama_response.response, # The LLM's raw response
            metadata=metadata_for_guard
        )

        # The GuardrailsQueryEngine expects a Guardrails LlamaIndex Response object
        # It handles wrapping the validated_output in the correct LlamaIndex Response type.
        return self._create_response_from_guard_result(
            validated_output=validated_output,
            raw_response=llama_response.response,
            metadata=metadata_for_guard # Include metadata if you want it in the final response
        )

guardrails_query_engine = CustomGuardrailsQueryEngine(
    query_engine=query_engine,
    guard=guard
)
print("LlamaIndex QueryEngine wrapped with GuardrailsQueryEngine.")
print("Custom GuardrailsQueryEngine created to pass 'query' and 'reference' metadata for GroundedAIHallucination.")

# 8. Test Queries with Guardrails
print("\n--- Testing Queries with Guardrails ---")

test_queries = [
    "What does InnovateTech do?", # On-topic & Grounded
    "Who is InnovateTech's main competitor?", # On-topic & Grounded
    "How can I contact InnovateTech support?", # On-topic & Grounded
    "Tell me about InnovateTech's partnerships.", # On-topic & Grounded
    "When was InnovateTech founded?", # On-topic & Grounded
    "What is the capital of France?", # Off-topic & Ungrounded
    "InnovateTech invented time travel in 2020.", # On-topic (Company) but Ungrounded
    "Tell me about the weather today in Mumbai." # Clearly off-topic
]

# Use an async loop to run queries
import asyncio

async def run_queries():
    for i, query_text in enumerate(test_queries):
        print(f"\nQuery {i+1}: {query_text}")
        try:
            # Await the async query
            response = await guardrails_query_engine.aquery(query_text)
            
            validated_output = response.response.validated_output
            print(f"Validated Response: {validated_output.response}")

            if guard.history.last:
                print(f"  Guardrails Outcome: {guard.history.last.outcome}")
                if guard.history.last.validation_result and guard.history.last.validation_result.errors:
                    print(f"  Guardrails Errors: {guard.history.last.validation_result.errors}")

        except Exception as e:
            print(f"An error occurred during query: {e}")

# Run the async function
if __name__ == "__main__":
    asyncio.run(run_queries())

LLM and Embedding model configured for LlamaIndex.
Dummy data created in 'data/' directory.
LlamaIndex VectorStoreIndex created.


Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]