In [1]:
import os
import warnings
from pathlib import Path
from datetime import datetime

warnings.filterwarnings('ignore')

from dotenv import load_dotenv

from langchain_core.documents import Document
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough

from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from langchain_community.vectorstores import FAISS

from ragas import evaluate, EvaluationDataset, SingleTurnSample
from ragas.metrics import (
    Faithfulness,
    AnswerRelevancy,
    ContextPrecision,
    ContextRecall,
    ContextEntityRecall,
    NoiseSensitivity,
)
from ragas.llms import LangchainLLMWrapper
from ragas.embeddings import LangchainEmbeddingsWrapper

from datasets import Dataset

import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import pandas as pd

plt.style.use('default')
sns.set_palette("husl")

print("âœ“ All imports successful!")
print("âœ“ Using LangChain 1.0+ with LCEL patterns")
print("âœ“ Ragas 0.3.7 evaluation framework loaded")

âœ“ All imports successful!
âœ“ Using LangChain 1.0+ with LCEL patterns
âœ“ Ragas 0.3.7 evaluation framework loaded


In [2]:
load_dotenv()

True

In [3]:
cloudflow_docs = [
    # ============================================================================
    # ARCHITECTURE DOCUMENTS (3)
    # ============================================================================
    Document(
        page_content="""CloudFlow Architecture Overview

CloudFlow is a distributed cloud platform built on microservices architecture. The platform consists of three main layers that work together to provide a robust, scalable infrastructure.

The API Gateway layer handles all incoming requests using OAuth 2.0 authentication and routes them through our service mesh powered by Istio. This layer provides load balancing, SSL termination, and request routing capabilities.

The Service Mesh layer orchestrates communication between microservices, providing service discovery, health checking, and automatic failover. It uses Kubernetes for container orchestration across multiple availability zones.

The Data Storage layer implements a distributed database system with automatic replication across three availability zones. This ensures data durability and supports horizontal scaling based on demand.

CloudFlow guarantees 99.99% uptime SLA with triple redundancy across availability zones. The platform supports horizontal scaling with automatic load balancing, allowing each service to scale independently based on CPU and memory metrics.""",
        metadata={"source": "architecture_overview", "topic": "architecture", "difficulty": "intermediate"}
    ),
    
    Document(
        page_content="""CloudFlow Scaling Mechanisms

CloudFlow implements sophisticated auto-scaling mechanisms to handle varying workloads efficiently. The platform monitors real-time metrics to make intelligent scaling decisions.

Horizontal Pod Autoscaling (HPA) adjusts the number of pod replicas based on CPU utilization (target: 70%) and memory usage (target: 80%). When these thresholds are exceeded for more than 3 consecutive minutes, the system automatically provisions additional pods.

Vertical scaling adjusts resource allocation for individual services. CloudFlow can increase or decrease CPU and memory limits without downtime, using Kubernetes resource management capabilities.

The platform supports bursting to handle sudden traffic spikes. During burst periods, CloudFlow can temporarily scale up to 500% of baseline capacity for up to 15 minutes before triggering permanent scaling.

Load balancing distributes traffic across all available pods using a weighted round-robin algorithm. Health checks run every 10 seconds, and unhealthy pods are automatically removed from the rotation within 30 seconds.""",
        metadata={"source": "scaling_guide", "topic": "architecture", "difficulty": "advanced"}
    ),
    
    Document(
        page_content="""CloudFlow System Components

CloudFlow's architecture comprises several key components that work in harmony to deliver reliable cloud services.

The Control Plane manages the overall system state, including service registration, configuration management, and orchestration. It runs on a dedicated cluster with five replicas for high availability.

The Data Plane handles actual request processing and data flow. It consists of worker nodes that execute application workloads and process user requests. Each data plane node has 16 CPU cores and 64GB RAM.

The Observability Stack includes Prometheus for metrics collection, Grafana for visualization, and ELK (Elasticsearch, Logstash, Kibana) for log aggregation. Metrics are collected every 15 seconds and retained for 90 days.

The Service Registry maintains a real-time directory of all available services and their endpoints. It uses etcd for distributed consensus and supports automatic service discovery with DNS-based lookups.

The Message Queue system, based on Apache Kafka, handles asynchronous communication between services with guaranteed message delivery and ordering.""",
        metadata={"source": "system_components", "topic": "architecture", "difficulty": "intermediate"}
    ),
    
    # ============================================================================
    # API DOCUMENTATION (4)
    # ============================================================================
    Document(
        page_content="""CloudFlow API Authentication

CloudFlow APIs support two authentication methods: OAuth 2.0 and API Keys. Both methods provide secure access to platform resources.

OAuth 2.0 is recommended for user-facing applications. It supports the Authorization Code flow and provides access tokens valid for 1 hour and refresh tokens valid for 30 days. To implement OAuth 2.0, direct users to the authorization endpoint at https://auth.cloudflow.io/oauth/authorize with your client_id and redirect_uri parameters.

API Keys are ideal for server-to-server communication and background jobs. Each API key has the format "cf_live_" followed by 32 alphanumeric characters. API keys never expire unless explicitly revoked.

To authenticate requests, include your API key in the Authorization header: "Authorization: Bearer YOUR_API_KEY". All API requests must be made over HTTPS; HTTP requests will be rejected with a 403 error.

API keys can be scoped to specific permissions (read, write, admin) and restricted to specific IP addresses for enhanced security. You can manage your API keys through the CloudFlow dashboard or the /api/v1/keys endpoint.""",
        metadata={"source": "api_authentication", "topic": "api", "difficulty": "beginner"}
    ),
    
    Document(
        page_content="""CloudFlow REST API Endpoints

CloudFlow provides a comprehensive REST API with endpoints organized by resource type. All endpoints follow RESTful conventions and return JSON responses.

Base URL: https://api.cloudflow.io/v1

Resources endpoint: GET /api/v1/resources - List all resources with pagination (max 100 per page). Supports filtering by type, status, and creation date.

Resource creation: POST /api/v1/resources - Create a new resource. Required fields: name (string), type (string), config (object). Returns 201 Created on success.

Resource details: GET /api/v1/resources/{id} - Retrieve detailed information about a specific resource by ID.

Resource update: PUT /api/v1/resources/{id} - Update an existing resource. Supports partial updates with PATCH /api/v1/resources/{id}.

Resource deletion: DELETE /api/v1/resources/{id} - Delete a resource. Returns 204 No Content on success. Deleted resources are soft-deleted and can be recovered within 30 days.

All list endpoints support query parameters: limit (default: 25, max: 100), offset (default: 0), sort (default: created_at), order (asc|desc).""",
        metadata={"source": "api_endpoints", "topic": "api", "difficulty": "intermediate"}
    ),
    
    Document(
        page_content="""CloudFlow API Rate Limiting

CloudFlow implements rate limiting to ensure fair usage and platform stability. Rate limits vary by pricing tier and authentication method.

Standard Tier: 1,000 requests per hour per API key. Burst capacity allows up to 100 requests per minute. Exceeding limits returns HTTP 429 (Too Many Requests).

Premium Tier: 10,000 requests per hour per API key with burst capacity of 500 requests per minute. Premium tier also includes priority request processing.

Enterprise Tier: Custom rate limits negotiated based on usage patterns. Typically starts at 100,000 requests per hour with dedicated infrastructure.

Rate limit headers are included in every response:
- X-RateLimit-Limit: Maximum requests per hour
- X-RateLimit-Remaining: Remaining requests in current window
- X-RateLimit-Reset: Unix timestamp when the limit resets

When rate limited, the Retry-After header indicates how many seconds to wait before retrying. Implement exponential backoff: wait 1s, then 2s, then 4s, etc.

OAuth 2.0 authenticated requests have separate, higher limits: 5,000 requests per hour for Standard tier.""",
        metadata={"source": "api_rate_limits", "topic": "api", "difficulty": "intermediate"}
    ),
    
    Document(
        page_content="""CloudFlow API Error Codes

CloudFlow APIs use standard HTTP status codes and provide detailed error messages in JSON format to help diagnose issues.

Authentication Errors:
- 401 Unauthorized: Missing or invalid API key. Check the Authorization header.
- 403 Forbidden: Valid API key but insufficient permissions for the requested operation.

Client Errors:
- 400 Bad Request: Invalid request format or missing required fields. The response includes a "details" field explaining what's wrong.
- 404 Not Found: Requested resource doesn't exist. Verify the resource ID.
- 409 Conflict: Request conflicts with current resource state (e.g., duplicate name).
- 422 Unprocessable Entity: Request format is valid but contains semantic errors.
- 429 Too Many Requests: Rate limit exceeded. Check X-RateLimit-Reset header.

Server Errors:
- 500 Internal Server Error: Unexpected server error. CloudFlow team is automatically notified.
- 502 Bad Gateway: Temporary issue with upstream services. Retry after a few seconds.
- 503 Service Unavailable: Scheduled maintenance or system overload. Check status.cloudflow.io.

Error Response Format: {"error": {"code": "error_code", "message": "Human-readable message", "details": {...}}}""",
        metadata={"source": "api_error_codes", "topic": "api", "difficulty": "beginner"}
    ),
    
    # ============================================================================
    # SECURITY DOCUMENTATION (2)
    # ============================================================================
    Document(
        page_content="""CloudFlow Security Features

Security is a top priority at CloudFlow. We implement industry-leading security practices to protect your data and applications.

Encryption: All data is encrypted at rest using AES-256 encryption. Data in transit uses TLS 1.3 with perfect forward secrecy. Encryption keys are rotated every 90 days using AWS KMS.

Network Security: CloudFlow runs in a Virtual Private Cloud (VPC) with strict network segmentation. Public endpoints are protected by Web Application Firewall (WAF) rules that block common attack patterns. DDoS protection is provided by Cloudflare with mitigation capacity up to 50 Gbps.

Access Control: All resources support Role-Based Access Control (RBAC) with customizable roles and permissions. We support integration with external identity providers via SAML 2.0 and OpenID Connect.

Audit Logging: Every API call is logged with timestamp, user identity, IP address, and action taken. Audit logs are immutable and retained for 2 years. You can access logs via the /api/v1/audit-logs endpoint.

Vulnerability Management: CloudFlow undergoes quarterly penetration testing by independent security firms. We maintain a bug bounty program and respond to security reports within 24 hours.""",
        metadata={"source": "security_features", "topic": "security", "difficulty": "intermediate"}
    ),
    
    Document(
        page_content="""CloudFlow Compliance Standards

CloudFlow maintains compliance with major industry standards and regulations to ensure your data is handled responsibly.

SOC 2 Type II: CloudFlow is SOC 2 Type II certified, demonstrating our commitment to security, availability, and confidentiality. Audit reports are available to enterprise customers under NDA.

GDPR Compliance: CloudFlow is fully compliant with the European Union's General Data Protection Regulation. We support data residency requirements, right to erasure, data portability, and provide Data Processing Agreements (DPA) to all customers.

HIPAA: For healthcare customers, CloudFlow offers HIPAA-compliant infrastructure with Business Associate Agreements (BAA). HIPAA features include enhanced audit logging, encrypted backups, and strict access controls.

ISO 27001: CloudFlow's information security management system is certified to ISO 27001:2013 standards. We maintain comprehensive security policies and undergo annual recertification audits.

PCI DSS: For customers processing payment card data, CloudFlow provides PCI DSS Level 1 certified infrastructure. However, we recommend using dedicated payment processors rather than storing card data.

Data Residency: CloudFlow supports data residency in US, EU, UK, and APAC regions to meet local regulatory requirements.""",
        metadata={"source": "compliance_standards", "topic": "security", "difficulty": "advanced"}
    ),
    
    # ============================================================================
    # PRICING DOCUMENTATION (2)
    # ============================================================================
    Document(
        page_content="""CloudFlow Pricing Tiers

CloudFlow offers three pricing tiers designed to meet the needs of individuals, teams, and enterprises.

Standard Tier ($99/month):
- 1,000 API requests per hour
- 100 GB storage included
- 10 GB bandwidth per month
- Community support via forums
- 99.9% uptime SLA
- Up to 5 team members

Premium Tier ($499/month):
- 10,000 API requests per hour
- 1 TB storage included
- 100 GB bandwidth per month
- Email support with 24-hour response time
- 99.95% uptime SLA
- Up to 25 team members
- Advanced monitoring and alerting
- Custom domain support

Enterprise Tier (Custom pricing):
- Custom API rate limits (100,000+ requests/hour)
- Unlimited storage and bandwidth
- 24/7 phone and email support with 1-hour response time
- 99.99% uptime SLA with service credits
- Unlimited team members
- Dedicated account manager
- Custom integrations and professional services
- Private cloud deployment options

All tiers include: SSL certificates, daily backups, API access, and dashboard analytics. Annual billing provides 15% discount.""",
        metadata={"source": "pricing_tiers", "topic": "pricing", "difficulty": "beginner"}
    ),
    
    Document(
        page_content="""CloudFlow Billing Information

Understanding CloudFlow's billing model helps you manage costs effectively and avoid unexpected charges.

Billing Cycle: Subscriptions are billed monthly on the date you signed up. Annual subscriptions are billed upfront with a 15% discount. Billing date can be changed once per year.

Usage-Based Charges: Beyond included quotas, additional usage is billed at:
- API requests: $0.01 per 1,000 requests
- Storage: $0.10 per GB per month
- Bandwidth: $0.08 per GB
- Backup retention (beyond 30 days): $0.05 per GB per month

Payment Methods: CloudFlow accepts credit cards (Visa, Mastercard, Amex), ACH transfers (US only), and wire transfers for invoices over $1,000. Cryptocurrency payments available for annual plans.

Invoicing: Invoices are emailed on the billing date and available in the dashboard. Enterprise customers receive consolidated monthly invoices with 30-day payment terms.

Upgrades and Downgrades: Upgrade anytime to immediately access higher tier features. Downgrades take effect at the next billing cycle. Prorated credits are applied to your account balance.

Free Trial: New customers get 14-day free trial on Premium tier with no credit card required. Trial includes 1,000 API requests and 10 GB storage.""",
        metadata={"source": "billing_info", "topic": "pricing", "difficulty": "beginner"}
    ),
    
    # ============================================================================
    # BEST PRACTICES DOCUMENTATION (3)
    # ============================================================================
    Document(
        page_content="""CloudFlow Performance Optimization

Following these best practices will help you achieve optimal performance from your CloudFlow applications.

Caching Strategy: Implement caching at multiple levels. Use CloudFlow's built-in Redis cache for frequently accessed data with TTL between 5-60 minutes. Cache API responses on the client side and respect Cache-Control headers.

Request Optimization: Batch multiple operations into single API calls when possible. Use pagination for large result sets (recommended page size: 50-100 items). Implement request compression using gzip to reduce bandwidth.

Connection Management: Reuse HTTP connections with keep-alive headers. Maintain a connection pool with 5-10 concurrent connections per API key. Set appropriate timeouts: connection timeout 10s, read timeout 30s.

Query Efficiency: Use field filtering to request only required data: /resources?fields=id,name,status. Leverage server-side filtering instead of retrieving all data and filtering locally.

Asynchronous Processing: For long-running operations, use CloudFlow's async API endpoints. Poll for results using the returned job_id rather than blocking on the initial request.

CDN Usage: Serve static assets through CloudFlow's global CDN with 150+ edge locations. Configure appropriate cache headers for optimal performance: max-age=3600 for semi-static content.""",
        metadata={"source": "performance_optimization", "topic": "best_practices", "difficulty": "intermediate"}
    ),
    
    Document(
        page_content="""CloudFlow Monitoring and Observability

Effective monitoring ensures your CloudFlow applications remain healthy and performant.

Metrics Collection: CloudFlow automatically collects key metrics including request rate, error rate, latency (p50, p95, p99), and resource utilization. Access metrics via the dashboard or Metrics API at /api/v1/metrics.

Custom Metrics: Send custom application metrics using the StatsD protocol. CloudFlow aggregates custom metrics every 60 seconds and retains them for 90 days.

Alerting: Configure alerts for critical conditions like error rate >5%, latency >500ms, or approaching rate limits. CloudFlow supports alerting via email, SMS, Slack, PagerDuty, and webhooks.

Distributed Tracing: Enable distributed tracing to track requests across services. CloudFlow supports OpenTelemetry and provides trace visualization in the dashboard. Sample rate: 10% of requests (configurable up to 100%).

Log Management: CloudFlow retains logs for 7 days by default (30 days for Premium, 90 days for Enterprise). Use structured logging with JSON format for better searchability. Maximum log line length: 32KB.

Dashboard Widgets: Create custom dashboards with real-time metrics, SLA compliance, and cost tracking. Share dashboards with team members or embed in external tools using iframe integration.""",
        metadata={"source": "monitoring_observability", "topic": "best_practices", "difficulty": "intermediate"}
    ),
    
    Document(
        page_content="""CloudFlow Disaster Recovery

CloudFlow implements comprehensive disaster recovery capabilities to protect your data and ensure business continuity.

Backup Strategy: CloudFlow performs automatic daily backups of all data at 2 AM UTC. Backups are encrypted and stored in geographically diverse locations. Retention: 30 days for Standard tier, 90 days for Premium, 1 year for Enterprise.

Point-in-Time Recovery: Enterprise customers can restore data to any point within the retention period with 5-minute granularity. Recovery operations typically complete within 15-30 minutes.

Multi-Region Replication: Enable multi-region replication for critical data. Data is asynchronously replicated to a secondary region within 60 seconds. Failover to secondary region is automatic and takes approximately 5 minutes.

Backup Verification: CloudFlow performs monthly backup restoration tests to ensure data recoverability. Test results are available in your compliance dashboard.

Export Capabilities: Export your data anytime in JSON, CSV, or Parquet format. Full exports are available via the /api/v1/export endpoint. Large exports (>10 GB) are delivered to your S3 bucket.

RTO and RPO: CloudFlow guarantees Recovery Time Objective (RTO) of 4 hours and Recovery Point Objective (RPO) of 1 hour for Enterprise tier. Contact support to initiate disaster recovery procedures.""",
        metadata={"source": "disaster_recovery", "topic": "best_practices", "difficulty": "advanced"}
    ),
    
    # ============================================================================
    # TROUBLESHOOTING DOCUMENTATION (3)
    # ============================================================================
    Document(
        page_content="""Common CloudFlow Errors and Solutions

This guide covers the most common errors encountered when using CloudFlow and their solutions.

Error: "Invalid API Key" (401)
Solution: Verify your API key format starts with "cf_live_" and is exactly 40 characters. Check for extra spaces or newlines. Generate a new API key if the issue persists. API keys are case-sensitive.

Error: "Rate Limit Exceeded" (429)
Solution: Implement exponential backoff in your retry logic. Check X-RateLimit-Reset header to know when limits reset. Consider upgrading to a higher tier if you consistently hit limits. Use batch endpoints to reduce request count.

Error: "Resource Not Found" (404)
Solution: Verify the resource ID is correct and the resource hasn't been deleted. Use the /api/v1/resources endpoint to list available resources. Check if you're using the correct API version (/v1).

Error: "Timeout" (504)
Solution: Increase client timeout to at least 30 seconds. For long-running operations, use async endpoints and poll for results. Check CloudFlow status page for any service degradation.

Error: "Validation Error" (422)
Solution: Review the error details field for specific validation failures. Common issues: missing required fields, invalid data types, values outside allowed ranges. Consult API documentation for correct request format.""",
        metadata={"source": "common_errors", "topic": "troubleshooting", "difficulty": "beginner"}
    ),
    
    Document(
        page_content="""CloudFlow Debugging Guide

When troubleshooting issues with CloudFlow, follow this systematic debugging approach.

Step 1 - Check Service Status: Visit status.cloudflow.io to verify all systems are operational. Subscribe to status updates to receive notifications about incidents and maintenance.

Step 2 - Review API Logs: Access detailed API logs in the CloudFlow dashboard under Analytics > API Logs. Filter by time range, status code, and endpoint. Look for patterns in failed requests.

Step 3 - Enable Debug Mode: Add X-CloudFlow-Debug: true header to requests to receive detailed debug information in responses. Debug mode provides request ID, processing time breakdown, and backend service information.

Step 4 - Test with curl: Isolate issues by testing with curl commands. Example: curl -H "Authorization: Bearer YOUR_API_KEY" -H "X-CloudFlow-Debug: true" https://api.cloudflow.io/v1/resources

Step 5 - Check Network Connectivity: Ensure your network allows outbound HTTPS traffic to *.cloudflow.io on port 443. Verify DNS resolution is working correctly.

Step 6 - Verify SDK Version: If using CloudFlow SDK, ensure you're running the latest version. Outdated SDKs may not support new API features or may have known bugs.

Step 7 - Contact Support: If issues persist, contact CloudFlow support with the request ID from failed requests. Support responds within 24 hours for Standard tier, 4 hours for Premium, 1 hour for Enterprise.""",
        metadata={"source": "debugging_guide", "topic": "troubleshooting", "difficulty": "intermediate"}
    ),
    
    Document(
        page_content="""CloudFlow Support Escalation Process

Understanding CloudFlow's support escalation process ensures your issues are resolved efficiently.

Support Channels:
- Community Forums (All tiers): community.cloudflow.io - Best for general questions, feature requests, and sharing knowledge
- Email Support (Premium & Enterprise): support@cloudflow.io - Include account ID and request ID in subject line
- Phone Support (Enterprise only): +1-888-CLOUDFLOW - Available 24/7 for critical issues
- Slack Channel (Enterprise only): Direct access to engineering team

Issue Severity Levels:
- P0 (Critical): Complete service outage affecting production. Response time: 1 hour for Enterprise, 4 hours for Premium
- P1 (High): Major functionality impaired but workarounds available. Response time: 4 hours for Enterprise, 8 hours for Premium
- P2 (Medium): Minor functionality issues with workarounds. Response time: 24 hours
- P3 (Low): Questions, feature requests, documentation issues. Response time: 48 hours

Escalation Path: If your issue isn't resolved within SLA, it automatically escalates to the next support tier. Enterprise customers can request immediate escalation to engineering team.

Required Information: Include account ID, request ID, error messages, timestamps, steps to reproduce, and expected vs actual behavior. Screenshots and API logs are helpful.""",
        metadata={"source": "support_escalation", "topic": "troubleshooting", "difficulty": "beginner"}
    ),
]

print(f" Created {len(cloudflow_docs)} CloudFlow documentation documents")
print("\nDocument breakdown by category: ")
for topic in ["architecture", "api", "security", "pricing", "best_practices", "troubleshooting"]:
    count = len([doc for doc in cloudflow_docs if doc.metadata["topic"]==topic])
    print(f"    -{topic.title()}: {count} documents")

 Created 17 CloudFlow documentation documents

Document breakdown by category: 
    -Architecture: 3 documents
    -Api: 4 documents
    -Security: 2 documents
    -Pricing: 2 documents
    -Best_Practices: 3 documents
    -Troubleshooting: 3 documents


In [5]:
sample_doc = cloudflow_docs[0]

print("Sample Document: ")
print("="*80)
print(f"Content preview(first 300 chars): \n{sample_doc.page_content[:300]}...\n")
print(f"Metadata: {sample_doc.metadata}")
print("="*80)

total_char = sum(len(doc.page_content) for doc in cloudflow_docs)
avg_char = total_char/ len(cloudflow_docs)

print(f"\nDataset statistics:")
print(f"   Total documents: {len(cloudflow_docs)}")
print(f"   Total charactor: {total_char}")
print(f"   Average documents length: {int(avg_char)}")

Sample Document: 
Content preview(first 300 chars): 
CloudFlow Architecture Overview

CloudFlow is a distributed cloud platform built on microservices architecture. The platform consists of three main layers that work together to provide a robust, scalable infrastructure.

The API Gateway layer handles all incoming requests using OAuth 2.0 authenticat...

Metadata: {'source': 'architecture_overview', 'topic': 'architecture', 'difficulty': 'intermediate'}

Dataset statistics:
   Total documents: 17
   Total charactor: 20987
   Average documents length: 1234


### **Basic RAG Pipeline**

In [8]:
#Step 1:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1024,
    chunk_overlap=128,
    separators = ["\n\n","\n", "."," ",""]
)

chunks = text_splitter.split_documents(cloudflow_docs)

print(f"âœ“ Split {len(cloudflow_docs)} documents into {len(chunks)} chunks")
print(f"\nChunk statics:")
chunks_length = [len(chunk.page_content) for chunk in chunks]

print(f"   Average chunk size: {int(np.mean(chunks_length))} characotrs")
print(f"   Max chunk size: {min(chunks_length)} charactors")
print(f"   Max chunk size: {max(chunks_length)} charactors")

print(f"\nSample chunk:")
print(f"Content: {chunks[0].page_content[:200]}...")
print(f"Metadata: {chunks[0].metadata}")

âœ“ Split 17 documents into 34 chunks

Chunk statics:
   Average chunk size: 616 characotrs
   Max chunk size: 105 charactors
   Max chunk size: 1012 charactors

Sample chunk:
Content: CloudFlow Architecture Overview

CloudFlow is a distributed cloud platform built on microservices architecture. The platform consists of three main layers that work together to provide a robust, scala...
Metadata: {'source': 'architecture_overview', 'topic': 'architecture', 'difficulty': 'intermediate'}


In [9]:
#Step 2: Create embeddings and vector store
from langchain_openai import AzureOpenAIEmbeddings
embeddings = AzureOpenAIEmbeddings(
    azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT"),
    api_key=os.getenv("AZURE_OPENAI_API_KEY"),
    api_version=os.getenv("AZURE_OPENAI_API_VERSION"),
    model=os.getenv("AZURE_OPENAI_EMBEDDING_DEPLOYMENT_NAME")
)

print("Creating FAISS vector store...")
print(f"   Embedding {len(chunks)} chunks")

vector_store = FAISS.from_documents(
    chunks, embeddings
)
vectorstore_path = "./ragas_evaluation_faiss"
vector_store.save_local(vectorstore_path)


print(f"âœ“ FAISS vector store created and saved to '{vectorstore_path}'")
print(f"âœ“ Indexed {len(chunks)} document chunks")
print("\nVector store can be reloaded with:")
print(f"  vectorstore = FAISS.load_local('{vectorstore_path}', embeddings, allow_dangerous_deserialization=True)")

Creating FAISS vector store...
   Embedding 34 chunks
âœ“ FAISS vector store created and saved to './ragas_evaluation_faiss'
âœ“ Indexed 34 document chunks

Vector store can be reloaded with:
  vectorstore = FAISS.load_local('./ragas_evaluation_faiss', embeddings, allow_dangerous_deserialization=True)


In [10]:
retriever = vector_store.as_retriever(
    search_type="similarity",
    search_kwargs={"k":4}
)

print("âœ“ Retriever configured successfully")
print("  Search type: similarity")
print("  Number of results (k): 4")

test_query = "What is CloudFlow's uptime SLA?"
print(f"\nTesting retriever with query: '{test_query}'")

retrieved_docs = retriever.invoke(test_query)
print(f"âœ“ Retrieved {len(retrieved_docs)} documents")
print(f"\nFirst retrieved chunk preview:")
print(f"{retrieved_docs[0].page_content[:250]}...")

âœ“ Retriever configured successfully
  Search type: similarity
  Number of results (k): 4

Testing retriever with query: 'What is CloudFlow's uptime SLA?'
âœ“ Retrieved 4 documents

First retrieved chunk preview:
CloudFlow guarantees 99.99% uptime SLA with triple redundancy across availability zones. The platform supports horizontal scaling with automatic load balancing, allowing each service to scale independently based on CPU and memory metrics....


In [17]:
from langchain_openai import AzureChatOpenAI

llm = AzureChatOpenAI(
    azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT"),
    api_key=os.getenv("AZURE_OPENAI_API_KEY"),
    deployment_name=os.getenv("AZURE_OPENAI_DEPLOYMENT_NAME"),
    api_version=os.getenv("AZURE_OPENAI_API_VERSION"),
    temperature=0,
    max_tokens=500
)
print("âœ“ Language model configured")
print("  Model: gpt-4o-mini")
print("  Temperature: 0 (deterministic)")
print("  Max tokens: 500")

test_response = llm.invoke("Say 'LLM is ready!'")
print(f"\nLLM test: {test_response.content}")

âœ“ Language model configured
  Model: gpt-4o-mini
  Temperature: 0 (deterministic)
  Max tokens: 500

LLM test: LLM is ready! ðŸš€


In [18]:
#Create prompt template
template = """You are a helpful assistant for CloudFlow Platform documentation.
Answer the question based on the following context. If you cannot answer based on
the context, say "I don't have enough information to answer that question."

Be concise and accurate. Include specific details like numbers, limits, and technical
specifications when available in the context.

Context:
{context}

Question: {question}

Answer:"""

prompt = ChatPromptTemplate.from_template(template)


print("âœ“ Prompt template created")
print("\nPrompt structure:")
print("  1. System instruction (CloudFlow assistant role)")
print("  2. Context from retrieved documents")
print("  3. User question")
print("  4. Answer placeholder")

âœ“ Prompt template created

Prompt structure:
  1. System instruction (CloudFlow assistant role)
  2. Context from retrieved documents
  3. User question
  4. Answer placeholder


In [None]:
#Step 4: Building chain
def doc_format(docs):
    return "\n\n".join(doc.page_content for doc in docs)

rag_chain = (
    {"context":retriever | doc_format, "question":RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

print("âœ“ RAG chain created using LCEL")
print("\nChain flow:")
print("  1. Question â†’ Retriever â†’ Get relevant documents")
print("  2. Documents â†’ format_docs â†’ Create context string")
print("  3. Context + Question â†’ Prompt template")
print("  4. Prompt â†’ LLM â†’ Generate answer")
print("  5. LLM output â†’ StrOutputParser â†’ Extract text")

âœ“ RAG chain created using LCEL

Chain flow:
  1. Question â†’ Retriever â†’ Get relevant documents
  2. Documents â†’ format_docs â†’ Create context string
  3. Context + Question â†’ Prompt template
  4. Prompt â†’ LLM â†’ Generate answer
  5. LLM output â†’ StrOutputParser â†’ Extract text


In [None]:
#Step 5: Test RAG pipeline
test_questions = [
    "What is CloudFlow's uptime SLA?",
    "How do I authenticate with CloudFlow APIs?",
    "What are the pricing tiers?"
]

print("Testing RAG pipeline with sample questions:\n")
print("=" * 80)

for i, q in enumerate(test_questions,1):
    print(f"Question {i}: {q}")
    answer = rag_chain.invoke(q)
    print(f"Answer: {answer}")
    print("-"*80)

Testing RAG pipeline with sample questions:

Question 1: What is CloudFlow's uptime SLA?
Answer: CloudFlow's uptime SLA is 99.99% for the Enterprise tier and 99.95% for the Premium tier. The Standard tier offers a 99.9% uptime SLA.
--------------------------------------------------------------------------------
Question 2: How do I authenticate with CloudFlow APIs?
Answer: You can authenticate with CloudFlow APIs using either OAuth 2.0 or API Keys:

1. **OAuth 2.0**: Recommended for user-facing applications.  
   - Use the Authorization Code flow to obtain access tokens (valid for 1 hour) and refresh tokens (valid for 30 days).  
   - Direct users to the authorization endpoint at `https://auth.cloudflow.io/oauth/authorize` with your `client_id` and `redirect_uri` parameters.

2. **API Keys**: Ideal for server-to-server communication and background jobs.  
   - API keys have the format `cf_live_` followed by 32 alphanumeric characters.  
   - API keys do not expire unless explicitly rev

In [22]:
#Step 6: evaluation

test_cases = [
    # ========== SIMPLE FACTUAL (5) ==========
    {
        "question": "What is CloudFlow's uptime SLA?",
        "ground_truth": "CloudFlow guarantees 99.99% uptime SLA with triple redundancy across availability zones."
    },
    {
        "question": "What authentication protocol does CloudFlow use?",
        "ground_truth": "CloudFlow uses OAuth 2.0 for user-facing applications and API keys for server-to-server communication."
    },
    {
        "question": "What is the service mesh technology used by CloudFlow?",
        "ground_truth": "CloudFlow uses Istio as the service mesh technology to orchestrate communication between microservices."
    },
    {
        "question": "What compliance standards does CloudFlow support?",
        "ground_truth": "CloudFlow supports SOC 2 Type II, GDPR, HIPAA, ISO 27001, and PCI DSS Level 1 compliance standards."
    },
    {
        "question": "How long are CloudFlow audit logs retained?",
        "ground_truth": "CloudFlow audit logs are immutable and retained for 2 years."
    },
    
    # ========== MULTI-FACT (4) ==========
    {
        "question": "What are the three main layers of CloudFlow architecture?",
        "ground_truth": "The three main layers are: API Gateway layer (handles authentication and routing), Service Mesh layer (orchestrates microservices), and Data Storage layer (distributed database with replication)."
    },
    {
        "question": "What are CloudFlow's pricing tiers and their API rate limits?",
        "ground_truth": "Standard tier costs $99/month with 1,000 requests/hour, Premium tier costs $499/month with 10,000 requests/hour, and Enterprise tier has custom pricing with 100,000+ requests/hour."
    },
    {
        "question": "What HTTP status codes indicate authentication failures in CloudFlow API?",
        "ground_truth": "401 Unauthorized indicates missing or invalid API key, and 403 Forbidden indicates valid API key but insufficient permissions."
    },
    {
        "question": "What auto-scaling metrics does CloudFlow monitor?",
        "ground_truth": "CloudFlow monitors CPU utilization (target 70%), memory usage (target 80%), and triggers scaling when thresholds are exceeded for more than 3 consecutive minutes."
    },
    
    # ========== PROCEDURAL (3) ==========
    {
        "question": "How do I authenticate with CloudFlow APIs using an API key?",
        "ground_truth": "Include your API key in the Authorization header as 'Authorization: Bearer YOUR_API_KEY'. All requests must be made over HTTPS, and API keys have the format 'cf_live_' followed by 32 alphanumeric characters."
    },
    {
        "question": "How do I handle rate limit errors in CloudFlow?",
        "ground_truth": "When you receive a 429 error, implement exponential backoff in retry logic, check the X-RateLimit-Reset header to know when limits reset, and use the Retry-After header to determine wait time (1s, then 2s, then 4s, etc.)."
    },
    {
        "question": "What steps should I follow to optimize CloudFlow API performance?",
        "ground_truth": "Implement caching with Redis (TTL 5-60 minutes), batch multiple operations into single API calls, use pagination for large result sets (50-100 items), enable request compression with gzip, and maintain a connection pool with 5-10 concurrent connections."
    },
    
    # ========== COMPARISON (2) ==========
    {
        "question": "What's the difference between Standard and Premium tier rate limits?",
        "ground_truth": "Standard tier allows 1,000 requests per hour with 100 requests per minute burst, while Premium tier allows 10,000 requests per hour with 500 requests per minute burst. Premium also includes priority request processing."
    },
    {
        "question": "How does OAuth 2.0 authentication differ from API key authentication in CloudFlow?",
        "ground_truth": "OAuth 2.0 is recommended for user-facing applications with access tokens valid for 1 hour and provides the Authorization Code flow, while API keys are ideal for server-to-server communication, never expire unless revoked, and have a simpler implementation."
    },
    
    # ========== TROUBLESHOOTING (2) ==========
    {
        "question": "What should I do if I receive a 504 timeout error?",
        "ground_truth": "Increase client timeout to at least 30 seconds, use async endpoints for long-running operations and poll for results, and check the CloudFlow status page for any service degradation."
    },
    {
        "question": "How do I debug slow API response times in CloudFlow?",
        "ground_truth": "Add X-CloudFlow-Debug: true header to requests for detailed debug information, review API logs in the dashboard under Analytics > API Logs, test with curl commands, and verify network connectivity to *.cloudflow.io on port 443."
    },
    
    # ========== EDGE CASES (2) ==========
    {
        "question": "What happens if I use an expired OAuth token?",
        "ground_truth": "If you use an expired access token, you'll receive a 401 Unauthorized error. You should use your refresh token to obtain a new access token. Access tokens are valid for 1 hour and refresh tokens are valid for 30 days."
    },
    {
        "question": "Does CloudFlow support blockchain integration?",
        "ground_truth": "I don't have enough information to answer that question."  # Tests 'I don't know' handling
    },
]

print(f"âœ“ Created {len(test_cases)} test questions with ground truth answers\n")
print("Question breakdown by category:")
print("  - Simple Factual: 5 questions")
print("  - Multi-Fact: 4 questions")
print("  - Procedural: 3 questions")
print("  - Comparison: 2 questions")
print("  - Troubleshooting: 2 questions")
print("  - Edge Cases: 2 questions")

âœ“ Created 18 test questions with ground truth answers

Question breakdown by category:
  - Simple Factual: 5 questions
  - Multi-Fact: 4 questions
  - Procedural: 3 questions
  - Comparison: 2 questions
  - Troubleshooting: 2 questions
  - Edge Cases: 2 questions


In [25]:
print("Generating answers and capturing contexts for all test questions...\n")
evaluation_data = {
    "user_input":[],
    "reference":[],
    "response":[],
    "retrieved_contexts":[]
}
for i, test_case in enumerate(test_cases,1):
    question = test_case['question']
    referance = test_case['ground_truth']

    print(f"[{i}/{len(test_cases)}] Processing: {question[:60]}...")
    answer = rag_chain.invoke(question)

    retrieved_docs = retriever.invoke(question)
    context = [doc.page_content for doc in retrieved_docs]

    evaluation_data['user_input'].append(question)
    evaluation_data['reference'].append(referance)
    evaluation_data["response"].append(answer)
    evaluation_data['retrieved_contexts'].append(context)

print(f"\n   Generated {len(evaluation_data['user_input'])} answer-context pairs")
print("\nDataset structure: ")
print(f"   - user_imput: {len(evaluation_data['user_input'])} questions")
print(f"  - reference: {len(evaluation_data['reference'])} ground truth answers")
print(f"  - response: {len(evaluation_data['response'])} RAG-generated answers")
print(f"  - retrieved_contexts: {len(evaluation_data['retrieved_contexts'])} context lists")

Generating answers and capturing contexts for all test questions...

[1/18] Processing: What is CloudFlow's uptime SLA?...
[2/18] Processing: What authentication protocol does CloudFlow use?...
[3/18] Processing: What is the service mesh technology used by CloudFlow?...
[4/18] Processing: What compliance standards does CloudFlow support?...
[5/18] Processing: How long are CloudFlow audit logs retained?...
[6/18] Processing: What are the three main layers of CloudFlow architecture?...
[7/18] Processing: What are CloudFlow's pricing tiers and their API rate limits...
[8/18] Processing: What HTTP status codes indicate authentication failures in C...
[9/18] Processing: What auto-scaling metrics does CloudFlow monitor?...
[10/18] Processing: How do I authenticate with CloudFlow APIs using an API key?...
[11/18] Processing: How do I handle rate limit errors in CloudFlow?...
[12/18] Processing: What steps should I follow to optimize CloudFlow API perform...
[13/18] Processing: What's the diff

In [26]:
samples = [
    SingleTurnSample(
        user_input=evaluation_data['user_input'][i],
        response=evaluation_data['response'][i],
        retrieved_contexts=evaluation_data['retrieved_contexts'][i],
        reference=evaluation_data['reference'][i]
    ) for i in range(len(evaluation_data['user_input']))
]
eval_dataset = EvaluationDataset(samples=samples)

print("âœ“ Converted to Ragas EvaluationDataset format")
print(f"  Total samples: {len(eval_dataset)}")
print(f"\nSample preview:")
print(f"  Question: {samples[0].user_input}")
print(f"  Answer: {samples[0].response[:100]}...")
print(f"  Contexts: {len(samples[0].retrieved_contexts)} chunks retrieved")


âœ“ Converted to Ragas EvaluationDataset format
  Total samples: 18

Sample preview:
  Question: What is CloudFlow's uptime SLA?
  Answer: CloudFlow's uptime SLA is 99.99% for the Enterprise tier and 99.95% for the Premium tier. The Standa...
  Contexts: 4 chunks retrieved


In [28]:
print("\n" + "=" * 80)
print("CONFIGURING LLM & EMBEDDINGS (WITH FIX)")
print("=" * 80)

print("\nðŸ”§ Creating SEPARATE evaluator LLM with increased limits...")

evaluator_llm =AzureChatOpenAI(
    azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT"),
    api_key=os.getenv("AZURE_OPENAI_API_KEY"),
    deployment_name=os.getenv("AZURE_OPENAI_DEPLOYMENT_NAME"),
    api_version=os.getenv("AZURE_OPENAI_API_VERSION"),
    temperature=0,
    max_tokens=3000,
    request_timeout=480,
    n=3
)

print("âœ… Evaluator LLM configured:")
print(f"   Model: gpt-4o")
print(f"   Temperature: 0 (deterministic)")
print(f"   Max tokens: 2000 (sufficient for Ragas evaluations)")
print(f"   Request timeout: 120 seconds")


print("\nðŸ”§ Creating embeddings model...")
embeddings = AzureOpenAIEmbeddings(
    azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT"),
    api_key=os.getenv("AZURE_OPENAI_API_KEY"),
    api_version=os.getenv("AZURE_OPENAI_API_VERSION"),
    model=os.getenv("AZURE_OPENAI_EMBEDDING_DEPLOYMENT_NAME")
)

print("âœ… Embeddings configured:")
print(f"   Model: text-embedding-3-small")
print(f"   Dimensions: 1536")

print("\nðŸ”§ Wrapping LLM and embeddings for Ragas...")
ragas_llm = LangchainLLMWrapper(evaluator_llm)
ragas_embeddings = LangchainEmbeddingsWrapper(embeddings)
print("âœ… Wrappers created successfully")

from ragas.metrics import (
    Faithfulness,
    AnswerRelevancy,
    ContextPrecision,
    ContextRecall,
    ContextEntityRecall,
    NoiseSensitivity
)

metrics = [
    Faithfulness(llm=ragas_llm),
    AnswerRelevancy(llm=ragas_llm, embeddings=ragas_embeddings),
    ContextPrecision(llm=ragas_llm),
    ContextRecall(llm=ragas_llm),
    ContextEntityRecall(llm=ragas_llm),
    NoiseSensitivity(llm=ragas_llm)
]

print("âœ“ Configured 6 Ragas metrics (v0.3.9) - All non-multimodal metrics\n")
print("Metric Details:")
print("=" * 80)
print("\n1. FAITHFULNESS")
print("   - Measures: Answer groundedness in retrieved context")
print("   - Type: LLM-based (uses GPT-4o-mini for verification)")
print("   - Initialization: Faithfulness(llm=ragas_llm)")
print("   - Target: >0.7")
print("   - Speed: ~2-3s per question")
print("   - Why: Prevents hallucinations and ensures factual accuracy")

print("\n2. ANSWER RELEVANCY (Response Relevancy)")
print("   - Measures: Semantic relevance of answer to question")
print("   - Type: Embedding-based (uses cosine similarity)")
print("   - Initialization: AnswerRelevancy(embeddings=ragas_embeddings)")
print("   - Target: >0.7")
print("   - Speed: ~0.5s per question")
print("   - Why: Ensures on-topic, focused responses")

print("\n3. CONTEXT PRECISION")
print("   - Measures: Ranking quality of retrieved chunks")
print("   - Type: LLM-based ground truth comparison")
print("   - Initialization: ContextPrecision(llm=ragas_llm)")
print("   - Target: >0.6")
print("   - Speed: ~1s per question")
print("   - Why: Evaluates retrieval algorithm effectiveness")

print("\n4. CONTEXT RECALL")
print("   - Measures: Completeness of retrieved information")
print("   - Type: LLM-based ground truth comparison")
print("   - Initialization: ContextRecall(llm=ragas_llm)")
print("   - Target: >0.6")
print("   - Speed: ~1s per question")
print("   - Why: Ensures no critical information is missed")

print("\n5. CONTEXT ENTITY RECALL")
print("   - Measures: Key entities from ground truth present in retrieved contexts")
print("   - Type: LLM-based entity extraction and matching")
print("   - Initialization: ContextEntityRecall(llm=ragas_llm)")
print("   - Target: >0.6")
print("   - Speed: ~1-2s per question")
print("   - Why: Ensures important named entities are retrieved")

print("\n6. NOISE SENSITIVITY")
print("   - Measures: Robustness to irrelevant/noisy context")
print("   - Type: LLM-based evaluation with added noise")
print("   - Initialization: NoiseSensitivity(llm=ragas_llm)")
print("   - Target: >0.7")
print("   - Speed: ~2-3s per question")
print("   - Why: Tests if system can ignore distractors")
print("=" * 80)

print("\nðŸ“Œ Note on Ragas 0.3.9:")
print("   All 6 non-multimodal metrics are now configured.")
print("   Metrics are initialized as classes with wrapped LLM/embeddings.")
print("   LangChain components must be wrapped using Ragas wrapper classes.")


CONFIGURING LLM & EMBEDDINGS (WITH FIX)

ðŸ”§ Creating SEPARATE evaluator LLM with increased limits...
âœ… Evaluator LLM configured:
   Model: gpt-4o
   Temperature: 0 (deterministic)
   Max tokens: 2000 (sufficient for Ragas evaluations)
   Request timeout: 120 seconds

ðŸ”§ Creating embeddings model...
âœ… Embeddings configured:
   Model: text-embedding-3-small
   Dimensions: 1536

ðŸ”§ Wrapping LLM and embeddings for Ragas...
âœ… Wrappers created successfully
âœ“ Configured 6 Ragas metrics (v0.3.9) - All non-multimodal metrics

Metric Details:

1. FAITHFULNESS
   - Measures: Answer groundedness in retrieved context
   - Type: LLM-based (uses GPT-4o-mini for verification)
   - Initialization: Faithfulness(llm=ragas_llm)
   - Target: >0.7
   - Speed: ~2-3s per question
   - Why: Prevents hallucinations and ensures factual accuracy

2. ANSWER RELEVANCY (Response Relevancy)
   - Measures: Semantic relevance of answer to question
   - Type: Embedding-based (uses cosine similarity)
   - 

In [29]:
print("Starting Ragas evaluation...")
print(f"  Dataset: {len(eval_dataset)} questions")
print(f"  Metrics: {len(metrics)} (all 6 non-multimodal metrics)")
print(f"  Estimated time: 7-12 minutes\n")

result = evaluate(
    dataset=eval_dataset,
    metrics=metrics
)

print("\nâœ“ Evaluation completed!")
print("\n" + "=" * 80)
print("RAGAS EVALUATION RESULTS (6 Metrics)")
print("=" * 80)
print(result)
print("=" * 80)

Starting Ragas evaluation...
  Dataset: 18 questions
  Metrics: 6 (all 6 non-multimodal metrics)
  Estimated time: 7-12 minutes



Evaluating:   0%|          | 0/108 [00:00<?, ?it/s]

LLM returned 1 generations instead of requested 3. Proceeding with 1 generations.
LLM returned 1 generations instead of requested 3. Proceeding with 1 generations.
LLM returned 1 generations instead of requested 3. Proceeding with 1 generations.
LLM returned 1 generations instead of requested 3. Proceeding with 1 generations.
LLM returned 1 generations instead of requested 3. Proceeding with 1 generations.
Exception raised in Job[11]: TimeoutError()
Exception raised in Job[5]: TimeoutError()
LLM returned 1 generations instead of requested 3. Proceeding with 1 generations.
Exception raised in Job[17]: TimeoutError()
Exception raised in Job[23]: TimeoutError()
Exception raised in Job[29]: TimeoutError()
Exception raised in Job[41]: TimeoutError()
Exception raised in Job[35]: TimeoutError()
Exception raised in Job[47]: TimeoutError()
LLM returned 1 generations instead of requested 3. Proceeding with 1 generations.
Exception raised in Job[53]: TimeoutError()
LLM returned 1 generations inst


âœ“ Evaluation completed!

RAGAS EVALUATION RESULTS (6 Metrics)
{'faithfulness': 0.9768, 'answer_relevancy': 0.8739, 'context_precision': 0.7870, 'context_recall': 0.9815, 'context_entity_recall': 0.4850, 'noise_sensitivity(mode=relevant)': 0.5000}


In [30]:
result_df = result.to_pandas()

print("Detailed Results by Question:\n")
print(result_df.to_string())

Detailed Results by Question:

                                                                            user_input                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                   

In [31]:
# Calculate summary statistics
print("\n" + "=" * 80)
print("SUMMARY STATISTICS")
print("=" * 80)
print(result_df.describe())



SUMMARY STATISTICS
       faithfulness  answer_relevancy  context_precision  context_recall  \
count     18.000000         18.000000          18.000000       18.000000   
mean       0.976812          0.873870           0.787037        0.981481   
std        0.067554          0.318361           0.364053        0.078567   
min        0.782609          0.000000           0.000000        0.666667   
25%        1.000000          0.963209           0.604167        1.000000   
50%        1.000000          0.986402           1.000000        1.000000   
75%        1.000000          0.995950           1.000000        1.000000   
max        1.000000          1.000000           1.000000        1.000000   

       context_entity_recall  noise_sensitivity(mode=relevant)  
count              18.000000                          2.000000  
mean                0.484965                          0.500000  
std                 0.235395                          0.707107  
min                 0.000000       