In [2]:
import os
import warnings
from typing import List

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from pydantic import BaseModel, Field
from enum import Enum

from langchain_core.documents import Document
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_openai import AzureChatOpenAI, AzureOpenAIEmbeddings
from langchain_community.vectorstores import FAISS
from langchain_text_splitters import RecursiveCharacterTextSplitter

from dotenv import load_dotenv

print("✓ All libraries imported successfully")

✓ All libraries imported successfully


In [3]:
load_dotenv()

embeddings = AzureOpenAIEmbeddings(
    azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT"),
    api_key=os.getenv("AZURE_OPENAI_API_KEY"),
    api_version=os.getenv("AZURE_OPENAI_API_VERSION"),
    model=os.getenv("AZURE_OPENAI_EMBEDDING_DEPLOYMENT_NAME")
)

llm = AzureChatOpenAI(
    azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT"),
    api_key=os.getenv("AZURE_OPENAI_API_KEY"),
    deployment_name=os.getenv("AZURE_OPENAI_DEPLOYMENT_NAME"),
    api_version=os.getenv("AZURE_OPENAI_API_VERSION"),
    max_tokens=500
)

judge_llm = AzureChatOpenAI(
    azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT"),
    api_key=os.getenv("AZURE_OPENAI_API_KEY"),
    deployment_name=os.getenv("AZURE_OPENAI_DEPLOYMENT_NAME"),
    api_version=os.getenv("AZURE_OPENAI_API_VERSION"),
    max_tokens=1000
)

test_response = llm.invoke("Say, 'LLM is Ready!'")
print(f"✓ LLM Response: {test_response.content}")
print("\n✓ All models initialized successfully")


✓ LLM Response: LLM is Ready!

✓ All models initialized successfully


In [5]:
cloudflow_docs = [
    # ============================================================================
    # ARCHITECTURE DOCUMENTS (3)
    # ============================================================================
    Document(
        page_content="""CloudFlow Architecture Overview

CloudFlow is a distributed cloud platform built on microservices architecture. The platform consists of three main layers that work together to provide a robust, scalable infrastructure.

The API Gateway layer handles all incoming requests using OAuth 2.0 authentication and routes them through our service mesh powered by Istio. This layer provides load balancing, SSL termination, and request routing capabilities.

The Service Mesh layer orchestrates communication between microservices, providing service discovery, health checking, and automatic failover. It uses Kubernetes for container orchestration across multiple availability zones.

The Data Storage layer implements a distributed database system with automatic replication across three availability zones. This ensures data durability and supports horizontal scaling based on demand.

CloudFlow guarantees 99.99% uptime SLA with triple redundancy across availability zones. The platform supports horizontal scaling with automatic load balancing, allowing each service to scale independently based on CPU and memory metrics.""",
        metadata={"source": "architecture_overview", "topic": "architecture", "difficulty": "intermediate"}
    ),
    
    Document(
        page_content="""CloudFlow Scaling Mechanisms

CloudFlow implements sophisticated auto-scaling mechanisms to handle varying workloads efficiently. The platform monitors real-time metrics to make intelligent scaling decisions.

Horizontal Pod Autoscaling (HPA) adjusts the number of pod replicas based on CPU utilization (target: 70%) and memory usage (target: 80%). When these thresholds are exceeded for more than 3 consecutive minutes, the system automatically provisions additional pods.

Vertical scaling adjusts resource allocation for individual services. CloudFlow can increase or decrease CPU and memory limits without downtime, using Kubernetes resource management capabilities.

The platform supports bursting to handle sudden traffic spikes. During burst periods, CloudFlow can temporarily scale up to 500% of baseline capacity for up to 15 minutes before triggering permanent scaling.

Load balancing distributes traffic across all available pods using a weighted round-robin algorithm. Health checks run every 10 seconds, and unhealthy pods are automatically removed from the rotation within 30 seconds.""",
        metadata={"source": "scaling_guide", "topic": "architecture", "difficulty": "advanced"}
    ),
    
    Document(
        page_content="""CloudFlow System Components

CloudFlow's architecture comprises several key components that work in harmony to deliver reliable cloud services.

The Control Plane manages the overall system state, including service registration, configuration management, and orchestration. It runs on a dedicated cluster with five replicas for high availability.

The Data Plane handles actual request processing and data flow. It consists of worker nodes that execute application workloads and process user requests. Each data plane node has 16 CPU cores and 64GB RAM.

The Observability Stack includes Prometheus for metrics collection, Grafana for visualization, and ELK (Elasticsearch, Logstash, Kibana) for log aggregation. Metrics are collected every 15 seconds and retained for 90 days.

The Service Registry maintains a real-time directory of all available services and their endpoints. It uses etcd for distributed consensus and supports automatic service discovery with DNS-based lookups.

The Message Queue system, based on Apache Kafka, handles asynchronous communication between services with guaranteed message delivery and ordering.""",
        metadata={"source": "system_components", "topic": "architecture", "difficulty": "intermediate"}
    ),
    
    # ============================================================================
    # API DOCUMENTATION (4)
    # ============================================================================
    Document(
        page_content="""CloudFlow API Authentication

CloudFlow APIs support two authentication methods: OAuth 2.0 and API Keys. Both methods provide secure access to platform resources.

OAuth 2.0 is recommended for user-facing applications. It supports the Authorization Code flow and provides access tokens valid for 1 hour and refresh tokens valid for 30 days. To implement OAuth 2.0, direct users to the authorization endpoint at https://auth.cloudflow.io/oauth/authorize with your client_id and redirect_uri parameters.

API Keys are ideal for server-to-server communication and background jobs. Each API key has the format "cf_live_" followed by 32 alphanumeric characters. API keys never expire unless explicitly revoked.

To authenticate requests, include your API key in the Authorization header: "Authorization: Bearer YOUR_API_KEY". All API requests must be made over HTTPS; HTTP requests will be rejected with a 403 error.

API keys can be scoped to specific permissions (read, write, admin) and restricted to specific IP addresses for enhanced security. You can manage your API keys through the CloudFlow dashboard or the /api/v1/keys endpoint.""",
        metadata={"source": "api_authentication", "topic": "api", "difficulty": "beginner"}
    ),
    
    Document(
        page_content="""CloudFlow REST API Endpoints

CloudFlow provides a comprehensive REST API with endpoints organized by resource type. All endpoints follow RESTful conventions and return JSON responses.

Base URL: https://api.cloudflow.io/v1

Resources endpoint: GET /api/v1/resources - List all resources with pagination (max 100 per page). Supports filtering by type, status, and creation date.

Resource creation: POST /api/v1/resources - Create a new resource. Required fields: name (string), type (string), config (object). Returns 201 Created on success.

Resource details: GET /api/v1/resources/{id} - Retrieve detailed information about a specific resource by ID.

Resource update: PUT /api/v1/resources/{id} - Update an existing resource. Supports partial updates with PATCH /api/v1/resources/{id}.

Resource deletion: DELETE /api/v1/resources/{id} - Delete a resource. Returns 204 No Content on success. Deleted resources are soft-deleted and can be recovered within 30 days.

All list endpoints support query parameters: limit (default: 25, max: 100), offset (default: 0), sort (default: created_at), order (asc|desc).""",
        metadata={"source": "api_endpoints", "topic": "api", "difficulty": "intermediate"}
    ),
    
    Document(
        page_content="""CloudFlow API Rate Limiting

CloudFlow implements rate limiting to ensure fair usage and platform stability. Rate limits vary by pricing tier and authentication method.

Standard Tier: 1,000 requests per hour per API key. Burst capacity allows up to 100 requests per minute. Exceeding limits returns HTTP 429 (Too Many Requests).

Premium Tier: 10,000 requests per hour per API key with burst capacity of 500 requests per minute. Premium tier also includes priority request processing.

Enterprise Tier: Custom rate limits negotiated based on usage patterns. Typically starts at 100,000 requests per hour with dedicated infrastructure.

Rate limit headers are included in every response:
- X-RateLimit-Limit: Maximum requests per hour
- X-RateLimit-Remaining: Remaining requests in current window
- X-RateLimit-Reset: Unix timestamp when the limit resets

When rate limited, the Retry-After header indicates how many seconds to wait before retrying. Implement exponential backoff: wait 1s, then 2s, then 4s, etc.

OAuth 2.0 authenticated requests have separate, higher limits: 5,000 requests per hour for Standard tier.""",
        metadata={"source": "api_rate_limits", "topic": "api", "difficulty": "intermediate"}
    ),
    
    Document(
        page_content="""CloudFlow API Error Codes

CloudFlow APIs use standard HTTP status codes and provide detailed error messages in JSON format to help diagnose issues.

Authentication Errors:
- 401 Unauthorized: Missing or invalid API key. Check the Authorization header.
- 403 Forbidden: Valid API key but insufficient permissions for the requested operation.

Client Errors:
- 400 Bad Request: Invalid request format or missing required fields. The response includes a "details" field explaining what's wrong.
- 404 Not Found: Requested resource doesn't exist. Verify the resource ID.
- 409 Conflict: Request conflicts with current resource state (e.g., duplicate name).
- 422 Unprocessable Entity: Request format is valid but contains semantic errors.
- 429 Too Many Requests: Rate limit exceeded. Check X-RateLimit-Reset header.

Server Errors:
- 500 Internal Server Error: Unexpected server error. CloudFlow team is automatically notified.
- 502 Bad Gateway: Temporary issue with upstream services. Retry after a few seconds.
- 503 Service Unavailable: Scheduled maintenance or system overload. Check status.cloudflow.io.

Error Response Format: {"error": {"code": "error_code", "message": "Human-readable message", "details": {...}}}""",
        metadata={"source": "api_error_codes", "topic": "api", "difficulty": "beginner"}
    ),
    
    # ============================================================================
    # SECURITY DOCUMENTATION (2)
    # ============================================================================
    Document(
        page_content="""CloudFlow Security Features

Security is a top priority at CloudFlow. We implement industry-leading security practices to protect your data and applications.

Encryption: All data is encrypted at rest using AES-256 encryption. Data in transit uses TLS 1.3 with perfect forward secrecy. Encryption keys are rotated every 90 days using AWS KMS.

Network Security: CloudFlow runs in a Virtual Private Cloud (VPC) with strict network segmentation. Public endpoints are protected by Web Application Firewall (WAF) rules that block common attack patterns. DDoS protection is provided by Cloudflare with mitigation capacity up to 50 Gbps.

Access Control: All resources support Role-Based Access Control (RBAC) with customizable roles and permissions. We support integration with external identity providers via SAML 2.0 and OpenID Connect.

Audit Logging: Every API call is logged with timestamp, user identity, IP address, and action taken. Audit logs are immutable and retained for 2 years. You can access logs via the /api/v1/audit-logs endpoint.

Vulnerability Management: CloudFlow undergoes quarterly penetration testing by independent security firms. We maintain a bug bounty program and respond to security reports within 24 hours.""",
        metadata={"source": "security_features", "topic": "security", "difficulty": "intermediate"}
    ),
    
    Document(
        page_content="""CloudFlow Compliance Standards

CloudFlow maintains compliance with major industry standards and regulations to ensure your data is handled responsibly.

SOC 2 Type II: CloudFlow is SOC 2 Type II certified, demonstrating our commitment to security, availability, and confidentiality. Audit reports are available to enterprise customers under NDA.

GDPR Compliance: CloudFlow is fully compliant with the European Union's General Data Protection Regulation. We support data residency requirements, right to erasure, data portability, and provide Data Processing Agreements (DPA) to all customers.

HIPAA: For healthcare customers, CloudFlow offers HIPAA-compliant infrastructure with Business Associate Agreements (BAA). HIPAA features include enhanced audit logging, encrypted backups, and strict access controls.

ISO 27001: CloudFlow's information security management system is certified to ISO 27001:2013 standards. We maintain comprehensive security policies and undergo annual recertification audits.

PCI DSS: For customers processing payment card data, CloudFlow provides PCI DSS Level 1 certified infrastructure. However, we recommend using dedicated payment processors rather than storing card data.

Data Residency: CloudFlow supports data residency in US, EU, UK, and APAC regions to meet local regulatory requirements.""",
        metadata={"source": "compliance_standards", "topic": "security", "difficulty": "advanced"}
    ),
    
    # ============================================================================
    # PRICING DOCUMENTATION (2)
    # ============================================================================
    Document(
        page_content="""CloudFlow Pricing Tiers

CloudFlow offers three pricing tiers designed to meet the needs of individuals, teams, and enterprises.

Standard Tier ($99/month):
- 1,000 API requests per hour
- 100 GB storage included
- 10 GB bandwidth per month
- Community support via forums
- 99.9% uptime SLA
- Up to 5 team members

Premium Tier ($499/month):
- 10,000 API requests per hour
- 1 TB storage included
- 100 GB bandwidth per month
- Email support with 24-hour response time
- 99.95% uptime SLA
- Up to 25 team members
- Advanced monitoring and alerting
- Custom domain support

Enterprise Tier (Custom pricing):
- Custom API rate limits (100,000+ requests/hour)
- Unlimited storage and bandwidth
- 24/7 phone and email support with 1-hour response time
- 99.99% uptime SLA with service credits
- Unlimited team members
- Dedicated account manager
- Custom integrations and professional services
- Private cloud deployment options

All tiers include: SSL certificates, daily backups, API access, and dashboard analytics. Annual billing provides 15% discount.""",
        metadata={"source": "pricing_tiers", "topic": "pricing", "difficulty": "beginner"}
    ),
    
    Document(
        page_content="""CloudFlow Billing Information

Understanding CloudFlow's billing model helps you manage costs effectively and avoid unexpected charges.

Billing Cycle: Subscriptions are billed monthly on the date you signed up. Annual subscriptions are billed upfront with a 15% discount. Billing date can be changed once per year.

Usage-Based Charges: Beyond included quotas, additional usage is billed at:
- API requests: $0.01 per 1,000 requests
- Storage: $0.10 per GB per month
- Bandwidth: $0.08 per GB
- Backup retention (beyond 30 days): $0.05 per GB per month

Payment Methods: CloudFlow accepts credit cards (Visa, Mastercard, Amex), ACH transfers (US only), and wire transfers for invoices over $1,000. Cryptocurrency payments available for annual plans.

Invoicing: Invoices are emailed on the billing date and available in the dashboard. Enterprise customers receive consolidated monthly invoices with 30-day payment terms.

Upgrades and Downgrades: Upgrade anytime to immediately access higher tier features. Downgrades take effect at the next billing cycle. Prorated credits are applied to your account balance.

Free Trial: New customers get 14-day free trial on Premium tier with no credit card required. Trial includes 1,000 API requests and 10 GB storage.""",
        metadata={"source": "billing_info", "topic": "pricing", "difficulty": "beginner"}
    ),
    
    # ============================================================================
    # BEST PRACTICES DOCUMENTATION (3)
    # ============================================================================
    Document(
        page_content="""CloudFlow Performance Optimization

Following these best practices will help you achieve optimal performance from your CloudFlow applications.

Caching Strategy: Implement caching at multiple levels. Use CloudFlow's built-in Redis cache for frequently accessed data with TTL between 5-60 minutes. Cache API responses on the client side and respect Cache-Control headers.

Request Optimization: Batch multiple operations into single API calls when possible. Use pagination for large result sets (recommended page size: 50-100 items). Implement request compression using gzip to reduce bandwidth.

Connection Management: Reuse HTTP connections with keep-alive headers. Maintain a connection pool with 5-10 concurrent connections per API key. Set appropriate timeouts: connection timeout 10s, read timeout 30s.

Query Efficiency: Use field filtering to request only required data: /resources?fields=id,name,status. Leverage server-side filtering instead of retrieving all data and filtering locally.

Asynchronous Processing: For long-running operations, use CloudFlow's async API endpoints. Poll for results using the returned job_id rather than blocking on the initial request.

CDN Usage: Serve static assets through CloudFlow's global CDN with 150+ edge locations. Configure appropriate cache headers for optimal performance: max-age=3600 for semi-static content.""",
        metadata={"source": "performance_optimization", "topic": "best_practices", "difficulty": "intermediate"}
    ),
    
    Document(
        page_content="""CloudFlow Monitoring and Observability

Effective monitoring ensures your CloudFlow applications remain healthy and performant.

Metrics Collection: CloudFlow automatically collects key metrics including request rate, error rate, latency (p50, p95, p99), and resource utilization. Access metrics via the dashboard or Metrics API at /api/v1/metrics.

Custom Metrics: Send custom application metrics using the StatsD protocol. CloudFlow aggregates custom metrics every 60 seconds and retains them for 90 days.

Alerting: Configure alerts for critical conditions like error rate >5%, latency >500ms, or approaching rate limits. CloudFlow supports alerting via email, SMS, Slack, PagerDuty, and webhooks.

Distributed Tracing: Enable distributed tracing to track requests across services. CloudFlow supports OpenTelemetry and provides trace visualization in the dashboard. Sample rate: 10% of requests (configurable up to 100%).

Log Management: CloudFlow retains logs for 7 days by default (30 days for Premium, 90 days for Enterprise). Use structured logging with JSON format for better searchability. Maximum log line length: 32KB.

Dashboard Widgets: Create custom dashboards with real-time metrics, SLA compliance, and cost tracking. Share dashboards with team members or embed in external tools using iframe integration.""",
        metadata={"source": "monitoring_observability", "topic": "best_practices", "difficulty": "intermediate"}
    ),
    
    Document(
        page_content="""CloudFlow Disaster Recovery

CloudFlow implements comprehensive disaster recovery capabilities to protect your data and ensure business continuity.

Backup Strategy: CloudFlow performs automatic daily backups of all data at 2 AM UTC. Backups are encrypted and stored in geographically diverse locations. Retention: 30 days for Standard tier, 90 days for Premium, 1 year for Enterprise.

Point-in-Time Recovery: Enterprise customers can restore data to any point within the retention period with 5-minute granularity. Recovery operations typically complete within 15-30 minutes.

Multi-Region Replication: Enable multi-region replication for critical data. Data is asynchronously replicated to a secondary region within 60 seconds. Failover to secondary region is automatic and takes approximately 5 minutes.

Backup Verification: CloudFlow performs monthly backup restoration tests to ensure data recoverability. Test results are available in your compliance dashboard.

Export Capabilities: Export your data anytime in JSON, CSV, or Parquet format. Full exports are available via the /api/v1/export endpoint. Large exports (>10 GB) are delivered to your S3 bucket.

RTO and RPO: CloudFlow guarantees Recovery Time Objective (RTO) of 4 hours and Recovery Point Objective (RPO) of 1 hour for Enterprise tier. Contact support to initiate disaster recovery procedures.""",
        metadata={"source": "disaster_recovery", "topic": "best_practices", "difficulty": "advanced"}
    ),
    
    # ============================================================================
    # TROUBLESHOOTING DOCUMENTATION (3)
    # ============================================================================
    Document(
        page_content="""Common CloudFlow Errors and Solutions

This guide covers the most common errors encountered when using CloudFlow and their solutions.

Error: "Invalid API Key" (401)
Solution: Verify your API key format starts with "cf_live_" and is exactly 40 characters. Check for extra spaces or newlines. Generate a new API key if the issue persists. API keys are case-sensitive.

Error: "Rate Limit Exceeded" (429)
Solution: Implement exponential backoff in your retry logic. Check X-RateLimit-Reset header to know when limits reset. Consider upgrading to a higher tier if you consistently hit limits. Use batch endpoints to reduce request count.

Error: "Resource Not Found" (404)
Solution: Verify the resource ID is correct and the resource hasn't been deleted. Use the /api/v1/resources endpoint to list available resources. Check if you're using the correct API version (/v1).

Error: "Timeout" (504)
Solution: Increase client timeout to at least 30 seconds. For long-running operations, use async endpoints and poll for results. Check CloudFlow status page for any service degradation.

Error: "Validation Error" (422)
Solution: Review the error details field for specific validation failures. Common issues: missing required fields, invalid data types, values outside allowed ranges. Consult API documentation for correct request format.""",
        metadata={"source": "common_errors", "topic": "troubleshooting", "difficulty": "beginner"}
    ),
    
    Document(
        page_content="""CloudFlow Debugging Guide

When troubleshooting issues with CloudFlow, follow this systematic debugging approach.

Step 1 - Check Service Status: Visit status.cloudflow.io to verify all systems are operational. Subscribe to status updates to receive notifications about incidents and maintenance.

Step 2 - Review API Logs: Access detailed API logs in the CloudFlow dashboard under Analytics > API Logs. Filter by time range, status code, and endpoint. Look for patterns in failed requests.

Step 3 - Enable Debug Mode: Add X-CloudFlow-Debug: true header to requests to receive detailed debug information in responses. Debug mode provides request ID, processing time breakdown, and backend service information.

Step 4 - Test with curl: Isolate issues by testing with curl commands. Example: curl -H "Authorization: Bearer YOUR_API_KEY" -H "X-CloudFlow-Debug: true" https://api.cloudflow.io/v1/resources

Step 5 - Check Network Connectivity: Ensure your network allows outbound HTTPS traffic to *.cloudflow.io on port 443. Verify DNS resolution is working correctly.

Step 6 - Verify SDK Version: If using CloudFlow SDK, ensure you're running the latest version. Outdated SDKs may not support new API features or may have known bugs.

Step 7 - Contact Support: If issues persist, contact CloudFlow support with the request ID from failed requests. Support responds within 24 hours for Standard tier, 4 hours for Premium, 1 hour for Enterprise.""",
        metadata={"source": "debugging_guide", "topic": "troubleshooting", "difficulty": "intermediate"}
    ),
    
    Document(
        page_content="""CloudFlow Support Escalation Process

Understanding CloudFlow's support escalation process ensures your issues are resolved efficiently.

Support Channels:
- Community Forums (All tiers): community.cloudflow.io - Best for general questions, feature requests, and sharing knowledge
- Email Support (Premium & Enterprise): support@cloudflow.io - Include account ID and request ID in subject line
- Phone Support (Enterprise only): +1-888-CLOUDFLOW - Available 24/7 for critical issues
- Slack Channel (Enterprise only): Direct access to engineering team

Issue Severity Levels:
- P0 (Critical): Complete service outage affecting production. Response time: 1 hour for Enterprise, 4 hours for Premium
- P1 (High): Major functionality impaired but workarounds available. Response time: 4 hours for Enterprise, 8 hours for Premium
- P2 (Medium): Minor functionality issues with workarounds. Response time: 24 hours
- P3 (Low): Questions, feature requests, documentation issues. Response time: 48 hours

Escalation Path: If your issue isn't resolved within SLA, it automatically escalates to the next support tier. Enterprise customers can request immediate escalation to engineering team.

Required Information: Include account ID, request ID, error messages, timestamps, steps to reproduce, and expected vs actual behavior. Screenshots and API logs are helpful.""",
        metadata={"source": "support_escalation", "topic": "troubleshooting", "difficulty": "beginner"}
    ),
]

print(f"Created {len(cloudflow_docs)} CloudFlow documentation documents.")
print("\nDocument breakdown by category:")
for topic in ["architecture", "api", "security", "pricing", "best_practices", "troubleshooting"]:
    count = len([doc for doc in cloudflow_docs if doc.metadata['topic']==topic])
    print(f"   - {topic.title()}: {count} documents")

Created 17 CloudFlow documentation documents.

Document breakdown by category:
   - Architecture: 3 documents
   - Api: 4 documents
   - Security: 2 documents
   - Pricing: 2 documents
   - Best_Practices: 3 documents
   - Troubleshooting: 3 documents


In [6]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1024,
    chunk_overlap=128,
    separators=["\n\n","\n","."," ", ""]
)
chunks = text_splitter.split_documents(cloudflow_docs)

print(f"Split {len(cloudflow_docs)} documents into {len(chunks)} chunks")
print(f"\nExample chunk (first 200 chars): \n{chunks[0].page_content[:200]}...")

Split 17 documents into 34 chunks

Example chunk (first 200 chars): 
CloudFlow Architecture Overview

CloudFlow is a distributed cloud platform built on microservices architecture. The platform consists of three main layers that work together to provide a robust, scala...


In [7]:
print("Creating FAISS vector store...")
vector_store = FAISS.from_documents(chunks, embeddings)

vectorstore_path = "./llm_judge_faiss"
vector_store.save_local(vectorstore_path)


print(f"✓ FAISS vector store created with {len(chunks)} document chunks")
print(f"✓ Vector store saved to '{vectorstore_path}'")
print(f"\nTo reload later, use:")
print(f"  vectorstore = FAISS.load_local('{vectorstore_path}', embeddings, allow_dangerous_deserialization=True)")

Creating FAISS vector store...
✓ FAISS vector store created with 34 document chunks
✓ Vector store saved to './llm_judge_faiss'

To reload later, use:
  vectorstore = FAISS.load_local('./llm_judge_faiss', embeddings, allow_dangerous_deserialization=True)


In [9]:
retriever = vector_store.as_retriever(
    search_type="similarity",
    search_kwargs={"k":4}
)
test_query = "What is CloudFlow's uptime SLA?"
retrieved_docs = retriever.invoke(test_query)

print(f"Retrieved {len(retrieved_docs)} documents.")
print(f"\nFirst relevant documents (200 chars): \n{retrieved_docs[0].page_content[:200]}...")

Retrieved 4 documents.

First relevant documents (200 chars): 
CloudFlow guarantees 99.99% uptime SLA with triple redundancy across availability zones. The platform supports horizontal scaling with automatic load balancing, allowing each service to scale independ...


In [16]:
template = """You are a helpful assistant for CloudFlow Platform documentation.
Answer the question based on the following context. If you cannot answer based on
the context, say "I don't have enough information to answer that question."

Be concise and accurate. Include specific details like numbers, limits, and technical
specifications when available in the context. Answer must be in 200 words

Context:
{context}

Question: {question}

Answer:
"""
prompt = ChatPromptTemplate.from_template(template)

def format_doc(docs):
    return "\n\n".join(doc.page_content for doc in docs)

rag_chain = (
    {"context":retriever | format_doc, "question":RunnablePassthrough()}
    |prompt
    |llm
    |StrOutputParser()
)


print("✓ RAG chain created using LCEL")
print("\nChain flow: Question → Retriever → format_docs → Prompt → LLM → Answer")

✓ RAG chain created using LCEL

Chain flow: Question → Retriever → format_docs → Prompt → LLM → Answer


In [17]:
test_questions = [
    "What is CloudFlow's uptime SLA?",
    "What authentication methods does CloudFlow support?",
    "How do I handle rate limit errors?"
]

print("Testing RAG pipeline with sample questions:\n")

for i, q in enumerate(test_questions,1):
    ans = rag_chain.invoke(q)
    print(f"{i}. Q: {q}")
    print(f"   A: {ans}")

print("✓ RAG pipeline is working correctly!")


Testing RAG pipeline with sample questions:

1. Q: What is CloudFlow's uptime SLA?
   A: CloudFlow's uptime SLA varies by pricing tier:

- **Standard Tier**: Guarantees a 99.9% uptime SLA.  
- **Premium Tier**: Guarantees a 99.95% uptime SLA.  
- **Enterprise Tier**: Guarantees a 99.99% uptime SLA with service credits available in case of SLA violations.  

For the Enterprise Tier, the platform also offers triple redundancy across availability zones to support the 99.99% uptime guarantee.
2. Q: What authentication methods does CloudFlow support?
   A: CloudFlow supports two authentication methods: **OAuth 2.0** and **API Keys**. 

1. **OAuth 2.0**:
   - Recommended for user-facing applications.
   - Supports the Authorization Code flow.
   - Provides access tokens valid for 1 hour and refresh tokens valid for 30 days.
   - Requires directing users to the authorization endpoint at `https://auth.cloudflow.io/oauth/authorize` with `client_id` and `redirect_uri` parameters.

2. **API Keys*