In [2]:
%pip install pymilvus



Note: you may need to restart the kernel to use updated packages.


In [32]:

import os
from dotenv import load_dotenv
load_dotenv(override=True, dotenv_path="../.env.local")

from docx import Document
from sentence_transformers import SentenceTransformer

my_api_key = os.getenv("OPENAI_API_KEY")



In [5]:
# Connect to  Milvus.
from pymilvus import MilvusClient
from pymilvus import connections

# 1. Configuration
load_dotenv(override=True, dotenv_path="../.env.local")

milvus_uri = os.getenv("MILVUS_URI")
milvus_token = os.getenv("MILVUS_API_KEY")

milvus_conn = connections.connect(
    alias="default",
    uri=milvus_uri,
    token=milvus_token
)

print("Connected to Milvus on Zilliz Cloud")

from pymilvus import Collection

collection = Collection("policy_docs_collection")
collection.load()

collection.schema


Connected to Milvus on Zilliz Cloud


{'auto_id': False, 'description': 'Policy documents with embeddings', 'fields': [{'name': 'doc_id', 'description': '', 'type': <DataType.INT64: 5>, 'is_primary': True, 'auto_id': False}, {'name': 'title', 'description': '', 'type': <DataType.VARCHAR: 21>, 'params': {'max_length': 200}}, {'name': 'domain', 'description': '', 'type': <DataType.VARCHAR: 21>, 'params': {'max_length': 100}}, {'name': 'content', 'description': '', 'type': <DataType.VARCHAR: 21>, 'params': {'max_length': 2500}}, {'name': 'embedding', 'description': '', 'type': <DataType.FLOAT_VECTOR: 101>, 'params': {'dim': 384}}], 'enable_dynamic_field': False, 'enable_namespace': False}

In [6]:
import os
import time
from docx import Document
from sentence_transformers import SentenceTransformer
from pymilvus import Collection

# 1. Setup
model = SentenceTransformer("all-MiniLM-L6-v2")
data_dir = './documents'
docx_files = [f for f in os.listdir(data_dir) if f.endswith('.docx')][:3]

# 2. Prepare column-wise lists
data_to_insert = []

for i, filename in enumerate(docx_files):
    file_path = os.path.join(data_dir, filename)
    try:
        doc = Document(file_path)
        full_text = " ".join([p.text for p in doc.paragraphs if p.text.strip()])
        
        if full_text:
            vector = model.encode(full_text).tolist()
            
            # Create a dictionary matching your EXACT schema field names
            entity = {
                "doc_id": int(time.time() + i),  # PK
                "embedding": vector,             # FLOAT_VECTOR
                "title": filename[:200],         # VARCHAR
                "domain": "Policy",              # VARCHAR
                "content": full_text[:2000]      # VARCHAR
            }
            data_to_insert.append(entity)
            print(f"Prepared: {filename}")
    except Exception as e:
        print(f"Error: {e}")


Prepared: Customer_Journey_Map.docx
Prepared: Privacy_Policy.docx
Prepared: Terms_ And_Conditions.docx


In [7]:

# 3. Insert into Collection
if data_to_insert:
    try:
        # In this version, we pass the list of dictionaries directly
        res = collection.insert(data_to_insert)
        print(f"Success! Inserted {res.insert_count} records.")
        
        # Make it searchable
        collection.flush()
    except Exception as e:
        print(f"Insertion Error: {e}")

Success! Inserted 3 records.


In [28]:
# Write a query function that takes a user question, retrieves the top 3 most similar chunks, and prints them.

def query_policy_docs(question):
    # 1. Convert question to embedding
    question_embedding = model.encode(question).tolist()

    # 2. Define search parameters
    search_params = {
        "metric_type": "COSINE", # Use COSINE or L2 based on your index
        "params": {"nprobe": 10},
    }

    try:
        # 3. Perform the search
        # We explicitly request the 'title' and 'content' fields to be returned
        results = collection.search(
            data=[question_embedding], 
            anns_field="embedding", 
            param=search_params,
            limit=3,
            output_fields=["title", "content"]
        )
        
        # 4. Print the results
        print(f"\n--- Search Results for: '{question}' ---")
        all_contents = []
        for i, hits in enumerate(results):
            for hit in hits:
                content = hit.entity.get('content')
                all_contents.append(content) # Store for RAG context
                print(f"\nRank: {i+1} | Score: {hit.score:.4f}")
                print(f"Source File: {hit.entity.get('title')}")
                print(f"Snippet: {hit.entity.get('content')[:300]}...") # Print first 300 chars
                print("-" * 30)
                           
        # Join all retrieved chunks into one string
        return "\n\n".join(all_contents)
    except Exception as e:
        print(f"Search Error: {e}")

# --- Example Usage ---
# Ensure the collection is loaded before searching
collection.load()

user_query = "Are SpeakEQ terms and conditions reasonable? Are they standard across mobile apps?"
rag_result = query_policy_docs(user_query)
print(f"rag_result: {rag_result}")



--- Search Results for: 'Are SpeakEQ terms and conditions reasonable? Are they standard across mobile apps?' ---

Rank: 1 | Score: 0.6848
Source File: Terms_ And_Conditions.docx
Snippet: Terms & Conditions Welcome to SpeakEQ! These Terms & Conditions ("Terms") explain the rules for using the SpeakEQ mobile appprovided by Yellow Sapphire Consulting, located in Washington, USA. ownloading and using  to these Terms.   Account
 Voice Recordings and Data  voice recordings, audio files, a...
------------------------------

Rank: 1 | Score: 0.6848
Source File: Terms_ And_Conditions.docx
Snippet: Terms & Conditions Welcome to SpeakEQ! These Terms & Conditions ("Terms") explain the rules for using the SpeakEQ mobile appprovided by Yellow Sapphire Consulting, located in Washington, USA. ownloading and using  to these Terms.   Account
 Voice Recordings and Data  voice recordings, audio files, a...
------------------------------
rag_result: Terms & Conditions Welcome to SpeakEQ! These Terms & Con

In [30]:
# Pass the retrieved context to an OpenAI model (or a local LLM) and generate an answer.


client = OpenAI(api_key=my_api_key)


def generate_rag_answer(question):
    # 2. Get the context from your existing Milvus query function
    # (Assuming query_policy_docs returns the text chunks)
    question_embedding = model.encode(question).tolist()
    
    results = collection.search(
        data=[question_embedding], 
        anns_field="embedding", 
        param={"metric_type": "COSINE", "params": {"nprobe": 10}},
        limit=3,
        output_fields=["content", "title"]
    )

    # 3. Combine retrieved chunks into a single context string
    retrieved_context = ""
    sources = []
    for hit in results[0]:
        retrieved_context += f"\nSource ({hit.entity.get('title')}): {hit.entity.get('content')}\n"
        sources.append(hit.entity.get('title'))

    # 4. Create the System Prompt
    system_prompt = """
    You are a helpful assistant. Use the provided context from our internal  
    documents to answer the user's question. If the answer is not in the context, 
    say you don't know based on internal docs. Do not use outside knowledge.
    """

    user_prompt = f"Context: {retrieved_context}\n\nQuestion: {question}"

    # 5. Generate Answer via OpenAI
    response = client.chat.completions.create( 
    model="gpt-4o", 
    messages=[
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": user_prompt}
    ]
)

    return response.choices[0].message.content, sources

# --- Execution ---
query = "Are SpeakEQ terms and conditions reasonable? Are they standard across mobile apps?"
answer, source_files = generate_rag_answer(query)

print(f"\nAI ANSWER:\n{answer}")
print(f"\nSOURCES USED: {list(set(source_files))}")



AI ANSWER:
I don't know based on internal docs. The provided context does not specifically evaluate the reasonableness or standard nature of SpeakEQ's terms and conditions compared to other mobile apps. It only details the terms themselves.

SOURCES USED: ['Terms_ And_Conditions.docx']


In [31]:
def evaluate_results(query, milvus_context, rag_answer, ai_search_answer):
    judge_prompt = f"""
    You are an objective auditor. Your job is to compare two AI-generated answers based on the provided Context.
    
    USER QUERY: {query}
    
    INTERNAL DOCUMENT CONTEXT:
    {milvus_context}
    
    ---
    ANSWER A (RAG System): {rag_answer}
    ---
    ANSWER B (Standard AI Search): {ai_search_answer}
    ---
    
    EVALUATION CRITERIA:
    1. Grounding: Does the answer stay loyal to the Internal Context?
    2. Hallucination: Does it invent facts not found in the context?
    3. Accuracy: Which is more correct for this specific company?

    Provide a final verdict on which is better and why.
    """

    # Call the judge (using your client object)
    response = client.chat.completions.create(
        model="gpt-4o",
        messages=[{"role": "system", "content": judge_prompt}],
        temperature=0
    )
    
    return response.choices[0].message.content

# --- How to run the comparison ---
# 1. Get RAG result (uses Milvus)
rag_answer, context_used = generate_rag_answer(user_query)

# 2. Get Standard AI result (no context provided)
standard_response = client.chat.completions.create(
    model="gpt-4o",
    messages=[{"role": "user", "content": user_query}]
)
ai_search_answer = standard_response.choices[0].message.content

# 3. Run the Judge
verdict = evaluate_results("user_query", context_used, rag_answer, ai_search_answer)
print(f"user_query: {user_query}")
print(f"context_used: {context_used}")
print(f"rag_answer: {rag_answer}")
print(f"ai_search_answer: {ai_search_answer}")
print(f"verdict: {verdict}")


user_query: Are SpeakEQ terms and conditions reasonable? Are they standard across mobile apps?
context_used: ['Terms_ And_Conditions.docx', 'Terms_ And_Conditions.docx']
rag_answer: The context provided does not explicitly state whether the terms and conditions of SpeakEQ are reasonable or how they compare to terms of other mobile apps. However, the terms include standard elements such as voice data handling, subscription management, non-liability clauses, and a disclaimer that the service is not a substitute for professional advice, which are common in mobile app agreements. For a determination of reasonableness or a comparison to industry standards, further analysis or comparison would be necessary, which is not available in the provided context.
ai_search_answer: To evaluate if the terms and conditions of SpeakEQ are reasonable, we would need to look at the specific content of those terms. However, I can provide you with some general guidance on assessing their reasonableness and st