In [1]:
### requiremnts.txt
        # pymilvus==2.5.10
        # sentence-transformers==3.1.1
        # langchain==0.1.13
        # ollama==0.4.3
        # python-docx==0.8.11
        # pypdf==4.3.1
        # SpeechRecognition==3.14.3
        # PyAudio==0.2.14

In [2]:
import warnings
import os
import time
import speech_recognition as sr
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from PyPDF2 import PdfReader
from docx import Document
from pymilvus import connections, utility, FieldSchema, CollectionSchema, DataType, Collection
from sentence_transformers import SentenceTransformer
from langchain.llms import Ollama
from langchain.prompts import PromptTemplate

# Suppress warnings
warnings.filterwarnings('ignore')

  from tqdm.autonotebook import tqdm, trange


In [3]:
# Configuration
MILVUS_HOST = "localhost"
MILVUS_PORT = "19530"
COLLECTION_NAME = "voice_rag"
EMBEDDING_MODEL = "all-MiniLM-L6-v2"
OLLAMA_MODEL = "gemma2:2b"
OLLAMA_BASE_URL = "http://localhost:11434"
CHUNK_SIZE = 500
CHUNK_OVERLAP = 100

In [4]:
# Initialize speech recognizer
recognizer = sr.Recognizer()
microphone = sr.Microphone()

In [5]:
def get_voice_command():
    """Capture voice input and convert to text"""
    with microphone as source:
        print("\nListening... (speak your question)")
        recognizer.adjust_for_ambient_noise(source)
        audio = recognizer.listen(source, timeout=5, phrase_time_limit=10)
    
    try:
        question = recognizer.recognize_google(audio)
        print(f"\nYou asked: {question}")
        return question
    except sr.UnknownValueError:
        print("Sorry, I didn't understand that.")
        return None
    except sr.RequestError:
        print("Speech service unavailable.")
        return None
    except Exception as e:
        print(f"Error in voice recognition: {e}")
        return None

In [6]:
def load_pdf(pdf_path):
    try:
        loader = PyPDFLoader(pdf_path)
        pages = loader.load()
        return "\n".join([page.page_content for page in pages])
    except Exception as e:
        print(f"Error loading PDF: {e}")
        return None

In [7]:
def load_docx(docx_path):
    try:
        doc = Document(docx_path)
        return "\n".join([para.text for para in doc.paragraphs if para.text.strip()])
    except Exception as e:
        print(f"Error loading DOCX: {e}")
        return None

In [8]:
def load_document(file_path):
    if not os.path.exists(file_path):
        print(f"File not found: {file_path}")
        return None
    if file_path.lower().endswith('.pdf'):
        return load_pdf(file_path)
    elif file_path.lower().endswith('.docx'):
        return load_docx(file_path)
    else:
        print("Unsupported file format! Please provide a PDF or DOCX file.")
        return None

In [9]:
def split_text(text):
    if not text:
        return []
    splitter = RecursiveCharacterTextSplitter(
        chunk_size=CHUNK_SIZE,
        chunk_overlap=CHUNK_OVERLAP,
        length_function=len
    )
    return splitter.split_text(text)

In [10]:
def init_milvus():
    connections.connect(host=MILVUS_HOST, port=MILVUS_PORT)
    print(f"Connected to Milvus at {MILVUS_HOST}:{MILVUS_PORT}")

In [11]:
def setup_milvus_collection(embedding_dim):
    if utility.has_collection(COLLECTION_NAME):
        utility.drop_collection(COLLECTION_NAME)
    
    fields = [
        FieldSchema(name="id", dtype=DataType.INT64, is_primary=True, auto_id=True),
        FieldSchema(name="content", dtype=DataType.VARCHAR, max_length=65535),
        FieldSchema(name="embedding", dtype=DataType.FLOAT_VECTOR, dim=embedding_dim)
    ]
    schema = CollectionSchema(fields, "Voice RAG Collection")
    collection = Collection(COLLECTION_NAME, schema)
    
    index_params = {
        "index_type": "IVF_FLAT",
        "metric_type": "L2",
        "params": {"nlist": 128}
    }
    collection.create_index("embedding", index_params)
    return collection

In [12]:
def process_document(file_path):
    print(f"\nProcessing document: {os.path.basename(file_path)}")
    text = load_document(file_path)
    if not text:
        return None
    
    print(f"Loaded document with {len(text)} characters")
    chunks = split_text(text)
    print(f"Split into {len(chunks)} chunks")
    return chunks

In [13]:
def stream_response(response):
    for char in response:
        print(char, end='', flush=True)
        time.sleep(0.02)

In [14]:
def main():    
    # Document processing
    file_path = input("Enter path to your document (PDF or DOCX): ").strip()
    chunks = process_document(file_path)
    if not chunks:
        return
    
    # Initialize components
    init_milvus()
    st_model = SentenceTransformer(EMBEDDING_MODEL)
    embeddings = st_model.encode(chunks, show_progress_bar=True)
    collection = setup_milvus_collection(embeddings.shape[1])
    
    # Store embeddings
    collection.insert([chunks, embeddings.tolist()])
    collection.load()
    print(f"Stored {len(chunks)} chunks in Milvus")
    
    # Initialize LLM
    llm = Ollama(base_url=OLLAMA_BASE_URL, model=OLLAMA_MODEL)
    
    # Simple prompt template
    prompt_template = """Answer this question based on the context:
    Context: {context}
    Question: {question}
    Answer concisely:"""
    
    prompt = PromptTemplate(template=prompt_template, input_variables=["context", "question"])
    
    # Voice interaction loop
    print("\nVoice RAG system ready! Speak your questions...")
    while True:
        question = get_voice_command()
        if not question:
            continue
        if "exit" in question.lower() or "quit" in question.lower():
            break
        
        # Search Milvus
        query_embedding = st_model.encode([question])
        results = collection.search(
            data=query_embedding,
            anns_field="embedding",
            param={"metric_type": "L2", "params": {"nprobe": 10}},
            limit=3,
            output_fields=["content"]
        )
        
        # Prepare context
        context = "\n".join([hit.entity.get("content") for hit in results[0]])
        
        # Generate answer
        answer = llm(prompt.format(context=context, question=question))
        
        # Print only the answer
        stream_response(answer.strip())

In [15]:
if __name__ == "__main__":
    main()

Enter path to your document (PDF or DOCX):  test_data/CAG-test_data.docx



Processing document: CAG-test_data.docx
Loaded document with 2944 characters
Split into 10 chunks
Connected to Milvus at localhost:19530


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Stored 10 chunks in Milvus

Voice RAG system ready! Speak your questions...

Listening... (speak your question)

You asked: tell me about the key points of this document
Here are the key points from the document:

* **Autonomous Driving:** Advanced sensors (LiDAR, radar, cameras) enable vehicles to navigate complex environments without human input. This technology is expected to become more common on highways in the near future. 
* **Connected & Smart Vehicles:** V2X technology enables cars to communicate with other vehicles, infrastructure, and cloud platforms for increased efficiency and safety.  The document also mentions "connected cars" and the use of IoT. 
* **IoT and Connected Features:** The internet of things (IoT) is driving the development of "connected cars" which offer features like real-time traffic updates, remote diagnostics, and over-the-air software updates.


**In short:**  The document discusses how technology is transforming vehicles into smart devices with increas