# Milvus Vector Store Usage

This notebook demonstrates how to use the Milvus vector store for storing and retrieving transcript analyses.

## Prerequisites
- Milvus database running (docker or standalone)
- Azure OpenAI configured for embeddings

In [None]:
import sys
sys.path.append('..')

from src.agent.vector_store import MilvusVectorStore
from src.agent.transcript_analyzer import TranscriptAnalyzer
import json
import uuid

## 1. Initialize Components

In [None]:
# Initialize vector store and analyzer
vector_store = MilvusVectorStore()
analyzer = TranscriptAnalyzer()

print("Components initialized and connected to Milvus!")

## 2. Store Transcript Analysis

In [None]:
# Load and analyze a transcript
with open('../data/text/sample_transcript_1.txt', 'r') as f:
    transcript = f.read()

# Analyze
analysis_result = analyzer.analyze_transcript(transcript)

# Store in Milvus
transcript_id = str(uuid.uuid4())
success = vector_store.store_transcript(
    transcript_id=transcript_id,
    transcript_text=transcript,
    analysis_result=analysis_result,
    source_type="text"
)

if success:
    print(f"✓ Transcript stored successfully with ID: {transcript_id}")
else:
    print("✗ Failed to store transcript")

## 3. Retrieve Transcript by ID

In [None]:
# Retrieve the stored transcript
retrieved = vector_store.get_transcript_by_id(transcript_id)

if retrieved:
    print("Retrieved transcript:")
    print(f"ID: {retrieved['transcript_id']}")
    print(f"Source: {retrieved['source_type']}")
    print(f"Timestamp: {retrieved['timestamp']}")
    print(f"\nTranscript preview: {retrieved['transcript_text'][:200]}...")
    print(f"\nAnalysis: {json.dumps(retrieved['analysis_result'], indent=2)[:500]}...")
else:
    print("Transcript not found")

## 4. Search for Similar Transcripts

In [None]:
# Search for transcripts similar to a query
query = "CRM system with mobile access and integration capabilities"

similar_transcripts = vector_store.search_similar_transcripts(
    query_text=query,
    top_k=3
)

print(f"Found {len(similar_transcripts)} similar transcripts:\n")

for i, result in enumerate(similar_transcripts, 1):
    print(f"Result {i}:")
    print(f"  ID: {result['transcript_id']}")
    print(f"  Distance: {result['distance']:.4f}")
    print(f"  Source: {result['source_type']}")
    print(f"  Preview: {result['transcript_text'][:150]}...")
    print()

## 5. Store Multiple Transcripts

In [None]:
import os

# Store all sample transcripts
transcript_dir = '../data/text/'
stored_ids = []

for filename in os.listdir(transcript_dir):
    if filename.endswith('.txt'):
        filepath = os.path.join(transcript_dir, filename)
        
        with open(filepath, 'r') as f:
            content = f.read()
        
        # Analyze
        analysis = analyzer.analyze_transcript(content)
        
        # Store
        tid = str(uuid.uuid4())
        success = vector_store.store_transcript(
            transcript_id=tid,
            transcript_text=content,
            analysis_result=analysis,
            source_type="text"
        )
        
        if success:
            stored_ids.append(tid)
            print(f"✓ Stored {filename} with ID: {tid}")
        else:
            print(f"✗ Failed to store {filename}")

print(f"\nTotal transcripts stored: {len(stored_ids)}")

## 6. Semantic Search Examples

In [None]:
# Example searches
queries = [
    "security and compliance requirements",
    "pricing and budget discussions",
    "integration with existing systems",
    "mobile and remote access needs"
]

for query in queries:
    print(f"\n{'='*60}")
    print(f"Query: {query}")
    print('='*60)
    
    results = vector_store.search_similar_transcripts(query, top_k=2)
    
    for i, result in enumerate(results, 1):
        print(f"\nResult {i} (distance: {result['distance']:.4f}):")
        print(f"Preview: {result['transcript_text'][:200]}...")

## 7. Cleanup (Optional)

In [None]:
# Disconnect from Milvus
vector_store.disconnect()
print("Disconnected from Milvus")