# Embedding Service Testing Notebook
This notebook tests the document embedding and vector database functionality.

In [1]:
# Setup and imports
import os
import sys

# Add current directory to path
current_dir = os.getcwd()
sys.path.insert(0, current_dir)

print(f"Current working directory: {current_dir}")
print(f"Python path includes: {current_dir}")

Current working directory: c:\Users\Chutchanan.Ma\Desktop\Project\Chatbot
Python path includes: c:\Users\Chutchanan.Ma\Desktop\Project\Chatbot


In [2]:
# Check required packages
required_packages = ['langchain', 'chromadb', 'openai', 'tiktoken']

for package in required_packages:
    try:
        __import__(package)
        print(f"‚úÖ {package} is installed")
    except ImportError:
        print(f"‚ùå {package} is NOT installed")
        print(f"   Install with: pip install {package}")

‚úÖ langchain is installed
‚úÖ chromadb is installed
‚úÖ openai is installed
‚úÖ tiktoken is installed


In [3]:
# Test config and environment
try:
    import config
    print("‚úÖ Config loaded successfully")
    
    # Check OpenAI API key
    if config.OPENAI_API_KEY and config.OPENAI_API_KEY != "your-api-key-here":
        print("‚úÖ OpenAI API key is set")
    else:
        print("‚ùå OpenAI API key is not set properly")
        print("   Please update your .env file or config.py")
    
    print(f"üìÅ Chroma directory: {config.CHROMA_PERSIST_DIRECTORY}")
    print(f"üìö Collection name: {config.COLLECTION_NAME}")
    
except Exception as e:
    print(f"‚ùå Error loading config: {e}")

‚úÖ Config loaded successfully
‚úÖ OpenAI API key is set
üìÅ Chroma directory: ./chroma_db
üìö Collection name: company_documents


In [4]:
# Create test documents
import os

# Create data directory
data_dir = "data/company_docs"
os.makedirs(data_dir, exist_ok=True)

# Create test documents
test_docs = {
    "company_overview.txt": """
Our company is a leading provider of AI chatbot solutions.
We specialize in natural language processing and customer support automation.
Founded in 2020, we have helped over 100 companies improve their customer service.
Our team consists of AI engineers, data scientists, and customer success specialists.
""",
    "company_policy.txt": """
Company Policy Guidelines:

1. Security: All employees must follow security protocols and use VPN when working remotely.
2. Remote Work: We offer flexible work arrangements including remote and hybrid options.
3. Benefits: Full health insurance, dental coverage, and 401k matching.
4. Time Off: 20 days PTO plus national holidays.
5. Professional Development: $2000 annual budget for training and conferences.
""",
    "technical_specs.txt": """
Technical Specifications:

Our chatbot platform supports:
- Multiple programming languages (Python, JavaScript, Java)
- Integration with popular CRM systems (Salesforce, HubSpot)
- Real-time analytics and reporting
- Multi-language support (English, Spanish, French, German)
- API endpoints for custom integrations
- Enterprise-grade security with SOC 2 compliance
"""
}

for filename, content in test_docs.items():
    filepath = os.path.join(data_dir, filename)
    with open(filepath, 'w', encoding='utf-8') as f:
        f.write(content.strip())
    print(f"‚úÖ Created: {filepath}")

print(f"\nüìÅ Files in {data_dir}:")
for file in os.listdir(data_dir):
    print(f"   - {file}")

‚úÖ Created: data/company_docs\company_overview.txt
‚úÖ Created: data/company_docs\company_policy.txt
‚úÖ Created: data/company_docs\technical_specs.txt

üìÅ Files in data/company_docs:
   - BluebikVulcan_website.pdf
   - company_overview.txt
   - company_policy.txt
   - technical_specs.txt


In [8]:
# Test embedding service import
try:
    from services.embedding_service import EmbeddingService
    print("‚úÖ EmbeddingService imported successfully")
except Exception as e:
    print(f"‚ùå Error importing EmbeddingService: {e}")
    print("\nLet's try a simpler approach...")

‚úÖ EmbeddingService imported successfully


In [None]:
# Simple embedding test (if import failed above)
# This cell provides a basic embedding test without the full service

try:
    from langchain.embeddings import OpenAIEmbeddings
    from langchain.text_splitter import RecursiveCharacterTextSplitter
    from langchain.document_loaders import TextLoader
    import config
    
    print("üß™ Testing basic embedding functionality...")
    
    # Initialize embeddings
    embeddings = OpenAIEmbeddings(openai_api_key=config.OPENAI_API_KEY)
    print("‚úÖ OpenAI embeddings initialized")
    
    # Test text splitter
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=1000,
        chunk_overlap=200
    )
    print("‚úÖ Text splitter initialized")
    
    # Test loading a document
    test_file = "data/company_docs/company_overview.txt"
    if os.path.exists(test_file):
        loader = TextLoader(test_file)
        documents = loader.load()
        print(f"‚úÖ Loaded document: {len(documents)} doc(s)")
        print(f"üìÑ Content preview: {documents[0].page_content[:100]}...")
        
        # Test splitting
        texts = text_splitter.split_documents(documents)
        print(f"‚úÖ Split into {len(texts)} chunks")
    else:
        print(f"‚ùå Test file not found: {test_file}")
    
except Exception as e:
    print(f"‚ùå Error in basic embedding test: {e}")

In [None]:
# Test ChromaDB initialization
try:
    import chromadb
    from langchain.vectorstores import Chroma
    from langchain.embeddings import OpenAIEmbeddings
    import config
    
    print("üß™ Testing ChromaDB...")
    
    # Initialize embeddings
    embeddings = OpenAIEmbeddings(openai_api_key=config.OPENAI_API_KEY)
    
    # Initialize Chroma
    vectorstore = Chroma(
        collection_name=config.COLLECTION_NAME,
        embedding_function=embeddings,
        persist_directory=config.CHROMA_PERSIST_DIRECTORY
    )
    
    print("‚úÖ ChromaDB initialized successfully")
    
    # Test adding a simple document
    test_texts = ["This is a test document for our company chatbot system."]
    test_metadatas = [{"source": "test", "type": "example"}]
    
    vectorstore.add_texts(test_texts, test_metadatas)
    print("‚úÖ Test document added to vector store")
    
    # Test search
    results = vectorstore.similarity_search("chatbot", k=1)
    print(f"‚úÖ Search test successful: found {len(results)} result(s)")
    
    if results:
        print(f"üìÑ Result: {results[0].page_content}")
    
except Exception as e:
    print(f"‚ùå Error testing ChromaDB: {e}")
    print("This might be due to missing OpenAI API key or network issues.")

In [None]:
# Full embedding service test (if everything above worked)
try:
    # Re-attempt importing the full service
    from services.embedding_service import EmbeddingService
    
    print("üöÄ Testing full EmbeddingService...")
    
    # Initialize service
    embedding_service = EmbeddingService()
    print("‚úÖ EmbeddingService initialized")
    
    # Process documents
    print("üìö Processing company documents...")
    embedding_service.process_and_store_documents("./data/company_docs")
    print("‚úÖ Documents processed and stored")
    
    # Test search
    test_queries = [
        "What is our company policy?",
        "Tell me about remote work",
        "What programming languages do we support?"
    ]
    
    for query in test_queries:
        print(f"\nüîç Testing query: '{query}'")
        results = embedding_service.search_similar_documents(query, k=2)
        
        for i, result in enumerate(results, 1):
            print(f"   Result {i}: {result.page_content[:100]}...")
    
    # Get collection info
    info = embedding_service.get_collection_info()
    print(f"\nüìä Collection info: {info}")
    
except Exception as e:
    print(f"‚ùå Error testing full EmbeddingService: {e}")
    print("This is likely due to OpenAI API key issues or missing dependencies.")

In [None]:
# Summary and next steps
print("üìã EMBEDDING SERVICE TEST SUMMARY")
print("=" * 40)
print("")
print("If all tests above passed:")
print("‚úÖ Your embedding service is ready!")
print("‚úÖ You can proceed to test the chatbot service")
print("")
print("If some tests failed:")
print("‚ùå Check your OpenAI API key in .env file")
print("‚ùå Make sure all packages are installed: pip install -r requirements.txt")
print("‚ùå Check internet connection for OpenAI API calls")
print("")
print("Next: Run the chatbot service test notebook")