In [6]:
from agentcortex import RAGPipeline

# Initialize pipeline
pipeline = RAGPipeline()

# Add some documents
texts = [
    "Python is a powerful programming language for AI development.",
    "Machine learning models require large datasets for training.",
    "Natural language processing helps computers understand human language."
]

# Process and store documents
stats = pipeline.add_documents_from_text(texts)
print(f"Added {stats['total_chunks']} chunks from {stats['text_documents_processed']} documents")

# Query the pipeline
results = pipeline.query("What is Python used for?", k=2)
for result in results:
    print(f"Similarity: {result['similarity']:.3f}")
    print(f"Text: {result['text']}")
    print("-" * 50)

Device set to use cpu


Added 3 chunks from 3 documents
Similarity: 0.670
Text: Python is a powerful programming language for AI development.
--------------------------------------------------
Similarity: 0.357
Text: Natural language processing helps computers understand human language.
--------------------------------------------------


In [5]:
# Clear module cache and reload agentcortex modules
import sys
import importlib

# Remove agentcortex modules from cache
modules_to_remove = [mod for mod in sys.modules.keys() if mod.startswith('agentcortex')]
for mod in modules_to_remove:
    del sys.modules[mod]

print(f"Cleared {len(modules_to_remove)} modules from cache")

Cleared 14 modules from cache


In [9]:
# Process PDF files
pdf_files = ["DDPM.pdf", "DeepSeek-R1.pdf"]
stats = pipeline.add_documents_from_pdfs(pdf_files)

# Query across all documents
results = pipeline.query("Tell me about the research findings", k=3)

Model-based detection failed: The size of tensor a (529) must match the size of tensor b (512) at non-singleton dimension 1
Model-based detection failed: The size of tensor a (529) must match the size of tensor b (512) at non-singleton dimension 1
Model-based detection failed: The size of tensor a (529) must match the size of tensor b (512) at non-singleton dimension 1
Model-based detection failed: The size of tensor a (2639) must match the size of tensor b (512) at non-singleton dimension 1
Model-based detection failed: The size of tensor a (2639) must match the size of tensor b (512) at non-singleton dimension 1
Model-based detection failed: The size of tensor a (2639) must match the size of tensor b (512) at non-singleton dimension 1
Model-based detection failed: The size of tensor a (1567) must match the size of tensor b (512) at non-singleton dimension 1
Model-based detection failed: The size of tensor a (2639) must match the size of tensor b (512) at non-singleton dimension 1
Mod

In [10]:
# Check current pipeline state and test with just one PDF first
print("Testing with single PDF first...")

try:
    # Test with just one PDF
    stats = pipeline.add_documents_from_pdfs(["DDPM.pdf"])
    print("✅ Single PDF processed successfully!")
    print(f"Documents processed: {stats.get('pdf_documents_processed', 'N/A')}")
    print(f"Total chunks: {stats.get('total_chunks', 'N/A')}")
    
except Exception as e:
    print(f"❌ Error processing single PDF: {e}")
    # Check if it's a file not found error
    import os
    pdf_path = "DDPM.pdf"
    if os.path.exists(pdf_path):
        print(f"✅ File {pdf_path} exists")
        print(f"File size: {os.path.getsize(pdf_path)} bytes")
    else:
        print(f"❌ File {pdf_path} not found")

Testing with single PDF first...


Model-based detection failed: The size of tensor a (529) must match the size of tensor b (512) at non-singleton dimension 1
Model-based detection failed: The size of tensor a (529) must match the size of tensor b (512) at non-singleton dimension 1
Model-based detection failed: The size of tensor a (2639) must match the size of tensor b (512) at non-singleton dimension 1
Model-based detection failed: The size of tensor a (2639) must match the size of tensor b (512) at non-singleton dimension 1
Model-based detection failed: The size of tensor a (1567) must match the size of tensor b (512) at non-singleton dimension 1
Model-based detection failed: The size of tensor a (1567) must match the size of tensor b (512) at non-singleton dimension 1
Model-based detection failed: The size of tensor a (1474) must match the size of tensor b (512) at non-singleton dimension 1
Model-based detection failed: The size of tensor a (1474) must match the size of tensor b (512) at non-singleton dimension 1
Mo

✅ Single PDF processed successfully!
Documents processed: N/A
Total chunks: 17


In [11]:
# Set logging level to reduce verbose output
import logging

# Reduce agentcortex logging verbosity
logging.getLogger('agentcortex').setLevel(logging.WARNING)
logging.getLogger('sentence_transformers').setLevel(logging.WARNING)
logging.getLogger('transformers').setLevel(logging.WARNING)

print("Logging levels adjusted - testing PDF processing with both files now...")

# Process both PDF files
pdf_files = ["DDPM.pdf", "DeepSeek-R1.pdf"]
try:
    stats = pipeline.add_documents_from_pdfs(pdf_files)
    print("✅ Both PDFs processed successfully!")
    print(f"PDF documents processed: {stats.get('pdf_documents_processed', 'N/A')}")
    print(f"Successful extractions: {stats.get('successful_extractions', 'N/A')}")
    print(f"Total chunks: {stats.get('total_chunks', 'N/A')}")
    
    # Query across all documents
    results = pipeline.query("Tell me about the research findings", k=3)
    print(f"\n🔍 Query results:")
    print(f"Found {len(results)} results")
    
    for i, result in enumerate(results, 1):
        print(f"\nResult {i}:")
        print(f"Similarity: {result['similarity']:.3f}")
        print(f"Text preview: {result['text'][:150]}...")
        if i < len(results):
            print("-" * 40)
    
except Exception as e:
    print(f"❌ Error: {e}")
    import traceback
    traceback.print_exc()

Logging levels adjusted - testing PDF processing with both files now...


Model-based detection failed: The size of tensor a (529) must match the size of tensor b (512) at non-singleton dimension 1
Model-based detection failed: The size of tensor a (529) must match the size of tensor b (512) at non-singleton dimension 1
Model-based detection failed: The size of tensor a (2639) must match the size of tensor b (512) at non-singleton dimension 1
Model-based detection failed: The size of tensor a (2639) must match the size of tensor b (512) at non-singleton dimension 1
Model-based detection failed: The size of tensor a (1567) must match the size of tensor b (512) at non-singleton dimension 1
Model-based detection failed: The size of tensor a (1567) must match the size of tensor b (512) at non-singleton dimension 1
Model-based detection failed: The size of tensor a (1474) must match the size of tensor b (512) at non-singleton dimension 1
Model-based detection failed: The size of tensor a (1474) must match the size of tensor b (512) at non-singleton dimension 1
Mo

✅ Both PDFs processed successfully!
PDF documents processed: N/A
Successful extractions: 2
Total chunks: 28

🔍 Query results:
Found 3 results

Result 1:
Similarity: 0.227
Text preview: 

Article
R. L. Jin1, Ruyi Chen1, Shanghao Lu1, Shangyan Zhou1, Shanhuang Chen1, Shengfeng Ye1, Yanping Huang1, Yaohui Li1, Yi Zheng1, Yuchen Zhu1, Yu...
----------------------------------------

Result 2:
Similarity: 0.227
Text preview: 

Article
R. L. Jin1, Ruyi Chen1, Shanghao Lu1, Shangyan Zhou1, Shanhuang Chen1, Shengfeng Ye1, Yanping Huang1, Yaohui Li1, Yi Zheng1, Yuchen Zhu1, Yu...
----------------------------------------

Result 3:
Similarity: 0.227
Text preview: 

Article
R. L. Jin1, Ruyi Chen1, Shanghao Lu1, Shangyan Zhou1, Shanhuang Chen1, Shengfeng Ye1, Yanping Huang1, Yaohui Li1, Yi Zheng1, Yuchen Zhu1, Yu...
