In [5]:
from ingramdocai.services.weaviate_client import get_weaviate_client

client = get_weaviate_client()
tenant_id = "tenant-xyz"

results = client.collections.get("DocumentChunk", tenant=tenant_id).query.hybrid(
    query="What are the key terms?",
    limit=5
)

for res in results.objects:
    print(res.properties.get("content", "[No content]"))


TypeError: _CollectionsBase.get() got an unexpected keyword argument 'tenant'

In [8]:
from datetime import datetime
from pathlib import Path

from ingramdocai.core.logger import setup_logger
from ingramdocai.services.document_processing_service import DocumentProcessingService
from ingramdocai.tools.save_session_record import SaveSessionRecordTool
from ingramdocai.services.weaviate_class_manager import sync_schema, ensure_tenant_registered
from ingramdocai.services.document_upsert_embedding import bulk_upsert_document_chunks
from ingramdocai.persistence.db import Base, engine
from ingramdocai.persistence.models import DocumentSession

logger = setup_logger("inject-document")


class TestInjectRunner:
    def __init__(self):
        self.state = type("State", (), {
            "session_id": "session-localtest-001",
            "chunk_count": 0,
            "user_info": {
                "tenant_id": "tenant-xyz",
                "user_id": "user-123"
            }
        })()

    def inject_document(self):
        logger.info("⏳ Checking and initializing database schema if needed...")
        Base.metadata.create_all(bind=engine)

        try:
            sample_docs_dir = Path("tests/sample_docs").resolve()
            sample_docs_dir.mkdir(parents=True, exist_ok=True)

            file_paths = [str(f) for f in sample_docs_dir.glob("*") if f.is_file()]
            if not file_paths:
                logger.warning("⚠️ No documents found in tests/sample_docs. Nothing to process.")
                return

            session_id = self.state.session_id
            tenant_id = self.state.user_info.get("tenant_id")
            user_id = self.state.user_info.get("user_id")

            logger.info(f"Injecting {len(file_paths)} document(s)")
            logger.debug(f"Session → ID: {session_id}, Tenant: {tenant_id}, User: {user_id}")

            SaveSessionRecordTool()._run(
                session_id=session_id,
                tenant_id=tenant_id,
                user_id=user_id,
                file_path=";".join(file_paths),
                status="in_progress",
                created_at=datetime.utcnow(),
                updated_at=datetime.utcnow()
            )

            processor = DocumentProcessingService()
            all_chunks = []

            for file_path in file_paths:
                logger.info(f"Processing file: {file_path}")
                result = processor.process(file_path)
                for chunk in result["chunks"]:
                    chunk.metadata.update({
                        "file_name": Path(file_path).name,
                        "file_type": Path(file_path).suffix.lstrip("."),
                        "tenant_id": tenant_id,
                        "session_id": session_id
                    })
                    all_chunks.append(chunk)

            if not all_chunks:
                logger.warning("⚠️ No chunks generated from input documents.")
                return

            payloads = [{
                "tenant_id": chunk.metadata["tenant_id"],
                "session_id": chunk.metadata["session_id"],
                "file_name": chunk.metadata["file_name"],
                "file_type": chunk.metadata["file_type"],
                "text": chunk.page_content,
                "chunk_id": f"{i+1}",
                "char_count": len(chunk.page_content),
                "source": "document_upload",
                "created_at": datetime.utcnow().isoformat() + "Z"
            } for i, chunk in enumerate(all_chunks)]

            logger.info(f"Prepared {len(payloads)} chunks for upsert")

            sync_schema(tenant_id)
            ensure_tenant_registered(tenant_id)

            bulk_upsert_document_chunks(payloads)
            logger.info(f"Upserted {len(payloads)} document chunks into Weaviate")

            SaveSessionRecordTool()._run(
                session_id=session_id,
                tenant_id=tenant_id,
                user_id=user_id,
                status="completed",
                chunk_count=len(payloads),
                updated_at=datetime.utcnow()
            )

            self.state.chunk_count = len(payloads)

            print("\n====== Document Injection Completed ======")
            print(f"Session ID: {session_id}")
            print(f"Total Chunks: {self.state.chunk_count}")
            print("==========================================\n")

        except Exception as e:
            logger.error(f"Injection failed: {str(e)}")
            SaveSessionRecordTool()._run(
                session_id=self.state.session_id,
                tenant_id=self.state.user_info.get("tenant_id"),
                user_id=self.state.user_info.get("user_id"),
                status="failed",
                error_message=str(e),
                updated_at=datetime.utcnow()
            )
            raise


# ✅ Entry Point
if __name__ == "__main__":
    runner = TestInjectRunner()
    runner.inject_document()


2025-07-12 09:30:47,412 [INFO] inject-document: ⏳ Checking and initializing database schema if needed...


2025-07-12 09:30:47,414 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2025-07-12 09:30:47,415 INFO sqlalchemy.engine.Engine PRAGMA main.table_info("document_sessions")
2025-07-12 09:30:47,416 INFO sqlalchemy.engine.Engine [raw sql] ()
2025-07-12 09:30:47,420 INFO sqlalchemy.engine.Engine COMMIT


2025-07-12 09:30:47,422 [INFO] inject-document: Injecting 2 document(s)


2025-07-12 09:30:47,424 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2025-07-12 09:30:47,425 INFO sqlalchemy.engine.Engine SELECT document_sessions.session_id AS document_sessions_session_id, document_sessions.tenant_id AS document_sessions_tenant_id, document_sessions.user_id AS document_sessions_user_id, document_sessions.file_path AS document_sessions_file_path, document_sessions.status AS document_sessions_status, document_sessions.chunk_count AS document_sessions_chunk_count, document_sessions.error_message AS document_sessions_error_message, document_sessions.created_at AS document_sessions_created_at, document_sessions.updated_at AS document_sessions_updated_at 
FROM document_sessions 
WHERE document_sessions.session_id = ?
 LIMIT ? OFFSET ?
2025-07-12 09:30:47,426 INFO sqlalchemy.engine.Engine [cached since 1475s ago] ('session-localtest-001', 1, 0)
2025-07-12 09:30:47,427 INFO sqlalchemy.engine.Engine UPDATE document_sessions SET status=?, updated_at=? WHERE document_session

  created_at=datetime.utcnow(),
  updated_at=datetime.utcnow()
2025-07-12 09:30:47,431 [INFO] save_session_record: Session session-localtest-001 updated → status=in_progress
2025-07-12 09:30:47,431 [INFO] save_session_record: Database session closed in SaveSessionRecordTool
2025-07-12 09:30:47,432 [INFO] inject-document: Processing file: /Users/gabrielohaike/Desktop/IngramDocAI/tests/sample_docs/ingram_fact_sheet.pdf
2025-07-12 09:30:51,106 [INFO] document_processor: Processed 6 chunks from /Users/gabrielohaike/Desktop/IngramDocAI/tests/sample_docs/ingram_fact_sheet.pdf
2025-07-12 09:30:51,106 [INFO] inject-document: Processing file: /Users/gabrielohaike/Desktop/IngramDocAI/tests/sample_docs/test_contract.txt
libmagic is unavailable but assists in filetype detection. Please consider installing libmagic for better results.
2025-07-12 09:30:51,110 [INFO] document_processor: Processed 1 chunks from /Users/gabrielohaike/Desktop/IngramDocAI/tests/sample_docs/test_contract.txt
  "created_at"

2025-07-12 09:30:53,631 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2025-07-12 09:30:53,632 INFO sqlalchemy.engine.Engine SELECT document_sessions.session_id AS document_sessions_session_id, document_sessions.tenant_id AS document_sessions_tenant_id, document_sessions.user_id AS document_sessions_user_id, document_sessions.file_path AS document_sessions_file_path, document_sessions.status AS document_sessions_status, document_sessions.chunk_count AS document_sessions_chunk_count, document_sessions.error_message AS document_sessions_error_message, document_sessions.created_at AS document_sessions_created_at, document_sessions.updated_at AS document_sessions_updated_at 
FROM document_sessions 
WHERE document_sessions.session_id = ?
 LIMIT ? OFFSET ?
2025-07-12 09:30:53,632 INFO sqlalchemy.engine.Engine [cached since 1481s ago] ('session-localtest-001', 1, 0)
2025-07-12 09:30:53,633 INFO sqlalchemy.engine.Engine UPDATE document_sessions SET status=?, chunk_count=?, updated_at=? WHERE d

  updated_at=datetime.utcnow()
2025-07-12 09:30:53,634 [INFO] save_session_record: Session session-localtest-001 updated → status=completed
2025-07-12 09:30:53,634 [INFO] save_session_record: Database session closed in SaveSessionRecordTool



Session ID: session-localtest-001
Total Chunks: 7



In [9]:
from ingramdocai.services.weaviate_client import get_weaviate_client

def query_chunks(tenant_id: str, query: str, limit: int = 3):
    client = get_weaviate_client()

    print(f"🔍 Querying chunks for tenant '{tenant_id}' with query: '{query}'")

    # ✅ Proper multi-tenant usage
    collection = client.collections.get("DocumentChunk").with_tenant(tenant_id)

    results = collection.query.hybrid(
        query=query,
        limit=limit
    )

    if not results.objects:
        print("❌ No chunks found.")
        return

    print(f"✅ Found {len(results.objects)} result(s):\n")

    for i, obj in enumerate(results.objects, start=1):
        props = obj.properties
        print(f"🔹 Result #{i}")
        print(f"📄 File: {props.get('file_name')}")
        print(f"📑 Text: {props.get('text')}")
        print(f"📦 Session: {props.get('session_id')}")
        print(f"📅 Created: {props.get('created_at')}")
        print("-" * 80)

if __name__ == "__main__":
    query_chunks(tenant_id="tenant-xyz", query="What is this contract about?")


🔍 Querying chunks for tenant 'tenant-xyz' with query: 'What is this contract about?'
✅ Found 3 result(s):

🔹 Result #1
📄 File: test_contract.txt
📑 Text: This is a fallback sample contract for testing Weaviate injection.
📦 Session: session-localtest-001
📅 Created: 2025-07-12 14:30:51.110777+00:00
--------------------------------------------------------------------------------
🔹 Result #2
📄 File: test_contract.txt
📑 Text: This is a fallback sample contract for testing Weaviate injection.
📦 Session: session-localtest-001
📅 Created: 2025-07-12 14:09:17.667995+00:00
--------------------------------------------------------------------------------
🔹 Result #3
📄 File: ingram_fact_sheet.pdf
📑 Text:  Solid balance sheet and flexible structure 
o Quarter-end cash and cash equivalents balance of $891 million 
o Continued focus on working capital management  
 
 $400 million share repurchase program 
o 12.5 million shares of common stock purchased as of February 8, 2012 for approximately $226 mil

In [12]:
# 🔧 Step 1: Required Imports
from sqlalchemy.orm import Session
from ingramdocai.persistence.db import Base, engine
from ingramdocai.persistence.models import DocumentSession

# 🔎 Step 2: Start a new DB session
db: Session = SessionLocal()

# 📄 Step 3: Query recent sessions
results = db.query(DocumentSession).order_by(DocumentSession.updated_at.desc()).all()

# 🧾 Step 4: Print summary
for r in results:
    print(f"Session ID   : {r.session_id}")
    print(f"Tenant ID    : {r.tenant_id}")
    print(f"User ID      : {r.user_id}")
    print(f"File Path    : {r.file_path}")
    print(f"Status       : {r.status}")
    print(f"Chunk Count  : {r.chunk_count}")
    print(f"Error        : {r.error_message}")
    print(f"Created At   : {r.created_at}")
    print(f"Updated At   : {r.updated_at}")
    print("-" * 80)

# ✅ Step 5: Optional: close connection when done
db.close()


2025-07-12 10:54:05,619 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2025-07-12 10:54:05,627 INFO sqlalchemy.engine.Engine SELECT document_sessions.session_id AS document_sessions_session_id, document_sessions.tenant_id AS document_sessions_tenant_id, document_sessions.user_id AS document_sessions_user_id, document_sessions.file_path AS document_sessions_file_path, document_sessions.status AS document_sessions_status, document_sessions.chunk_count AS document_sessions_chunk_count, document_sessions.error_message AS document_sessions_error_message, document_sessions.created_at AS document_sessions_created_at, document_sessions.updated_at AS document_sessions_updated_at 
FROM document_sessions ORDER BY document_sessions.updated_at DESC
2025-07-12 10:54:05,627 INFO sqlalchemy.engine.Engine [generated in 0.00112s] ()
Session ID   : session-localtest-001
Tenant ID    : tenant-xyz
User ID      : user-123
File Path    : /Users/gabrielohaike/Desktop/IngramDocAI/tests/sample_docs/test_contrac