# Memoirr: Preprocessor + Chunker Pipeline Smoke Test

This notebook runs a minimal Haystack pipeline using the SRT preprocessor and the semantic chunker.

Requirements:
- Ensure a local sentence-transformers model is available under `models/<EMBEDDING_MODEL_NAME>/` (with `model.safetensors` and tokenizer/config files).
- `.env` should set `EMBEDDING_MODEL_NAME` (default included in repo). Optionally set `EMBEDDING_DEVICE` (e.g., `cuda:0`).

Notes:
- The preprocessor emits cleaned JSONL lines, one per caption.
- The chunker uses Chonkie SemanticChunker with the self-hosted embeddings to create time-aware chunks.


In [None]:

import pathlib
import textwrap
from src.core.config import get_settings

settings = get_settings()
print('EMBEDDING_MODEL_NAME =', settings.embedding_model_name)
print('EMBEDDING_DEVICE     =', settings.device)

# Quick existence check to help the user
model_path = pathlib.Path('models') / settings.embedding_model_name
if not model_path.exists():
    # Fallback: search by terminal folder name (case-insensitive), similar to runtime resolver
    target = settings.embedding_model_name.split('/')[-1].lower()
    candidates = [p for p in pathlib.Path('models').rglob('*') if p.is_dir() and p.name.lower() == target]
    if candidates:
        print('Found candidate model dir at:', candidates[0])
    else:
        print('WARNING: Expected model folder not found under models/. The chunker cell may fail.')


In [None]:
# Sample SRT content (very small)
sample_srt = textwrap.dedent('''
1
00:00:01,000 --> 00:00:02,000
- Hello there!

2
00:00:02,100 --> 00:00:03,000
How are you doing?

3
00:00:03,100 --> 00:00:04,000
I'm fine. Thanks!
''')
print(sample_srt)


In [None]:
# Run the complete end-to-end pipeline: SRT → Preprocess → Chunk → Embed → Qdrant
from src.pipelines.srt_to_qdrant import build_srt_to_qdrant_pipeline

print('Building the complete SRT-to-Qdrant pipeline...')
pipe = build_srt_to_qdrant_pipeline()

print('Pipeline components:')
for component_name in pipe.graph.nodes:
    print(f'  - {component_name}')

print('Pipeline connections:')
for edge in pipe.graph.edges:
    print(f'  - {edge[0]} → {edge[1]}')

print('Running pipeline on sample SRT...')
result = pipe.run({'pre': {'srt_text': sample_srt}})

print('Pipeline Results:')
print('=================')

# Show preprocessing stats
pre_stats = result['pre']['stats']
print(f'Preprocessor: {pre_stats}')

# Show chunking stats
chunk_stats = result['chunk']['stats']
print(f'Chunker: {chunk_stats}')

# Show write stats
write_stats = result['write']['stats']
print(f'Writer: {write_stats}')

print('✅ SUCCESS! Check Qdrant UI at http://localhost:6300/dashboard to see the embedded chunks!')
print('Collection name: memoirr')

In [None]:
# Retrieval Pipeline Test
# This script demonstrates how to use the RAG pipeline to retrieve and generate answers from
# indexed content

from src.pipelines.rag_pipeline import RAGPipeline

# Initialize the RAG pipeline
print("Initializing RAG pipeline...")
rag = RAGPipeline()

# Test basic retrieval with a sample query
print("\nTesting basic retrieval...")
query = "What is the main character's name?"

try:
    result = rag.query(query)

    print(f"Query: {query}")
    print(f"Retrieved documents: {result['summary']['documents_retrieved']}")
    print(f"Generated replies: {result['summary']['replies_generated']}")

    if result.get('answer'):
        print(f"Answer: {result['answer']}")

    # Show source documents
    if result.get('sources'):
        print("\nTop source documents:")
        for i, source in enumerate(result['sources'][:3]):  # Show top 3 sources
            print(f"  {i+1}. {source['content'][:100]}... (score: {source['score']:.3f})")

except Exception as e:
    print(f"Error during retrieval: {e}")

print("\n✅ Retrieval pipeline tests completed!")


In [1]:
import torch
torch.cuda.is_available()

  return torch._C._cuda_getDeviceCount() > 0


False

In [2]:
from src.database_population import process_srt_directory

[2m2025-10-03 21:53:54[0m [[32m[1minfo     [0m] [1mPyTorch CUDA memory configuration set[0m [36mcomponent[0m=[35mmemory_management[0m [36mconfig[0m=[35mexpandable_segments:True,garbage_collection_threshold:0.8,max_split_size_mb:128[0m
{"module": "pipelines", "available_pipelines": ["srt_to_qdrant"], "event": "Pipelines module initialized", "level": "info", "timestamp": "2025-10-03T19:53:54.727210Z", "service": "memoirr", "environment": "development"}


In [3]:
process_srt_directory('data', overwrite=True)

[2m2025-10-03T19:54:02.138024Z[0m [[32m[1minfo     [0m] [1mbatch_srt_processing_started  [0m [36mdirectory[0m=[35mdata[0m [36moperation[0m=[35mbatch_srt_processing[0m [36moverwrite[0m=[35mTrue[0m


{'operation': 'batch_srt_processing', 'directory': 'data', 'overwrite': True, 'event': 'batch_srt_processing_started', 'level': 'info', 'timestamp': '2025-10-03T19:54:02.138024Z'}


[2m2025-10-03T19:54:02.139268Z[0m [[32m[1minfo     [0m] [1mStarting batch SRT processing [0m [36mcomponent[0m=[35mbatch_processor[0m [36mdirectory[0m=[35mdata[0m [36moverwrite[0m=[35mTrue[0m


{'directory': 'data', 'overwrite': True, 'component': 'batch_processor', 'event': 'Starting batch SRT processing', 'level': 'info', 'timestamp': '2025-10-03T19:54:02.139268Z'}


[2m2025-10-03T19:54:02.140741Z[0m [[32m[1minfo     [0m] [1mSRT file discovery completed  [0m [36mcomponent[0m=[35mbatch_processor[0m [36mdirectory[0m=[35mdata[0m [36mfiles_found[0m=[35m3[0m


{'directory': 'data', 'files_found': 3, 'component': 'batch_processor', 'event': 'SRT file discovery completed', 'level': 'info', 'timestamp': '2025-10-03T19:54:02.140741Z'}


[2m2025-10-03T19:54:02.141373Z[0m [[32m[1minfo     [0m] [1mClearing database before processing[0m [36mcomponent[0m=[35mbatch_processor[0m


{'component': 'batch_processor', 'event': 'Clearing database before processing', 'level': 'info', 'timestamp': '2025-10-03T19:54:02.141373Z'}


[2m2025-10-03T19:54:02.737610Z[0m [[32m[1minfo     [0m] [1mQdrantWriter initialized successfully[0m [36mcollection_name[0m=[35mmemoirr[0m [36mcomponent[0m=[35mqdrant_writer[0m [36membedding_dimension[0m=[35m1024[0m [36mqdrant_url[0m=[35mhttp://localhost:6300[0m


{'qdrant_url': 'http://localhost:6300', 'collection_name': 'memoirr', 'embedding_dimension': 1024, 'component': 'qdrant_writer', 'event': 'QdrantWriter initialized successfully', 'level': 'info', 'timestamp': '2025-10-03T19:54:02.737610Z'}


[2m2025-10-03T19:54:02.922494Z[0m [[32m[1minfo     [0m] [1mHTTP Request: GET http://localhost:6300 "HTTP/1.1 200 OK"[0m [36mlineno[0m=[35m1025[0m [36mmodule[0m=[35mhttpx[0m


HTTP Request: GET http://localhost:6300 "HTTP/1.1 200 OK"


[2m2025-10-03T19:54:02.925447Z[0m [[32m[1minfo     [0m] [1mHTTP Request: GET http://localhost:6300/collections/memoirr/exists "HTTP/1.1 200 OK"[0m [36mlineno[0m=[35m1025[0m [36mmodule[0m=[35mhttpx[0m


HTTP Request: GET http://localhost:6300/collections/memoirr/exists "HTTP/1.1 200 OK"


[2m2025-10-03T19:54:02.933043Z[0m [[32m[1minfo     [0m] [1mHTTP Request: DELETE http://localhost:6300/collections/memoirr "HTTP/1.1 200 OK"[0m [36mlineno[0m=[35m1025[0m [36mmodule[0m=[35mhttpx[0m


HTTP Request: DELETE http://localhost:6300/collections/memoirr "HTTP/1.1 200 OK"


[2m2025-10-03T19:54:02.979713Z[0m [[32m[1minfo     [0m] [1mHTTP Request: PUT http://localhost:6300/collections/memoirr "HTTP/1.1 200 OK"[0m [36mlineno[0m=[35m1025[0m [36mmodule[0m=[35mhttpx[0m


HTTP Request: PUT http://localhost:6300/collections/memoirr "HTTP/1.1 200 OK"


[2m2025-10-03T19:54:02.984513Z[0m [[32m[1minfo     [0m] [1mHTTP Request: POST http://localhost:6300/collections/memoirr/points/count "HTTP/1.1 200 OK"[0m [36mlineno[0m=[35m1025[0m [36mmodule[0m=[35mhttpx[0m


HTTP Request: POST http://localhost:6300/collections/memoirr/points/count "HTTP/1.1 200 OK"


[2m2025-10-03T19:54:02.987226Z[0m [[32m[1minfo     [0m] [1mClearing Qdrant database      [0m [36mcomponent[0m=[35mbatch_processor[0m [36mdocuments_before[0m=[35m0[0m


{'documents_before': 0, 'component': 'batch_processor', 'event': 'Clearing Qdrant database', 'level': 'info', 'timestamp': '2025-10-03T19:54:02.987226Z'}


[2m2025-10-03T19:54:02.988293Z[0m [[32m[1minfo     [0m] [1mcollection_clearing_started   [0m [36moperation[0m=[35mcollection_clearing[0m


{'operation': 'collection_clearing', 'event': 'collection_clearing_started', 'level': 'info', 'timestamp': '2025-10-03T19:54:02.988293Z'}


[2m2025-10-03T19:54:02.989318Z[0m [[32m[1minfo     [0m] [1mClearing Qdrant collection via recreation[0m [36mcollection_name[0m=[35mmemoirr[0m [36mcomponent[0m=[35mqdrant_writer[0m [36membedding_dimension[0m=[35m1024[0m


{'collection_name': 'memoirr', 'embedding_dimension': 1024, 'component': 'qdrant_writer', 'event': 'Clearing Qdrant collection via recreation', 'level': 'info', 'timestamp': '2025-10-03T19:54:02.989318Z'}


[2m2025-10-03T19:54:02.992961Z[0m [[32m[1minfo     [0m] [1mHTTP Request: GET http://localhost:6300/collections/memoirr/exists "HTTP/1.1 200 OK"[0m [36mlineno[0m=[35m1025[0m [36mmodule[0m=[35mhttpx[0m


HTTP Request: GET http://localhost:6300/collections/memoirr/exists "HTTP/1.1 200 OK"


[2m2025-10-03T19:54:03.001283Z[0m [[32m[1minfo     [0m] [1mHTTP Request: DELETE http://localhost:6300/collections/memoirr "HTTP/1.1 200 OK"[0m [36mlineno[0m=[35m1025[0m [36mmodule[0m=[35mhttpx[0m


HTTP Request: DELETE http://localhost:6300/collections/memoirr "HTTP/1.1 200 OK"


[2m2025-10-03T19:54:03.046258Z[0m [[32m[1minfo     [0m] [1mHTTP Request: PUT http://localhost:6300/collections/memoirr "HTTP/1.1 200 OK"[0m [36mlineno[0m=[35m1025[0m [36mmodule[0m=[35mhttpx[0m


HTTP Request: PUT http://localhost:6300/collections/memoirr "HTTP/1.1 200 OK"


[2m2025-10-03T19:54:03.049574Z[0m [[32m[1minfo     [0m] [1mQdrant collection cleared successfully via recreation[0m [36mcollection_name[0m=[35mmemoirr[0m [36mcomponent[0m=[35mqdrant_writer[0m [36membedding_dimension[0m=[35m1024[0m


{'collection_name': 'memoirr', 'embedding_dimension': 1024, 'component': 'qdrant_writer', 'event': 'Qdrant collection cleared successfully via recreation', 'level': 'info', 'timestamp': '2025-10-03T19:54:03.049574Z'}


[2m2025-10-03T19:54:03.051268Z[0m [[32m[1minfo     [0m] [1mmetric_counter                [0m [36mcomponent[0m=[35mqdrant_writer[0m [36mmetric_name[0m=[35mcollection_cleared_total[0m [36mmetric_type[0m=[35mcounter[0m [36mstatus[0m=[35msuccess[0m [36mvalue[0m=[35m1[0m


{'metric_name': 'collection_cleared_total', 'metric_type': 'counter', 'value': 1, 'component': 'qdrant_writer', 'status': 'success', 'event': 'metric_counter', 'level': 'info', 'timestamp': '2025-10-03T19:54:03.051268Z'}


[2m2025-10-03T19:54:03.052602Z[0m [[32m[1minfo     [0m] [1mcollection_clearing_completed [0m [36mcollection_cleared[0m=[35mTrue[0m [36mduration_ms[0m=[35m64[0m [36membedding_dimension[0m=[35m1024[0m [36mmethod[0m=[35mrecreation[0m [36moperation[0m=[35mcollection_clearing[0m


{'operation': 'collection_clearing', 'duration_ms': 64, 'collection_cleared': True, 'method': 'recreation', 'embedding_dimension': 1024, 'event': 'collection_clearing_completed', 'level': 'info', 'timestamp': '2025-10-03T19:54:03.052602Z'}


[2m2025-10-03T19:54:03.058107Z[0m [[32m[1minfo     [0m] [1mHTTP Request: POST http://localhost:6300/collections/memoirr/points/count "HTTP/1.1 200 OK"[0m [36mlineno[0m=[35m1025[0m [36mmodule[0m=[35mhttpx[0m


HTTP Request: POST http://localhost:6300/collections/memoirr/points/count "HTTP/1.1 200 OK"


[2m2025-10-03T19:54:03.060092Z[0m [[32m[1minfo     [0m] [1mDatabase cleared successfully [0m [36mcomponent[0m=[35mbatch_processor[0m [36mdocuments_after[0m=[35m0[0m [36mdocuments_before[0m=[35m0[0m


{'documents_before': 0, 'documents_after': 0, 'component': 'batch_processor', 'event': 'Database cleared successfully', 'level': 'info', 'timestamp': '2025-10-03T19:54:03.060092Z'}


[2m2025-10-03T19:54:03.314412Z[0m [[32m[1minfo     [0m] [1mMemory usage - pipeline_build - start[0m [36mallocated_mb[0m=[35m0.0[0m [36mcomponent[0m=[35mmemory_management[0m [36mdevice[0m=[35m0[0m [36mfree_memory_mb[0m=[35m7495.8125[0m [36mmax_allocated_mb[0m=[35m0.0[0m [36mmax_reserved_mb[0m=[35m0.0[0m [36mreserved_mb[0m=[35m0.0[0m [36mtotal_memory_mb[0m=[35m7816.5625[0m [36mutilization_percent[0m=[35m4.10346619757726[0m


{'device': 0, 'allocated_mb': 0.0, 'reserved_mb': 0.0, 'max_allocated_mb': 0.0, 'max_reserved_mb': 0.0, 'total_memory_mb': 7816.5625, 'free_memory_mb': 7495.8125, 'utilization_percent': 4.10346619757726, 'component': 'memory_management', 'event': 'Memory usage - pipeline_build - start', 'level': 'info', 'timestamp': '2025-10-03T19:54:03.314412Z'}


[2m2025-10-03T19:54:03.315398Z[0m [[32m[1minfo     [0m] [1mBuilding SRT-to-Qdrant pipeline[0m [36mcomponent[0m=[35mpipeline_builder[0m [36mpipeline_type[0m=[35msrt_to_qdrant[0m


{'pipeline_type': 'srt_to_qdrant', 'component': 'pipeline_builder', 'event': 'Building SRT-to-Qdrant pipeline', 'level': 'info', 'timestamp': '2025-10-03T19:54:03.315398Z'}


[2m2025-10-03T19:54:04.994868Z[0m [[32m[1minfo     [0m] [1mSRTPreprocessor initialized   [0m [36mcomponent[0m=[35mpreprocessor[0m [36mdedupe_window_ms[0m=[35m1000[0m [36mmin_len[0m=[35m1[0m


{'min_len': 1, 'dedupe_window_ms': 1000, 'component': 'preprocessor', 'event': 'SRTPreprocessor initialized', 'level': 'info', 'timestamp': '2025-10-03T19:54:04.994868Z'}


[2m2025-10-03T19:54:04.996452Z[0m [[32m[1minfo     [0m] [1mSemanticChunker initialized   [0m [36mchunk_size[0m=[35m512[0m [36mcomponent[0m=[35mchunker[0m [36mmin_sentences[0m=[35m2[0m [36msimilarity_window[0m=[35m3[0m [36mthreshold[0m=[35mauto[0m


{'threshold': 'auto', 'chunk_size': 512, 'similarity_window': 3, 'min_sentences': 2, 'component': 'chunker', 'event': 'SemanticChunker initialized', 'level': 'info', 'timestamp': '2025-10-03T19:54:04.996452Z'}


[2m2025-10-03T19:54:04.997547Z[0m [[32m[1minfo     [0m] [1mChunkJsonlToTexts initialized [0m [36mcomponent[0m=[35mpipeline_glue[0m


{'component': 'pipeline_glue', 'event': 'ChunkJsonlToTexts initialized', 'level': 'info', 'timestamp': '2025-10-03T19:54:04.997547Z'}


[2m2025-10-03T19:54:04.999190Z[0m [[32m[1minfo     [0m] [1mModel resolved via recursive search[0m [36mcomponent[0m=[35mmodel_utils[0m [36mmethod[0m=[35mrecursive[0m [36mmodel_name[0m=[35mqwen3-embedding-0.6B[0m [36mresolved_path[0m=[35mmodels/chunker/qwen3-embedding-0.6B[0m [36mtotal_candidates[0m=[35m1[0m


{'model_name': 'qwen3-embedding-0.6B', 'resolved_path': 'models/chunker/qwen3-embedding-0.6B', 'method': 'recursive', 'total_candidates': 1, 'component': 'model_utils', 'event': 'Model resolved via recursive search', 'level': 'info', 'timestamp': '2025-10-03T19:54:04.999190Z'}


[2m2025-10-03T19:54:04.999958Z[0m [[32m[1minfo     [0m] [1mEmbedding model resolved successfully[0m [36mcomponent[0m=[35membedder[0m [36mmodel_name[0m=[35mqwen3-embedding-0.6B[0m [36mmodel_path[0m=[35mmodels/chunker/qwen3-embedding-0.6B[0m


{'model_name': 'qwen3-embedding-0.6B', 'model_path': 'models/chunker/qwen3-embedding-0.6B', 'component': 'embedder', 'event': 'Embedding model resolved successfully', 'level': 'info', 'timestamp': '2025-10-03T19:54:04.999958Z'}


[2m2025-10-03T19:54:05.001939Z[0m [[32m[1minfo     [0m] [1mLoad pretrained SentenceTransformer: models/chunker/qwen3-embedding-0.6B[0m [36mlineno[0m=[35m227[0m [36mmodule[0m=[35msentence_transformers.SentenceTransformer[0m


Load pretrained SentenceTransformer: models/chunker/qwen3-embedding-0.6B


[2m2025-10-03T19:54:06.219945Z[0m [[32m[1minfo     [0m] [1m1 prompt is loaded, with the key: query[0m [36mlineno[0m=[35m378[0m [36mmodule[0m=[35msentence_transformers.SentenceTransformer[0m


1 prompt is loaded, with the key: query


[2m2025-10-03T19:54:06.221187Z[0m [[32m[1minfo     [0m] [1mTextEmbedder initialized successfully[0m [36mcomponent[0m=[35membedder[0m [36membedding_dimension[0m=[35m1024[0m [36mmodel_name[0m=[35mqwen3-embedding-0.6B[0m [36mmodel_path[0m=[35mmodels/chunker/qwen3-embedding-0.6B[0m


{'model_name': 'qwen3-embedding-0.6B', 'embedding_dimension': 1024, 'model_path': 'models/chunker/qwen3-embedding-0.6B', 'component': 'embedder', 'event': 'TextEmbedder initialized successfully', 'level': 'info', 'timestamp': '2025-10-03T19:54:06.221187Z'}


[2m2025-10-03T19:54:06.222167Z[0m [[32m[1minfo     [0m] [1mBuildDocuments initialized    [0m [36mcomponent[0m=[35mpipeline_glue[0m


{'component': 'pipeline_glue', 'event': 'BuildDocuments initialized', 'level': 'info', 'timestamp': '2025-10-03T19:54:06.222167Z'}


[2m2025-10-03T19:54:06.223721Z[0m [[32m[1minfo     [0m] [1mQdrantWriter initialized successfully[0m [36mcollection_name[0m=[35mmemoirr[0m [36mcomponent[0m=[35mqdrant_writer[0m [36membedding_dimension[0m=[35m1024[0m [36mqdrant_url[0m=[35mhttp://localhost:6300[0m


{'qdrant_url': 'http://localhost:6300', 'collection_name': 'memoirr', 'embedding_dimension': 1024, 'component': 'qdrant_writer', 'event': 'QdrantWriter initialized successfully', 'level': 'info', 'timestamp': '2025-10-03T19:54:06.223721Z'}


[2m2025-10-03T19:54:06.224974Z[0m [[32m[1minfo     [0m] [1mSRT-to-Qdrant pipeline built successfully[0m [36mcomponent[0m=[35mpipeline_builder[0m [36mcomponent_names[0m=[35m['pre', 'chunk', 'explode', 'embed', 'docs', 'write'][0m [36mpipeline_type[0m=[35msrt_to_qdrant[0m [36mtotal_components[0m=[35m6[0m [36mtotal_connections[0m=[35m7[0m


{'total_components': 6, 'component_names': ['pre', 'chunk', 'explode', 'embed', 'docs', 'write'], 'total_connections': 7, 'pipeline_type': 'srt_to_qdrant', 'component': 'pipeline_builder', 'event': 'SRT-to-Qdrant pipeline built successfully', 'level': 'info', 'timestamp': '2025-10-03T19:54:06.224974Z'}


[2m2025-10-03T19:54:06.226899Z[0m [[32m[1minfo     [0m] [1mMemory usage - pipeline built [0m [36mallocated_mb[0m=[35m2272.70751953125[0m [36mcomponent[0m=[35mmemory_management[0m [36mdevice[0m=[35m0[0m [36mfree_memory_mb[0m=[35m5213.8125[0m [36mmax_allocated_mb[0m=[35m2272.70751953125[0m [36mmax_reserved_mb[0m=[35m2282.0[0m [36mreserved_mb[0m=[35m2282.0[0m [36mtotal_memory_mb[0m=[35m7816.5625[0m [36mutilization_percent[0m=[35m33.29788509974813[0m


{'device': 0, 'allocated_mb': 2272.70751953125, 'reserved_mb': 2282.0, 'max_allocated_mb': 2272.70751953125, 'max_reserved_mb': 2282.0, 'total_memory_mb': 7816.5625, 'free_memory_mb': 5213.8125, 'utilization_percent': 33.29788509974813, 'component': 'memory_management', 'event': 'Memory usage - pipeline built', 'level': 'info', 'timestamp': '2025-10-03T19:54:06.226899Z'}


[2m2025-10-03T19:54:06.227735Z[0m [[32m[1minfo     [0m] [1mPipeline built successfully for batch processing[0m [36mcomponent[0m=[35mbatch_processor[0m


{'component': 'batch_processor', 'event': 'Pipeline built successfully for batch processing', 'level': 'info', 'timestamp': '2025-10-03T19:54:06.227735Z'}


[2m2025-10-03T19:54:06.229460Z[0m [[32m[1minfo     [0m] [1mMemory usage - pipeline_build - end[0m [36mallocated_mb[0m=[35m2272.70751953125[0m [36mcomponent[0m=[35mmemory_management[0m [36mdevice[0m=[35m0[0m [36mfree_memory_mb[0m=[35m5213.8125[0m [36mmax_allocated_mb[0m=[35m2272.70751953125[0m [36mmax_reserved_mb[0m=[35m2282.0[0m [36mreserved_mb[0m=[35m2282.0[0m [36mtotal_memory_mb[0m=[35m7816.5625[0m [36mutilization_percent[0m=[35m33.29788509974813[0m


{'device': 0, 'allocated_mb': 2272.70751953125, 'reserved_mb': 2282.0, 'max_allocated_mb': 2272.70751953125, 'max_reserved_mb': 2282.0, 'total_memory_mb': 7816.5625, 'free_memory_mb': 5213.8125, 'utilization_percent': 33.29788509974813, 'component': 'memory_management', 'event': 'Memory usage - pipeline_build - end', 'level': 'info', 'timestamp': '2025-10-03T19:54:06.229460Z'}


[2m2025-10-03T19:54:06.230376Z[0m [[32m[1minfo     [0m] [1mProcessing SRT file           [0m [36mcomponent[0m=[35mbatch_processor[0m [36mfile_index[0m=[35m1[0m [36mfile_path[0m=[35mdata/the.lord.of.the.rings.the.fellowship.of.the.ring.(2001).eng.1cd.(6053820)/The.Lord.of.the.Rings.The.Fellowship.of.the.Ring.Extended.Editions.2001.1080p.BluRay.x264.srt[0m [36mtotal_files[0m=[35m3[0m


{'file_index': 1, 'total_files': 3, 'file_path': 'data/the.lord.of.the.rings.the.fellowship.of.the.ring.(2001).eng.1cd.(6053820)/The.Lord.of.the.Rings.The.Fellowship.of.the.Ring.Extended.Editions.2001.1080p.BluRay.x264.srt', 'component': 'batch_processor', 'event': 'Processing SRT file', 'level': 'info', 'timestamp': '2025-10-03T19:54:06.230376Z'}


[2m2025-10-03T19:54:06.231812Z[0m [[32m[1minfo     [0m] [1mMemory usage - before file 1/3[0m [36mallocated_mb[0m=[35m2272.70751953125[0m [36mcomponent[0m=[35mmemory_management[0m [36mdevice[0m=[35m0[0m [36mfree_memory_mb[0m=[35m5213.8125[0m [36mmax_allocated_mb[0m=[35m2272.70751953125[0m [36mmax_reserved_mb[0m=[35m2282.0[0m [36mreserved_mb[0m=[35m2282.0[0m [36mtotal_memory_mb[0m=[35m7816.5625[0m [36mutilization_percent[0m=[35m33.29788509974813[0m


{'device': 0, 'allocated_mb': 2272.70751953125, 'reserved_mb': 2282.0, 'max_allocated_mb': 2272.70751953125, 'max_reserved_mb': 2282.0, 'total_memory_mb': 7816.5625, 'free_memory_mb': 5213.8125, 'utilization_percent': 33.29788509974813, 'component': 'memory_management', 'event': 'Memory usage - before file 1/3', 'level': 'info', 'timestamp': '2025-10-03T19:54:06.231812Z'}


[2m2025-10-03T19:54:06.232644Z[0m [[32m[1minfo     [0m] [1msingle_file_processing_started[0m [36mfile_path[0m=[35mdata/the.lord.of.the.rings.the.fellowship.of.the.ring.(2001).eng.1cd.(6053820)/The.Lord.of.the.Rings.The.Fellowship.of.the.Ring.Extended.Editions.2001.1080p.BluRay.x264.srt[0m [36moperation[0m=[35msingle_file_processing[0m


{'operation': 'single_file_processing', 'file_path': 'data/the.lord.of.the.rings.the.fellowship.of.the.ring.(2001).eng.1cd.(6053820)/The.Lord.of.the.Rings.The.Fellowship.of.the.Ring.Extended.Editions.2001.1080p.BluRay.x264.srt', 'event': 'single_file_processing_started', 'level': 'info', 'timestamp': '2025-10-03T19:54:06.232644Z'}


[2m2025-10-03T19:54:06.262313Z[0m [[32m[1minfo     [0m] [1mRunning component pre         [0m [36mcomponent_name[0m=[35mpre[0m [36mlineno[0m=[35m67[0m [36mmodule[0m=[35mhaystack.core.pipeline.pipeline[0m


Running component pre


[2m2025-10-03T19:54:06.263714Z[0m [[32m[1minfo     [0m] [1msrt_preprocessing_started     [0m [36minput_length[0m=[35m125522[0m [36moperation[0m=[35msrt_preprocessing[0m


{'operation': 'srt_preprocessing', 'input_length': 125522, 'event': 'srt_preprocessing_started', 'level': 'info', 'timestamp': '2025-10-03T19:54:06.263714Z'}


[2m2025-10-03T19:54:06.265277Z[0m [[32m[1minfo     [0m] [1mStarting SRT preprocessing    [0m [36mcomponent[0m=[35mpreprocessor[0m [36mdedupe_window_ms[0m=[35m1000[0m [36minput_size_chars[0m=[35m125522[0m [36mmin_len[0m=[35m1[0m


{'input_size_chars': 125522, 'min_len': 1, 'dedupe_window_ms': 1000, 'component': 'preprocessor', 'event': 'Starting SRT preprocessing', 'level': 'info', 'timestamp': '2025-10-03T19:54:06.265277Z'}


[2m2025-10-03T19:54:12.616693Z[0m [[32m[1minfo     [0m] [1mCaption language filtering completed[0m [36mcomponent[0m=[35mlanguage_filter[0m [36mdropped_captions[0m=[35m371[0m [36mdropped_lines[0m=[35m559[0m [36mkept_captions[0m=[35m1412[0m [36mkept_lines[0m=[35m1708[0m [36mtotal_captions[0m=[35m1783[0m [36mtotal_lines[0m=[35m2267[0m


{'total_captions': 1783, 'kept_captions': 1412, 'dropped_captions': 371, 'total_lines': 2267, 'kept_lines': 1708, 'dropped_lines': 559, 'component': 'language_filter', 'event': 'Caption language filtering completed', 'level': 'info', 'timestamp': '2025-10-03T19:54:12.616693Z'}


[2m2025-10-03T19:54:12.617689Z[0m [[32m[1minfo     [0m] [1mmetric_counter                [0m [36mcomponent[0m=[35mlanguage_filter[0m [36mmetric_name[0m=[35mcaptions_language_filtered_total[0m [36mmetric_type[0m=[35mcounter[0m [36mvalue[0m=[35m1783[0m


{'metric_name': 'captions_language_filtered_total', 'metric_type': 'counter', 'value': 1783, 'component': 'language_filter', 'event': 'metric_counter', 'level': 'info', 'timestamp': '2025-10-03T19:54:12.617689Z'}


[2m2025-10-03T19:54:12.618242Z[0m [[32m[1minfo     [0m] [1mmetric_counter                [0m [36mcomponent[0m=[35mlanguage_filter[0m [36mmetric_name[0m=[35mcaptions_kept_after_language_filter[0m [36mmetric_type[0m=[35mcounter[0m [36mstatus[0m=[35mkept[0m [36mvalue[0m=[35m1412[0m


{'metric_name': 'captions_kept_after_language_filter', 'metric_type': 'counter', 'value': 1412, 'component': 'language_filter', 'status': 'kept', 'event': 'metric_counter', 'level': 'info', 'timestamp': '2025-10-03T19:54:12.618242Z'}


[2m2025-10-03T19:54:12.618801Z[0m [[32m[1minfo     [0m] [1mmetric_counter                [0m [36mcomponent[0m=[35mlanguage_filter[0m [36mmetric_name[0m=[35mcaptions_dropped_after_language_filter[0m [36mmetric_type[0m=[35mcounter[0m [36mstatus[0m=[35mdropped[0m [36mvalue[0m=[35m371[0m


{'metric_name': 'captions_dropped_after_language_filter', 'metric_type': 'counter', 'value': 371, 'component': 'language_filter', 'status': 'dropped', 'event': 'metric_counter', 'level': 'info', 'timestamp': '2025-10-03T19:54:12.618801Z'}


[2m2025-10-03T19:54:12.619395Z[0m [[32m[1minfo     [0m] [1mmetric_counter                [0m [36mcomponent[0m=[35mlanguage_filter[0m [36mmetric_name[0m=[35mlines_language_filtered_total[0m [36mmetric_type[0m=[35mcounter[0m [36mvalue[0m=[35m2267[0m


{'metric_name': 'lines_language_filtered_total', 'metric_type': 'counter', 'value': 2267, 'component': 'language_filter', 'event': 'metric_counter', 'level': 'info', 'timestamp': '2025-10-03T19:54:12.619395Z'}


[2m2025-10-03T19:54:12.619950Z[0m [[32m[1minfo     [0m] [1mmetric_counter                [0m [36mcomponent[0m=[35mlanguage_filter[0m [36mmetric_name[0m=[35mlines_kept_after_language_filter[0m [36mmetric_type[0m=[35mcounter[0m [36mstatus[0m=[35mkept[0m [36mvalue[0m=[35m1708[0m


{'metric_name': 'lines_kept_after_language_filter', 'metric_type': 'counter', 'value': 1708, 'component': 'language_filter', 'status': 'kept', 'event': 'metric_counter', 'level': 'info', 'timestamp': '2025-10-03T19:54:12.619950Z'}


[2m2025-10-03T19:54:12.621313Z[0m [[32m[1minfo     [0m] [1mmetric_counter                [0m [36mcomponent[0m=[35mlanguage_filter[0m [36mmetric_name[0m=[35mlines_dropped_after_language_filter[0m [36mmetric_type[0m=[35mcounter[0m [36mstatus[0m=[35mdropped[0m [36mvalue[0m=[35m559[0m


{'metric_name': 'lines_dropped_after_language_filter', 'metric_type': 'counter', 'value': 559, 'component': 'language_filter', 'status': 'dropped', 'event': 'metric_counter', 'level': 'info', 'timestamp': '2025-10-03T19:54:12.621313Z'}


[2m2025-10-03T19:54:12.639678Z[0m [[32m[1minfo     [0m] [1mmetric_counter                [0m [36mcomponent[0m=[35mpreprocessor[0m [36mmetric_name[0m=[35mcaptions_processed_total[0m [36mmetric_type[0m=[35mcounter[0m [36mvalue[0m=[35m0[0m


{'metric_name': 'captions_processed_total', 'metric_type': 'counter', 'value': 0, 'component': 'preprocessor', 'event': 'metric_counter', 'level': 'info', 'timestamp': '2025-10-03T19:54:12.639678Z'}


[2m2025-10-03T19:54:12.641272Z[0m [[32m[1minfo     [0m] [1mmetric_counter                [0m [36mcomponent[0m=[35mpreprocessor[0m [36mmetric_name[0m=[35mcaptions_kept_total[0m [36mmetric_type[0m=[35mcounter[0m [36mstatus[0m=[35mkept[0m [36mvalue[0m=[35m1353[0m


{'metric_name': 'captions_kept_total', 'metric_type': 'counter', 'value': 1353, 'component': 'preprocessor', 'status': 'kept', 'event': 'metric_counter', 'level': 'info', 'timestamp': '2025-10-03T19:54:12.641272Z'}


[2m2025-10-03T19:54:12.642380Z[0m [[32m[1minfo     [0m] [1mmetric_counter                [0m [36mcomponent[0m=[35mpreprocessor[0m [36mmetric_name[0m=[35mcaptions_dropped_total[0m [36mmetric_type[0m=[35mcounter[0m [36mreason[0m=[35mempty[0m [36mvalue[0m=[35m59[0m


{'metric_name': 'captions_dropped_total', 'metric_type': 'counter', 'value': 59, 'component': 'preprocessor', 'reason': 'empty', 'event': 'metric_counter', 'level': 'info', 'timestamp': '2025-10-03T19:54:12.642380Z'}


[2m2025-10-03T19:54:12.643817Z[0m [[32m[1minfo     [0m] [1mmetric_counter                [0m [36mcomponent[0m=[35mpreprocessor[0m [36mmetric_name[0m=[35mcaptions_dropped_total[0m [36mmetric_type[0m=[35mcounter[0m [36mreason[0m=[35mnon_english[0m [36mvalue[0m=[35m371[0m


{'metric_name': 'captions_dropped_total', 'metric_type': 'counter', 'value': 371, 'component': 'preprocessor', 'reason': 'non_english', 'event': 'metric_counter', 'level': 'info', 'timestamp': '2025-10-03T19:54:12.643817Z'}


[2m2025-10-03T19:54:12.646098Z[0m [[32m[1minfo     [0m] [1mmetric_counter                [0m [36mcomponent[0m=[35mpreprocessor[0m [36mmetric_name[0m=[35mcaptions_deduped_total[0m [36mmetric_type[0m=[35mcounter[0m [36mreason[0m=[35mduplicate[0m [36mvalue[0m=[35m0[0m


{'metric_name': 'captions_deduped_total', 'metric_type': 'counter', 'value': 0, 'component': 'preprocessor', 'reason': 'duplicate', 'event': 'metric_counter', 'level': 'info', 'timestamp': '2025-10-03T19:54:12.646098Z'}


[2m2025-10-03T19:54:12.647178Z[0m [[32m[1minfo     [0m] [1mSRT preprocessing completed   [0m [36mcomponent[0m=[35mpreprocessor[0m [36moutput_lines[0m=[35m1353[0m [36mprocessing_stats[0m=[35m{'total_captions': 1783, 'kept': 1353, 'dropped_empty': 59, 'dropped_non_english': 371, 'deduped': 0}[0m


{'output_lines': 1353, 'processing_stats': {'total_captions': 1783, 'kept': 1353, 'dropped_empty': 59, 'dropped_non_english': 371, 'deduped': 0}, 'component': 'preprocessor', 'event': 'SRT preprocessing completed', 'level': 'info', 'timestamp': '2025-10-03T19:54:12.647178Z'}


[2m2025-10-03T19:54:12.648151Z[0m [[32m[1minfo     [0m] [1msrt_preprocessing_completed   [0m [36mcaptions_dropped[0m=[35m-1353[0m [36mcaptions_kept[0m=[35m1353[0m [36mduration_ms[0m=[35m6384[0m [36minput_length[0m=[35m125522[0m [36moperation[0m=[35msrt_preprocessing[0m [36moutput_lines[0m=[35m1353[0m


{'operation': 'srt_preprocessing', 'duration_ms': 6384, 'input_length': 125522, 'output_lines': 1353, 'captions_kept': 1353, 'captions_dropped': -1353, 'event': 'srt_preprocessing_completed', 'level': 'info', 'timestamp': '2025-10-03T19:54:12.648151Z'}


[2m2025-10-03T19:54:12.659361Z[0m [[32m[1minfo     [0m] [1mRunning component chunk       [0m [36mcomponent_name[0m=[35mchunk[0m [36mlineno[0m=[35m67[0m [36mmodule[0m=[35mhaystack.core.pipeline.pipeline[0m


Running component chunk


[2m2025-10-03T19:54:12.660655Z[0m [[32m[1minfo     [0m] [1msemantic_chunking_started     [0m [36minput_captions[0m=[35m1353[0m [36moperation[0m=[35msemantic_chunking[0m


{'operation': 'semantic_chunking', 'input_captions': 1353, 'event': 'semantic_chunking_started', 'level': 'info', 'timestamp': '2025-10-03T19:54:12.660655Z'}


[2m2025-10-03T19:54:12.661926Z[0m [[32m[1minfo     [0m] [1mStarting semantic chunking    [0m [36mchunk_size[0m=[35m512[0m [36mcomponent[0m=[35mchunker[0m [36minput_captions[0m=[35m1353[0m [36mthreshold[0m=[35mauto[0m


{'input_captions': 1353, 'threshold': 'auto', 'chunk_size': 512, 'component': 'chunker', 'event': 'Starting semantic chunking', 'level': 'info', 'timestamp': '2025-10-03T19:54:12.661926Z'}


[2m2025-10-03T19:54:12.671773Z[0m [[32m[1minfo     [0m] [1mMemory usage - semantic_chunking - start[0m [36mallocated_mb[0m=[35m2272.70751953125[0m [36mcomponent[0m=[35mmemory_management[0m [36mdevice[0m=[35m0[0m [36mfree_memory_mb[0m=[35m5213.8125[0m [36mmax_allocated_mb[0m=[35m2272.70751953125[0m [36mmax_reserved_mb[0m=[35m2282.0[0m [36mreserved_mb[0m=[35m2282.0[0m [36mtotal_memory_mb[0m=[35m7816.5625[0m [36mutilization_percent[0m=[35m33.29788509974813[0m


{'device': 0, 'allocated_mb': 2272.70751953125, 'reserved_mb': 2282.0, 'max_allocated_mb': 2272.70751953125, 'max_reserved_mb': 2282.0, 'total_memory_mb': 7816.5625, 'free_memory_mb': 5213.8125, 'utilization_percent': 33.29788509974813, 'component': 'memory_management', 'event': 'Memory usage - semantic_chunking - start', 'level': 'info', 'timestamp': '2025-10-03T19:54:12.671773Z'}


[2m2025-10-03T19:54:12.674619Z[0m [[32m[1minfo     [0m] [1mUse pytorch device_name: cuda:0[0m [36mlineno[0m=[35m219[0m [36mmodule[0m=[35msentence_transformers.SentenceTransformer[0m


Use pytorch device_name: cuda:0


[2m2025-10-03T19:54:12.675504Z[0m [[32m[1minfo     [0m] [1mLoad pretrained SentenceTransformer: models/chunker/qwen3-embedding-0.6B[0m [36mlineno[0m=[35m227[0m [36mmodule[0m=[35msentence_transformers.SentenceTransformer[0m


Load pretrained SentenceTransformer: models/chunker/qwen3-embedding-0.6B


[2m2025-10-03T19:54:13.717351Z[0m [[32m[1minfo     [0m] [1m1 prompt is loaded, with the key: query[0m [36mlineno[0m=[35m378[0m [36mmodule[0m=[35msentence_transformers.SentenceTransformer[0m


1 prompt is loaded, with the key: query


Batches:   0%|          | 0/36 [00:00<?, ?it/s]

Batches:   0%|          | 0/36 [00:00<?, ?it/s]

[2m2025-10-03T19:54:35.373641Z[0m [[32m[1minfo     [0m] [1mMemory usage - semantic_chunking - end[0m [36mallocated_mb[0m=[35m4553.5400390625[0m [36mcomponent[0m=[35mmemory_management[0m [36mdevice[0m=[35m0[0m [36mfree_memory_mb[0m=[35m2115.8125[0m [36mmax_allocated_mb[0m=[35m5278.37939453125[0m [36mmax_reserved_mb[0m=[35m5358.0[0m [36mreserved_mb[0m=[35m5358.0[0m [36mtotal_memory_mb[0m=[35m7816.5625[0m [36mutilization_percent[0m=[35m72.93167552872507[0m


{'device': 0, 'allocated_mb': 4553.5400390625, 'reserved_mb': 5358.0, 'max_allocated_mb': 5278.37939453125, 'max_reserved_mb': 5358.0, 'total_memory_mb': 7816.5625, 'free_memory_mb': 2115.8125, 'utilization_percent': 72.93167552872507, 'component': 'memory_management', 'event': 'Memory usage - semantic_chunking - end', 'level': 'info', 'timestamp': '2025-10-03T19:54:35.373641Z'}


[2m2025-10-03T19:54:35.648459Z[0m [[32m[1minfo     [0m] [1mmetric_counter                [0m [36mcomponent[0m=[35mchunker[0m [36mmetric_name[0m=[35mcaptions_chunked_total[0m [36mmetric_type[0m=[35mcounter[0m [36mvalue[0m=[35m1353[0m


{'metric_name': 'captions_chunked_total', 'metric_type': 'counter', 'value': 1353, 'component': 'chunker', 'event': 'metric_counter', 'level': 'info', 'timestamp': '2025-10-03T19:54:35.648459Z'}


[2m2025-10-03T19:54:35.649415Z[0m [[32m[1minfo     [0m] [1mmetric_counter                [0m [36mcomponent[0m=[35mchunker[0m [36mmetric_name[0m=[35mchunks_generated_total[0m [36mmetric_type[0m=[35mcounter[0m [36mvalue[0m=[35m420[0m


{'metric_name': 'chunks_generated_total', 'metric_type': 'counter', 'value': 420, 'component': 'chunker', 'event': 'metric_counter', 'level': 'info', 'timestamp': '2025-10-03T19:54:35.649415Z'}


[2m2025-10-03T19:54:35.650073Z[0m [[32m[1minfo     [0m] [1mmetric_histogram              [0m [36mcomponent[0m=[35mchunker[0m [36mmetric_name[0m=[35mavg_tokens_per_chunk[0m [36mmetric_type[0m=[35mhistogram[0m [36mvalue[0m=[35m33.069047619047616[0m


{'metric_name': 'avg_tokens_per_chunk', 'metric_type': 'histogram', 'value': 33.069047619047616, 'component': 'chunker', 'event': 'metric_histogram', 'level': 'info', 'timestamp': '2025-10-03T19:54:35.650073Z'}


[2m2025-10-03T19:54:35.650644Z[0m [[32m[1minfo     [0m] [1mSemantic chunking completed   [0m [36mavg_tokens_per_chunk[0m=[35m33.069047619047616[0m [36mcomponent[0m=[35mchunker[0m [36minput_captions[0m=[35m1353[0m [36moutput_chunks[0m=[35m420[0m


{'input_captions': 1353, 'output_chunks': 420, 'avg_tokens_per_chunk': 33.069047619047616, 'component': 'chunker', 'event': 'Semantic chunking completed', 'level': 'info', 'timestamp': '2025-10-03T19:54:35.650644Z'}


[2m2025-10-03T19:54:35.651269Z[0m [[32m[1minfo     [0m] [1msemantic_chunking_completed   [0m [36mavg_tokens_per_chunk[0m=[35m33.069047619047616[0m [36mduration_ms[0m=[35m22990[0m [36minput_captions[0m=[35m1353[0m [36moperation[0m=[35msemantic_chunking[0m [36moutput_chunks[0m=[35m420[0m [36moutput_chunks_final[0m=[35m420[0m


{'operation': 'semantic_chunking', 'duration_ms': 22990, 'input_captions': 1353, 'output_chunks': 420, 'avg_tokens_per_chunk': 33.069047619047616, 'output_chunks_final': 420, 'event': 'semantic_chunking_completed', 'level': 'info', 'timestamp': '2025-10-03T19:54:35.651269Z'}


[2m2025-10-03T19:54:35.654792Z[0m [[32m[1minfo     [0m] [1mRunning component explode     [0m [36mcomponent_name[0m=[35mexplode[0m [36mlineno[0m=[35m67[0m [36mmodule[0m=[35mhaystack.core.pipeline.pipeline[0m


Running component explode


[2m2025-10-03T19:54:35.655361Z[0m [[32m[1minfo     [0m] [1mchunk_jsonl_parsing_started   [0m [36minput_lines[0m=[35m420[0m [36moperation[0m=[35mchunk_jsonl_parsing[0m


{'operation': 'chunk_jsonl_parsing', 'input_lines': 420, 'event': 'chunk_jsonl_parsing_started', 'level': 'info', 'timestamp': '2025-10-03T19:54:35.655361Z'}


[2m2025-10-03T19:54:35.658226Z[0m [[32m[1minfo     [0m] [1mmetric_counter                [0m [36mcomponent[0m=[35mpipeline_glue[0m [36mmetric_name[0m=[35mchunk_lines_processed_total[0m [36mmetric_type[0m=[35mcounter[0m [36mvalue[0m=[35m420[0m


{'metric_name': 'chunk_lines_processed_total', 'metric_type': 'counter', 'value': 420, 'component': 'pipeline_glue', 'event': 'metric_counter', 'level': 'info', 'timestamp': '2025-10-03T19:54:35.658226Z'}


[2m2025-10-03T19:54:35.658860Z[0m [[32m[1minfo     [0m] [1mmetric_counter                [0m [36mcomponent[0m=[35mpipeline_glue[0m [36mmetric_name[0m=[35mtexts_extracted_total[0m [36mmetric_type[0m=[35mcounter[0m [36mstatus[0m=[35msuccess[0m [36mvalue[0m=[35m420[0m


{'metric_name': 'texts_extracted_total', 'metric_type': 'counter', 'value': 420, 'component': 'pipeline_glue', 'status': 'success', 'event': 'metric_counter', 'level': 'info', 'timestamp': '2025-10-03T19:54:35.658860Z'}


[2m2025-10-03T19:54:35.659368Z[0m [[32m[1minfo     [0m] [1mChunk JSONL parsing completed [0m [36mcomponent[0m=[35mpipeline_glue[0m [36minput_lines[0m=[35m420[0m [36moutput_texts[0m=[35m420[0m [36mskipped_lines[0m=[35m0[0m


{'input_lines': 420, 'output_texts': 420, 'skipped_lines': 0, 'component': 'pipeline_glue', 'event': 'Chunk JSONL parsing completed', 'level': 'info', 'timestamp': '2025-10-03T19:54:35.659368Z'}


[2m2025-10-03T19:54:35.659908Z[0m [[32m[1minfo     [0m] [1mchunk_jsonl_parsing_completed [0m [36mduration_ms[0m=[35m4[0m [36minput_lines[0m=[35m420[0m [36moperation[0m=[35mchunk_jsonl_parsing[0m [36moutput_texts[0m=[35m420[0m [36mskipped_lines[0m=[35m0[0m [36msuccess_rate[0m=[35m1.0[0m


{'operation': 'chunk_jsonl_parsing', 'duration_ms': 4, 'input_lines': 420, 'output_texts': 420, 'skipped_lines': 0, 'success_rate': 1.0, 'event': 'chunk_jsonl_parsing_completed', 'level': 'info', 'timestamp': '2025-10-03T19:54:35.659908Z'}


[2m2025-10-03T19:54:35.663351Z[0m [[32m[1minfo     [0m] [1mRunning component embed       [0m [36mcomponent_name[0m=[35membed[0m [36mlineno[0m=[35m67[0m [36mmodule[0m=[35mhaystack.core.pipeline.pipeline[0m


Running component embed


[2m2025-10-03T19:54:35.664068Z[0m [[32m[1minfo     [0m] [1mtext_embedding_started        [0m [36minput_texts[0m=[35m420[0m [36moperation[0m=[35mtext_embedding[0m


{'operation': 'text_embedding', 'input_texts': 420, 'event': 'text_embedding_started', 'level': 'info', 'timestamp': '2025-10-03T19:54:35.664068Z'}


[2m2025-10-03T19:54:35.666447Z[0m [[32m[1minfo     [0m] [1mMemory usage - before embedding[0m [36mallocated_mb[0m=[35m2280.83251953125[0m [36mcomponent[0m=[35mmemory_management[0m [36mdevice[0m=[35m0[0m [36mfree_memory_mb[0m=[35m2891.8125[0m [36mmax_allocated_mb[0m=[35m5278.37939453125[0m [36mmax_reserved_mb[0m=[35m5358.0[0m [36mreserved_mb[0m=[35m4582.0[0m [36mtotal_memory_mb[0m=[35m7816.5625[0m [36mutilization_percent[0m=[35m63.00403790029184[0m


{'device': 0, 'allocated_mb': 2280.83251953125, 'reserved_mb': 4582.0, 'max_allocated_mb': 5278.37939453125, 'max_reserved_mb': 5358.0, 'total_memory_mb': 7816.5625, 'free_memory_mb': 2891.8125, 'utilization_percent': 63.00403790029184, 'component': 'memory_management', 'event': 'Memory usage - before embedding', 'level': 'info', 'timestamp': '2025-10-03T19:54:35.666447Z'}


Batches:   0%|          | 0/14 [00:00<?, ?it/s]

[2m2025-10-03T19:54:42.140183Z[0m [[32m[1minfo     [0m] [1mmetric_counter                [0m [36mcomponent[0m=[35membedder[0m [36mmetric_name[0m=[35membeddings_generated_total[0m [36mmetric_type[0m=[35mcounter[0m [36mmode[0m=[35mbatch[0m [36mstatus[0m=[35msuccess[0m [36mvalue[0m=[35m420[0m


{'metric_name': 'embeddings_generated_total', 'metric_type': 'counter', 'value': 420, 'component': 'embedder', 'mode': 'batch', 'status': 'success', 'event': 'metric_counter', 'level': 'info', 'timestamp': '2025-10-03T19:54:42.140183Z'}


[2m2025-10-03T19:54:42.142062Z[0m [[32m[1minfo     [0m] [1mMemory usage - after batch embedding[0m [36mallocated_mb[0m=[35m2280.83251953125[0m [36mcomponent[0m=[35mmemory_management[0m [36mdevice[0m=[35m0[0m [36mfree_memory_mb[0m=[35m981.8125[0m [36mmax_allocated_mb[0m=[35m5278.37939453125[0m [36mmax_reserved_mb[0m=[35m6492.0[0m [36mreserved_mb[0m=[35m6492.0[0m [36mtotal_memory_mb[0m=[35m7816.5625[0m [36mutilization_percent[0m=[35m87.43933154759526[0m


{'device': 0, 'allocated_mb': 2280.83251953125, 'reserved_mb': 6492.0, 'max_allocated_mb': 5278.37939453125, 'max_reserved_mb': 6492.0, 'total_memory_mb': 7816.5625, 'free_memory_mb': 981.8125, 'utilization_percent': 87.43933154759526, 'component': 'memory_management', 'event': 'Memory usage - after batch embedding', 'level': 'info', 'timestamp': '2025-10-03T19:54:42.142062Z'}


[2m2025-10-03T19:54:42.143112Z[0m [[32m[1minfo     [0m] [1mtext_embedding_completed      [0m [36mduration_ms[0m=[35m6479[0m [36mfailed_embeddings[0m=[35m0[0m [36minput_texts[0m=[35m420[0m [36moperation[0m=[35mtext_embedding[0m [36mprocessing_mode[0m=[35mbatch[0m [36msuccessful_embeddings[0m=[35m420[0m


{'operation': 'text_embedding', 'duration_ms': 6479, 'input_texts': 420, 'successful_embeddings': 420, 'failed_embeddings': 0, 'processing_mode': 'batch', 'event': 'text_embedding_completed', 'level': 'info', 'timestamp': '2025-10-03T19:54:42.143112Z'}


[2m2025-10-03T19:54:44.819536Z[0m [[32m[1minfo     [0m] [1mRunning component docs        [0m [36mcomponent_name[0m=[35mdocs[0m [36mlineno[0m=[35m67[0m [36mmodule[0m=[35mhaystack.core.pipeline.pipeline[0m


Running component docs


[2m2025-10-03T19:54:44.820786Z[0m [[32m[1minfo     [0m] [1mdocument_building_started     [0m [36minput_embeddings[0m=[35m420[0m [36minput_metas[0m=[35m420[0m [36minput_texts[0m=[35m420[0m [36moperation[0m=[35mdocument_building[0m


{'operation': 'document_building', 'input_texts': 420, 'input_metas': 420, 'input_embeddings': 420, 'event': 'document_building_started', 'level': 'info', 'timestamp': '2025-10-03T19:54:44.820786Z'}


[2m2025-10-03T19:54:44.822136Z[0m [[32m[1minfo     [0m] [1mmetric_counter                [0m [36mcomponent[0m=[35mpipeline_glue[0m [36mmetric_name[0m=[35mdocuments_built_total[0m [36mmetric_type[0m=[35mcounter[0m [36mstatus[0m=[35msuccess[0m [36mvalue[0m=[35m420[0m


{'metric_name': 'documents_built_total', 'metric_type': 'counter', 'value': 420, 'component': 'pipeline_glue', 'status': 'success', 'event': 'metric_counter', 'level': 'info', 'timestamp': '2025-10-03T19:54:44.822136Z'}


[2m2025-10-03T19:54:44.822877Z[0m [[32m[1minfo     [0m] [1mmetric_histogram              [0m [36mcomponent[0m=[35mpipeline_glue[0m [36mmetric_name[0m=[35mdocument_build_batch_size[0m [36mmetric_type[0m=[35mhistogram[0m [36mvalue[0m=[35m420[0m


{'metric_name': 'document_build_batch_size', 'metric_type': 'histogram', 'value': 420, 'component': 'pipeline_glue', 'event': 'metric_histogram', 'level': 'info', 'timestamp': '2025-10-03T19:54:44.822877Z'}


[2m2025-10-03T19:54:44.823754Z[0m [[32m[1minfo     [0m] [1mDocument building completed   [0m [36mcomponent[0m=[35mpipeline_glue[0m [36minput_embeddings[0m=[35m420[0m [36minput_metas[0m=[35m420[0m [36minput_texts[0m=[35m420[0m [36moutput_documents[0m=[35m420[0m


{'input_texts': 420, 'input_metas': 420, 'input_embeddings': 420, 'output_documents': 420, 'component': 'pipeline_glue', 'event': 'Document building completed', 'level': 'info', 'timestamp': '2025-10-03T19:54:44.823754Z'}


[2m2025-10-03T19:54:44.824284Z[0m [[32m[1minfo     [0m] [1mdocument_building_completed   [0m [36mdocuments_built[0m=[35m420[0m [36mduration_ms[0m=[35m3[0m [36minput_alignment_ratio[0m=[35m1.0[0m [36minput_embeddings[0m=[35m420[0m [36minput_metas[0m=[35m420[0m [36minput_texts[0m=[35m420[0m [36moperation[0m=[35mdocument_building[0m [36moutput_documents[0m=[35m420[0m


{'operation': 'document_building', 'duration_ms': 3, 'input_texts': 420, 'input_metas': 420, 'input_embeddings': 420, 'output_documents': 420, 'input_alignment_ratio': 1.0, 'documents_built': 420, 'event': 'document_building_completed', 'level': 'info', 'timestamp': '2025-10-03T19:54:44.824284Z'}


[2m2025-10-03T19:54:47.426591Z[0m [[32m[1minfo     [0m] [1mRunning component write       [0m [36mcomponent_name[0m=[35mwrite[0m [36mlineno[0m=[35m67[0m [36mmodule[0m=[35mhaystack.core.pipeline.pipeline[0m


Running component write


[2m2025-10-03T19:54:47.427737Z[0m [[32m[1minfo     [0m] [1mdocument_writing_started      [0m [36moperation[0m=[35mdocument_writing[0m [36mtotal_documents[0m=[35m420[0m


{'operation': 'document_writing', 'total_documents': 420, 'event': 'document_writing_started', 'level': 'info', 'timestamp': '2025-10-03T19:54:47.427737Z'}


[2m2025-10-03T19:54:47.577837Z[0m [[32m[1minfo     [0m] [1mWriting documents to Qdrant   [0m [36mcomponent[0m=[35mqdrant_writer[0m [36mdocument_count[0m=[35m420[0m


{'document_count': 420, 'component': 'qdrant_writer', 'event': 'Writing documents to Qdrant', 'level': 'info', 'timestamp': '2025-10-03T19:54:47.577837Z'}


[2m2025-10-03T19:54:47.625365Z[0m [[32m[1minfo     [0m] [1mHTTP Request: GET http://localhost:6300 "HTTP/1.1 200 OK"[0m [36mlineno[0m=[35m1025[0m [36mmodule[0m=[35mhttpx[0m


HTTP Request: GET http://localhost:6300 "HTTP/1.1 200 OK"


[2m2025-10-03T19:54:47.629478Z[0m [[32m[1minfo     [0m] [1mHTTP Request: GET http://localhost:6300/collections/memoirr/exists "HTTP/1.1 200 OK"[0m [36mlineno[0m=[35m1025[0m [36mmodule[0m=[35mhttpx[0m


HTTP Request: GET http://localhost:6300/collections/memoirr/exists "HTTP/1.1 200 OK"


[2m2025-10-03T19:54:47.636886Z[0m [[32m[1minfo     [0m] [1mHTTP Request: DELETE http://localhost:6300/collections/memoirr "HTTP/1.1 200 OK"[0m [36mlineno[0m=[35m1025[0m [36mmodule[0m=[35mhttpx[0m


HTTP Request: DELETE http://localhost:6300/collections/memoirr "HTTP/1.1 200 OK"


[2m2025-10-03T19:54:47.686509Z[0m [[32m[1minfo     [0m] [1mHTTP Request: PUT http://localhost:6300/collections/memoirr "HTTP/1.1 200 OK"[0m [36mlineno[0m=[35m1025[0m [36mmodule[0m=[35mhttpx[0m


HTTP Request: PUT http://localhost:6300/collections/memoirr "HTTP/1.1 200 OK"


[2m2025-10-03T19:54:47.691581Z[0m [[32m[1minfo     [0m] [1mHTTP Request: POST http://localhost:6300/collections/memoirr/points "HTTP/1.1 200 OK"[0m [36mlineno[0m=[35m1025[0m [36mmodule[0m=[35mhttpx[0m


HTTP Request: POST http://localhost:6300/collections/memoirr/points "HTTP/1.1 200 OK"


  0%|          | 0/420 [00:00<?, ?it/s][2m2025-10-03T19:54:47.850406Z[0m [[32m[1minfo     [0m] [1mHTTP Request: PUT http://localhost:6300/collections/memoirr/points?wait=true "HTTP/1.1 200 OK"[0m [36mlineno[0m=[35m1025[0m [36mmodule[0m=[35mhttpx[0m


HTTP Request: PUT http://localhost:6300/collections/memoirr/points?wait=true "HTTP/1.1 200 OK"


 24%|██▍       | 100/420 [00:00<00:00, 626.52it/s][2m2025-10-03T19:54:48.015044Z[0m [[32m[1minfo     [0m] [1mHTTP Request: PUT http://localhost:6300/collections/memoirr/points?wait=true "HTTP/1.1 200 OK"[0m [36mlineno[0m=[35m1025[0m [36mmodule[0m=[35mhttpx[0m


HTTP Request: PUT http://localhost:6300/collections/memoirr/points?wait=true "HTTP/1.1 200 OK"


 48%|████▊     | 200/420 [00:00<00:00, 618.64it/s][2m2025-10-03T19:54:48.168717Z[0m [[32m[1minfo     [0m] [1mHTTP Request: PUT http://localhost:6300/collections/memoirr/points?wait=true "HTTP/1.1 200 OK"[0m [36mlineno[0m=[35m1025[0m [36mmodule[0m=[35mhttpx[0m


HTTP Request: PUT http://localhost:6300/collections/memoirr/points?wait=true "HTTP/1.1 200 OK"


 71%|███████▏  | 300/420 [00:00<00:00, 633.39it/s][2m2025-10-03T19:54:48.322165Z[0m [[32m[1minfo     [0m] [1mHTTP Request: PUT http://localhost:6300/collections/memoirr/points?wait=true "HTTP/1.1 200 OK"[0m [36mlineno[0m=[35m1025[0m [36mmodule[0m=[35mhttpx[0m


HTTP Request: PUT http://localhost:6300/collections/memoirr/points?wait=true "HTTP/1.1 200 OK"


 95%|█████████▌| 400/420 [00:00<00:00, 639.20it/s][2m2025-10-03T19:54:48.363294Z[0m [[32m[1minfo     [0m] [1mHTTP Request: PUT http://localhost:6300/collections/memoirr/points?wait=true "HTTP/1.1 200 OK"[0m [36mlineno[0m=[35m1025[0m [36mmodule[0m=[35mhttpx[0m


HTTP Request: PUT http://localhost:6300/collections/memoirr/points?wait=true "HTTP/1.1 200 OK"


500it [00:00, 745.52it/s]                         
[2m2025-10-03T19:54:48.365357Z[0m [[32m[1minfo     [0m] [1mDocuments written successfully[0m [36mcomponent[0m=[35mqdrant_writer[0m [36mwritten_count[0m=[35m420[0m


{'written_count': 420, 'component': 'qdrant_writer', 'event': 'Documents written successfully', 'level': 'info', 'timestamp': '2025-10-03T19:54:48.365357Z'}


[2m2025-10-03T19:54:48.365986Z[0m [[32m[1minfo     [0m] [1mmetric_counter                [0m [36mcomponent[0m=[35mqdrant_writer[0m [36mmetric_name[0m=[35mdocuments_written_total[0m [36mmetric_type[0m=[35mcounter[0m [36mstatus[0m=[35msuccess[0m [36mvalue[0m=[35m420[0m


{'metric_name': 'documents_written_total', 'metric_type': 'counter', 'value': 420, 'component': 'qdrant_writer', 'status': 'success', 'event': 'metric_counter', 'level': 'info', 'timestamp': '2025-10-03T19:54:48.365986Z'}


[2m2025-10-03T19:54:48.366607Z[0m [[32m[1minfo     [0m] [1mdocument_writing_completed    [0m [36mdocuments_skipped[0m=[35m0[0m [36mdocuments_written[0m=[35m420[0m [36mduration_ms[0m=[35m938[0m [36moperation[0m=[35mdocument_writing[0m [36msuccess_rate[0m=[35m1.0[0m [36mtotal_documents[0m=[35m420[0m


{'operation': 'document_writing', 'duration_ms': 938, 'total_documents': 420, 'documents_written': 420, 'documents_skipped': 0, 'success_rate': 1.0, 'event': 'document_writing_completed', 'level': 'info', 'timestamp': '2025-10-03T19:54:48.366607Z'}


[2m2025-10-03T19:54:48.372339Z[0m [[32m[1minfo     [0m] [1mSRT file processed successfully[0m [36mcomponent[0m=[35mbatch_processor[0m [36mdocuments_written[0m=[35m420[0m [36mfile_path[0m=[35mdata/the.lord.of.the.rings.the.fellowship.of.the.ring.(2001).eng.1cd.(6053820)/The.Lord.of.the.Rings.The.Fellowship.of.the.Ring.Extended.Editions.2001.1080p.BluRay.x264.srt[0m


{'file_path': 'data/the.lord.of.the.rings.the.fellowship.of.the.ring.(2001).eng.1cd.(6053820)/The.Lord.of.the.Rings.The.Fellowship.of.the.Ring.Extended.Editions.2001.1080p.BluRay.x264.srt', 'documents_written': 420, 'component': 'batch_processor', 'event': 'SRT file processed successfully', 'level': 'info', 'timestamp': '2025-10-03T19:54:48.372339Z'}


[2m2025-10-03T19:54:48.373915Z[0m [[32m[1minfo     [0m] [1msingle_file_processing_completed[0m [36mdocuments_written[0m=[35m420[0m [36mduration_ms[0m=[35m42141[0m [36mfile_path[0m=[35mdata/the.lord.of.the.rings.the.fellowship.of.the.ring.(2001).eng.1cd.(6053820)/The.Lord.of.the.Rings.The.Fellowship.of.the.Ring.Extended.Editions.2001.1080p.BluRay.x264.srt[0m [36mfile_size_bytes[0m=[35m125522[0m [36moperation[0m=[35msingle_file_processing[0m


{'operation': 'single_file_processing', 'duration_ms': 42141, 'file_path': 'data/the.lord.of.the.rings.the.fellowship.of.the.ring.(2001).eng.1cd.(6053820)/The.Lord.of.the.Rings.The.Fellowship.of.the.Ring.Extended.Editions.2001.1080p.BluRay.x264.srt', 'documents_written': 420, 'file_size_bytes': 125522, 'event': 'single_file_processing_completed', 'level': 'info', 'timestamp': '2025-10-03T19:54:48.373915Z'}


[2m2025-10-03T19:54:48.377064Z[0m [[32m[1minfo     [0m] [1mMemory usage - after file 1/3 [0m [36mallocated_mb[0m=[35m2280.83251953125[0m [36mcomponent[0m=[35mmemory_management[0m [36mdevice[0m=[35m0[0m [36mfree_memory_mb[0m=[35m981.8125[0m [36mmax_allocated_mb[0m=[35m5278.37939453125[0m [36mmax_reserved_mb[0m=[35m6492.0[0m [36mreserved_mb[0m=[35m6492.0[0m [36mtotal_memory_mb[0m=[35m7816.5625[0m [36mutilization_percent[0m=[35m87.43933154759526[0m


{'device': 0, 'allocated_mb': 2280.83251953125, 'reserved_mb': 6492.0, 'max_allocated_mb': 5278.37939453125, 'max_reserved_mb': 6492.0, 'total_memory_mb': 7816.5625, 'free_memory_mb': 981.8125, 'utilization_percent': 87.43933154759526, 'component': 'memory_management', 'event': 'Memory usage - after file 1/3', 'level': 'info', 'timestamp': '2025-10-03T19:54:48.377064Z'}


[2m2025-10-03T19:54:48.378180Z[0m [[32m[1minfo     [0m] [1mmetric_counter                [0m [36mcomponent[0m=[35mbatch_processor[0m [36mmetric_name[0m=[35msrt_files_processed_total[0m [36mmetric_type[0m=[35mcounter[0m [36mstatus[0m=[35msuccess[0m [36mvalue[0m=[35m1[0m


{'metric_name': 'srt_files_processed_total', 'metric_type': 'counter', 'value': 1, 'component': 'batch_processor', 'status': 'success', 'event': 'metric_counter', 'level': 'info', 'timestamp': '2025-10-03T19:54:48.378180Z'}


[2m2025-10-03T19:54:48.379128Z[0m [[32m[1minfo     [0m] [1mmetric_counter                [0m [36mcomponent[0m=[35mbatch_processor[0m [36mmetric_name[0m=[35mdocuments_written_total[0m [36mmetric_type[0m=[35mcounter[0m [36mvalue[0m=[35m420[0m


{'metric_name': 'documents_written_total', 'metric_type': 'counter', 'value': 420, 'component': 'batch_processor', 'event': 'metric_counter', 'level': 'info', 'timestamp': '2025-10-03T19:54:48.379128Z'}


[2m2025-10-03T19:54:48.380092Z[0m [[32m[1minfo     [0m] [1mProcessing SRT file           [0m [36mcomponent[0m=[35mbatch_processor[0m [36mfile_index[0m=[35m2[0m [36mfile_path[0m=[35mdata/the.lord.of.the.rings.the.return.of.the.king.(2003).eng.1cd.(6256239)/The.Lord.of.the.Rings.The.Return.of.the.King.EXTENDED.2003.720p.BrRip.x264.BOKUTOX.YIFY.Perusoe.srt[0m [36mtotal_files[0m=[35m3[0m


{'file_index': 2, 'total_files': 3, 'file_path': 'data/the.lord.of.the.rings.the.return.of.the.king.(2003).eng.1cd.(6256239)/The.Lord.of.the.Rings.The.Return.of.the.King.EXTENDED.2003.720p.BrRip.x264.BOKUTOX.YIFY.Perusoe.srt', 'component': 'batch_processor', 'event': 'Processing SRT file', 'level': 'info', 'timestamp': '2025-10-03T19:54:48.380092Z'}


[2m2025-10-03T19:54:48.702458Z[0m [[32m[1minfo     [0m] [1mMemory usage - before file 2/3[0m [36mallocated_mb[0m=[35m2280.83251953125[0m [36mcomponent[0m=[35mmemory_management[0m [36mdevice[0m=[35m0[0m [36mfree_memory_mb[0m=[35m5151.8125[0m [36mmax_allocated_mb[0m=[35m5278.37939453125[0m [36mmax_reserved_mb[0m=[35m6492.0[0m [36mreserved_mb[0m=[35m2322.0[0m [36mtotal_memory_mb[0m=[35m7816.5625[0m [36mutilization_percent[0m=[35m34.091072642226045[0m


{'device': 0, 'allocated_mb': 2280.83251953125, 'reserved_mb': 2322.0, 'max_allocated_mb': 5278.37939453125, 'max_reserved_mb': 6492.0, 'total_memory_mb': 7816.5625, 'free_memory_mb': 5151.8125, 'utilization_percent': 34.091072642226045, 'component': 'memory_management', 'event': 'Memory usage - before file 2/3', 'level': 'info', 'timestamp': '2025-10-03T19:54:48.702458Z'}


[2m2025-10-03T19:54:48.704189Z[0m [[32m[1minfo     [0m] [1msingle_file_processing_started[0m [36mfile_path[0m=[35mdata/the.lord.of.the.rings.the.return.of.the.king.(2003).eng.1cd.(6256239)/The.Lord.of.the.Rings.The.Return.of.the.King.EXTENDED.2003.720p.BrRip.x264.BOKUTOX.YIFY.Perusoe.srt[0m [36moperation[0m=[35msingle_file_processing[0m


{'operation': 'single_file_processing', 'file_path': 'data/the.lord.of.the.rings.the.return.of.the.king.(2003).eng.1cd.(6256239)/The.Lord.of.the.Rings.The.Return.of.the.King.EXTENDED.2003.720p.BrRip.x264.BOKUTOX.YIFY.Perusoe.srt', 'event': 'single_file_processing_started', 'level': 'info', 'timestamp': '2025-10-03T19:54:48.704189Z'}


[2m2025-10-03T19:54:48.707600Z[0m [[32m[1minfo     [0m] [1mRunning component pre         [0m [36mcomponent_name[0m=[35mpre[0m [36mlineno[0m=[35m67[0m [36mmodule[0m=[35mhaystack.core.pipeline.pipeline[0m


Running component pre


[2m2025-10-03T19:54:48.708821Z[0m [[32m[1minfo     [0m] [1msrt_preprocessing_started     [0m [36minput_length[0m=[35m118551[0m [36moperation[0m=[35msrt_preprocessing[0m


{'operation': 'srt_preprocessing', 'input_length': 118551, 'event': 'srt_preprocessing_started', 'level': 'info', 'timestamp': '2025-10-03T19:54:48.708821Z'}


[2m2025-10-03T19:54:48.710219Z[0m [[32m[1minfo     [0m] [1mStarting SRT preprocessing    [0m [36mcomponent[0m=[35mpreprocessor[0m [36mdedupe_window_ms[0m=[35m1000[0m [36minput_size_chars[0m=[35m118551[0m [36mmin_len[0m=[35m1[0m


{'input_size_chars': 118551, 'min_len': 1, 'dedupe_window_ms': 1000, 'component': 'preprocessor', 'event': 'Starting SRT preprocessing', 'level': 'info', 'timestamp': '2025-10-03T19:54:48.710219Z'}


[2m2025-10-03T19:54:55.284989Z[0m [[32m[1minfo     [0m] [1mCaption language filtering completed[0m [36mcomponent[0m=[35mlanguage_filter[0m [36mdropped_captions[0m=[35m488[0m [36mdropped_lines[0m=[35m628[0m [36mkept_captions[0m=[35m1368[0m [36mkept_lines[0m=[35m1666[0m [36mtotal_captions[0m=[35m1856[0m [36mtotal_lines[0m=[35m2294[0m


{'total_captions': 1856, 'kept_captions': 1368, 'dropped_captions': 488, 'total_lines': 2294, 'kept_lines': 1666, 'dropped_lines': 628, 'component': 'language_filter', 'event': 'Caption language filtering completed', 'level': 'info', 'timestamp': '2025-10-03T19:54:55.284989Z'}


[2m2025-10-03T19:54:55.286212Z[0m [[32m[1minfo     [0m] [1mmetric_counter                [0m [36mcomponent[0m=[35mlanguage_filter[0m [36mmetric_name[0m=[35mcaptions_language_filtered_total[0m [36mmetric_type[0m=[35mcounter[0m [36mvalue[0m=[35m1856[0m


{'metric_name': 'captions_language_filtered_total', 'metric_type': 'counter', 'value': 1856, 'component': 'language_filter', 'event': 'metric_counter', 'level': 'info', 'timestamp': '2025-10-03T19:54:55.286212Z'}


[2m2025-10-03T19:54:55.286753Z[0m [[32m[1minfo     [0m] [1mmetric_counter                [0m [36mcomponent[0m=[35mlanguage_filter[0m [36mmetric_name[0m=[35mcaptions_kept_after_language_filter[0m [36mmetric_type[0m=[35mcounter[0m [36mstatus[0m=[35mkept[0m [36mvalue[0m=[35m1368[0m


{'metric_name': 'captions_kept_after_language_filter', 'metric_type': 'counter', 'value': 1368, 'component': 'language_filter', 'status': 'kept', 'event': 'metric_counter', 'level': 'info', 'timestamp': '2025-10-03T19:54:55.286753Z'}


[2m2025-10-03T19:54:55.287209Z[0m [[32m[1minfo     [0m] [1mmetric_counter                [0m [36mcomponent[0m=[35mlanguage_filter[0m [36mmetric_name[0m=[35mcaptions_dropped_after_language_filter[0m [36mmetric_type[0m=[35mcounter[0m [36mstatus[0m=[35mdropped[0m [36mvalue[0m=[35m488[0m


{'metric_name': 'captions_dropped_after_language_filter', 'metric_type': 'counter', 'value': 488, 'component': 'language_filter', 'status': 'dropped', 'event': 'metric_counter', 'level': 'info', 'timestamp': '2025-10-03T19:54:55.287209Z'}


[2m2025-10-03T19:54:55.287733Z[0m [[32m[1minfo     [0m] [1mmetric_counter                [0m [36mcomponent[0m=[35mlanguage_filter[0m [36mmetric_name[0m=[35mlines_language_filtered_total[0m [36mmetric_type[0m=[35mcounter[0m [36mvalue[0m=[35m2294[0m


{'metric_name': 'lines_language_filtered_total', 'metric_type': 'counter', 'value': 2294, 'component': 'language_filter', 'event': 'metric_counter', 'level': 'info', 'timestamp': '2025-10-03T19:54:55.287733Z'}


[2m2025-10-03T19:54:55.288636Z[0m [[32m[1minfo     [0m] [1mmetric_counter                [0m [36mcomponent[0m=[35mlanguage_filter[0m [36mmetric_name[0m=[35mlines_kept_after_language_filter[0m [36mmetric_type[0m=[35mcounter[0m [36mstatus[0m=[35mkept[0m [36mvalue[0m=[35m1666[0m


{'metric_name': 'lines_kept_after_language_filter', 'metric_type': 'counter', 'value': 1666, 'component': 'language_filter', 'status': 'kept', 'event': 'metric_counter', 'level': 'info', 'timestamp': '2025-10-03T19:54:55.288636Z'}


[2m2025-10-03T19:54:55.289106Z[0m [[32m[1minfo     [0m] [1mmetric_counter                [0m [36mcomponent[0m=[35mlanguage_filter[0m [36mmetric_name[0m=[35mlines_dropped_after_language_filter[0m [36mmetric_type[0m=[35mcounter[0m [36mstatus[0m=[35mdropped[0m [36mvalue[0m=[35m628[0m


{'metric_name': 'lines_dropped_after_language_filter', 'metric_type': 'counter', 'value': 628, 'component': 'language_filter', 'status': 'dropped', 'event': 'metric_counter', 'level': 'info', 'timestamp': '2025-10-03T19:54:55.289106Z'}


[2m2025-10-03T19:54:55.300843Z[0m [[32m[1minfo     [0m] [1mmetric_counter                [0m [36mcomponent[0m=[35mpreprocessor[0m [36mmetric_name[0m=[35mcaptions_processed_total[0m [36mmetric_type[0m=[35mcounter[0m [36mvalue[0m=[35m0[0m


{'metric_name': 'captions_processed_total', 'metric_type': 'counter', 'value': 0, 'component': 'preprocessor', 'event': 'metric_counter', 'level': 'info', 'timestamp': '2025-10-03T19:54:55.300843Z'}


[2m2025-10-03T19:54:55.301752Z[0m [[32m[1minfo     [0m] [1mmetric_counter                [0m [36mcomponent[0m=[35mpreprocessor[0m [36mmetric_name[0m=[35mcaptions_kept_total[0m [36mmetric_type[0m=[35mcounter[0m [36mstatus[0m=[35mkept[0m [36mvalue[0m=[35m1368[0m


{'metric_name': 'captions_kept_total', 'metric_type': 'counter', 'value': 1368, 'component': 'preprocessor', 'status': 'kept', 'event': 'metric_counter', 'level': 'info', 'timestamp': '2025-10-03T19:54:55.301752Z'}


[2m2025-10-03T19:54:55.302328Z[0m [[32m[1minfo     [0m] [1mmetric_counter                [0m [36mcomponent[0m=[35mpreprocessor[0m [36mmetric_name[0m=[35mcaptions_dropped_total[0m [36mmetric_type[0m=[35mcounter[0m [36mreason[0m=[35mempty[0m [36mvalue[0m=[35m0[0m


{'metric_name': 'captions_dropped_total', 'metric_type': 'counter', 'value': 0, 'component': 'preprocessor', 'reason': 'empty', 'event': 'metric_counter', 'level': 'info', 'timestamp': '2025-10-03T19:54:55.302328Z'}


[2m2025-10-03T19:54:55.302856Z[0m [[32m[1minfo     [0m] [1mmetric_counter                [0m [36mcomponent[0m=[35mpreprocessor[0m [36mmetric_name[0m=[35mcaptions_dropped_total[0m [36mmetric_type[0m=[35mcounter[0m [36mreason[0m=[35mnon_english[0m [36mvalue[0m=[35m488[0m


{'metric_name': 'captions_dropped_total', 'metric_type': 'counter', 'value': 488, 'component': 'preprocessor', 'reason': 'non_english', 'event': 'metric_counter', 'level': 'info', 'timestamp': '2025-10-03T19:54:55.302856Z'}


[2m2025-10-03T19:54:55.303715Z[0m [[32m[1minfo     [0m] [1mmetric_counter                [0m [36mcomponent[0m=[35mpreprocessor[0m [36mmetric_name[0m=[35mcaptions_deduped_total[0m [36mmetric_type[0m=[35mcounter[0m [36mreason[0m=[35mduplicate[0m [36mvalue[0m=[35m0[0m


{'metric_name': 'captions_deduped_total', 'metric_type': 'counter', 'value': 0, 'component': 'preprocessor', 'reason': 'duplicate', 'event': 'metric_counter', 'level': 'info', 'timestamp': '2025-10-03T19:54:55.303715Z'}


[2m2025-10-03T19:54:55.304178Z[0m [[32m[1minfo     [0m] [1mSRT preprocessing completed   [0m [36mcomponent[0m=[35mpreprocessor[0m [36moutput_lines[0m=[35m1368[0m [36mprocessing_stats[0m=[35m{'total_captions': 1856, 'kept': 1368, 'dropped_empty': 0, 'dropped_non_english': 488, 'deduped': 0}[0m


{'output_lines': 1368, 'processing_stats': {'total_captions': 1856, 'kept': 1368, 'dropped_empty': 0, 'dropped_non_english': 488, 'deduped': 0}, 'component': 'preprocessor', 'event': 'SRT preprocessing completed', 'level': 'info', 'timestamp': '2025-10-03T19:54:55.304178Z'}


[2m2025-10-03T19:54:55.304671Z[0m [[32m[1minfo     [0m] [1msrt_preprocessing_completed   [0m [36mcaptions_dropped[0m=[35m-1368[0m [36mcaptions_kept[0m=[35m1368[0m [36mduration_ms[0m=[35m6595[0m [36minput_length[0m=[35m118551[0m [36moperation[0m=[35msrt_preprocessing[0m [36moutput_lines[0m=[35m1368[0m


{'operation': 'srt_preprocessing', 'duration_ms': 6595, 'input_length': 118551, 'output_lines': 1368, 'captions_kept': 1368, 'captions_dropped': -1368, 'event': 'srt_preprocessing_completed', 'level': 'info', 'timestamp': '2025-10-03T19:54:55.304671Z'}


[2m2025-10-03T19:54:55.314148Z[0m [[32m[1minfo     [0m] [1mRunning component chunk       [0m [36mcomponent_name[0m=[35mchunk[0m [36mlineno[0m=[35m67[0m [36mmodule[0m=[35mhaystack.core.pipeline.pipeline[0m


Running component chunk


[2m2025-10-03T19:54:55.314944Z[0m [[32m[1minfo     [0m] [1msemantic_chunking_started     [0m [36minput_captions[0m=[35m1368[0m [36moperation[0m=[35msemantic_chunking[0m


{'operation': 'semantic_chunking', 'input_captions': 1368, 'event': 'semantic_chunking_started', 'level': 'info', 'timestamp': '2025-10-03T19:54:55.314944Z'}


[2m2025-10-03T19:54:55.315496Z[0m [[32m[1minfo     [0m] [1mStarting semantic chunking    [0m [36mchunk_size[0m=[35m512[0m [36mcomponent[0m=[35mchunker[0m [36minput_captions[0m=[35m1368[0m [36mthreshold[0m=[35mauto[0m


{'input_captions': 1368, 'threshold': 'auto', 'chunk_size': 512, 'component': 'chunker', 'event': 'Starting semantic chunking', 'level': 'info', 'timestamp': '2025-10-03T19:54:55.315496Z'}


[2m2025-10-03T19:54:55.321199Z[0m [[32m[1minfo     [0m] [1mMemory usage - semantic_chunking - start[0m [36mallocated_mb[0m=[35m2280.83251953125[0m [36mcomponent[0m=[35mmemory_management[0m [36mdevice[0m=[35m0[0m [36mfree_memory_mb[0m=[35m5151.8125[0m [36mmax_allocated_mb[0m=[35m5278.37939453125[0m [36mmax_reserved_mb[0m=[35m6492.0[0m [36mreserved_mb[0m=[35m2322.0[0m [36mtotal_memory_mb[0m=[35m7816.5625[0m [36mutilization_percent[0m=[35m34.091072642226045[0m


{'device': 0, 'allocated_mb': 2280.83251953125, 'reserved_mb': 2322.0, 'max_allocated_mb': 5278.37939453125, 'max_reserved_mb': 6492.0, 'total_memory_mb': 7816.5625, 'free_memory_mb': 5151.8125, 'utilization_percent': 34.091072642226045, 'component': 'memory_management', 'event': 'Memory usage - semantic_chunking - start', 'level': 'info', 'timestamp': '2025-10-03T19:54:55.321199Z'}


[2m2025-10-03T19:54:55.324028Z[0m [[32m[1minfo     [0m] [1mUse pytorch device_name: cuda:0[0m [36mlineno[0m=[35m219[0m [36mmodule[0m=[35msentence_transformers.SentenceTransformer[0m


Use pytorch device_name: cuda:0


[2m2025-10-03T19:54:55.325010Z[0m [[32m[1minfo     [0m] [1mLoad pretrained SentenceTransformer: models/chunker/qwen3-embedding-0.6B[0m [36mlineno[0m=[35m227[0m [36mmodule[0m=[35msentence_transformers.SentenceTransformer[0m


Load pretrained SentenceTransformer: models/chunker/qwen3-embedding-0.6B


[2m2025-10-03T19:54:56.326938Z[0m [[32m[1minfo     [0m] [1m1 prompt is loaded, with the key: query[0m [36mlineno[0m=[35m378[0m [36mmodule[0m=[35msentence_transformers.SentenceTransformer[0m


1 prompt is loaded, with the key: query


Batches:   0%|          | 0/32 [00:00<?, ?it/s]

Batches:   0%|          | 0/32 [00:00<?, ?it/s]

[2m2025-10-03T19:55:15.258494Z[0m [[32m[1minfo     [0m] [1mMemory usage - semantic_chunking - end[0m [36mallocated_mb[0m=[35m4553.5400390625[0m [36mcomponent[0m=[35mmemory_management[0m [36mdevice[0m=[35m0[0m [36mfree_memory_mb[0m=[35m2005.8125[0m [36mmax_allocated_mb[0m=[35m5399.22802734375[0m [36mmax_reserved_mb[0m=[35m6492.0[0m [36mreserved_mb[0m=[35m5468.0[0m [36mtotal_memory_mb[0m=[35m7816.5625[0m [36mutilization_percent[0m=[35m74.33894374925039[0m


{'device': 0, 'allocated_mb': 4553.5400390625, 'reserved_mb': 5468.0, 'max_allocated_mb': 5399.22802734375, 'max_reserved_mb': 6492.0, 'total_memory_mb': 7816.5625, 'free_memory_mb': 2005.8125, 'utilization_percent': 74.33894374925039, 'component': 'memory_management', 'event': 'Memory usage - semantic_chunking - end', 'level': 'info', 'timestamp': '2025-10-03T19:55:15.258494Z'}


[2m2025-10-03T19:55:15.532227Z[0m [[32m[1minfo     [0m] [1mmetric_counter                [0m [36mcomponent[0m=[35mchunker[0m [36mmetric_name[0m=[35mcaptions_chunked_total[0m [36mmetric_type[0m=[35mcounter[0m [36mvalue[0m=[35m1368[0m


{'metric_name': 'captions_chunked_total', 'metric_type': 'counter', 'value': 1368, 'component': 'chunker', 'event': 'metric_counter', 'level': 'info', 'timestamp': '2025-10-03T19:55:15.532227Z'}


[2m2025-10-03T19:55:15.533393Z[0m [[32m[1minfo     [0m] [1mmetric_counter                [0m [36mcomponent[0m=[35mchunker[0m [36mmetric_name[0m=[35mchunks_generated_total[0m [36mmetric_type[0m=[35mcounter[0m [36mvalue[0m=[35m367[0m


{'metric_name': 'chunks_generated_total', 'metric_type': 'counter', 'value': 367, 'component': 'chunker', 'event': 'metric_counter', 'level': 'info', 'timestamp': '2025-10-03T19:55:15.533393Z'}


[2m2025-10-03T19:55:15.534032Z[0m [[32m[1minfo     [0m] [1mmetric_histogram              [0m [36mcomponent[0m=[35mchunker[0m [36mmetric_name[0m=[35mavg_tokens_per_chunk[0m [36mmetric_type[0m=[35mhistogram[0m [36mvalue[0m=[35m32.403269754768395[0m


{'metric_name': 'avg_tokens_per_chunk', 'metric_type': 'histogram', 'value': 32.403269754768395, 'component': 'chunker', 'event': 'metric_histogram', 'level': 'info', 'timestamp': '2025-10-03T19:55:15.534032Z'}


[2m2025-10-03T19:55:15.534474Z[0m [[32m[1minfo     [0m] [1mSemantic chunking completed   [0m [36mavg_tokens_per_chunk[0m=[35m32.403269754768395[0m [36mcomponent[0m=[35mchunker[0m [36minput_captions[0m=[35m1368[0m [36moutput_chunks[0m=[35m367[0m


{'input_captions': 1368, 'output_chunks': 367, 'avg_tokens_per_chunk': 32.403269754768395, 'component': 'chunker', 'event': 'Semantic chunking completed', 'level': 'info', 'timestamp': '2025-10-03T19:55:15.534474Z'}


[2m2025-10-03T19:55:15.535209Z[0m [[32m[1minfo     [0m] [1msemantic_chunking_completed   [0m [36mavg_tokens_per_chunk[0m=[35m32.403269754768395[0m [36mduration_ms[0m=[35m20220[0m [36minput_captions[0m=[35m1368[0m [36moperation[0m=[35msemantic_chunking[0m [36moutput_chunks[0m=[35m367[0m [36moutput_chunks_final[0m=[35m367[0m


{'operation': 'semantic_chunking', 'duration_ms': 20220, 'input_captions': 1368, 'output_chunks': 367, 'avg_tokens_per_chunk': 32.403269754768395, 'output_chunks_final': 367, 'event': 'semantic_chunking_completed', 'level': 'info', 'timestamp': '2025-10-03T19:55:15.535209Z'}


[2m2025-10-03T19:55:15.538647Z[0m [[32m[1minfo     [0m] [1mRunning component explode     [0m [36mcomponent_name[0m=[35mexplode[0m [36mlineno[0m=[35m67[0m [36mmodule[0m=[35mhaystack.core.pipeline.pipeline[0m


Running component explode


[2m2025-10-03T19:55:15.539163Z[0m [[32m[1minfo     [0m] [1mchunk_jsonl_parsing_started   [0m [36minput_lines[0m=[35m367[0m [36moperation[0m=[35mchunk_jsonl_parsing[0m


{'operation': 'chunk_jsonl_parsing', 'input_lines': 367, 'event': 'chunk_jsonl_parsing_started', 'level': 'info', 'timestamp': '2025-10-03T19:55:15.539163Z'}


[2m2025-10-03T19:55:15.541771Z[0m [[32m[1minfo     [0m] [1mmetric_counter                [0m [36mcomponent[0m=[35mpipeline_glue[0m [36mmetric_name[0m=[35mchunk_lines_processed_total[0m [36mmetric_type[0m=[35mcounter[0m [36mvalue[0m=[35m367[0m


{'metric_name': 'chunk_lines_processed_total', 'metric_type': 'counter', 'value': 367, 'component': 'pipeline_glue', 'event': 'metric_counter', 'level': 'info', 'timestamp': '2025-10-03T19:55:15.541771Z'}


[2m2025-10-03T19:55:15.542302Z[0m [[32m[1minfo     [0m] [1mmetric_counter                [0m [36mcomponent[0m=[35mpipeline_glue[0m [36mmetric_name[0m=[35mtexts_extracted_total[0m [36mmetric_type[0m=[35mcounter[0m [36mstatus[0m=[35msuccess[0m [36mvalue[0m=[35m367[0m


{'metric_name': 'texts_extracted_total', 'metric_type': 'counter', 'value': 367, 'component': 'pipeline_glue', 'status': 'success', 'event': 'metric_counter', 'level': 'info', 'timestamp': '2025-10-03T19:55:15.542302Z'}


[2m2025-10-03T19:55:15.542756Z[0m [[32m[1minfo     [0m] [1mChunk JSONL parsing completed [0m [36mcomponent[0m=[35mpipeline_glue[0m [36minput_lines[0m=[35m367[0m [36moutput_texts[0m=[35m367[0m [36mskipped_lines[0m=[35m0[0m


{'input_lines': 367, 'output_texts': 367, 'skipped_lines': 0, 'component': 'pipeline_glue', 'event': 'Chunk JSONL parsing completed', 'level': 'info', 'timestamp': '2025-10-03T19:55:15.542756Z'}


[2m2025-10-03T19:55:15.543182Z[0m [[32m[1minfo     [0m] [1mchunk_jsonl_parsing_completed [0m [36mduration_ms[0m=[35m4[0m [36minput_lines[0m=[35m367[0m [36moperation[0m=[35mchunk_jsonl_parsing[0m [36moutput_texts[0m=[35m367[0m [36mskipped_lines[0m=[35m0[0m [36msuccess_rate[0m=[35m1.0[0m


{'operation': 'chunk_jsonl_parsing', 'duration_ms': 4, 'input_lines': 367, 'output_texts': 367, 'skipped_lines': 0, 'success_rate': 1.0, 'event': 'chunk_jsonl_parsing_completed', 'level': 'info', 'timestamp': '2025-10-03T19:55:15.543182Z'}


[2m2025-10-03T19:55:15.546550Z[0m [[32m[1minfo     [0m] [1mRunning component embed       [0m [36mcomponent_name[0m=[35membed[0m [36mlineno[0m=[35m67[0m [36mmodule[0m=[35mhaystack.core.pipeline.pipeline[0m


Running component embed


[2m2025-10-03T19:55:15.547320Z[0m [[32m[1minfo     [0m] [1mtext_embedding_started        [0m [36minput_texts[0m=[35m367[0m [36moperation[0m=[35mtext_embedding[0m


{'operation': 'text_embedding', 'input_texts': 367, 'event': 'text_embedding_started', 'level': 'info', 'timestamp': '2025-10-03T19:55:15.547320Z'}


[2m2025-10-03T19:55:15.549204Z[0m [[32m[1minfo     [0m] [1mMemory usage - before embedding[0m [36mallocated_mb[0m=[35m2280.83251953125[0m [36mcomponent[0m=[35mmemory_management[0m [36mdevice[0m=[35m0[0m [36mfree_memory_mb[0m=[35m2891.8125[0m [36mmax_allocated_mb[0m=[35m5399.22802734375[0m [36mmax_reserved_mb[0m=[35m6492.0[0m [36mreserved_mb[0m=[35m4582.0[0m [36mtotal_memory_mb[0m=[35m7816.5625[0m [36mutilization_percent[0m=[35m63.00403790029184[0m


{'device': 0, 'allocated_mb': 2280.83251953125, 'reserved_mb': 4582.0, 'max_allocated_mb': 5399.22802734375, 'max_reserved_mb': 6492.0, 'total_memory_mb': 7816.5625, 'free_memory_mb': 2891.8125, 'utilization_percent': 63.00403790029184, 'component': 'memory_management', 'event': 'Memory usage - before embedding', 'level': 'info', 'timestamp': '2025-10-03T19:55:15.549204Z'}


Batches:   0%|          | 0/12 [00:00<?, ?it/s]

[2m2025-10-03T19:55:21.695397Z[0m [[32m[1minfo     [0m] [1mmetric_counter                [0m [36mcomponent[0m=[35membedder[0m [36mmetric_name[0m=[35membeddings_generated_total[0m [36mmetric_type[0m=[35mcounter[0m [36mmode[0m=[35mbatch[0m [36mstatus[0m=[35msuccess[0m [36mvalue[0m=[35m367[0m


{'metric_name': 'embeddings_generated_total', 'metric_type': 'counter', 'value': 367, 'component': 'embedder', 'mode': 'batch', 'status': 'success', 'event': 'metric_counter', 'level': 'info', 'timestamp': '2025-10-03T19:55:21.695397Z'}


[2m2025-10-03T19:55:21.697529Z[0m [[32m[1minfo     [0m] [1mMemory usage - after batch embedding[0m [36mallocated_mb[0m=[35m2280.83251953125[0m [36mcomponent[0m=[35mmemory_management[0m [36mdevice[0m=[35m0[0m [36mfree_memory_mb[0m=[35m695.8125[0m [36mmax_allocated_mb[0m=[35m5399.22802734375[0m [36mmax_reserved_mb[0m=[35m6778.0[0m [36mreserved_mb[0m=[35m6778.0[0m [36mtotal_memory_mb[0m=[35m7816.5625[0m [36mutilization_percent[0m=[35m91.0982289209611[0m


{'device': 0, 'allocated_mb': 2280.83251953125, 'reserved_mb': 6778.0, 'max_allocated_mb': 5399.22802734375, 'max_reserved_mb': 6778.0, 'total_memory_mb': 7816.5625, 'free_memory_mb': 695.8125, 'utilization_percent': 91.0982289209611, 'component': 'memory_management', 'event': 'Memory usage - after batch embedding', 'level': 'info', 'timestamp': '2025-10-03T19:55:21.697529Z'}


[2m2025-10-03T19:55:21.698553Z[0m [[32m[1minfo     [0m] [1mtext_embedding_completed      [0m [36mduration_ms[0m=[35m6151[0m [36mfailed_embeddings[0m=[35m0[0m [36minput_texts[0m=[35m367[0m [36moperation[0m=[35mtext_embedding[0m [36mprocessing_mode[0m=[35mbatch[0m [36msuccessful_embeddings[0m=[35m367[0m


{'operation': 'text_embedding', 'duration_ms': 6151, 'input_texts': 367, 'successful_embeddings': 367, 'failed_embeddings': 0, 'processing_mode': 'batch', 'event': 'text_embedding_completed', 'level': 'info', 'timestamp': '2025-10-03T19:55:21.698553Z'}


[2m2025-10-03T19:55:24.028574Z[0m [[32m[1minfo     [0m] [1mRunning component docs        [0m [36mcomponent_name[0m=[35mdocs[0m [36mlineno[0m=[35m67[0m [36mmodule[0m=[35mhaystack.core.pipeline.pipeline[0m


Running component docs


[2m2025-10-03T19:55:24.030085Z[0m [[32m[1minfo     [0m] [1mdocument_building_started     [0m [36minput_embeddings[0m=[35m367[0m [36minput_metas[0m=[35m367[0m [36minput_texts[0m=[35m367[0m [36moperation[0m=[35mdocument_building[0m


{'operation': 'document_building', 'input_texts': 367, 'input_metas': 367, 'input_embeddings': 367, 'event': 'document_building_started', 'level': 'info', 'timestamp': '2025-10-03T19:55:24.030085Z'}


[2m2025-10-03T19:55:24.031621Z[0m [[32m[1minfo     [0m] [1mmetric_counter                [0m [36mcomponent[0m=[35mpipeline_glue[0m [36mmetric_name[0m=[35mdocuments_built_total[0m [36mmetric_type[0m=[35mcounter[0m [36mstatus[0m=[35msuccess[0m [36mvalue[0m=[35m367[0m


{'metric_name': 'documents_built_total', 'metric_type': 'counter', 'value': 367, 'component': 'pipeline_glue', 'status': 'success', 'event': 'metric_counter', 'level': 'info', 'timestamp': '2025-10-03T19:55:24.031621Z'}


[2m2025-10-03T19:55:24.032283Z[0m [[32m[1minfo     [0m] [1mmetric_histogram              [0m [36mcomponent[0m=[35mpipeline_glue[0m [36mmetric_name[0m=[35mdocument_build_batch_size[0m [36mmetric_type[0m=[35mhistogram[0m [36mvalue[0m=[35m367[0m


{'metric_name': 'document_build_batch_size', 'metric_type': 'histogram', 'value': 367, 'component': 'pipeline_glue', 'event': 'metric_histogram', 'level': 'info', 'timestamp': '2025-10-03T19:55:24.032283Z'}


[2m2025-10-03T19:55:24.032686Z[0m [[32m[1minfo     [0m] [1mDocument building completed   [0m [36mcomponent[0m=[35mpipeline_glue[0m [36minput_embeddings[0m=[35m367[0m [36minput_metas[0m=[35m367[0m [36minput_texts[0m=[35m367[0m [36moutput_documents[0m=[35m367[0m


{'input_texts': 367, 'input_metas': 367, 'input_embeddings': 367, 'output_documents': 367, 'component': 'pipeline_glue', 'event': 'Document building completed', 'level': 'info', 'timestamp': '2025-10-03T19:55:24.032686Z'}


[2m2025-10-03T19:55:24.033063Z[0m [[32m[1minfo     [0m] [1mdocument_building_completed   [0m [36mdocuments_built[0m=[35m367[0m [36mduration_ms[0m=[35m2[0m [36minput_alignment_ratio[0m=[35m1.0[0m [36minput_embeddings[0m=[35m367[0m [36minput_metas[0m=[35m367[0m [36minput_texts[0m=[35m367[0m [36moperation[0m=[35mdocument_building[0m [36moutput_documents[0m=[35m367[0m


{'operation': 'document_building', 'duration_ms': 2, 'input_texts': 367, 'input_metas': 367, 'input_embeddings': 367, 'output_documents': 367, 'input_alignment_ratio': 1.0, 'documents_built': 367, 'event': 'document_building_completed', 'level': 'info', 'timestamp': '2025-10-03T19:55:24.033063Z'}


[2m2025-10-03T19:55:26.303907Z[0m [[32m[1minfo     [0m] [1mRunning component write       [0m [36mcomponent_name[0m=[35mwrite[0m [36mlineno[0m=[35m67[0m [36mmodule[0m=[35mhaystack.core.pipeline.pipeline[0m


Running component write


[2m2025-10-03T19:55:26.304976Z[0m [[32m[1minfo     [0m] [1mdocument_writing_started      [0m [36moperation[0m=[35mdocument_writing[0m [36mtotal_documents[0m=[35m367[0m


{'operation': 'document_writing', 'total_documents': 367, 'event': 'document_writing_started', 'level': 'info', 'timestamp': '2025-10-03T19:55:26.304976Z'}


[2m2025-10-03T19:55:26.433140Z[0m [[32m[1minfo     [0m] [1mWriting documents to Qdrant   [0m [36mcomponent[0m=[35mqdrant_writer[0m [36mdocument_count[0m=[35m367[0m


{'document_count': 367, 'component': 'qdrant_writer', 'event': 'Writing documents to Qdrant', 'level': 'info', 'timestamp': '2025-10-03T19:55:26.433140Z'}


[2m2025-10-03T19:55:26.438517Z[0m [[32m[1minfo     [0m] [1mHTTP Request: POST http://localhost:6300/collections/memoirr/points "HTTP/1.1 200 OK"[0m [36mlineno[0m=[35m1025[0m [36mmodule[0m=[35mhttpx[0m


HTTP Request: POST http://localhost:6300/collections/memoirr/points "HTTP/1.1 200 OK"


  0%|          | 0/367 [00:00<?, ?it/s][2m2025-10-03T19:55:26.594606Z[0m [[32m[1minfo     [0m] [1mHTTP Request: PUT http://localhost:6300/collections/memoirr/points?wait=true "HTTP/1.1 200 OK"[0m [36mlineno[0m=[35m1025[0m [36mmodule[0m=[35mhttpx[0m


HTTP Request: PUT http://localhost:6300/collections/memoirr/points?wait=true "HTTP/1.1 200 OK"


 27%|██▋       | 100/367 [00:00<00:00, 642.01it/s][2m2025-10-03T19:55:26.748337Z[0m [[32m[1minfo     [0m] [1mHTTP Request: PUT http://localhost:6300/collections/memoirr/points?wait=true "HTTP/1.1 200 OK"[0m [36mlineno[0m=[35m1025[0m [36mmodule[0m=[35mhttpx[0m


HTTP Request: PUT http://localhost:6300/collections/memoirr/points?wait=true "HTTP/1.1 200 OK"


 54%|█████▍    | 200/367 [00:00<00:00, 644.94it/s][2m2025-10-03T19:55:26.909483Z[0m [[32m[1minfo     [0m] [1mHTTP Request: PUT http://localhost:6300/collections/memoirr/points?wait=true "HTTP/1.1 200 OK"[0m [36mlineno[0m=[35m1025[0m [36mmodule[0m=[35mhttpx[0m


HTTP Request: PUT http://localhost:6300/collections/memoirr/points?wait=true "HTTP/1.1 200 OK"


 82%|████████▏ | 300/367 [00:00<00:00, 634.12it/s][2m2025-10-03T19:55:27.017911Z[0m [[32m[1minfo     [0m] [1mHTTP Request: PUT http://localhost:6300/collections/memoirr/points?wait=true "HTTP/1.1 200 OK"[0m [36mlineno[0m=[35m1025[0m [36mmodule[0m=[35mhttpx[0m


HTTP Request: PUT http://localhost:6300/collections/memoirr/points?wait=true "HTTP/1.1 200 OK"


400it [00:00, 689.01it/s]                         
[2m2025-10-03T19:55:27.021666Z[0m [[32m[1minfo     [0m] [1mDocuments written successfully[0m [36mcomponent[0m=[35mqdrant_writer[0m [36mwritten_count[0m=[35m367[0m


{'written_count': 367, 'component': 'qdrant_writer', 'event': 'Documents written successfully', 'level': 'info', 'timestamp': '2025-10-03T19:55:27.021666Z'}


[2m2025-10-03T19:55:27.022622Z[0m [[32m[1minfo     [0m] [1mmetric_counter                [0m [36mcomponent[0m=[35mqdrant_writer[0m [36mmetric_name[0m=[35mdocuments_written_total[0m [36mmetric_type[0m=[35mcounter[0m [36mstatus[0m=[35msuccess[0m [36mvalue[0m=[35m367[0m


{'metric_name': 'documents_written_total', 'metric_type': 'counter', 'value': 367, 'component': 'qdrant_writer', 'status': 'success', 'event': 'metric_counter', 'level': 'info', 'timestamp': '2025-10-03T19:55:27.022622Z'}


[2m2025-10-03T19:55:27.024110Z[0m [[32m[1minfo     [0m] [1mdocument_writing_completed    [0m [36mdocuments_skipped[0m=[35m0[0m [36mdocuments_written[0m=[35m367[0m [36mduration_ms[0m=[35m719[0m [36moperation[0m=[35mdocument_writing[0m [36msuccess_rate[0m=[35m1.0[0m [36mtotal_documents[0m=[35m367[0m


{'operation': 'document_writing', 'duration_ms': 719, 'total_documents': 367, 'documents_written': 367, 'documents_skipped': 0, 'success_rate': 1.0, 'event': 'document_writing_completed', 'level': 'info', 'timestamp': '2025-10-03T19:55:27.024110Z'}


[2m2025-10-03T19:55:27.029017Z[0m [[32m[1minfo     [0m] [1mSRT file processed successfully[0m [36mcomponent[0m=[35mbatch_processor[0m [36mdocuments_written[0m=[35m367[0m [36mfile_path[0m=[35mdata/the.lord.of.the.rings.the.return.of.the.king.(2003).eng.1cd.(6256239)/The.Lord.of.the.Rings.The.Return.of.the.King.EXTENDED.2003.720p.BrRip.x264.BOKUTOX.YIFY.Perusoe.srt[0m


{'file_path': 'data/the.lord.of.the.rings.the.return.of.the.king.(2003).eng.1cd.(6256239)/The.Lord.of.the.Rings.The.Return.of.the.King.EXTENDED.2003.720p.BrRip.x264.BOKUTOX.YIFY.Perusoe.srt', 'documents_written': 367, 'component': 'batch_processor', 'event': 'SRT file processed successfully', 'level': 'info', 'timestamp': '2025-10-03T19:55:27.029017Z'}


[2m2025-10-03T19:55:27.031184Z[0m [[32m[1minfo     [0m] [1msingle_file_processing_completed[0m [36mdocuments_written[0m=[35m367[0m [36mduration_ms[0m=[35m38327[0m [36mfile_path[0m=[35mdata/the.lord.of.the.rings.the.return.of.the.king.(2003).eng.1cd.(6256239)/The.Lord.of.the.Rings.The.Return.of.the.King.EXTENDED.2003.720p.BrRip.x264.BOKUTOX.YIFY.Perusoe.srt[0m [36mfile_size_bytes[0m=[35m118551[0m [36moperation[0m=[35msingle_file_processing[0m


{'operation': 'single_file_processing', 'duration_ms': 38327, 'file_path': 'data/the.lord.of.the.rings.the.return.of.the.king.(2003).eng.1cd.(6256239)/The.Lord.of.the.Rings.The.Return.of.the.King.EXTENDED.2003.720p.BrRip.x264.BOKUTOX.YIFY.Perusoe.srt', 'documents_written': 367, 'file_size_bytes': 118551, 'event': 'single_file_processing_completed', 'level': 'info', 'timestamp': '2025-10-03T19:55:27.031184Z'}


[2m2025-10-03T19:55:27.034785Z[0m [[32m[1minfo     [0m] [1mMemory usage - after file 2/3 [0m [36mallocated_mb[0m=[35m2280.83251953125[0m [36mcomponent[0m=[35mmemory_management[0m [36mdevice[0m=[35m0[0m [36mfree_memory_mb[0m=[35m695.8125[0m [36mmax_allocated_mb[0m=[35m5399.22802734375[0m [36mmax_reserved_mb[0m=[35m6778.0[0m [36mreserved_mb[0m=[35m6778.0[0m [36mtotal_memory_mb[0m=[35m7816.5625[0m [36mutilization_percent[0m=[35m91.0982289209611[0m


{'device': 0, 'allocated_mb': 2280.83251953125, 'reserved_mb': 6778.0, 'max_allocated_mb': 5399.22802734375, 'max_reserved_mb': 6778.0, 'total_memory_mb': 7816.5625, 'free_memory_mb': 695.8125, 'utilization_percent': 91.0982289209611, 'component': 'memory_management', 'event': 'Memory usage - after file 2/3', 'level': 'info', 'timestamp': '2025-10-03T19:55:27.034785Z'}


[2m2025-10-03T19:55:27.037112Z[0m [[32m[1minfo     [0m] [1mmetric_counter                [0m [36mcomponent[0m=[35mbatch_processor[0m [36mmetric_name[0m=[35msrt_files_processed_total[0m [36mmetric_type[0m=[35mcounter[0m [36mstatus[0m=[35msuccess[0m [36mvalue[0m=[35m1[0m


{'metric_name': 'srt_files_processed_total', 'metric_type': 'counter', 'value': 1, 'component': 'batch_processor', 'status': 'success', 'event': 'metric_counter', 'level': 'info', 'timestamp': '2025-10-03T19:55:27.037112Z'}


[2m2025-10-03T19:55:27.038672Z[0m [[32m[1minfo     [0m] [1mmetric_counter                [0m [36mcomponent[0m=[35mbatch_processor[0m [36mmetric_name[0m=[35mdocuments_written_total[0m [36mmetric_type[0m=[35mcounter[0m [36mvalue[0m=[35m367[0m


{'metric_name': 'documents_written_total', 'metric_type': 'counter', 'value': 367, 'component': 'batch_processor', 'event': 'metric_counter', 'level': 'info', 'timestamp': '2025-10-03T19:55:27.038672Z'}


[2m2025-10-03T19:55:27.039924Z[0m [[32m[1minfo     [0m] [1mProcessing SRT file           [0m [36mcomponent[0m=[35mbatch_processor[0m [36mfile_index[0m=[35m3[0m [36mfile_path[0m=[35m'data/the.lord.of.the.rings.the.two.towers.(2002).eng.1cd.(5581877)/The.Lord.of.the.Rings.The.Two.Towers.Extended.Editions.2002.1080p.BluRay.x264.DTS-WiKi .srt'[0m [36mtotal_files[0m=[35m3[0m


{'file_index': 3, 'total_files': 3, 'file_path': 'data/the.lord.of.the.rings.the.two.towers.(2002).eng.1cd.(5581877)/The.Lord.of.the.Rings.The.Two.Towers.Extended.Editions.2002.1080p.BluRay.x264.DTS-WiKi .srt', 'component': 'batch_processor', 'event': 'Processing SRT file', 'level': 'info', 'timestamp': '2025-10-03T19:55:27.039924Z'}


[2m2025-10-03T19:55:27.383654Z[0m [[32m[1minfo     [0m] [1mMemory usage - before file 3/3[0m [36mallocated_mb[0m=[35m2280.83251953125[0m [36mcomponent[0m=[35mmemory_management[0m [36mdevice[0m=[35m0[0m [36mfree_memory_mb[0m=[35m5151.8125[0m [36mmax_allocated_mb[0m=[35m5399.22802734375[0m [36mmax_reserved_mb[0m=[35m6778.0[0m [36mreserved_mb[0m=[35m2322.0[0m [36mtotal_memory_mb[0m=[35m7816.5625[0m [36mutilization_percent[0m=[35m34.091072642226045[0m


{'device': 0, 'allocated_mb': 2280.83251953125, 'reserved_mb': 2322.0, 'max_allocated_mb': 5399.22802734375, 'max_reserved_mb': 6778.0, 'total_memory_mb': 7816.5625, 'free_memory_mb': 5151.8125, 'utilization_percent': 34.091072642226045, 'component': 'memory_management', 'event': 'Memory usage - before file 3/3', 'level': 'info', 'timestamp': '2025-10-03T19:55:27.383654Z'}


[2m2025-10-03T19:55:27.385004Z[0m [[32m[1minfo     [0m] [1msingle_file_processing_started[0m [36mfile_path[0m=[35m'data/the.lord.of.the.rings.the.two.towers.(2002).eng.1cd.(5581877)/The.Lord.of.the.Rings.The.Two.Towers.Extended.Editions.2002.1080p.BluRay.x264.DTS-WiKi .srt'[0m [36moperation[0m=[35msingle_file_processing[0m


{'operation': 'single_file_processing', 'file_path': 'data/the.lord.of.the.rings.the.two.towers.(2002).eng.1cd.(5581877)/The.Lord.of.the.Rings.The.Two.Towers.Extended.Editions.2002.1080p.BluRay.x264.DTS-WiKi .srt', 'event': 'single_file_processing_started', 'level': 'info', 'timestamp': '2025-10-03T19:55:27.385004Z'}


[2m2025-10-03T19:55:27.388921Z[0m [[32m[1minfo     [0m] [1mRunning component pre         [0m [36mcomponent_name[0m=[35mpre[0m [36mlineno[0m=[35m67[0m [36mmodule[0m=[35mhaystack.core.pipeline.pipeline[0m


Running component pre


[2m2025-10-03T19:55:27.390410Z[0m [[32m[1minfo     [0m] [1msrt_preprocessing_started     [0m [36minput_length[0m=[35m134817[0m [36moperation[0m=[35msrt_preprocessing[0m


{'operation': 'srt_preprocessing', 'input_length': 134817, 'event': 'srt_preprocessing_started', 'level': 'info', 'timestamp': '2025-10-03T19:55:27.390410Z'}


[2m2025-10-03T19:55:27.391628Z[0m [[32m[1minfo     [0m] [1mStarting SRT preprocessing    [0m [36mcomponent[0m=[35mpreprocessor[0m [36mdedupe_window_ms[0m=[35m1000[0m [36minput_size_chars[0m=[35m134817[0m [36mmin_len[0m=[35m1[0m


{'input_size_chars': 134817, 'min_len': 1, 'dedupe_window_ms': 1000, 'component': 'preprocessor', 'event': 'Starting SRT preprocessing', 'level': 'info', 'timestamp': '2025-10-03T19:55:27.391628Z'}


[2m2025-10-03T19:55:33.795293Z[0m [[32m[1minfo     [0m] [1mCaption language filtering completed[0m [36mcomponent[0m=[35mlanguage_filter[0m [36mdropped_captions[0m=[35m428[0m [36mdropped_lines[0m=[35m564[0m [36mkept_captions[0m=[35m1633[0m [36mkept_lines[0m=[35m1785[0m [36mtotal_captions[0m=[35m2061[0m [36mtotal_lines[0m=[35m2349[0m


{'total_captions': 2061, 'kept_captions': 1633, 'dropped_captions': 428, 'total_lines': 2349, 'kept_lines': 1785, 'dropped_lines': 564, 'component': 'language_filter', 'event': 'Caption language filtering completed', 'level': 'info', 'timestamp': '2025-10-03T19:55:33.795293Z'}


[2m2025-10-03T19:55:33.796309Z[0m [[32m[1minfo     [0m] [1mmetric_counter                [0m [36mcomponent[0m=[35mlanguage_filter[0m [36mmetric_name[0m=[35mcaptions_language_filtered_total[0m [36mmetric_type[0m=[35mcounter[0m [36mvalue[0m=[35m2061[0m


{'metric_name': 'captions_language_filtered_total', 'metric_type': 'counter', 'value': 2061, 'component': 'language_filter', 'event': 'metric_counter', 'level': 'info', 'timestamp': '2025-10-03T19:55:33.796309Z'}


[2m2025-10-03T19:55:33.797022Z[0m [[32m[1minfo     [0m] [1mmetric_counter                [0m [36mcomponent[0m=[35mlanguage_filter[0m [36mmetric_name[0m=[35mcaptions_kept_after_language_filter[0m [36mmetric_type[0m=[35mcounter[0m [36mstatus[0m=[35mkept[0m [36mvalue[0m=[35m1633[0m


{'metric_name': 'captions_kept_after_language_filter', 'metric_type': 'counter', 'value': 1633, 'component': 'language_filter', 'status': 'kept', 'event': 'metric_counter', 'level': 'info', 'timestamp': '2025-10-03T19:55:33.797022Z'}


[2m2025-10-03T19:55:33.797881Z[0m [[32m[1minfo     [0m] [1mmetric_counter                [0m [36mcomponent[0m=[35mlanguage_filter[0m [36mmetric_name[0m=[35mcaptions_dropped_after_language_filter[0m [36mmetric_type[0m=[35mcounter[0m [36mstatus[0m=[35mdropped[0m [36mvalue[0m=[35m428[0m


{'metric_name': 'captions_dropped_after_language_filter', 'metric_type': 'counter', 'value': 428, 'component': 'language_filter', 'status': 'dropped', 'event': 'metric_counter', 'level': 'info', 'timestamp': '2025-10-03T19:55:33.797881Z'}


[2m2025-10-03T19:55:33.798627Z[0m [[32m[1minfo     [0m] [1mmetric_counter                [0m [36mcomponent[0m=[35mlanguage_filter[0m [36mmetric_name[0m=[35mlines_language_filtered_total[0m [36mmetric_type[0m=[35mcounter[0m [36mvalue[0m=[35m2349[0m


{'metric_name': 'lines_language_filtered_total', 'metric_type': 'counter', 'value': 2349, 'component': 'language_filter', 'event': 'metric_counter', 'level': 'info', 'timestamp': '2025-10-03T19:55:33.798627Z'}


[2m2025-10-03T19:55:33.799160Z[0m [[32m[1minfo     [0m] [1mmetric_counter                [0m [36mcomponent[0m=[35mlanguage_filter[0m [36mmetric_name[0m=[35mlines_kept_after_language_filter[0m [36mmetric_type[0m=[35mcounter[0m [36mstatus[0m=[35mkept[0m [36mvalue[0m=[35m1785[0m


{'metric_name': 'lines_kept_after_language_filter', 'metric_type': 'counter', 'value': 1785, 'component': 'language_filter', 'status': 'kept', 'event': 'metric_counter', 'level': 'info', 'timestamp': '2025-10-03T19:55:33.799160Z'}


[2m2025-10-03T19:55:33.799627Z[0m [[32m[1minfo     [0m] [1mmetric_counter                [0m [36mcomponent[0m=[35mlanguage_filter[0m [36mmetric_name[0m=[35mlines_dropped_after_language_filter[0m [36mmetric_type[0m=[35mcounter[0m [36mstatus[0m=[35mdropped[0m [36mvalue[0m=[35m564[0m


{'metric_name': 'lines_dropped_after_language_filter', 'metric_type': 'counter', 'value': 564, 'component': 'language_filter', 'status': 'dropped', 'event': 'metric_counter', 'level': 'info', 'timestamp': '2025-10-03T19:55:33.799627Z'}


[2m2025-10-03T19:55:33.812443Z[0m [[32m[1minfo     [0m] [1mmetric_counter                [0m [36mcomponent[0m=[35mpreprocessor[0m [36mmetric_name[0m=[35mcaptions_processed_total[0m [36mmetric_type[0m=[35mcounter[0m [36mvalue[0m=[35m0[0m


{'metric_name': 'captions_processed_total', 'metric_type': 'counter', 'value': 0, 'component': 'preprocessor', 'event': 'metric_counter', 'level': 'info', 'timestamp': '2025-10-03T19:55:33.812443Z'}


[2m2025-10-03T19:55:33.813352Z[0m [[32m[1minfo     [0m] [1mmetric_counter                [0m [36mcomponent[0m=[35mpreprocessor[0m [36mmetric_name[0m=[35mcaptions_kept_total[0m [36mmetric_type[0m=[35mcounter[0m [36mstatus[0m=[35mkept[0m [36mvalue[0m=[35m1578[0m


{'metric_name': 'captions_kept_total', 'metric_type': 'counter', 'value': 1578, 'component': 'preprocessor', 'status': 'kept', 'event': 'metric_counter', 'level': 'info', 'timestamp': '2025-10-03T19:55:33.813352Z'}


[2m2025-10-03T19:55:33.813876Z[0m [[32m[1minfo     [0m] [1mmetric_counter                [0m [36mcomponent[0m=[35mpreprocessor[0m [36mmetric_name[0m=[35mcaptions_dropped_total[0m [36mmetric_type[0m=[35mcounter[0m [36mreason[0m=[35mempty[0m [36mvalue[0m=[35m55[0m


{'metric_name': 'captions_dropped_total', 'metric_type': 'counter', 'value': 55, 'component': 'preprocessor', 'reason': 'empty', 'event': 'metric_counter', 'level': 'info', 'timestamp': '2025-10-03T19:55:33.813876Z'}


[2m2025-10-03T19:55:33.814338Z[0m [[32m[1minfo     [0m] [1mmetric_counter                [0m [36mcomponent[0m=[35mpreprocessor[0m [36mmetric_name[0m=[35mcaptions_dropped_total[0m [36mmetric_type[0m=[35mcounter[0m [36mreason[0m=[35mnon_english[0m [36mvalue[0m=[35m428[0m


{'metric_name': 'captions_dropped_total', 'metric_type': 'counter', 'value': 428, 'component': 'preprocessor', 'reason': 'non_english', 'event': 'metric_counter', 'level': 'info', 'timestamp': '2025-10-03T19:55:33.814338Z'}


[2m2025-10-03T19:55:33.814834Z[0m [[32m[1minfo     [0m] [1mmetric_counter                [0m [36mcomponent[0m=[35mpreprocessor[0m [36mmetric_name[0m=[35mcaptions_deduped_total[0m [36mmetric_type[0m=[35mcounter[0m [36mreason[0m=[35mduplicate[0m [36mvalue[0m=[35m0[0m


{'metric_name': 'captions_deduped_total', 'metric_type': 'counter', 'value': 0, 'component': 'preprocessor', 'reason': 'duplicate', 'event': 'metric_counter', 'level': 'info', 'timestamp': '2025-10-03T19:55:33.814834Z'}


[2m2025-10-03T19:55:33.816276Z[0m [[32m[1minfo     [0m] [1mSRT preprocessing completed   [0m [36mcomponent[0m=[35mpreprocessor[0m [36moutput_lines[0m=[35m1578[0m [36mprocessing_stats[0m=[35m{'total_captions': 2061, 'kept': 1578, 'dropped_empty': 55, 'dropped_non_english': 428, 'deduped': 0}[0m


{'output_lines': 1578, 'processing_stats': {'total_captions': 2061, 'kept': 1578, 'dropped_empty': 55, 'dropped_non_english': 428, 'deduped': 0}, 'component': 'preprocessor', 'event': 'SRT preprocessing completed', 'level': 'info', 'timestamp': '2025-10-03T19:55:33.816276Z'}


[2m2025-10-03T19:55:33.817141Z[0m [[32m[1minfo     [0m] [1msrt_preprocessing_completed   [0m [36mcaptions_dropped[0m=[35m-1578[0m [36mcaptions_kept[0m=[35m1578[0m [36mduration_ms[0m=[35m6426[0m [36minput_length[0m=[35m134817[0m [36moperation[0m=[35msrt_preprocessing[0m [36moutput_lines[0m=[35m1578[0m


{'operation': 'srt_preprocessing', 'duration_ms': 6426, 'input_length': 134817, 'output_lines': 1578, 'captions_kept': 1578, 'captions_dropped': -1578, 'event': 'srt_preprocessing_completed', 'level': 'info', 'timestamp': '2025-10-03T19:55:33.817141Z'}


[2m2025-10-03T19:55:33.829138Z[0m [[32m[1minfo     [0m] [1mRunning component chunk       [0m [36mcomponent_name[0m=[35mchunk[0m [36mlineno[0m=[35m67[0m [36mmodule[0m=[35mhaystack.core.pipeline.pipeline[0m


Running component chunk


[2m2025-10-03T19:55:33.830656Z[0m [[32m[1minfo     [0m] [1msemantic_chunking_started     [0m [36minput_captions[0m=[35m1578[0m [36moperation[0m=[35msemantic_chunking[0m


{'operation': 'semantic_chunking', 'input_captions': 1578, 'event': 'semantic_chunking_started', 'level': 'info', 'timestamp': '2025-10-03T19:55:33.830656Z'}


[2m2025-10-03T19:55:33.831645Z[0m [[32m[1minfo     [0m] [1mStarting semantic chunking    [0m [36mchunk_size[0m=[35m512[0m [36mcomponent[0m=[35mchunker[0m [36minput_captions[0m=[35m1578[0m [36mthreshold[0m=[35mauto[0m


{'input_captions': 1578, 'threshold': 'auto', 'chunk_size': 512, 'component': 'chunker', 'event': 'Starting semantic chunking', 'level': 'info', 'timestamp': '2025-10-03T19:55:33.831645Z'}


[2m2025-10-03T19:55:33.838814Z[0m [[32m[1minfo     [0m] [1mMemory usage - semantic_chunking - start[0m [36mallocated_mb[0m=[35m2280.83251953125[0m [36mcomponent[0m=[35mmemory_management[0m [36mdevice[0m=[35m0[0m [36mfree_memory_mb[0m=[35m5151.8125[0m [36mmax_allocated_mb[0m=[35m5399.22802734375[0m [36mmax_reserved_mb[0m=[35m6778.0[0m [36mreserved_mb[0m=[35m2322.0[0m [36mtotal_memory_mb[0m=[35m7816.5625[0m [36mutilization_percent[0m=[35m34.091072642226045[0m


{'device': 0, 'allocated_mb': 2280.83251953125, 'reserved_mb': 2322.0, 'max_allocated_mb': 5399.22802734375, 'max_reserved_mb': 6778.0, 'total_memory_mb': 7816.5625, 'free_memory_mb': 5151.8125, 'utilization_percent': 34.091072642226045, 'component': 'memory_management', 'event': 'Memory usage - semantic_chunking - start', 'level': 'info', 'timestamp': '2025-10-03T19:55:33.838814Z'}


[2m2025-10-03T19:55:33.842804Z[0m [[32m[1minfo     [0m] [1mUse pytorch device_name: cuda:0[0m [36mlineno[0m=[35m219[0m [36mmodule[0m=[35msentence_transformers.SentenceTransformer[0m


Use pytorch device_name: cuda:0


[2m2025-10-03T19:55:33.844108Z[0m [[32m[1minfo     [0m] [1mLoad pretrained SentenceTransformer: models/chunker/qwen3-embedding-0.6B[0m [36mlineno[0m=[35m227[0m [36mmodule[0m=[35msentence_transformers.SentenceTransformer[0m


Load pretrained SentenceTransformer: models/chunker/qwen3-embedding-0.6B


[2m2025-10-03T19:55:34.833702Z[0m [[32m[1minfo     [0m] [1m1 prompt is loaded, with the key: query[0m [36mlineno[0m=[35m378[0m [36mmodule[0m=[35msentence_transformers.SentenceTransformer[0m


1 prompt is loaded, with the key: query


Batches:   0%|          | 0/39 [00:00<?, ?it/s]

Batches:   0%|          | 0/39 [00:00<?, ?it/s]

[2m2025-10-03T19:55:57.263049Z[0m [[32m[1minfo     [0m] [1mMemory usage - semantic_chunking - end[0m [36mallocated_mb[0m=[35m4553.5400390625[0m [36mcomponent[0m=[35mmemory_management[0m [36mdevice[0m=[35m0[0m [36mfree_memory_mb[0m=[35m2107.8125[0m [36mmax_allocated_mb[0m=[35m5399.22802734375[0m [36mmax_reserved_mb[0m=[35m6778.0[0m [36mreserved_mb[0m=[35m5366.0[0m [36mtotal_memory_mb[0m=[35m7816.5625[0m [36mutilization_percent[0m=[35m73.03402230839964[0m


{'device': 0, 'allocated_mb': 4553.5400390625, 'reserved_mb': 5366.0, 'max_allocated_mb': 5399.22802734375, 'max_reserved_mb': 6778.0, 'total_memory_mb': 7816.5625, 'free_memory_mb': 2107.8125, 'utilization_percent': 73.03402230839964, 'component': 'memory_management', 'event': 'Memory usage - semantic_chunking - end', 'level': 'info', 'timestamp': '2025-10-03T19:55:57.263049Z'}


[2m2025-10-03T19:55:57.564449Z[0m [[32m[1minfo     [0m] [1mmetric_counter                [0m [36mcomponent[0m=[35mchunker[0m [36mmetric_name[0m=[35mcaptions_chunked_total[0m [36mmetric_type[0m=[35mcounter[0m [36mvalue[0m=[35m1578[0m


{'metric_name': 'captions_chunked_total', 'metric_type': 'counter', 'value': 1578, 'component': 'chunker', 'event': 'metric_counter', 'level': 'info', 'timestamp': '2025-10-03T19:55:57.564449Z'}


[2m2025-10-03T19:55:57.565597Z[0m [[32m[1minfo     [0m] [1mmetric_counter                [0m [36mcomponent[0m=[35mchunker[0m [36mmetric_name[0m=[35mchunks_generated_total[0m [36mmetric_type[0m=[35mcounter[0m [36mvalue[0m=[35m446[0m


{'metric_name': 'chunks_generated_total', 'metric_type': 'counter', 'value': 446, 'component': 'chunker', 'event': 'metric_counter', 'level': 'info', 'timestamp': '2025-10-03T19:55:57.565597Z'}


[2m2025-10-03T19:55:57.566222Z[0m [[32m[1minfo     [0m] [1mmetric_histogram              [0m [36mcomponent[0m=[35mchunker[0m [36mmetric_name[0m=[35mavg_tokens_per_chunk[0m [36mmetric_type[0m=[35mhistogram[0m [36mvalue[0m=[35m32.04932735426009[0m


{'metric_name': 'avg_tokens_per_chunk', 'metric_type': 'histogram', 'value': 32.04932735426009, 'component': 'chunker', 'event': 'metric_histogram', 'level': 'info', 'timestamp': '2025-10-03T19:55:57.566222Z'}


[2m2025-10-03T19:55:57.566978Z[0m [[32m[1minfo     [0m] [1mSemantic chunking completed   [0m [36mavg_tokens_per_chunk[0m=[35m32.04932735426009[0m [36mcomponent[0m=[35mchunker[0m [36minput_captions[0m=[35m1578[0m [36moutput_chunks[0m=[35m446[0m


{'input_captions': 1578, 'output_chunks': 446, 'avg_tokens_per_chunk': 32.04932735426009, 'component': 'chunker', 'event': 'Semantic chunking completed', 'level': 'info', 'timestamp': '2025-10-03T19:55:57.566978Z'}


[2m2025-10-03T19:55:57.567657Z[0m [[32m[1minfo     [0m] [1msemantic_chunking_completed   [0m [36mavg_tokens_per_chunk[0m=[35m32.04932735426009[0m [36mduration_ms[0m=[35m23736[0m [36minput_captions[0m=[35m1578[0m [36moperation[0m=[35msemantic_chunking[0m [36moutput_chunks[0m=[35m446[0m [36moutput_chunks_final[0m=[35m446[0m


{'operation': 'semantic_chunking', 'duration_ms': 23736, 'input_captions': 1578, 'output_chunks': 446, 'avg_tokens_per_chunk': 32.04932735426009, 'output_chunks_final': 446, 'event': 'semantic_chunking_completed', 'level': 'info', 'timestamp': '2025-10-03T19:55:57.567657Z'}


[2m2025-10-03T19:55:57.575868Z[0m [[32m[1minfo     [0m] [1mRunning component explode     [0m [36mcomponent_name[0m=[35mexplode[0m [36mlineno[0m=[35m67[0m [36mmodule[0m=[35mhaystack.core.pipeline.pipeline[0m


Running component explode


[2m2025-10-03T19:55:57.576749Z[0m [[32m[1minfo     [0m] [1mchunk_jsonl_parsing_started   [0m [36minput_lines[0m=[35m446[0m [36moperation[0m=[35mchunk_jsonl_parsing[0m


{'operation': 'chunk_jsonl_parsing', 'input_lines': 446, 'event': 'chunk_jsonl_parsing_started', 'level': 'info', 'timestamp': '2025-10-03T19:55:57.576749Z'}


[2m2025-10-03T19:55:57.583121Z[0m [[32m[1minfo     [0m] [1mmetric_counter                [0m [36mcomponent[0m=[35mpipeline_glue[0m [36mmetric_name[0m=[35mchunk_lines_processed_total[0m [36mmetric_type[0m=[35mcounter[0m [36mvalue[0m=[35m446[0m


{'metric_name': 'chunk_lines_processed_total', 'metric_type': 'counter', 'value': 446, 'component': 'pipeline_glue', 'event': 'metric_counter', 'level': 'info', 'timestamp': '2025-10-03T19:55:57.583121Z'}


[2m2025-10-03T19:55:57.584218Z[0m [[32m[1minfo     [0m] [1mmetric_counter                [0m [36mcomponent[0m=[35mpipeline_glue[0m [36mmetric_name[0m=[35mtexts_extracted_total[0m [36mmetric_type[0m=[35mcounter[0m [36mstatus[0m=[35msuccess[0m [36mvalue[0m=[35m446[0m


{'metric_name': 'texts_extracted_total', 'metric_type': 'counter', 'value': 446, 'component': 'pipeline_glue', 'status': 'success', 'event': 'metric_counter', 'level': 'info', 'timestamp': '2025-10-03T19:55:57.584218Z'}


[2m2025-10-03T19:55:57.584829Z[0m [[32m[1minfo     [0m] [1mChunk JSONL parsing completed [0m [36mcomponent[0m=[35mpipeline_glue[0m [36minput_lines[0m=[35m446[0m [36moutput_texts[0m=[35m446[0m [36mskipped_lines[0m=[35m0[0m


{'input_lines': 446, 'output_texts': 446, 'skipped_lines': 0, 'component': 'pipeline_glue', 'event': 'Chunk JSONL parsing completed', 'level': 'info', 'timestamp': '2025-10-03T19:55:57.584829Z'}


[2m2025-10-03T19:55:57.585672Z[0m [[32m[1minfo     [0m] [1mchunk_jsonl_parsing_completed [0m [36mduration_ms[0m=[35m8[0m [36minput_lines[0m=[35m446[0m [36moperation[0m=[35mchunk_jsonl_parsing[0m [36moutput_texts[0m=[35m446[0m [36mskipped_lines[0m=[35m0[0m [36msuccess_rate[0m=[35m1.0[0m


{'operation': 'chunk_jsonl_parsing', 'duration_ms': 8, 'input_lines': 446, 'output_texts': 446, 'skipped_lines': 0, 'success_rate': 1.0, 'event': 'chunk_jsonl_parsing_completed', 'level': 'info', 'timestamp': '2025-10-03T19:55:57.585672Z'}


[2m2025-10-03T19:55:57.592744Z[0m [[32m[1minfo     [0m] [1mRunning component embed       [0m [36mcomponent_name[0m=[35membed[0m [36mlineno[0m=[35m67[0m [36mmodule[0m=[35mhaystack.core.pipeline.pipeline[0m


Running component embed


[2m2025-10-03T19:55:57.593826Z[0m [[32m[1minfo     [0m] [1mtext_embedding_started        [0m [36minput_texts[0m=[35m446[0m [36moperation[0m=[35mtext_embedding[0m


{'operation': 'text_embedding', 'input_texts': 446, 'event': 'text_embedding_started', 'level': 'info', 'timestamp': '2025-10-03T19:55:57.593826Z'}


[2m2025-10-03T19:55:57.595613Z[0m [[32m[1minfo     [0m] [1mMemory usage - before embedding[0m [36mallocated_mb[0m=[35m2280.83251953125[0m [36mcomponent[0m=[35mmemory_management[0m [36mdevice[0m=[35m0[0m [36mfree_memory_mb[0m=[35m2891.8125[0m [36mmax_allocated_mb[0m=[35m5399.22802734375[0m [36mmax_reserved_mb[0m=[35m6778.0[0m [36mreserved_mb[0m=[35m4582.0[0m [36mtotal_memory_mb[0m=[35m7816.5625[0m [36mutilization_percent[0m=[35m63.00403790029184[0m


{'device': 0, 'allocated_mb': 2280.83251953125, 'reserved_mb': 4582.0, 'max_allocated_mb': 5399.22802734375, 'max_reserved_mb': 6778.0, 'total_memory_mb': 7816.5625, 'free_memory_mb': 2891.8125, 'utilization_percent': 63.00403790029184, 'component': 'memory_management', 'event': 'Memory usage - before embedding', 'level': 'info', 'timestamp': '2025-10-03T19:55:57.595613Z'}


Batches:   0%|          | 0/14 [00:00<?, ?it/s]

[2m2025-10-03T19:56:04.150661Z[0m [[32m[1minfo     [0m] [1mmetric_counter                [0m [36mcomponent[0m=[35membedder[0m [36mmetric_name[0m=[35membeddings_generated_total[0m [36mmetric_type[0m=[35mcounter[0m [36mmode[0m=[35mbatch[0m [36mstatus[0m=[35msuccess[0m [36mvalue[0m=[35m446[0m


{'metric_name': 'embeddings_generated_total', 'metric_type': 'counter', 'value': 446, 'component': 'embedder', 'mode': 'batch', 'status': 'success', 'event': 'metric_counter', 'level': 'info', 'timestamp': '2025-10-03T19:56:04.150661Z'}


[2m2025-10-03T19:56:04.152944Z[0m [[32m[1minfo     [0m] [1mMemory usage - after batch embedding[0m [36mallocated_mb[0m=[35m2280.83251953125[0m [36mcomponent[0m=[35mmemory_management[0m [36mdevice[0m=[35m0[0m [36mfree_memory_mb[0m=[35m1231.8125[0m [36mmax_allocated_mb[0m=[35m5399.22802734375[0m [36mmax_reserved_mb[0m=[35m6778.0[0m [36mreserved_mb[0m=[35m6242.0[0m [36mtotal_memory_mb[0m=[35m7816.5625[0m [36mutilization_percent[0m=[35m84.24099468276496[0m


{'device': 0, 'allocated_mb': 2280.83251953125, 'reserved_mb': 6242.0, 'max_allocated_mb': 5399.22802734375, 'max_reserved_mb': 6778.0, 'total_memory_mb': 7816.5625, 'free_memory_mb': 1231.8125, 'utilization_percent': 84.24099468276496, 'component': 'memory_management', 'event': 'Memory usage - after batch embedding', 'level': 'info', 'timestamp': '2025-10-03T19:56:04.152944Z'}


[2m2025-10-03T19:56:04.154353Z[0m [[32m[1minfo     [0m] [1mtext_embedding_completed      [0m [36mduration_ms[0m=[35m6560[0m [36mfailed_embeddings[0m=[35m0[0m [36minput_texts[0m=[35m446[0m [36moperation[0m=[35mtext_embedding[0m [36mprocessing_mode[0m=[35mbatch[0m [36msuccessful_embeddings[0m=[35m446[0m


{'operation': 'text_embedding', 'duration_ms': 6560, 'input_texts': 446, 'successful_embeddings': 446, 'failed_embeddings': 0, 'processing_mode': 'batch', 'event': 'text_embedding_completed', 'level': 'info', 'timestamp': '2025-10-03T19:56:04.154353Z'}


[2m2025-10-03T19:56:06.985422Z[0m [[32m[1minfo     [0m] [1mRunning component docs        [0m [36mcomponent_name[0m=[35mdocs[0m [36mlineno[0m=[35m67[0m [36mmodule[0m=[35mhaystack.core.pipeline.pipeline[0m


Running component docs


[2m2025-10-03T19:56:06.986330Z[0m [[32m[1minfo     [0m] [1mdocument_building_started     [0m [36minput_embeddings[0m=[35m446[0m [36minput_metas[0m=[35m446[0m [36minput_texts[0m=[35m446[0m [36moperation[0m=[35mdocument_building[0m


{'operation': 'document_building', 'input_texts': 446, 'input_metas': 446, 'input_embeddings': 446, 'event': 'document_building_started', 'level': 'info', 'timestamp': '2025-10-03T19:56:06.986330Z'}


[2m2025-10-03T19:56:06.987817Z[0m [[32m[1minfo     [0m] [1mmetric_counter                [0m [36mcomponent[0m=[35mpipeline_glue[0m [36mmetric_name[0m=[35mdocuments_built_total[0m [36mmetric_type[0m=[35mcounter[0m [36mstatus[0m=[35msuccess[0m [36mvalue[0m=[35m446[0m


{'metric_name': 'documents_built_total', 'metric_type': 'counter', 'value': 446, 'component': 'pipeline_glue', 'status': 'success', 'event': 'metric_counter', 'level': 'info', 'timestamp': '2025-10-03T19:56:06.987817Z'}


[2m2025-10-03T19:56:06.988395Z[0m [[32m[1minfo     [0m] [1mmetric_histogram              [0m [36mcomponent[0m=[35mpipeline_glue[0m [36mmetric_name[0m=[35mdocument_build_batch_size[0m [36mmetric_type[0m=[35mhistogram[0m [36mvalue[0m=[35m446[0m


{'metric_name': 'document_build_batch_size', 'metric_type': 'histogram', 'value': 446, 'component': 'pipeline_glue', 'event': 'metric_histogram', 'level': 'info', 'timestamp': '2025-10-03T19:56:06.988395Z'}


[2m2025-10-03T19:56:06.988923Z[0m [[32m[1minfo     [0m] [1mDocument building completed   [0m [36mcomponent[0m=[35mpipeline_glue[0m [36minput_embeddings[0m=[35m446[0m [36minput_metas[0m=[35m446[0m [36minput_texts[0m=[35m446[0m [36moutput_documents[0m=[35m446[0m


{'input_texts': 446, 'input_metas': 446, 'input_embeddings': 446, 'output_documents': 446, 'component': 'pipeline_glue', 'event': 'Document building completed', 'level': 'info', 'timestamp': '2025-10-03T19:56:06.988923Z'}


[2m2025-10-03T19:56:06.989396Z[0m [[32m[1minfo     [0m] [1mdocument_building_completed   [0m [36mdocuments_built[0m=[35m446[0m [36mduration_ms[0m=[35m3[0m [36minput_alignment_ratio[0m=[35m1.0[0m [36minput_embeddings[0m=[35m446[0m [36minput_metas[0m=[35m446[0m [36minput_texts[0m=[35m446[0m [36moperation[0m=[35mdocument_building[0m [36moutput_documents[0m=[35m446[0m


{'operation': 'document_building', 'duration_ms': 3, 'input_texts': 446, 'input_metas': 446, 'input_embeddings': 446, 'output_documents': 446, 'input_alignment_ratio': 1.0, 'documents_built': 446, 'event': 'document_building_completed', 'level': 'info', 'timestamp': '2025-10-03T19:56:06.989396Z'}


[2m2025-10-03T19:56:09.852449Z[0m [[32m[1minfo     [0m] [1mRunning component write       [0m [36mcomponent_name[0m=[35mwrite[0m [36mlineno[0m=[35m67[0m [36mmodule[0m=[35mhaystack.core.pipeline.pipeline[0m


Running component write


[2m2025-10-03T19:56:09.853320Z[0m [[32m[1minfo     [0m] [1mdocument_writing_started      [0m [36moperation[0m=[35mdocument_writing[0m [36mtotal_documents[0m=[35m446[0m


{'operation': 'document_writing', 'total_documents': 446, 'event': 'document_writing_started', 'level': 'info', 'timestamp': '2025-10-03T19:56:09.853320Z'}


[2m2025-10-03T19:56:10.013946Z[0m [[32m[1minfo     [0m] [1mWriting documents to Qdrant   [0m [36mcomponent[0m=[35mqdrant_writer[0m [36mdocument_count[0m=[35m446[0m


{'document_count': 446, 'component': 'qdrant_writer', 'event': 'Writing documents to Qdrant', 'level': 'info', 'timestamp': '2025-10-03T19:56:10.013946Z'}


[2m2025-10-03T19:56:10.019797Z[0m [[32m[1minfo     [0m] [1mHTTP Request: POST http://localhost:6300/collections/memoirr/points "HTTP/1.1 200 OK"[0m [36mlineno[0m=[35m1025[0m [36mmodule[0m=[35mhttpx[0m


HTTP Request: POST http://localhost:6300/collections/memoirr/points "HTTP/1.1 200 OK"


  0%|          | 0/446 [00:00<?, ?it/s][2m2025-10-03T19:56:10.181563Z[0m [[32m[1minfo     [0m] [1mHTTP Request: PUT http://localhost:6300/collections/memoirr/points?wait=true "HTTP/1.1 200 OK"[0m [36mlineno[0m=[35m1025[0m [36mmodule[0m=[35mhttpx[0m


HTTP Request: PUT http://localhost:6300/collections/memoirr/points?wait=true "HTTP/1.1 200 OK"


 22%|██▏       | 100/446 [00:00<00:00, 620.45it/s][2m2025-10-03T19:56:10.336346Z[0m [[32m[1minfo     [0m] [1mHTTP Request: PUT http://localhost:6300/collections/memoirr/points?wait=true "HTTP/1.1 200 OK"[0m [36mlineno[0m=[35m1025[0m [36mmodule[0m=[35mhttpx[0m


HTTP Request: PUT http://localhost:6300/collections/memoirr/points?wait=true "HTTP/1.1 200 OK"


 45%|████▍     | 200/446 [00:00<00:00, 635.72it/s][2m2025-10-03T19:56:10.506606Z[0m [[32m[1minfo     [0m] [1mHTTP Request: PUT http://localhost:6300/collections/memoirr/points?wait=true "HTTP/1.1 200 OK"[0m [36mlineno[0m=[35m1025[0m [36mmodule[0m=[35mhttpx[0m


HTTP Request: PUT http://localhost:6300/collections/memoirr/points?wait=true "HTTP/1.1 200 OK"


 67%|██████▋   | 300/446 [00:00<00:00, 612.19it/s][2m2025-10-03T19:56:10.653444Z[0m [[32m[1minfo     [0m] [1mHTTP Request: PUT http://localhost:6300/collections/memoirr/points?wait=true "HTTP/1.1 200 OK"[0m [36mlineno[0m=[35m1025[0m [36mmodule[0m=[35mhttpx[0m


HTTP Request: PUT http://localhost:6300/collections/memoirr/points?wait=true "HTTP/1.1 200 OK"


 90%|████████▉ | 400/446 [00:00<00:00, 638.35it/s][2m2025-10-03T19:56:10.727651Z[0m [[32m[1minfo     [0m] [1mHTTP Request: PUT http://localhost:6300/collections/memoirr/points?wait=true "HTTP/1.1 200 OK"[0m [36mlineno[0m=[35m1025[0m [36mmodule[0m=[35mhttpx[0m


HTTP Request: PUT http://localhost:6300/collections/memoirr/points?wait=true "HTTP/1.1 200 OK"


500it [00:00, 706.70it/s]                         
[2m2025-10-03T19:56:10.730197Z[0m [[32m[1minfo     [0m] [1mDocuments written successfully[0m [36mcomponent[0m=[35mqdrant_writer[0m [36mwritten_count[0m=[35m446[0m


{'written_count': 446, 'component': 'qdrant_writer', 'event': 'Documents written successfully', 'level': 'info', 'timestamp': '2025-10-03T19:56:10.730197Z'}


[2m2025-10-03T19:56:10.731305Z[0m [[32m[1minfo     [0m] [1mmetric_counter                [0m [36mcomponent[0m=[35mqdrant_writer[0m [36mmetric_name[0m=[35mdocuments_written_total[0m [36mmetric_type[0m=[35mcounter[0m [36mstatus[0m=[35msuccess[0m [36mvalue[0m=[35m446[0m


{'metric_name': 'documents_written_total', 'metric_type': 'counter', 'value': 446, 'component': 'qdrant_writer', 'status': 'success', 'event': 'metric_counter', 'level': 'info', 'timestamp': '2025-10-03T19:56:10.731305Z'}


[2m2025-10-03T19:56:10.732411Z[0m [[32m[1minfo     [0m] [1mdocument_writing_completed    [0m [36mdocuments_skipped[0m=[35m0[0m [36mdocuments_written[0m=[35m446[0m [36mduration_ms[0m=[35m879[0m [36moperation[0m=[35mdocument_writing[0m [36msuccess_rate[0m=[35m1.0[0m [36mtotal_documents[0m=[35m446[0m


{'operation': 'document_writing', 'duration_ms': 879, 'total_documents': 446, 'documents_written': 446, 'documents_skipped': 0, 'success_rate': 1.0, 'event': 'document_writing_completed', 'level': 'info', 'timestamp': '2025-10-03T19:56:10.732411Z'}


[2m2025-10-03T19:56:10.738083Z[0m [[32m[1minfo     [0m] [1mSRT file processed successfully[0m [36mcomponent[0m=[35mbatch_processor[0m [36mdocuments_written[0m=[35m446[0m [36mfile_path[0m=[35m'data/the.lord.of.the.rings.the.two.towers.(2002).eng.1cd.(5581877)/The.Lord.of.the.Rings.The.Two.Towers.Extended.Editions.2002.1080p.BluRay.x264.DTS-WiKi .srt'[0m


{'file_path': 'data/the.lord.of.the.rings.the.two.towers.(2002).eng.1cd.(5581877)/The.Lord.of.the.Rings.The.Two.Towers.Extended.Editions.2002.1080p.BluRay.x264.DTS-WiKi .srt', 'documents_written': 446, 'component': 'batch_processor', 'event': 'SRT file processed successfully', 'level': 'info', 'timestamp': '2025-10-03T19:56:10.738083Z'}


[2m2025-10-03T19:56:10.739323Z[0m [[32m[1minfo     [0m] [1msingle_file_processing_completed[0m [36mdocuments_written[0m=[35m446[0m [36mduration_ms[0m=[35m43354[0m [36mfile_path[0m=[35m'data/the.lord.of.the.rings.the.two.towers.(2002).eng.1cd.(5581877)/The.Lord.of.the.Rings.The.Two.Towers.Extended.Editions.2002.1080p.BluRay.x264.DTS-WiKi .srt'[0m [36mfile_size_bytes[0m=[35m134817[0m [36moperation[0m=[35msingle_file_processing[0m


{'operation': 'single_file_processing', 'duration_ms': 43354, 'file_path': 'data/the.lord.of.the.rings.the.two.towers.(2002).eng.1cd.(5581877)/The.Lord.of.the.Rings.The.Two.Towers.Extended.Editions.2002.1080p.BluRay.x264.DTS-WiKi .srt', 'documents_written': 446, 'file_size_bytes': 134817, 'event': 'single_file_processing_completed', 'level': 'info', 'timestamp': '2025-10-03T19:56:10.739323Z'}


[2m2025-10-03T19:56:10.740981Z[0m [[32m[1minfo     [0m] [1mMemory usage - after file 3/3 [0m [36mallocated_mb[0m=[35m2280.83251953125[0m [36mcomponent[0m=[35mmemory_management[0m [36mdevice[0m=[35m0[0m [36mfree_memory_mb[0m=[35m1231.8125[0m [36mmax_allocated_mb[0m=[35m5399.22802734375[0m [36mmax_reserved_mb[0m=[35m6778.0[0m [36mreserved_mb[0m=[35m6242.0[0m [36mtotal_memory_mb[0m=[35m7816.5625[0m [36mutilization_percent[0m=[35m84.24099468276496[0m


{'device': 0, 'allocated_mb': 2280.83251953125, 'reserved_mb': 6242.0, 'max_allocated_mb': 5399.22802734375, 'max_reserved_mb': 6778.0, 'total_memory_mb': 7816.5625, 'free_memory_mb': 1231.8125, 'utilization_percent': 84.24099468276496, 'component': 'memory_management', 'event': 'Memory usage - after file 3/3', 'level': 'info', 'timestamp': '2025-10-03T19:56:10.740981Z'}


[2m2025-10-03T19:56:10.741691Z[0m [[32m[1minfo     [0m] [1mmetric_counter                [0m [36mcomponent[0m=[35mbatch_processor[0m [36mmetric_name[0m=[35msrt_files_processed_total[0m [36mmetric_type[0m=[35mcounter[0m [36mstatus[0m=[35msuccess[0m [36mvalue[0m=[35m1[0m


{'metric_name': 'srt_files_processed_total', 'metric_type': 'counter', 'value': 1, 'component': 'batch_processor', 'status': 'success', 'event': 'metric_counter', 'level': 'info', 'timestamp': '2025-10-03T19:56:10.741691Z'}


[2m2025-10-03T19:56:10.742258Z[0m [[32m[1minfo     [0m] [1mmetric_counter                [0m [36mcomponent[0m=[35mbatch_processor[0m [36mmetric_name[0m=[35mdocuments_written_total[0m [36mmetric_type[0m=[35mcounter[0m [36mvalue[0m=[35m446[0m


{'metric_name': 'documents_written_total', 'metric_type': 'counter', 'value': 446, 'component': 'batch_processor', 'event': 'metric_counter', 'level': 'info', 'timestamp': '2025-10-03T19:56:10.742258Z'}


[2m2025-10-03T19:56:10.742846Z[0m [[32m[1minfo     [0m] [1mBatch processing progress     [0m [36mcomponent[0m=[35mbatch_processor[0m [36mfailed_files[0m=[35m0[0m [36mprocessed_files[0m=[35m3[0m [36mprogress_percent[0m=[35m100.0[0m [36msuccessful_files[0m=[35m3[0m [36mtotal_files[0m=[35m3[0m


{'processed_files': 3, 'total_files': 3, 'successful_files': 3, 'failed_files': 0, 'progress_percent': 100.0, 'component': 'batch_processor', 'event': 'Batch processing progress', 'level': 'info', 'timestamp': '2025-10-03T19:56:10.742846Z'}


[2m2025-10-03T19:56:10.743853Z[0m [[32m[1minfo     [0m] [1mmetric_histogram              [0m [36mcomponent[0m=[35mbatch_processor[0m [36mmetric_name[0m=[35mbatch_size_files[0m [36mmetric_type[0m=[35mhistogram[0m [36mvalue[0m=[35m3[0m


{'metric_name': 'batch_size_files', 'metric_type': 'histogram', 'value': 3, 'component': 'batch_processor', 'event': 'metric_histogram', 'level': 'info', 'timestamp': '2025-10-03T19:56:10.743853Z'}


[2m2025-10-03T19:56:10.744582Z[0m [[32m[1minfo     [0m] [1mmetric_histogram              [0m [36mcomponent[0m=[35mbatch_processor[0m [36mmetric_name[0m=[35mbatch_success_rate[0m [36mmetric_type[0m=[35mhistogram[0m [36mvalue[0m=[35m1.0[0m


{'metric_name': 'batch_success_rate', 'metric_type': 'histogram', 'value': 1.0, 'component': 'batch_processor', 'event': 'metric_histogram', 'level': 'info', 'timestamp': '2025-10-03T19:56:10.744582Z'}


[2m2025-10-03T19:56:10.745662Z[0m [[32m[1minfo     [0m] [1mBatch SRT processing completed[0m [36mcomponent[0m=[35mbatch_processor[0m [36mdirectory[0m=[35mdata[0m [36mfailed_files[0m=[35m0[0m [36mprocessing_time_ms[0m=[35m128607[0m [36msuccessful_files[0m=[35m3[0m [36mtotal_documents_written[0m=[35m1233[0m [36mtotal_files[0m=[35m3[0m


{'directory': 'data', 'total_files': 3, 'successful_files': 3, 'failed_files': 0, 'total_documents_written': 1233, 'processing_time_ms': 128607, 'component': 'batch_processor', 'event': 'Batch SRT processing completed', 'level': 'info', 'timestamp': '2025-10-03T19:56:10.745662Z'}


[2m2025-10-03T19:56:10.746897Z[0m [[32m[1minfo     [0m] [1mbatch_srt_processing_completed[0m [36mdirectory[0m=[35mdata[0m [36mduration_ms[0m=[35m128608[0m [36mfailed_files[0m=[35m0[0m [36moperation[0m=[35mbatch_srt_processing[0m [36moverwrite[0m=[35mTrue[0m [36msuccess_rate[0m=[35m1.0[0m [36msuccessful_files[0m=[35m3[0m [36mtotal_documents_written[0m=[35m1233[0m [36mtotal_files[0m=[35m3[0m


{'operation': 'batch_srt_processing', 'duration_ms': 128608, 'directory': 'data', 'overwrite': True, 'total_files': 3, 'successful_files': 3, 'failed_files': 0, 'total_documents_written': 1233, 'success_rate': 1.0, 'event': 'batch_srt_processing_completed', 'level': 'info', 'timestamp': '2025-10-03T19:56:10.746897Z'}


BatchProcessingResult(total_files=3, successful_files=3, failed_files=0, total_documents_written=1233, file_results=[ProcessingResult(file_path='data/the.lord.of.the.rings.the.fellowship.of.the.ring.(2001).eng.1cd.(6053820)/The.Lord.of.the.Rings.The.Fellowship.of.the.Ring.Extended.Editions.2001.1080p.BluRay.x264.srt', success=True, documents_written=420, error_message=None, processing_time_ms=42141), ProcessingResult(file_path='data/the.lord.of.the.rings.the.return.of.the.king.(2003).eng.1cd.(6256239)/The.Lord.of.the.Rings.The.Return.of.the.King.EXTENDED.2003.720p.BrRip.x264.BOKUTOX.YIFY.Perusoe.srt', success=True, documents_written=367, error_message=None, processing_time_ms=38327), ProcessingResult(file_path='data/the.lord.of.the.rings.the.two.towers.(2002).eng.1cd.(5581877)/The.Lord.of.the.Rings.The.Two.Towers.Extended.Editions.2002.1080p.BluRay.x264.DTS-WiKi .srt', success=True, documents_written=446, error_message=None, processing_time_ms=43354)], processing_time_ms=128607)