In [3]:
from story_sage import StorySageConfig, StorySageRetriever
from story_sage.utils import Embedder
from story_sage.utils.raptor import RaptorProcessor, Chunk, _RaptorResults
from openai import OpenAI
import yaml
import httpx
import os
import chromadb
from pprint import pprint
from typing import OrderedDict
import logging
import glob


# Enable debug logging to stdout
for handler in logging.root.handlers[:]:
    logging.root.removeHandler(handler)
logging.basicConfig(
    level=logging.WARN,
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
    handlers=[logging.StreamHandler()]
)


RERUN = True
SERIES_METADATA_NAME = 'acotar'
book_nums = [1, 2, 3, 4, 5]
file_patterns = [f'./books/{SERIES_METADATA_NAME}/{str(num).zfill(2)}_*.txt' for num in book_nums]

os.environ['TOKENIZERS_PARALLELISM'] = "false"

config_path = './config.yml'
ssconfig = StorySageConfig.from_file(config_path)
selected_series = next(series for series in ssconfig.series if series.series_metadata_name == SERIES_METADATA_NAME)

raptor = RaptorProcessor(config_path=config_path,
                skip_summarization=False,
                chunk_size=1000,
                max_tokens=200,
                target_dim=5,
                max_levels=3,
                max_processes=2,
                max_summary_threads=3)

In [4]:
if RERUN:
    for idx, pattern in enumerate(file_patterns):
        results: _RaptorResults = raptor.process_texts(pattern)
        processed_file_name = f'./chunks/{SERIES_METADATA_NAME}/raptor_chunks/{SERIES_METADATA_NAME}.json'
        raptor.save_chunk_tree(processed_file_name)

Name: 01_a_court_of_thrones_and_roses.txt Book Number: 1
Book 1 has 47 chapters (0 indexed to include prologue).
Processing chapter 1 in 01_a_court_of_thrones_and_roses.txt...
Processing chapter 0 in 01_a_court_of_thrones_and_roses.txt...
Processing chapter 2 in 01_a_court_of_thrones_and_roses.txt...
Processing chapter 3 in 01_a_court_of_thrones_and_roses.txt...
Processing chapter 4 in 01_a_court_of_thrones_and_roses.txt...
Processing chapter 5 in 01_a_court_of_thrones_and_roses.txt...
Processing chapter 6 in 01_a_court_of_thrones_and_roses.txt...
Processing chapter 7 in 01_a_court_of_thrones_and_roses.txt...
Processing chapter 8 in 01_a_court_of_thrones_and_roses.txt...
Processing chapter 10 in 01_a_court_of_thrones_and_roses.txt...
Processing chapter 9 in 01_a_court_of_thrones_and_roses.txt...
Processing chapter 11 in 01_a_court_of_thrones_and_roses.txt...
Processing chapter 12 in 01_a_court_of_thrones_and_roses.txt...
Processing chapter 13 in 01_a_court_of_thrones_and_roses.txt...
P

OMP: Info #276: omp_set_nested routine deprecated, please use omp_set_max_active_levels instead.


Processing chapter 17 in 02_a_court_of_mist_and_fury.txt...
Processing chapter 18 in 02_a_court_of_mist_and_fury.txt...
Processing chapter 20 in 02_a_court_of_mist_and_fury.txt...
Processing chapter 19 in 02_a_court_of_mist_and_fury.txt...
Processing chapter 22 in 02_a_court_of_mist_and_fury.txt...
Processing chapter 21 in 02_a_court_of_mist_and_fury.txt...
Processing chapter 23 in 02_a_court_of_mist_and_fury.txt...
Processing chapter 24 in 02_a_court_of_mist_and_fury.txt...
Processing chapter 25 in 02_a_court_of_mist_and_fury.txt...
Processing chapter 26 in 02_a_court_of_mist_and_fury.txt...
Processing chapter 28 in 02_a_court_of_mist_and_fury.txt...
Processing chapter 29 in 02_a_court_of_mist_and_fury.txt...
Processing chapter 27 in 02_a_court_of_mist_and_fury.txt...
Processing chapter 30 in 02_a_court_of_mist_and_fury.txt...
Processing chapter 31 in 02_a_court_of_mist_and_fury.txt...
Processing chapter 32 in 02_a_court_of_mist_and_fury.txt...
Processing chapter 34 in 02_a_court_of_m