In [17]:
import PyPDF2
import spacy
import json
from typing import List, Dict, Any
from pathlib import Path
import re
from sklearn.feature_extraction.text import TfidfVectorizer
from collections import defaultdict
import logging

# Set up logging
logging.basicConfig(level=logging.INFO,
                   format='%(asctime)s - %(levelname)s - %(message)s')

class SemanticChunker:
    def __init__(self, pdf_path: str):
        self.pdf_path = pdf_path
        logging.info(f"Initializing SemanticChunker with PDF: {pdf_path}")
        self.nlp = spacy.load("en_core_web_sm")
        self.toc = []
        
    def debug_pdf_content(self):
        """Debug function to check PDF content extraction"""
        logging.info("Running PDF content debug...")
        
        try:
            with open(self.pdf_path, 'rb') as file:
                reader = PyPDF2.PdfReader(file)
                
                # Check first few pages
                for i in range(min(5, len(reader.pages))):
                    page_text = reader.pages[i].extract_text()
                    logging.info(f"\nPage {i+1} Preview (first 200 chars):")
                    logging.info("-" * 50)
                    logging.info(page_text[:200])
                    logging.info("-" * 50)
                    logging.info(f"Page {i+1} length: {len(page_text)} characters")
        
        except Exception as e:
            logging.error(f"Error in PDF debug: {str(e)}")

    def extract_toc_and_content(self):
        """Extract table of contents and content from PDF"""
        logging.info("Extracting ToC and content from PDF...")
        try:
            with open(self.pdf_path, 'rb') as file:
                reader = PyPDF2.PdfReader(file)
                content_text = ""
                
                # Extract content from all pages
                for i in range(len(reader.pages)):
                    page_text = reader.pages[i].extract_text()
                    content_text += page_text + "\n\n"
                    
                logging.info(f"Extracted {len(content_text)} characters of content")
                return content_text
                
        except Exception as e:
            logging.error(f"Error reading PDF: {str(e)}")
            raise

    def create_chunks(self, content_text: str, chunk_size: int = 1000) -> List[Dict[str, Any]]:
        """Create semantic chunks from content"""
        logging.info("Creating semantic chunks...")
        
        # Split content into paragraphs
        paragraphs = [p.strip() for p in content_text.split('\n\n') if p.strip()]
        logging.info(f"Split content into {len(paragraphs)} paragraphs")
        
        if not paragraphs:
            logging.error("No paragraphs found in content")
            return []
        
        # Fit TF-IDF on all paragraphs
        vectorizer = TfidfVectorizer(
            max_features=100,
            stop_words='english',
            min_df=1
        )
        
        try:
            vectorizer.fit(paragraphs)
            logging.info("TF-IDF vectorizer fitted successfully")
        except Exception as e:
            logging.error(f"Error fitting TF-IDF vectorizer: {str(e)}")
            return []
        
        chunks = []
        current_chunk = []
        current_length = 0
        chunk_id = 1
        
        for para in paragraphs:
            para_length = len(para)
            
            if current_length + para_length > chunk_size and current_chunk:
                try:
                    # Process current chunk
                    chunk_text = '\n\n'.join(current_chunk)
                    doc = self.nlp(chunk_text)
                    
                    # Get topics
                    chunk_vector = vectorizer.transform([chunk_text])
                    feature_names = vectorizer.get_feature_names_out()
                    top_indices = chunk_vector.toarray()[0].argsort()[-5:][::-1]
                    top_terms = [feature_names[i] for i in top_indices if chunk_vector.toarray()[0][i] > 0]
                    
                    chunk_data = {
                        'id': f"chunk_{chunk_id}",
                        'content': chunk_text,
                        'topics': top_terms,
                        'tokens': len(doc)
                    }
                    
                    chunks.append(chunk_data)
                    logging.info(f"Created chunk {chunk_id}:")
                    logging.info(f"  Topics: {', '.join(top_terms)}")
                    logging.info(f"  Tokens: {len(doc)}")
                    
                    # Reset for next chunk
                    current_chunk = [para]
                    current_length = para_length
                    chunk_id += 1
                    
                except Exception as e:
                    logging.error(f"Error processing chunk {chunk_id}: {str(e)}")
                    current_chunk = [para]
                    current_length = para_length
                    chunk_id += 1
            else:
                current_chunk.append(para)
                current_length += para_length
        
        # Process the last chunk
        if current_chunk:
            try:
                chunk_text = '\n\n'.join(current_chunk)
                doc = self.nlp(chunk_text)
                
                chunk_vector = vectorizer.transform([chunk_text])
                feature_names = vectorizer.get_feature_names_out()
                top_indices = chunk_vector.toarray()[0].argsort()[-5:][::-1]
                top_terms = [feature_names[i] for i in top_indices if chunk_vector.toarray()[0][i] > 0]
                
                chunk_data = {
                    'id': f"chunk_{chunk_id}",
                    'content': chunk_text,
                    'topics': top_terms,
                    'tokens': len(doc)
                }
                
                chunks.append(chunk_data)
                logging.info(f"Created final chunk {chunk_id}")
                
            except Exception as e:
                logging.error(f"Error processing final chunk: {str(e)}")
        
        logging.info(f"Total chunks created: {len(chunks)}")
        return chunks

    def process_and_save(self, output_path: str):
        """Process PDF and save chunks to JSON"""
        logging.info("Starting PDF processing...")
        try:
            # Extract content
            content_text = self.extract_toc_and_content()
            if not content_text:
                logging.error("No content extracted from PDF")
                return
            
            # Create chunks
            chunks = self.create_chunks(content_text)
            
            # Save to JSON
            output_data = {
                'metadata': {
                    'source': self.pdf_path,
                    'total_chunks': len(chunks),
                    'chunking_strategy': 'semantic_with_toc'
                },
                'chunks': chunks
            }
            
            with open(output_path, 'w', encoding='utf-8') as f:
                json.dump(output_data, f, indent=2, ensure_ascii=False)
            
            logging.info(f"Successfully saved {len(chunks)} chunks to {output_path}")
            
        except Exception as e:
            logging.error(f"Error in processing PDF: {str(e)}")
            raise


In [18]:
# Usage:
chunker = SemanticChunker('s3-userguide.pdf')
chunker.debug_pdf_content()  # Run this before process_and_save to check content


2025-02-07 13:42:44,265 - INFO - Initializing SemanticChunker with PDF: s3-userguide.pdf
2025-02-07 13:42:44,846 - INFO - Running PDF content debug...
2025-02-07 13:42:45,870 - INFO - 
Page 1 Preview (first 200 chars):
2025-02-07 13:42:45,871 - INFO - --------------------------------------------------
2025-02-07 13:42:45,871 - INFO - User Guide
Amazon Simple Storage Service
API Version 2006-03-01
Copyright © 2024 Amazon Web Services, Inc. and/or its aﬃliates. All rights reserved.
2025-02-07 13:42:45,872 - INFO - --------------------------------------------------
2025-02-07 13:42:45,872 - INFO - Page 1 length: 148 characters
2025-02-07 13:42:45,904 - INFO - 
Page 2 Preview (first 200 chars):
2025-02-07 13:42:45,905 - INFO - --------------------------------------------------
2025-02-07 13:42:45,906 - INFO - Amazon Simple Storage Service User Guide
Amazon Simple Storage Service: User Guide
Copyright © 2024 Amazon Web Services, Inc. and/or its aﬃliates. All rights reserved.
Amazon's tradem

In [19]:
# Then process if content looks good
chunker.process_and_save('NEW-s3-userguide-chunks.json')

2025-02-07 13:42:47,249 - INFO - Starting PDF processing...
2025-02-07 13:42:47,250 - INFO - Extracting ToC and content from PDF...
2025-02-07 13:45:10,863 - INFO - Extracted 4323539 characters of content
2025-02-07 13:45:10,864 - INFO - Creating semantic chunks...
2025-02-07 13:45:10,873 - INFO - Split content into 2500 paragraphs
2025-02-07 13:45:11,264 - INFO - TF-IDF vectorizer fitted successfully
2025-02-07 13:45:11,299 - INFO - Created chunk 1:
2025-02-07 13:45:11,300 - INFO -   Topics: amazon, service, simple, storage, user
2025-02-07 13:45:11,300 - INFO -   Tokens: 146
2025-02-07 13:45:11,360 - INFO - Created chunk 2:
2025-02-07 13:45:11,361 - INFO -   Topics: s3, amazon, access, management, aws
2025-02-07 13:45:11,361 - INFO -   Tokens: 224
2025-02-07 13:45:11,435 - INFO - Created chunk 3:
2025-02-07 13:45:11,436 - INFO -   Topics: bucket, buckets, object, access, objects
2025-02-07 13:45:11,436 - INFO -   Tokens: 262
2025-02-07 13:45:11,497 - INFO - Created chunk 4:
2025-02-0

2025-02-07 13:45:14,291 - INFO - Created chunk 45:
2025-02-07 13:45:14,292 - INFO -   Topics: bucket, delete, choose, use, objects
2025-02-07 13:45:14,293 - INFO -   Tokens: 290
2025-02-07 13:45:14,343 - INFO - Created chunk 46:
2025-02-07 13:45:14,344 - INFO -   Topics: use, s3, buckets, amazon, directory
2025-02-07 13:45:14,344 - INFO -   Tokens: 297
2025-02-07 13:45:14,434 - INFO - Created chunk 47:
2025-02-07 13:45:14,434 - INFO -   Topics: policies, access, iam, s3, acls
2025-02-07 13:45:14,435 - INFO -   Tokens: 541
2025-02-07 13:45:14,512 - INFO - Created chunk 48:
2025-02-07 13:45:14,513 - INFO -   Topics: s3, amazon, cli, use, aws
2025-02-07 13:45:14,514 - INFO -   Tokens: 500
2025-02-07 13:45:14,580 - INFO - Created chunk 49:
2025-02-07 13:45:14,581 - INFO -   Topics: aws, s3, amazon, use, operations
2025-02-07 13:45:14,582 - INFO -   Tokens: 400
2025-02-07 13:45:14,631 - INFO - Created chunk 50:
2025-02-07 13:45:14,631 - INFO -   Topics: s3, data, replication, batch, amazon


2025-02-07 13:45:17,124 - INFO -   Topics: bucket, lifecycle, objects, s3, rule
2025-02-07 13:45:17,124 - INFO -   Tokens: 475
2025-02-07 13:45:17,178 - INFO - Created chunk 92:
2025-02-07 13:45:17,178 - INFO -   Topics: bucket, command, s3, aws, cli
2025-02-07 13:45:17,179 - INFO -   Tokens: 341
2025-02-07 13:45:17,246 - INFO - Created chunk 93:
2025-02-07 13:45:17,246 - INFO -   Topics: lifecycle, bucket, rule, conﬁguration, objects
2025-02-07 13:45:17,247 - INFO -   Tokens: 463
2025-02-07 13:45:17,318 - INFO - Created chunk 94:
2025-02-07 13:45:17,319 - INFO -   Topics: bucket, delete, access, zone, points
2025-02-07 13:45:17,320 - INFO -   Tokens: 497
2025-02-07 13:45:17,374 - INFO - Created chunk 95:
2025-02-07 13:45:17,375 - INFO -   Topics: delete, bucket, iam, permissions, s3
2025-02-07 13:45:17,376 - INFO -   Tokens: 361
2025-02-07 13:45:17,423 - INFO - Created chunk 96:
2025-02-07 13:45:17,423 - INFO -   Topics: import, amazonaws, com, bucket, delete
2025-02-07 13:45:17,424 -

2025-02-07 13:45:19,709 - INFO -   Tokens: 357
2025-02-07 13:45:19,776 - INFO - Created chunk 138:
2025-02-07 13:45:19,777 - INFO -   Topics: object, value, api, upload, set
2025-02-07 13:45:19,778 - INFO -   Tokens: 423
2025-02-07 13:45:19,835 - INFO - Created chunk 139:
2025-02-07 13:45:19,836 - INFO -   Topics: amazonaws, demo, amzn, s3, key
2025-02-07 13:45:19,836 - INFO -   Tokens: 358
2025-02-07 13:45:19,914 - INFO - Created chunk 140:
2025-02-07 13:45:19,914 - INFO -   Topics: object, copy, upload, s3, want
2025-02-07 13:45:19,915 - INFO -   Tokens: 520
2025-02-07 13:45:19,974 - INFO - Created chunk 141:
2025-02-07 13:45:19,974 - INFO -   Topics: choose, destination, copy, objects, settings
2025-02-07 13:45:19,975 - INFO -   Tokens: 384
2025-02-07 13:45:20,047 - INFO - Created chunk 142:
2025-02-07 13:45:20,048 - INFO -   Topics: choose, destination, key, specify, copy
2025-02-07 13:45:20,049 - INFO -   Tokens: 454
2025-02-07 13:45:20,120 - INFO - Created chunk 143:
2025-02-07 1

2025-02-07 13:45:22,426 - INFO -   Tokens: 300
2025-02-07 13:45:22,463 - INFO - Created chunk 184:
2025-02-07 13:45:22,464 - INFO -   Topics: multipart, bucket, setting, s3, object
2025-02-07 13:45:22,465 - INFO -   Tokens: 236
2025-02-07 13:45:22,517 - INFO - Created chunk 185:
2025-02-07 13:45:22,518 - INFO -   Topics: upload, multipart, encryption, using, api
2025-02-07 13:45:22,519 - INFO -   Tokens: 336
2025-02-07 13:45:22,578 - INFO - Created chunk 186:
2025-02-07 13:45:22,579 - INFO -   Topics: import, amazonaws, com, upload, string
2025-02-07 13:45:22,580 - INFO -   Tokens: 305
2025-02-07 13:45:22,629 - INFO - Created chunk 187:
2025-02-07 13:45:22,630 - INFO -   Topics: upload, multipart, new, list, add
2025-02-07 13:45:22,631 - INFO -   Tokens: 270
2025-02-07 13:45:22,684 - INFO - Created chunk 188:
2025-02-07 13:45:22,684 - INFO -   Topics: using, multipart, upload, amazon, s3
2025-02-07 13:45:22,685 - INFO -   Tokens: 318
2025-02-07 13:45:22,734 - INFO - Created chunk 189:


2025-02-07 13:45:24,908 - INFO -   Topics: requests, copy, object, add, operation
2025-02-07 13:45:24,910 - INFO -   Tokens: 457
2025-02-07 13:45:24,964 - INFO - Created chunk 230:
2025-02-07 13:45:24,965 - INFO -   Topics: object, value, request, copy, specify
2025-02-07 13:45:24,965 - INFO -   Tokens: 374
2025-02-07 13:45:25,030 - INFO - Created chunk 231:
2025-02-07 13:45:25,031 - INFO -   Topics: source, object, copy, key, request
2025-02-07 13:45:25,032 - INFO -   Tokens: 436
2025-02-07 13:45:25,078 - INFO - Created chunk 232:
2025-02-07 13:45:25,079 - INFO -   Topics: object, use, key, cli, aws
2025-02-07 13:45:25,080 - INFO -   Tokens: 299
2025-02-07 13:45:25,136 - INFO - Created chunk 233:
2025-02-07 13:45:25,136 - INFO -   Topics: upload, multipart, command, cli, aws
2025-02-07 13:45:25,137 - INFO -   Tokens: 377
2025-02-07 13:45:25,195 - INFO - Created chunk 234:
2025-02-07 13:45:25,196 - INFO -   Topics: upload, multipart, operation, object, delete
2025-02-07 13:45:25,197 - 

2025-02-07 13:45:27,538 - INFO -   Tokens: 306
2025-02-07 13:45:27,604 - INFO - Created chunk 276:
2025-02-07 13:45:27,605 - INFO -   Topics: delete, object, information, objects, using
2025-02-07 13:45:27,605 - INFO -   Tokens: 314
2025-02-07 13:45:27,654 - INFO - Created chunk 277:
2025-02-07 13:45:27,655 - INFO -   Topics: import, amazonaws, com, string, s3
2025-02-07 13:45:27,655 - INFO -   Tokens: 305
2025-02-07 13:45:27,696 - INFO - Created chunk 278:
2025-02-07 13:45:27,697 - INFO -   Topics: import, amazonaws, com, object, id
2025-02-07 13:45:27,698 - INFO -   Tokens: 258
2025-02-07 13:45:27,747 - INFO - Created chunk 279:
2025-02-07 13:45:27,748 - INFO -   Topics: string, object, enabled, bucket, new
2025-02-07 13:45:27,749 - INFO -   Tokens: 308
2025-02-07 13:45:27,793 - INFO - Created chunk 280:
2025-02-07 13:45:27,794 - INFO -   Topics: object, specify, using, example, string
2025-02-07 13:45:27,795 - INFO -   Tokens: 299
2025-02-07 13:45:27,840 - INFO - Created chunk 281:


2025-02-07 13:45:30,127 - INFO - Created chunk 322:
2025-02-07 13:45:30,127 - INFO -   Topics: lambda, object, points, access, s3
2025-02-07 13:45:30,128 - INFO -   Tokens: 124
2025-02-07 13:45:30,185 - INFO - Created chunk 323:
2025-02-07 13:45:30,186 - INFO -   Topics: lambda, point, access, points, object
2025-02-07 13:45:30,187 - INFO -   Tokens: 355
2025-02-07 13:45:30,252 - INFO - Created chunk 324:
2025-02-07 13:45:30,253 - INFO -   Topics: lambda, points, access, point, choose
2025-02-07 13:45:30,254 - INFO -   Tokens: 467
2025-02-07 13:45:30,307 - INFO - Created chunk 325:
2025-02-07 13:45:30,307 - INFO -   Topics: lambda, access, point, points, object
2025-02-07 13:45:30,308 - INFO -   Tokens: 379
2025-02-07 13:45:30,362 - INFO - Created chunk 326:
2025-02-07 13:45:30,362 - INFO -   Topics: lambda, point, object, access, aws
2025-02-07 13:45:30,363 - INFO -   Tokens: 348
2025-02-07 13:45:30,419 - INFO - Created chunk 327:
2025-02-07 13:45:30,420 - INFO -   Topics: lambda, aws

2025-02-07 13:45:32,658 - INFO - Created chunk 368:
2025-02-07 13:45:32,659 - INFO -   Topics: string, value, owner, lambda, id
2025-02-07 13:45:32,660 - INFO -   Tokens: 393
2025-02-07 13:45:32,710 - INFO - Created chunk 369:
2025-02-07 13:45:32,710 - INFO -   Topics: lambda, point, object, arn, example
2025-02-07 13:45:32,712 - INFO -   Tokens: 332
2025-02-07 13:45:32,766 - INFO - Created chunk 370:
2025-02-07 13:45:32,767 - INFO -   Topics: lambda, object, arn, role, s3
2025-02-07 13:45:32,768 - INFO -   Tokens: 402
2025-02-07 13:45:32,804 - INFO - Created chunk 371:
2025-02-07 13:45:32,805 - INFO -   Topics: import, lambda, requests, create, object
2025-02-07 13:45:32,805 - INFO -   Tokens: 239
2025-02-07 13:45:32,863 - INFO - Created chunk 372:
2025-02-07 13:45:32,864 - INFO -   Topics: string, value, key, lambda, id
2025-02-07 13:45:32,864 - INFO -   Tokens: 349
2025-02-07 13:45:32,938 - INFO - Created chunk 373:
2025-02-07 13:45:32,939 - INFO -   Topics: string, lambda, owner, e

2025-02-07 13:45:35,399 - INFO - Created chunk 414:
2025-02-07 13:45:35,399 - INFO -   Topics: lambda, point, object, access, s3
2025-02-07 13:45:35,400 - INFO -   Tokens: 487
2025-02-07 13:45:35,451 - INFO - Created chunk 415:
2025-02-07 13:45:35,452 - INFO -   Topics: delete, lambda, point, object, access
2025-02-07 13:45:35,453 - INFO -   Tokens: 374
2025-02-07 13:45:35,499 - INFO - Created chunk 416:
2025-02-07 13:45:35,500 - INFO -   Topics: delete, choose, lambda, point, console
2025-02-07 13:45:35,500 - INFO -   Tokens: 325
2025-02-07 13:45:35,553 - INFO - Created chunk 417:
2025-02-07 13:45:35,554 - INFO -   Topics: delete, choose, console, bucket, lambda
2025-02-07 13:45:35,555 - INFO -   Tokens: 381
2025-02-07 13:45:35,610 - INFO - Created chunk 418:
2025-02-07 13:45:35,611 - INFO -   Topics: delete, choose, iam, lambda, console
2025-02-07 13:45:35,612 - INFO -   Tokens: 382
2025-02-07 13:45:35,674 - INFO - Created chunk 419:
2025-02-07 13:45:35,674 - INFO -   Topics: lambda,

2025-02-07 13:45:38,042 - INFO -   Topics: source, s3, bucket, acls, tags
2025-02-07 13:45:38,042 - INFO -   Tokens: 402
2025-02-07 13:45:38,101 - INFO - Created chunk 461:
2025-02-07 13:45:38,102 - INFO -   Topics: batch, job, operations, copy, s3
2025-02-07 13:45:38,103 - INFO -   Tokens: 385
2025-02-07 13:45:38,157 - INFO - Created chunk 462:
2025-02-07 13:45:38,158 - INFO -   Topics: role, policy, s3, account, operations
2025-02-07 13:45:38,158 - INFO -   Tokens: 380
2025-02-07 13:45:38,214 - INFO - Created chunk 463:
2025-02-07 13:45:38,215 - INFO -   Topics: bucket, source, arn, s3, role
2025-02-07 13:45:38,215 - INFO -   Tokens: 409
2025-02-07 13:45:38,269 - INFO - Created chunk 464:
2025-02-07 13:45:38,270 - INFO -   Topics: source, job, role, bucket, policy
2025-02-07 13:45:38,270 - INFO -   Tokens: 356
2025-02-07 13:45:38,338 - INFO - Created chunk 465:
2025-02-07 13:45:38,339 - INFO -   Topics: s3, keys, bucket, batch, operations
2025-02-07 13:45:38,340 - INFO -   Tokens: 48

2025-02-07 13:45:40,633 - INFO -   Topics: job, status, batch, aws, id
2025-02-07 13:45:40,633 - INFO -   Tokens: 320
2025-02-07 13:45:40,677 - INFO - Created chunk 507:
2025-02-07 13:45:40,678 - INFO -   Topics: import, amazonaws, com, string, batch
2025-02-07 13:45:40,678 - INFO -   Tokens: 265
2025-02-07 13:45:40,750 - INFO - Created chunk 508:
2025-02-07 13:45:40,751 - INFO -   Topics: job, operations, batch, s3, amazon
2025-02-07 13:45:40,752 - INFO -   Tokens: 504
2025-02-07 13:45:40,805 - INFO - Created chunk 509:
2025-02-07 13:45:40,806 - INFO -   Topics: job, batch, operations, amazon, s3
2025-02-07 13:45:40,806 - INFO -   Tokens: 380
2025-02-07 13:45:40,855 - INFO - Created chunk 510:
2025-02-07 13:45:40,856 - INFO -   Topics: job, batch, operations, amazonaws, com
2025-02-07 13:45:40,856 - INFO -   Tokens: 317
2025-02-07 13:45:40,903 - INFO - Created chunk 511:
2025-02-07 13:45:40,904 - INFO -   Topics: batch, operations, job, rule, s3
2025-02-07 13:45:40,905 - INFO -   Toke

2025-02-07 13:45:43,122 - INFO - Created chunk 553:
2025-02-07 13:45:43,123 - INFO -   Topics: batch, operations, role, s3, policy
2025-02-07 13:45:43,124 - INFO -   Tokens: 318
2025-02-07 13:45:43,166 - INFO - Created chunk 554:
2025-02-07 13:45:43,167 - INFO -   Topics: arn, demo, amzn, bucket, aws
2025-02-07 13:45:43,168 - INFO -   Tokens: 287
2025-02-07 13:45:43,216 - INFO - Created chunk 555:
2025-02-07 13:45:43,217 - INFO -   Topics: batch, string, role, operation, s3
2025-02-07 13:45:43,218 - INFO -   Tokens: 337
2025-02-07 13:45:43,259 - INFO - Created chunk 556:
2025-02-07 13:45:43,259 - INFO -   Topics: new, string, demo, amzn, arn
2025-02-07 13:45:43,260 - INFO -   Tokens: 262
2025-02-07 13:45:43,337 - INFO - Created chunk 557:
2025-02-07 13:45:43,338 - INFO -   Topics: batch, operations, request, s3, string
2025-02-07 13:45:43,339 - INFO -   Tokens: 332
2025-02-07 13:45:43,389 - INFO - Created chunk 558:
2025-02-07 13:45:43,390 - INFO -   Topics: lambda, batch, create, sour

2025-02-07 13:45:45,631 - INFO - Created chunk 599:
2025-02-07 13:45:45,632 - INFO -   Topics: batch, job, operations, permissions, object
2025-02-07 13:45:45,633 - INFO -   Tokens: 380
2025-02-07 13:45:45,690 - INFO - Created chunk 600:
2025-02-07 13:45:45,691 - INFO -   Topics: data, s3, amazon, aws, kms
2025-02-07 13:45:45,692 - INFO -   Tokens: 406
2025-02-07 13:45:45,747 - INFO - Created chunk 601:
2025-02-07 13:45:45,748 - INFO -   Topics: object, s3, using, amazon, request
2025-02-07 13:45:45,748 - INFO -   Tokens: 381
2025-02-07 13:45:45,815 - INFO - Created chunk 602:
2025-02-07 13:45:45,816 - INFO -   Topics: request, amazon, s3, requests, objects
2025-02-07 13:45:45,817 - INFO -   Tokens: 465
2025-02-07 13:45:45,864 - INFO - Created chunk 603:
2025-02-07 13:45:45,865 - INFO -   Topics: s3, console, amazon, choose, object
2025-02-07 13:45:45,865 - INFO -   Tokens: 325
2025-02-07 13:45:45,915 - INFO - Created chunk 604:
2025-02-07 13:45:45,916 - INFO -   Topics: import, amazon

2025-02-07 13:45:47,730 - INFO -   Tokens: 231
2025-02-07 13:45:47,798 - INFO - Created chunk 646:
2025-02-07 13:45:47,798 - INFO -   Topics: zone, buckets, directory, s3, data
2025-02-07 13:45:47,799 - INFO -   Tokens: 443
2025-02-07 13:45:47,880 - INFO - Created chunk 647:
2025-02-07 13:45:47,880 - INFO -   Topics: zone, operations, directory, s3, access
2025-02-07 13:45:47,881 - INFO -   Tokens: 455
2025-02-07 13:45:47,953 - INFO - Created chunk 648:
2025-02-07 13:45:47,954 - INFO -   Topics: access, iam, control, directory, bucket
2025-02-07 13:45:47,954 - INFO -   Tokens: 494
2025-02-07 13:45:48,023 - INFO - Created chunk 649:
2025-02-07 13:45:48,024 - INFO -   Topics: aws, operations, directory, s3, batch
2025-02-07 13:45:48,025 - INFO -   Tokens: 440
2025-02-07 13:45:48,097 - INFO - Created chunk 650:
2025-02-07 13:45:48,098 - INFO -   Topics: encryption, zone, aws, data, s3
2025-02-07 13:45:48,099 - INFO -   Tokens: 460
2025-02-07 13:45:48,195 - INFO - Created chunk 651:
2025-0

2025-02-07 13:45:50,562 - INFO -   Topics: policy, bucket, string, aws, public
2025-02-07 13:45:50,562 - INFO -   Tokens: 260
2025-02-07 13:45:50,603 - INFO - Created chunk 692:
2025-02-07 13:45:50,604 - INFO -   Topics: bucket, policy, aws, directory, command
2025-02-07 13:45:50,604 - INFO -   Tokens: 254
2025-02-07 13:45:50,651 - INFO - Created chunk 693:
2025-02-07 13:45:50,652 - INFO -   Topics: bucket, policy, cli, aws, directory
2025-02-07 13:45:50,653 - INFO -   Tokens: 243
2025-02-07 13:45:50,718 - INFO - Created chunk 694:
2025-02-07 13:45:50,719 - INFO -   Topics: directory, bucket, multipart, delete, operation
2025-02-07 13:45:50,720 - INFO -   Tokens: 356
2025-02-07 13:45:50,780 - INFO - Created chunk 695:
2025-02-07 13:45:50,781 - INFO -   Topics: directory, bucket, delete, choose, console
2025-02-07 13:45:50,782 - INFO -   Tokens: 336
2025-02-07 13:45:50,823 - INFO - Created chunk 696:
2025-02-07 13:45:50,824 - INFO -   Topics: bucket, directory, import, delete, aws
2025-

2025-02-07 13:45:53,087 - INFO -   Topics: import, string, upload, multipart, list
2025-02-07 13:45:53,087 - INFO -   Tokens: 218
2025-02-07 13:45:53,140 - INFO - Created chunk 737:
2025-02-07 13:45:53,141 - INFO -   Topics: multipart, upload, bucket, key, directory
2025-02-07 13:45:53,141 - INFO -   Tokens: 260
2025-02-07 13:45:53,198 - INFO - Created chunk 738:
2025-02-07 13:45:53,199 - INFO -   Topics: directory, copy, multipart, bucket, upload
2025-02-07 13:45:53,199 - INFO -   Tokens: 379
2025-02-07 13:45:53,269 - INFO - Created chunk 739:
2025-02-07 13:45:53,269 - INFO -   Topics: encryption, kms, directory, sse, buckets
2025-02-07 13:45:53,270 - INFO -   Tokens: 512
2025-02-07 13:45:53,328 - INFO - Created chunk 740:
2025-02-07 13:45:53,329 - INFO -   Topics: zone, copy, objects, s3, object
2025-02-07 13:45:53,330 - INFO -   Tokens: 395
2025-02-07 13:45:53,388 - INFO - Created chunk 741:
2025-02-07 13:45:53,389 - INFO -   Topics: copy, choose, settings, directory, destination
20

2025-02-07 13:45:55,680 - INFO -   Tokens: 191
2025-02-07 13:45:55,712 - INFO - Created chunk 782:
2025-02-07 13:45:55,713 - INFO -   Topics: keys, action, resource, requests, account
2025-02-07 13:45:55,713 - INFO -   Tokens: 180
2025-02-07 13:45:55,748 - INFO - Created chunk 783:
2025-02-07 13:45:55,749 - INFO -   Topics: keys, action, resource, requests, list
2025-02-07 13:45:55,750 - INFO -   Tokens: 194
2025-02-07 13:45:55,771 - INFO - Created chunk 784:
2025-02-07 13:45:55,771 - INFO -   Topics: keys, action, add, bucket, resource
2025-02-07 13:45:55,772 - INFO -   Tokens: 95
2025-02-07 13:45:55,808 - INFO - Created chunk 785:
2025-02-07 13:45:55,809 - INFO -   Topics: keys, zone, key, use, requests
2025-02-07 13:45:55,810 - INFO -   Tokens: 199
2025-02-07 13:45:55,847 - INFO - Created chunk 786:
2025-02-07 13:45:55,847 - INFO -   Topics: id, key, access, example, identity
2025-02-07 13:45:55,848 - INFO -   Tokens: 214
2025-02-07 13:45:55,892 - INFO - Created chunk 787:
2025-02-0

2025-02-07 13:45:58,256 - INFO - Created chunk 828:
2025-02-07 13:45:58,256 - INFO -   Topics: acls, owner, bucket, id, object
2025-02-07 13:45:58,257 - INFO -   Tokens: 384
2025-02-07 13:45:58,321 - INFO - Created chunk 829:
2025-02-07 13:45:58,321 - INFO -   Topics: acls, bucket, object, owner, data
2025-02-07 13:45:58,322 - INFO -   Tokens: 475
2025-02-07 13:45:58,379 - INFO - Created chunk 830:
2025-02-07 13:45:58,380 - INFO -   Topics: keys, s3, access, encryption, policy
2025-02-07 13:45:58,380 - INFO -   Tokens: 410
2025-02-07 13:45:58,450 - INFO - Created chunk 831:
2025-02-07 13:45:58,450 - INFO -   Topics: access, policy, management, bucket, identity
2025-02-07 13:45:58,451 - INFO -   Tokens: 526
2025-02-07 13:45:58,523 - INFO - Created chunk 832:
2025-02-07 13:45:58,524 - INFO -   Topics: access, points, grants, bucket, use
2025-02-07 13:45:58,524 - INFO -   Tokens: 532
2025-02-07 13:45:58,592 - INFO - Created chunk 833:
2025-02-07 13:45:58,593 - INFO -   Topics: access, buc

2025-02-07 13:46:00,961 - INFO -   Tokens: 307
2025-02-07 13:46:01,029 - INFO - Created chunk 874:
2025-02-07 13:46:01,029 - INFO -   Topics: policies, object, bucket, request, acl
2025-02-07 13:46:01,030 - INFO -   Tokens: 490
2025-02-07 13:46:01,075 - INFO - Created chunk 875:
2025-02-07 13:46:01,076 - INFO -   Topics: object, bucket, account, owner, operation
2025-02-07 13:46:01,076 - INFO -   Tokens: 279
2025-02-07 13:46:01,114 - INFO - Created chunk 876:
2025-02-07 13:46:01,114 - INFO -   Topics: owner, s3, amazon, object, account
2025-02-07 13:46:01,115 - INFO -   Tokens: 254
2025-02-07 13:46:01,172 - INFO - Created chunk 877:
2025-02-07 13:46:01,173 - INFO -   Topics: operations, permissions, policy, resource, s3
2025-02-07 13:46:01,173 - INFO -   Tokens: 409
2025-02-07 13:46:01,219 - INFO - Created chunk 878:
2025-02-07 13:46:01,220 - INFO -   Topics: point, access, operations, policies, permissions
2025-02-07 13:46:01,221 - INFO -   Tokens: 300
2025-02-07 13:46:01,265 - INFO -

2025-02-07 13:46:03,267 - INFO -   Topics: access, public, s3, policy, settings
2025-02-07 13:46:03,268 - INFO -   Tokens: 350
2025-02-07 13:46:03,297 - INFO - Created chunk 920:
2025-02-07 13:46:03,298 - INFO -   Topics: public, access, choose, bucket, settings
2025-02-07 13:46:03,298 - INFO -   Tokens: 152
2025-02-07 13:46:03,359 - INFO - Created chunk 921:
2025-02-07 13:46:03,360 - INFO -   Topics: kms, sse, key, encryption, public
2025-02-07 13:46:03,361 - INFO -   Tokens: 447
2025-02-07 13:46:03,416 - INFO - Created chunk 922:
2025-02-07 13:46:03,417 - INFO -   Topics: public, access, s3, policy, bucket
2025-02-07 13:46:03,418 - INFO -   Tokens: 394
2025-02-07 13:46:03,464 - INFO - Created chunk 923:
2025-02-07 13:46:03,466 - INFO -   Topics: acl, upload, account, control, aws
2025-02-07 13:46:03,466 - INFO -   Tokens: 328
2025-02-07 13:46:03,508 - INFO - Created chunk 924:
2025-02-07 13:46:03,509 - INFO -   Topics: s3, value, key, arn, aws
2025-02-07 13:46:03,510 - INFO -   Token

2025-02-07 13:46:05,920 - INFO - Created chunk 966:
2025-02-07 13:46:05,920 - INFO -   Topics: bucket, console, request, s3, list
2025-02-07 13:46:05,921 - INFO -   Tokens: 301
2025-02-07 13:46:05,982 - INFO - Created chunk 967:
2025-02-07 13:46:05,983 - INFO -   Topics: key, keys, s3, value, amazon
2025-02-07 13:46:05,984 - INFO -   Tokens: 322
2025-02-07 13:46:06,038 - INFO - Created chunk 968:
2025-02-07 13:46:06,039 - INFO -   Topics: action, console, s3, bucket, list
2025-02-07 13:46:06,039 - INFO -   Tokens: 386
2025-02-07 13:46:06,093 - INFO - Created chunk 969:
2025-02-07 13:46:06,093 - INFO -   Topics: iam, policy, console, s3, action
2025-02-07 13:46:06,094 - INFO -   Tokens: 358
2025-02-07 13:46:06,157 - INFO - Created chunk 970:
2025-02-07 13:46:06,158 - INFO -   Topics: grant, console, permissions, iam, policy
2025-02-07 13:46:06,159 - INFO -   Tokens: 423
2025-02-07 13:46:06,210 - INFO - Created chunk 971:
2025-02-07 13:46:06,211 - INFO -   Topics: policy, choose, console

2025-02-07 13:46:08,627 - INFO -   Topics: console, bucket, delete, demo, amzn
2025-02-07 13:46:08,628 - INFO -   Tokens: 287
2025-02-07 13:46:08,695 - INFO - Created chunk 1013:
2025-02-07 13:46:08,696 - INFO -   Topics: owner, bucket, objects, account, permissions
2025-02-07 13:46:08,697 - INFO -   Tokens: 409
2025-02-07 13:46:08,748 - INFO - Created chunk 1014:
2025-02-07 13:46:08,749 - INFO -   Topics: account, bucket, acls, owner, objects
2025-02-07 13:46:08,749 - INFO -   Tokens: 320
2025-02-07 13:46:08,809 - INFO - Created chunk 1015:
2025-02-07 13:46:08,810 - INFO -   Topics: account, iam, aws, user, permissions
2025-02-07 13:46:08,810 - INFO -   Tokens: 400
2025-02-07 13:46:08,865 - INFO - Created chunk 1016:
2025-02-07 13:46:08,866 - INFO -   Topics: policy, console, bucket, account, create
2025-02-07 13:46:08,867 - INFO -   Tokens: 384
2025-02-07 13:46:08,953 - INFO - Created chunk 1017:
2025-02-07 13:46:08,954 - INFO -   Topics: account, arn, s3, aws, demo
2025-02-07 13:46:

2025-02-07 13:46:11,454 - INFO - Created chunk 1058:
2025-02-07 13:46:11,455 - INFO -   Topics: policy, permissions, aws, policies, lambda
2025-02-07 13:46:11,456 - INFO -   Tokens: 360
2025-02-07 13:46:11,499 - INFO - Created chunk 1059:
2025-02-07 13:46:11,500 - INFO -   Topics: access, s3, permissions, points, lambda
2025-02-07 13:46:11,501 - INFO -   Tokens: 283
2025-02-07 13:46:11,550 - INFO - Created chunk 1060:
2025-02-07 13:46:11,551 - INFO -   Topics: access, points, point, s3, operations
2025-02-07 13:46:11,551 - INFO -   Tokens: 358
2025-02-07 13:46:11,624 - INFO - Created chunk 1061:
2025-02-07 13:46:11,624 - INFO -   Topics: access, point, points, requests, account
2025-02-07 13:46:11,625 - INFO -   Tokens: 516
2025-02-07 13:46:11,681 - INFO - Created chunk 1062:
2025-02-07 13:46:11,682 - INFO -   Topics: access, point, iam, policy, policies
2025-02-07 13:46:11,682 - INFO -   Tokens: 385
2025-02-07 13:46:11,733 - INFO - Created chunk 1063:
2025-02-07 13:46:11,734 - INFO - 

2025-02-07 13:46:13,967 - INFO -   Tokens: 426
2025-02-07 13:46:14,033 - INFO - Created chunk 1104:
2025-02-07 13:46:14,034 - INFO -   Topics: grants, access, s3, role, data
2025-02-07 13:46:14,035 - INFO -   Tokens: 460
2025-02-07 13:46:14,104 - INFO - Created chunk 1105:
2025-02-07 13:46:14,105 - INFO -   Topics: grants, access, data, s3, iam
2025-02-07 13:46:14,106 - INFO -   Tokens: 346
2025-02-07 13:46:14,170 - INFO - Created chunk 1106:
2025-02-07 13:46:14,171 - INFO -   Topics: grants, access, s3, identity, create
2025-02-07 13:46:14,172 - INFO -   Tokens: 337
2025-02-07 13:46:14,257 - INFO - Created chunk 1107:
2025-02-07 13:46:14,258 - INFO -   Topics: grants, identity, access, s3, directory
2025-02-07 13:46:14,258 - INFO -   Tokens: 463
2025-02-07 13:46:14,324 - INFO - Created chunk 1108:
2025-02-07 13:46:14,324 - INFO -   Topics: grants, access, identity, s3, create
2025-02-07 13:46:14,325 - INFO -   Tokens: 369
2025-02-07 13:46:14,366 - INFO - Created chunk 1109:
2025-02-07

2025-02-07 13:46:16,590 - INFO -   Topics: grants, iam, arn, s3, access
2025-02-07 13:46:16,591 - INFO -   Tokens: 296
2025-02-07 13:46:16,643 - INFO - Created chunk 1151:
2025-02-07 13:46:16,643 - INFO -   Topics: grants, grant, access, s3, delete
2025-02-07 13:46:16,644 - INFO -   Tokens: 346
2025-02-07 13:46:16,689 - INFO - Created chunk 1152:
2025-02-07 13:46:16,690 - INFO -   Topics: grants, grant, delete, choose, access
2025-02-07 13:46:16,690 - INFO -   Tokens: 298
2025-02-07 13:46:16,737 - INFO - Created chunk 1153:
2025-02-07 13:46:16,738 - INFO -   Topics: grants, access, data, s3, using
2025-02-07 13:46:16,738 - INFO -   Tokens: 292
2025-02-07 13:46:16,817 - INFO - Created chunk 1154:
2025-02-07 13:46:16,818 - INFO -   Topics: grant, access, grants, s3, amzn
2025-02-07 13:46:16,818 - INFO -   Tokens: 534
2025-02-07 13:46:16,859 - INFO - Created chunk 1155:
2025-02-07 13:46:16,860 - INFO -   Topics: amzn, demo, s3, access, key
2025-02-07 13:46:16,860 - INFO -   Tokens: 266
20

2025-02-07 13:46:18,866 - INFO -   Tokens: 117
2025-02-07 13:46:18,894 - INFO - Created chunk 1197:
2025-02-07 13:46:18,895 - INFO -   Topics: acl, access, grants, account, bucket
2025-02-07 13:46:18,896 - INFO -   Tokens: 129
2025-02-07 13:46:18,934 - INFO - Created chunk 1198:
2025-02-07 13:46:18,934 - INFO -   Topics: acl, owner, id, grants, resource
2025-02-07 13:46:18,935 - INFO -   Tokens: 229
2025-02-07 13:46:18,978 - INFO - Created chunk 1199:
2025-02-07 13:46:18,979 - INFO -   Topics: grant, id, amazonaws, com, user
2025-02-07 13:46:18,979 - INFO -   Tokens: 278
2025-02-07 13:46:19,025 - INFO - Created chunk 1200:
2025-02-07 13:46:19,026 - INFO -   Topics: acl, owner, bucket, access, set
2025-02-07 13:46:19,027 - INFO -   Tokens: 303
2025-02-07 13:46:19,081 - INFO - Created chunk 1201:
2025-02-07 13:46:19,082 - INFO -   Topics: acl, acls, permissions, bucket, grants
2025-02-07 13:46:19,083 - INFO -   Tokens: 392
2025-02-07 13:46:19,130 - INFO - Created chunk 1202:
2025-02-07 1

2025-02-07 13:46:21,266 - INFO -   Tokens: 384
2025-02-07 13:46:21,331 - INFO - Created chunk 1243:
2025-02-07 13:46:21,332 - INFO -   Topics: access, public, bucket, iam, s3
2025-02-07 13:46:21,333 - INFO -   Tokens: 470
2025-02-07 13:46:21,380 - INFO - Created chunk 1244:
2025-02-07 13:46:21,381 - INFO -   Topics: access, point, policy, choose, multi
2025-02-07 13:46:21,381 - INFO -   Tokens: 335
2025-02-07 13:46:21,432 - INFO - Created chunk 1245:
2025-02-07 13:46:21,433 - INFO -   Topics: bucket, access, public, choose, iam
2025-02-07 13:46:21,433 - INFO -   Tokens: 369
2025-02-07 13:46:21,479 - INFO - Created chunk 1246:
2025-02-07 13:46:21,480 - INFO -   Topics: iam, s3, access, console, choose
2025-02-07 13:46:21,480 - INFO -   Tokens: 318
2025-02-07 13:46:21,539 - INFO - Created chunk 1247:
2025-02-07 13:46:21,540 - INFO -   Topics: owner, bucket, operations, account, use
2025-02-07 13:46:21,541 - INFO -   Tokens: 392
2025-02-07 13:46:21,597 - INFO - Created chunk 1248:
2025-02

2025-02-07 13:46:23,724 - INFO -   Tokens: 377
2025-02-07 13:46:23,771 - INFO - Created chunk 1289:
2025-02-07 13:46:23,771 - INFO -   Topics: owner, bucket, control, acl, demo
2025-02-07 13:46:23,772 - INFO -   Tokens: 347
2025-02-07 13:46:23,831 - INFO - Created chunk 1290:
2025-02-07 13:46:23,832 - INFO -   Topics: acls, acl, owner, bucket, control
2025-02-07 13:46:23,833 - INFO -   Tokens: 428
2025-02-07 13:46:23,886 - INFO - Created chunk 1291:
2025-02-07 13:46:23,887 - INFO -   Topics: acl, bucket, owner, grants, acls
2025-02-07 13:46:23,887 - INFO -   Tokens: 361
2025-02-07 13:46:23,915 - INFO - Created chunk 1292:
2025-02-07 13:46:23,916 - INFO -   Topics: bucket, setting, acls, acl, amazonaws
2025-02-07 13:46:23,917 - INFO -   Tokens: 161
2025-02-07 13:46:23,979 - INFO - Created chunk 1293:
2025-02-07 13:46:23,980 - INFO -   Topics: aws, amazon, data, s3, use
2025-02-07 13:46:23,981 - INFO -   Tokens: 341
2025-02-07 13:46:24,010 - INFO - Created chunk 1294:
2025-02-07 13:46:24

2025-02-07 13:46:26,478 - INFO -   Tokens: 319
2025-02-07 13:46:26,521 - INFO - Created chunk 1335:
2025-02-07 13:46:26,522 - INFO -   Topics: using, object, example, string, encryption
2025-02-07 13:46:26,522 - INFO -   Tokens: 291
2025-02-07 13:46:26,566 - INFO - Created chunk 1336:
2025-02-07 13:46:26,568 - INFO -   Topics: encryption, server, upload, console, object
2025-02-07 13:46:26,568 - INFO -   Tokens: 304
2025-02-07 13:46:26,627 - INFO - Created chunk 1337:
2025-02-07 13:46:26,627 - INFO -   Topics: upload, encryption, multipart, server, operation
2025-02-07 13:46:26,628 - INFO -   Tokens: 413
2025-02-07 13:46:26,681 - INFO - Created chunk 1338:
2025-02-07 13:46:26,682 - INFO -   Topics: encryption, object, server, source, key
2025-02-07 13:46:26,682 - INFO -   Tokens: 369
2025-02-07 13:46:26,726 - INFO - Created chunk 1339:
2025-02-07 13:46:26,727 - INFO -   Topics: object, encryption, aws, s3, class
2025-02-07 13:46:26,727 - INFO -   Tokens: 289
2025-02-07 13:46:26,767 - I

2025-02-07 13:46:29,142 - INFO - Created chunk 1381:
2025-02-07 13:46:29,142 - INFO -   Topics: encryption, server, kms, object, request
2025-02-07 13:46:29,143 - INFO -   Tokens: 424
2025-02-07 13:46:29,192 - INFO - Created chunk 1382:
2025-02-07 13:46:29,193 - INFO -   Topics: encryption, server, kms, key, requests
2025-02-07 13:46:29,194 - INFO -   Tokens: 340
2025-02-07 13:46:29,266 - INFO - Created chunk 1383:
2025-02-07 13:46:29,267 - INFO -   Topics: kms, key, encryption, server, id
2025-02-07 13:46:29,268 - INFO -   Tokens: 556
2025-02-07 13:46:29,322 - INFO - Created chunk 1384:
2025-02-07 13:46:29,322 - INFO -   Topics: encryption, sse, key, data, object
2025-02-07 13:46:29,323 - INFO -   Tokens: 398
2025-02-07 13:46:29,370 - INFO - Created chunk 1385:
2025-02-07 13:46:29,371 - INFO -   Topics: encryption, sse, key, object, keys
2025-02-07 13:46:29,372 - INFO -   Tokens: 338
2025-02-07 13:46:29,422 - INFO - Created chunk 1386:
2025-02-07 13:46:29,423 - INFO -   Topics: encryp

2025-02-07 13:46:31,541 - INFO - Created chunk 1427:
2025-02-07 13:46:31,544 - INFO -   Topics: replication, status, encryption, data, objects
2025-02-07 13:46:31,553 - INFO -   Tokens: 51
2025-02-07 13:46:31,648 - INFO - Created chunk 1428:
2025-02-07 13:46:31,649 - INFO -   Topics: lifecycle, objects, object, s3, aws
2025-02-07 13:46:31,650 - INFO -   Tokens: 452
2025-02-07 13:46:31,686 - INFO - Created chunk 1429:
2025-02-07 13:46:31,686 - INFO -   Topics: s3, access, storage, data, amazon
2025-02-07 13:46:31,687 - INFO -   Tokens: 205
2025-02-07 13:46:31,713 - INFO - Created chunk 1430:
2025-02-07 13:46:31,714 - INFO -   Topics: encryption, default, kms, s3, set
2025-02-07 13:46:31,714 - INFO -   Tokens: 149
2025-02-07 13:46:31,761 - INFO - Created chunk 1431:
2025-02-07 13:46:31,761 - INFO -   Topics: access, aws, amazon, s3, policies
2025-02-07 13:46:31,762 - INFO -   Tokens: 303
2025-02-07 13:46:31,822 - INFO - Created chunk 1432:
2025-02-07 13:46:31,823 - INFO -   Topics: acces

2025-02-07 13:46:33,965 - INFO -   Topics: role, permissions, replication, destination, arn
2025-02-07 13:46:33,966 - INFO -   Tokens: 390
2025-02-07 13:46:34,020 - INFO - Created chunk 1472:
2025-02-07 13:46:34,020 - INFO -   Topics: role, destination, source, iam, s3
2025-02-07 13:46:34,021 - INFO -   Tokens: 368
2025-02-07 13:46:34,070 - INFO - Created chunk 1473:
2025-02-07 13:46:34,071 - INFO -   Topics: choose, source, bucket, policy, console
2025-02-07 13:46:34,072 - INFO -   Tokens: 334
2025-02-07 13:46:34,120 - INFO - Created chunk 1474:
2025-02-07 13:46:34,121 - INFO -   Topics: replication, batch, objects, conﬁguration, destination
2025-02-07 13:46:34,122 - INFO -   Tokens: 330
2025-02-07 13:46:34,175 - INFO - Created chunk 1475:
2025-02-07 13:46:34,176 - INFO -   Topics: replication, aws, buckets, objects, cli
2025-02-07 13:46:34,176 - INFO -   Tokens: 365
2025-02-07 13:46:34,244 - INFO - Created chunk 1476:
2025-02-07 13:46:34,245 - INFO -   Topics: rule, replication, dest

2025-02-07 13:46:36,463 - INFO -   Tokens: 328
2025-02-07 13:46:36,533 - INFO - Created chunk 1516:
2025-02-07 13:46:36,533 - INFO -   Topics: encryption, objects, kms, replication, keys
2025-02-07 13:46:36,534 - INFO -   Tokens: 429
2025-02-07 13:46:36,609 - INFO - Created chunk 1517:
2025-02-07 13:46:36,610 - INFO -   Topics: replication, sse, objects, encryption, conﬁguration
2025-02-07 13:46:36,611 - INFO -   Tokens: 492
2025-02-07 13:46:36,660 - INFO - Created chunk 1518:
2025-02-07 13:46:36,661 - INFO -   Topics: kms, replication, key, rule, destination
2025-02-07 13:46:36,662 - INFO -   Tokens: 287
2025-02-07 13:46:36,744 - INFO - Created chunk 1519:
2025-02-07 13:46:36,745 - INFO -   Topics: role, status, kms, sse, destination
2025-02-07 13:46:36,746 - INFO -   Tokens: 344
2025-02-07 13:46:36,850 - INFO - Created chunk 1520:
2025-02-07 13:46:36,851 - INFO -   Topics: kms, keys, key, replication, aws
2025-02-07 13:46:36,852 - INFO -   Tokens: 492
2025-02-07 13:46:36,946 - INFO -

2025-02-07 13:46:39,873 - INFO - Created chunk 1561:
2025-02-07 13:46:39,874 - INFO -   Topics: replication, batch, job, source, objects
2025-02-07 13:46:39,875 - INFO -   Tokens: 476
2025-02-07 13:46:39,946 - INFO - Created chunk 1562:
2025-02-07 13:46:39,947 - INFO -   Topics: replication, metrics, s3, lens, batch
2025-02-07 13:46:39,948 - INFO -   Tokens: 459
2025-02-07 13:46:39,997 - INFO - Created chunk 1563:
2025-02-07 13:46:39,998 - INFO -   Topics: replication, status, metrics, s3, batch
2025-02-07 13:46:39,999 - INFO -   Tokens: 349
2025-02-07 13:46:40,051 - INFO - Created chunk 1564:
2025-02-07 13:46:40,052 - INFO -   Topics: replication, metrics, s3, rule, region
2025-02-07 13:46:40,052 - INFO -   Tokens: 335
2025-02-07 13:46:40,082 - INFO - Created chunk 1565:
2025-02-07 13:46:40,082 - INFO -   Topics: replication, bucket, objects, rule, status
2025-02-07 13:46:40,083 - INFO -   Tokens: 123
2025-02-07 13:46:40,119 - INFO - Created chunk 1566:
2025-02-07 13:46:40,120 - INFO 

2025-02-07 13:46:42,370 - INFO - Created chunk 1606:
2025-02-07 13:46:42,371 - INFO -   Topics: multi, point, region, policy, aws
2025-02-07 13:46:42,372 - INFO -   Tokens: 401
2025-02-07 13:46:42,420 - INFO - Created chunk 1607:
2025-02-07 13:46:42,421 - INFO -   Topics: multi, point, region, access, policy
2025-02-07 13:46:42,421 - INFO -   Tokens: 346
2025-02-07 13:46:42,480 - INFO - Created chunk 1608:
2025-02-07 13:46:42,481 - INFO -   Topics: multi, point, access, region, policy
2025-02-07 13:46:42,482 - INFO -   Tokens: 397
2025-02-07 13:46:42,533 - INFO - Created chunk 1609:
2025-02-07 13:46:42,533 - INFO -   Topics: multi, access, region, point, policy
2025-02-07 13:46:42,534 - INFO -   Tokens: 367
2025-02-07 13:46:42,570 - INFO - Created chunk 1610:
2025-02-07 13:46:42,571 - INFO -   Topics: multi, region, point, access, action
2025-02-07 13:46:42,572 - INFO -   Tokens: 248
2025-02-07 13:46:42,630 - INFO - Created chunk 1611:
2025-02-07 13:46:42,631 - INFO -   Topics: multi, 

2025-02-07 13:46:44,864 - INFO -   Topics: objects, bucket, enabled, lifecycle, object
2025-02-07 13:46:44,864 - INFO -   Tokens: 410
2025-02-07 13:46:44,912 - INFO - Created chunk 1652:
2025-02-07 13:46:44,913 - INFO -   Topics: lifecycle, bucket, object, conﬁguration, s3
2025-02-07 13:46:44,914 - INFO -   Tokens: 346
2025-02-07 13:46:44,965 - INFO - Created chunk 1653:
2025-02-07 13:46:44,966 - INFO -   Topics: status, bucket, s3, conﬁguration, management
2025-02-07 13:46:44,967 - INFO -   Tokens: 356
2025-02-07 13:46:45,013 - INFO - Created chunk 1654:
2025-02-07 13:46:45,014 - INFO -   Topics: version, id, s3, object, value
2025-02-07 13:46:45,015 - INFO -   Tokens: 320
2025-02-07 13:46:45,057 - INFO - Created chunk 1655:
2025-02-07 13:46:45,057 - INFO -   Topics: object, version, delete, bucket, following
2025-02-07 13:46:45,058 - INFO -   Tokens: 290
2025-02-07 13:46:45,114 - INFO - Created chunk 1656:
2025-02-07 13:46:45,114 - INFO -   Topics: delete, object, bucket, new, s3
202

2025-02-07 13:46:47,003 - INFO -   Tokens: 354
2025-02-07 13:46:47,053 - INFO - Created chunk 1697:
2025-02-07 13:46:47,053 - INFO -   Topics: object, objects, version, bucket, id
2025-02-07 13:46:47,054 - INFO -   Tokens: 313
2025-02-07 13:46:47,080 - INFO - Created chunk 1698:
2025-02-07 13:46:47,080 - INFO -   Topics: version, object, bucket, objects, following
2025-02-07 13:46:47,081 - INFO -   Tokens: 145
2025-02-07 13:46:47,110 - INFO - Created chunk 1699:
2025-02-07 13:46:47,111 - INFO -   Topics: delete, id, version, object, bucket
2025-02-07 13:46:47,112 - INFO -   Tokens: 173
2025-02-07 13:46:47,141 - INFO - Created chunk 1700:
2025-02-07 13:46:47,141 - INFO -   Topics: delete, version, object, owner, bucket
2025-02-07 13:46:47,142 - INFO -   Tokens: 180
2025-02-07 13:46:47,198 - INFO - Created chunk 1701:
2025-02-07 13:46:47,199 - INFO -   Topics: delete, object, enabled, bucket, objects
2025-02-07 13:46:47,199 - INFO -   Tokens: 398
2025-02-07 13:46:47,260 - INFO - Created 

2025-02-07 13:46:49,398 - INFO -   Tokens: 226
2025-02-07 13:46:49,429 - INFO - Created chunk 1743:
2025-02-07 13:46:49,429 - INFO -   Topics: region, objects, storage, s3, api
2025-02-07 13:46:49,430 - INFO -   Tokens: 180
2025-02-07 13:46:49,466 - INFO - Created chunk 1744:
2025-02-07 13:46:49,467 - INFO -   Topics: region, objects, zone, s3, storage
2025-02-07 13:46:49,468 - INFO -   Tokens: 219
2025-02-07 13:46:49,505 - INFO - Created chunk 1745:
2025-02-07 13:46:49,506 - INFO -   Topics: region, multi, data, points, access
2025-02-07 13:46:49,507 - INFO -   Tokens: 204
2025-02-07 13:46:49,542 - INFO - Created chunk 1746:
2025-02-07 13:46:49,542 - INFO -   Topics: copy, region, operation, data, s3
2025-02-07 13:46:49,543 - INFO -   Tokens: 201
2025-02-07 13:46:49,603 - INFO - Created chunk 1747:
2025-02-07 13:46:49,604 - INFO -   Topics: operation, copy, region, data, storage
2025-02-07 13:46:49,605 - INFO -   Tokens: 182
2025-02-07 13:46:49,643 - INFO - Created chunk 1748:
2025-02

2025-02-07 13:46:51,140 - INFO -   Topics: access, data, objects, s3, using
2025-02-07 13:46:51,140 - INFO -   Tokens: 386
2025-02-07 13:46:51,200 - INFO - Created chunk 1789:
2025-02-07 13:46:51,200 - INFO -   Topics: s3, zone, objects, class, lifecycle
2025-02-07 13:46:51,201 - INFO -   Tokens: 415
2025-02-07 13:46:51,274 - INFO - Created chunk 1790:
2025-02-07 13:46:51,275 - INFO -   Topics: data, s3, zone, class, storage
2025-02-07 13:46:51,276 - INFO -   Tokens: 512
2025-02-07 13:46:51,340 - INFO - Created chunk 1791:
2025-02-07 13:46:51,341 - INFO -   Topics: s3, class, encryption, objects, sse
2025-02-07 13:46:51,341 - INFO -   Tokens: 466
2025-02-07 13:46:51,382 - INFO - Created chunk 1792:
2025-02-07 13:46:51,383 - INFO -   Topics: s3, data, storage, kms, amazon
2025-02-07 13:46:51,383 - INFO -   Tokens: 261
2025-02-07 13:46:51,447 - INFO - Created chunk 1793:
2025-02-07 13:46:51,448 - INFO -   Topics: class, s3, storage, zone, amazon
2025-02-07 13:46:51,449 - INFO -   Tokens:

2025-02-07 13:46:53,861 - INFO -   Topics: object, s3, amazon, lambda, using
2025-02-07 13:46:53,861 - INFO -   Tokens: 367
2025-02-07 13:46:53,914 - INFO - Created chunk 1835:
2025-02-07 13:46:53,915 - INFO -   Topics: lifecycle, choose, object, objects, status
2025-02-07 13:46:53,916 - INFO -   Tokens: 388
2025-02-07 13:46:53,975 - INFO - Created chunk 1836:
2025-02-07 13:46:53,975 - INFO -   Topics: lifecycle, objects, class, s3, conﬁguration
2025-02-07 13:46:53,976 - INFO -   Tokens: 404
2025-02-07 13:46:54,035 - INFO - Created chunk 1837:
2025-02-07 13:46:54,036 - INFO -   Topics: lifecycle, data, s3, objects, class
2025-02-07 13:46:54,036 - INFO -   Tokens: 437
2025-02-07 13:46:54,085 - INFO - Created chunk 1838:
2025-02-07 13:46:54,086 - INFO -   Topics: lifecycle, s3, objects, amazon, storage
2025-02-07 13:46:54,087 - INFO -   Tokens: 335
2025-02-07 13:46:54,114 - INFO - Created chunk 1839:
2025-02-07 13:46:54,115 - INFO -   Topics: lifecycle, storage, objects, class, s3
2025-0

2025-02-07 13:46:56,179 - INFO -   Tokens: 391
2025-02-07 13:46:56,244 - INFO - Created chunk 1880:
2025-02-07 13:46:56,245 - INFO -   Topics: delete, object, action, version, s3
2025-02-07 13:46:56,246 - INFO -   Tokens: 437
2025-02-07 13:46:56,309 - INFO - Created chunk 1881:
2025-02-07 13:46:56,310 - INFO -   Topics: lifecycle, action, id, delete, specify
2025-02-07 13:46:56,311 - INFO -   Tokens: 468
2025-02-07 13:46:56,366 - INFO - Created chunk 1882:
2025-02-07 13:46:56,366 - INFO -   Topics: lifecycle, object, multipart, delete, action
2025-02-07 13:46:56,367 - INFO -   Tokens: 405
2025-02-07 13:46:56,426 - INFO - Created chunk 1883:
2025-02-07 13:46:56,427 - INFO -   Topics: lifecycle, object, specify, rule, s3
2025-02-07 13:46:56,428 - INFO -   Tokens: 430
2025-02-07 13:46:56,492 - INFO - Created chunk 1884:
2025-02-07 13:46:56,493 - INFO -   Topics: action, lifecycle, rule, object, specify
2025-02-07 13:46:56,494 - INFO -   Tokens: 483
2025-02-07 13:46:56,544 - INFO - Created

2025-02-07 13:46:58,668 - INFO -   Topics: server, data, s3, amazon, access
2025-02-07 13:46:58,669 - INFO -   Tokens: 517
2025-02-07 13:46:58,706 - INFO - Created chunk 1926:
2025-02-07 13:46:58,706 - INFO -   Topics: amazon, s3, api, information, metrics
2025-02-07 13:46:58,707 - INFO -   Tokens: 228
2025-02-07 13:46:58,761 - INFO - Created chunk 1927:
2025-02-07 13:46:58,762 - INFO -   Topics: data, s3, encryption, amazon, aws
2025-02-07 13:46:58,763 - INFO -   Tokens: 380
2025-02-07 13:46:58,793 - INFO - Created chunk 1928:
2025-02-07 13:46:58,794 - INFO -   Topics: data, value, console, resource, s3
2025-02-07 13:46:58,794 - INFO -   Tokens: 171
2025-02-07 13:46:58,814 - INFO - Created chunk 1929:
2025-02-07 13:46:58,815 - INFO -   Topics: zone, data, console, aws, s3
2025-02-07 13:46:58,816 - INFO -   Tokens: 79
2025-02-07 13:46:58,847 - INFO - Created chunk 1930:
2025-02-07 13:46:58,848 - INFO -   Topics: point, data, console, access, region
2025-02-07 13:46:58,849 - INFO -   To

2025-02-07 13:47:01,042 - INFO - Created chunk 1971:
2025-02-07 13:47:01,043 - INFO -   Topics: string, server, s3, aws, arn
2025-02-07 13:47:01,043 - INFO -   Tokens: 336
2025-02-07 13:47:01,091 - INFO - Created chunk 1972:
2025-02-07 13:47:01,092 - INFO -   Topics: demo, amzn, bucket, buckets, s3
2025-02-07 13:47:01,093 - INFO -   Tokens: 329
2025-02-07 13:47:01,168 - INFO - Created chunk 1973:
2025-02-07 13:47:01,168 - INFO -   Topics: demo, amzn, bucket, s3, policy
2025-02-07 13:47:01,169 - INFO -   Tokens: 557
2025-02-07 13:47:01,217 - INFO - Created chunk 1974:
2025-02-07 13:47:01,217 - INFO -   Topics: destination, bucket, demo, amzn, s3
2025-02-07 13:47:01,218 - INFO -   Tokens: 355
2025-02-07 13:47:01,265 - INFO - Created chunk 1975:
2025-02-07 13:47:01,266 - INFO -   Topics: destination, grants, bucket, conﬁguration, owner
2025-02-07 13:47:01,267 - INFO -   Tokens: 334
2025-02-07 13:47:01,308 - INFO - Created chunk 1976:
2025-02-07 13:47:01,309 - INFO -   Topics: destination,

2025-02-07 13:47:03,762 - INFO -   Tokens: 251
2025-02-07 13:47:03,821 - INFO - Created chunk 2017:
2025-02-07 13:47:03,822 - INFO -   Topics: lambda, point, requests, request, access
2025-02-07 13:47:03,823 - INFO -   Tokens: 270
2025-02-07 13:47:03,884 - INFO - Created chunk 2018:
2025-02-07 13:47:03,884 - INFO -   Topics: lambda, requests, point, server, object
2025-02-07 13:47:03,885 - INFO -   Tokens: 291
2025-02-07 13:47:03,932 - INFO - Created chunk 2019:
2025-02-07 13:47:03,932 - INFO -   Topics: class, objects, access, storage, metrics
2025-02-07 13:47:03,933 - INFO -   Tokens: 275
2025-02-07 13:47:03,983 - INFO - Created chunk 2020:
2025-02-07 13:47:03,984 - INFO -   Topics: class, storage, object, s3, access
2025-02-07 13:47:03,985 - INFO -   Tokens: 334
2025-02-07 13:47:04,044 - INFO - Created chunk 2021:
2025-02-07 13:47:04,044 - INFO -   Topics: s3, class, storage, object, objects
2025-02-07 13:47:04,045 - INFO -   Tokens: 340
2025-02-07 13:47:04,087 - INFO - Created chun

2025-02-07 13:47:06,542 - INFO - Created chunk 2062:
2025-02-07 13:47:06,543 - INFO -   Topics: key, amazon, conﬁguration, object, using
2025-02-07 13:47:06,544 - INFO -   Tokens: 383
2025-02-07 13:47:06,603 - INFO - Created chunk 2063:
2025-02-07 13:47:06,604 - INFO -   Topics: value, id, arn, requests, conﬁguration
2025-02-07 13:47:06,605 - INFO -   Tokens: 295
2025-02-07 13:47:06,661 - INFO - Created chunk 2064:
2025-02-07 13:47:06,662 - INFO -   Topics: lambda, value, id, conﬁguration, string
2025-02-07 13:47:06,663 - INFO -   Tokens: 335
2025-02-07 13:47:06,715 - INFO - Created chunk 2065:
2025-02-07 13:47:06,716 - INFO -   Topics: value, lambda, id, arn, aws
2025-02-07 13:47:06,717 - INFO -   Tokens: 259
2025-02-07 13:47:06,808 - INFO - Created chunk 2066:
2025-02-07 13:47:06,809 - INFO -   Topics: conﬁguration, value, arn, example, use
2025-02-07 13:47:06,810 - INFO -   Tokens: 402
2025-02-07 13:47:06,879 - INFO - Created chunk 2067:
2025-02-07 13:47:06,880 - INFO -   Topics: va

2025-02-07 13:47:09,937 - INFO -   Tokens: 169
2025-02-07 13:47:09,979 - INFO - Created chunk 2108:
2025-02-07 13:47:09,980 - INFO -   Topics: source, objects, metrics, object, storage
2025-02-07 13:47:09,980 - INFO -   Tokens: 185
2025-02-07 13:47:10,032 - INFO - Created chunk 2109:
2025-02-07 13:47:10,033 - INFO -   Topics: destination, objects, source, metrics, replication
2025-02-07 13:47:10,034 - INFO -   Tokens: 201
2025-02-07 13:47:10,078 - INFO - Created chunk 2110:
2025-02-07 13:47:10,079 - INFO -   Topics: object, enabled, objects, metrics, storage
2025-02-07 13:47:10,080 - INFO -   Tokens: 172
2025-02-07 13:47:10,122 - INFO - Created chunk 2111:
2025-02-07 13:47:10,123 - INFO -   Topics: buckets, delete, enabled, multi, metrics
2025-02-07 13:47:10,124 - INFO -   Tokens: 179
2025-02-07 13:47:10,174 - INFO - Created chunk 2112:
2025-02-07 13:47:10,174 - INFO -   Topics: sse, kms, requests, buckets, enabled
2025-02-07 13:47:10,175 - INFO -   Tokens: 191
2025-02-07 13:47:10,243 

2025-02-07 13:47:12,537 - INFO -   Topics: tags, lens, resource, add, aws
2025-02-07 13:47:12,538 - INFO -   Tokens: 345
2025-02-07 13:47:12,587 - INFO - Created chunk 2154:
2025-02-07 13:47:12,588 - INFO -   Topics: import, amazonaws, lens, tags, com
2025-02-07 13:47:12,589 - INFO -   Tokens: 300
2025-02-07 13:47:12,636 - INFO - Created chunk 2155:
2025-02-07 13:47:12,637 - INFO -   Topics: string, new, import, tags, public
2025-02-07 13:47:12,638 - INFO -   Tokens: 226
2025-02-07 13:47:12,699 - INFO - Created chunk 2156:
2025-02-07 13:47:12,700 - INFO -   Topics: tags, lens, choose, aws, console
2025-02-07 13:47:12,701 - INFO -   Tokens: 347
2025-02-07 13:47:12,747 - INFO - Created chunk 2157:
2025-02-07 13:47:12,748 - INFO -   Topics: import, amazonaws, com, lens, string
2025-02-07 13:47:12,749 - INFO -   Tokens: 237
2025-02-07 13:47:12,802 - INFO - Created chunk 2158:
2025-02-07 13:47:12,803 - INFO -   Topics: lens, choose, tags, console, add
2025-02-07 13:47:12,804 - INFO -   Toke

2025-02-07 13:47:15,422 - INFO -   Topics: metrics, lens, use, storage, buckets
2025-02-07 13:47:15,423 - INFO -   Tokens: 402
2025-02-07 13:47:15,479 - INFO - Created chunk 2200:
2025-02-07 13:47:15,480 - INFO -   Topics: metrics, lens, s3, storage, lifecycle
2025-02-07 13:47:15,480 - INFO -   Tokens: 405
2025-02-07 13:47:15,537 - INFO - Created chunk 2201:
2025-02-07 13:47:15,537 - INFO -   Topics: buckets, lens, metrics, lifecycle, storage
2025-02-07 13:47:15,538 - INFO -   Tokens: 405
2025-02-07 13:47:15,593 - INFO - Created chunk 2202:
2025-02-07 13:47:15,594 - INFO -   Topics: metrics, buckets, bucket, storage, lens
2025-02-07 13:47:15,594 - INFO -   Tokens: 391
2025-02-07 13:47:15,657 - INFO - Created chunk 2203:
2025-02-07 13:47:15,658 - INFO -   Topics: metrics, choose, buckets, class, storage
2025-02-07 13:47:15,659 - INFO -   Tokens: 464
2025-02-07 13:47:15,729 - INFO - Created chunk 2204:
2025-02-07 13:47:15,729 - INFO -   Topics: multipart, upload, buckets, lifecycle, metr

2025-02-07 13:47:17,977 - INFO -   Tokens: 462
2025-02-07 13:47:18,028 - INFO - Created chunk 2245:
2025-02-07 13:47:18,029 - INFO -   Topics: import, lens, example, string, storage
2025-02-07 13:47:18,030 - INFO -   Tokens: 329
2025-02-07 13:47:18,080 - INFO - Created chunk 2246:
2025-02-07 13:47:18,081 - INFO -   Topics: import, lens, region, amazon, example
2025-02-07 13:47:18,082 - INFO -   Tokens: 282
2025-02-07 13:47:18,143 - INFO - Created chunk 2247:
2025-02-07 13:47:18,144 - INFO -   Topics: import, value, string, key, amazon
2025-02-07 13:47:18,145 - INFO -   Tokens: 285
2025-02-07 13:47:18,196 - INFO - Created chunk 2248:
2025-02-07 13:47:18,196 - INFO -   Topics: import, amazon, lens, region, example
2025-02-07 13:47:18,198 - INFO -   Tokens: 302
2025-02-07 13:47:18,240 - INFO - Created chunk 2249:
2025-02-07 13:47:18,240 - INFO -   Topics: string, public, amazon, region, import
2025-02-07 13:47:18,241 - INFO -   Tokens: 271
2025-02-07 13:47:18,286 - INFO - Created chunk 22

2025-02-07 13:47:20,428 - INFO -   Tokens: 317
2025-02-07 13:47:20,486 - INFO - Created chunk 2291:
2025-02-07 13:47:20,486 - INFO -   Topics: encryption, destination, choose, key, bucket
2025-02-07 13:47:20,488 - INFO -   Tokens: 415
2025-02-07 13:47:20,541 - INFO - Created chunk 2292:
2025-02-07 13:47:20,541 - INFO -   Topics: kms, key, sse, keys, aws
2025-02-07 13:47:20,542 - INFO -   Tokens: 361
2025-02-07 13:47:20,609 - INFO - Created chunk 2293:
2025-02-07 13:47:20,609 - INFO -   Topics: object, status, information, access, multipart
2025-02-07 13:47:20,610 - INFO -   Tokens: 496
2025-02-07 13:47:20,657 - INFO - Created chunk 2294:
2025-02-07 13:47:20,658 - INFO -   Topics: destination, object, s3, list, source
2025-02-07 13:47:20,658 - INFO -   Tokens: 309
2025-02-07 13:47:20,724 - INFO - Created chunk 2295:
2025-02-07 13:47:20,725 - INFO -   Topics: source, destination, bucket, demo, amzn
2025-02-07 13:47:20,725 - INFO -   Tokens: 466
2025-02-07 13:47:20,779 - INFO - Created ch

2025-02-07 13:47:23,048 - INFO -   Tokens: 435
2025-02-07 13:47:23,106 - INFO - Created chunk 2337:
2025-02-07 13:47:23,106 - INFO -   Topics: public, access, settings, bucket, account
2025-02-07 13:47:23,107 - INFO -   Tokens: 401
2025-02-07 13:47:23,136 - INFO - Created chunk 2338:
2025-02-07 13:47:23,137 - INFO -   Topics: public, settings, access, bucket, policy
2025-02-07 13:47:23,137 - INFO -   Tokens: 185
2025-02-07 13:47:23,184 - INFO - Created chunk 2339:
2025-02-07 13:47:23,185 - INFO -   Topics: bucket, policy, choose, public, access
2025-02-07 13:47:23,186 - INFO -   Tokens: 318
2025-02-07 13:47:23,244 - INFO - Created chunk 2340:
2025-02-07 13:47:23,244 - INFO -   Topics: bucket, acls, owner, control, access
2025-02-07 13:47:23,245 - INFO -   Tokens: 430
2025-02-07 13:47:23,300 - INFO - Created chunk 2341:
2025-02-07 13:47:23,301 - INFO -   Topics: server, access, grant, choose, bucket
2025-02-07 13:47:23,301 - INFO -   Tokens: 400
2025-02-07 13:47:23,347 - INFO - Created 

2025-02-07 13:47:25,398 - INFO - Created chunk 2382:
2025-02-07 13:47:25,399 - INFO -   Topics: choose, create, policy, bucket, default
2025-02-07 13:47:25,400 - INFO -   Tokens: 393
2025-02-07 13:47:25,452 - INFO - Created chunk 2383:
2025-02-07 13:47:25,453 - INFO -   Topics: policy, bucket, choose, console, aws
2025-02-07 13:47:25,454 - INFO -   Tokens: 362
2025-02-07 13:47:25,514 - INFO - Created chunk 2384:
2025-02-07 13:47:25,515 - INFO -   Topics: bucket, console, choose, value, s3
2025-02-07 13:47:25,515 - INFO -   Tokens: 443
2025-02-07 13:47:25,568 - INFO - Created chunk 2385:
2025-02-07 13:47:25,568 - INFO -   Topics: request, add, use, choose, console
2025-02-07 13:47:25,569 - INFO -   Tokens: 365
2025-02-07 13:47:25,624 - INFO - Created chunk 2386:
2025-02-07 13:47:25,625 - INFO -   Topics: choose, status, console, request, aws
2025-02-07 13:47:25,625 - INFO -   Tokens: 393
2025-02-07 13:47:25,682 - INFO - Created chunk 2387:
2025-02-07 13:47:25,682 - INFO -   Topics: choo

2025-02-07 13:47:27,969 - INFO -   Tokens: 415
2025-02-07 13:47:28,023 - INFO - Created chunk 2428:
2025-02-07 13:47:28,024 - INFO -   Topics: delete, zone, choose, console, amazon
2025-02-07 13:47:28,025 - INFO -   Tokens: 354
2025-02-07 13:47:28,060 - INFO - Created chunk 2429:
2025-02-07 13:47:28,061 - INFO -   Topics: choose, delete, bucket, zone, set
2025-02-07 13:47:28,062 - INFO -   Tokens: 226
2025-02-07 13:47:28,101 - INFO - Created chunk 2430:
2025-02-07 13:47:28,101 - INFO -   Topics: request, aws, service, cli, default
2025-02-07 13:47:28,103 - INFO -   Tokens: 238
2025-02-07 13:47:28,142 - INFO - Created chunk 2431:
2025-02-07 13:47:28,143 - INFO -   Topics: aws, command, cli, amazon, s3
2025-02-07 13:47:28,143 - INFO -   Tokens: 253
2025-02-07 13:47:28,185 - INFO - Created chunk 2432:
2025-02-07 13:47:28,186 - INFO -   Topics: kms, keys, sse, lifecycle, s3
2025-02-07 13:47:28,187 - INFO -   Tokens: 270
2025-02-07 13:47:28,215 - INFO - Created chunk 2433:
2025-02-07 13:47:

2025-02-07 13:47:29,894 - INFO -   Tokens: 309
2025-02-07 13:47:29,921 - INFO - Created chunk 2474:
2025-02-07 13:47:29,922 - INFO -   Topics: lifecycle, delete, s3, amazon, object
2025-02-07 13:47:29,923 - INFO -   Tokens: 146
2025-02-07 13:47:29,965 - INFO - Created chunk 2475:
2025-02-07 13:47:29,966 - INFO -   Topics: multipart, lifecycle, upload, action, rule
2025-02-07 13:47:29,967 - INFO -   Tokens: 288
2025-02-07 13:47:30,020 - INFO - Created chunk 2476:
2025-02-07 13:47:30,021 - INFO -   Topics: class, upload, region, multipart, storage
2025-02-07 13:47:30,021 - INFO -   Tokens: 371
2025-02-07 13:47:30,078 - INFO - Created chunk 2477:
2025-02-07 13:47:30,078 - INFO -   Topics: s3, aws, amazon, objects, bucket
2025-02-07 13:47:30,079 - INFO -   Tokens: 367
2025-02-07 13:47:30,131 - INFO - Created chunk 2478:
2025-02-07 13:47:30,132 - INFO -   Topics: policies, s3, metrics, amazon, bucket
2025-02-07 13:47:30,132 - INFO -   Tokens: 349
2025-02-07 13:47:30,218 - INFO - Created chu

In [20]:
# Usage:
chunker = SemanticChunker('s3-api.pdf')
chunker.debug_pdf_content()  # Run this before process_and_save to check content

chunker.process_and_save('NEW-s3-api-chunks.json')


2025-02-07 13:47:31,339 - INFO - Initializing SemanticChunker with PDF: s3-api.pdf
2025-02-07 13:47:32,009 - INFO - Running PDF content debug...
2025-02-07 13:47:33,296 - INFO - 
Page 1 Preview (first 200 chars):
2025-02-07 13:47:33,296 - INFO - --------------------------------------------------
2025-02-07 13:47:33,297 - INFO - API Reference
Amazon Simple Storage Service
API Version 2006-03-01
Copyright © 2024 Amazon Web Services, Inc. and/or its aﬃliates. All rights reserved.
2025-02-07 13:47:33,298 - INFO - --------------------------------------------------
2025-02-07 13:47:33,299 - INFO - Page 1 length: 151 characters
2025-02-07 13:47:33,324 - INFO - 
Page 2 Preview (first 200 chars):
2025-02-07 13:47:33,324 - INFO - --------------------------------------------------
2025-02-07 13:47:33,325 - INFO - Amazon Simple Storage Service API Reference
Amazon Simple Storage Service: API Reference
Copyright © 2024 Amazon Web Services, Inc. and/or its aﬃliates. All rights reserved.
Amazon's tra

KeyboardInterrupt: 

In [None]:
# Usage:
chunker = SemanticChunker('s3-outposts.pdf')
chunker.debug_pdf_content()  # Run this before process_and_save to check content

chunker.process_and_save('NEW-s3-outposts-chunks.json')


In [None]:
# Initialize and run the chunker
chunker = SemanticChunker('glacier-dg.pdf')
chunker.process_and_save('NEW-glacier-dg-chunks.json')