# This notebook is to generate summaries for the most common topics (extracted via the clustering script). These summaries can then be pre-loaded by the final model if needed, rather than having to generate them from scratch. Which ultimately saves time and efficiency.  

In [1]:
import os
import pickle
import json
import boto3
import logging
import yaml
import pandas as pd
import numpy as np
from datetime import datetime
from typing import Dict, List, Any, Union
from tqdm import tqdm
from openai import OpenAI
from pinecone import Pinecone
from bertopic import BERTopic
from sentence_transformers import SentenceTransformer

# ---------- Configure Logging ----------
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s',
    handlers=[
        logging.FileHandler('topic_summarization.log'),
        logging.StreamHandler()
    ]
)
logger = logging.getLogger(__name__)

# ---------- Config ----------
class Config:
    def __init__(self, config_file="config.yaml"):
        # Load configuration from YAML file
        self.config_data = self.load_config(config_file)
        
        # File paths
        self.CLUSTERED_VECTORS_PATH = "rizzbot_data/cleaned_clustered_vectors.pkl" # Path the the cleaned clustered vectors file
        self.TOPIC_MODEL_PATH = "rizzbot_data/bertopic_model"
        
        # S3 settings
        self.S3_BUCKET = "rizzbot-temp-storage"
        self.S3_PREFIX = "rizzbot/Summaries/"
        
        # Pinecone settings
        self.PINECONE_INDEX = "rizzbot-summaries"
        
        # Model settings
        self.EMBEDDING_MODEL = "all-MiniLM-L6-v2"
        self.SUMMARY_MODEL = "gpt-4o-mini"
        
        # Processing settings
        self.MAX_SUMMARY_WORDS = 500
        self.MAX_DOCS_PER_TOPIC = 50
        self.CHUNK_SIZE = 500
        
        # API Keys from config.yaml
        self.OPENAI_API_KEY = self.config_data.get('openai_api_key')
        self.PINECONE_API_KEY = self.config_data.get('pinecone_api_key')
        
        # Validate required keys
        self.validate_config()
    
    def load_config(self, config_file: str) -> dict:
        """Load configuration from YAML file"""
        try:
            with open(config_file, 'r') as f:
                config = yaml.safe_load(f)
            logger.info(f"Loaded configuration from {config_file}")
            return config
        except FileNotFoundError:
            logger.error(f"Configuration file {config_file} not found")
            raise
        except yaml.YAMLError as e:
            logger.error(f"Error parsing YAML file: {e}")
            raise
    
    def validate_config(self):
        """Validate that required API keys are present"""
        missing_keys = []
        
        if not self.OPENAI_API_KEY:
            missing_keys.append('openai_api_key')
        
        if not self.PINECONE_API_KEY:
            missing_keys.append('pinecone_api_key')
        
        if missing_keys:
            logger.error(f"Missing required API keys in config.yaml: {missing_keys}")
            raise ValueError(f"Missing required API keys: {', '.join(missing_keys)}")
        
        logger.info("All required API keys found in configuration")

class TopicSummarizer:
    def __init__(self, config_file="config.yaml"):
        self.config = Config(config_file)
        self.s3_client = None
        self.openai_client = None
        self.pinecone_client = None
        self.pinecone_index = None
        self.embedder = None
        self.topic_model = None
        self.clustered_vectors = None
        
    def initialize_clients(self):
        """Initialize all external service clients"""
        try:
            logger.info("Initializing clients...")
            
            # Initialize S3 client (uses AWS credentials from environment/AWS config)
            self.s3_client = boto3.client("s3")
            logger.info("S3 client initialized")
            
            # Initialize OpenAI client with API key from config
            self.openai_client = OpenAI(api_key=self.config.OPENAI_API_KEY)
            logger.info("OpenAI client initialized")
            
            # Test OpenAI connection
            try:
                test_response = self.openai_client.chat.completions.create(
                    model="gpt-3.5-turbo",
                    messages=[{"role": "user", "content": "Hello"}],
                    max_tokens=5
                )
                logger.info("OpenAI API connection test successful")
            except Exception as e:
                logger.error(f"OpenAI API connection test failed: {e}")
                raise
            
            # Initialize Pinecone client with API key from config
            self.pinecone_client = Pinecone(api_key=self.config.PINECONE_API_KEY)
            self.pinecone_index = self.pinecone_client.Index(self.config.PINECONE_INDEX)
            logger.info("Pinecone client initialized")
            
            # Initialize sentence transformer (no API key needed)
            self.embedder = SentenceTransformer(self.config.EMBEDDING_MODEL)
            logger.info("Sentence transformer initialized")
            
            logger.info("All clients initialized successfully")
        except Exception as e:
            logger.error(f"Failed to initialize clients: {e}")
            raise
    
    def load_data(self):
    #Load clustered data and BERTopic model"""
        try:
            logger.info("Loading clustered vectors...")
            with open(self.config.CLUSTERED_VECTORS_PATH, "rb") as f:
                self.clustered_vectors = pickle.load(f)
            logger.info(f"Loaded clustered vectors: {len(self.clustered_vectors)} items")
            
            if isinstance(self.clustered_vectors, pd.DataFrame):
                logger.info(f"DataFrame shape: {self.clustered_vectors.shape}, columns: {list(self.clustered_vectors.columns)}")
            else:
                logger.info(f"Data type: {type(self.clustered_vectors)}")

            self.load_topic_model_first()
        except Exception as e:
            logger.error(f"Error loading data: {e}")
            raise

    
    def load_topic_model_first(self):
    #Load BERTopic model with re-attached embedding model
        try:
            logger.info(f"Loading BERTopic model from: {self.config.TOPIC_MODEL_PATH}")
            embedding_model = SentenceTransformer(self.config.EMBEDDING_MODEL)
            self.topic_model = BERTopic.load(self.config.TOPIC_MODEL_PATH, embedding_model=embedding_model)
            topics = self.topic_model.get_topics()
            logger.info(f"Loaded BERTopic model with {len(topics)} topics")
        except Exception as e:
            logger.error(f"Error loading BERTopic model: {e}")
            raise

    
    def group_documents_by_topic(self) -> Dict[int, List[str]]:
        logger.info("Grouping documents by topic...")
        topic_to_docs = {}

        if isinstance(self.clustered_vectors, pd.DataFrame):
            df = self.clustered_vectors
            if 'topic_id' not in df.columns or 'text' not in df.columns:
                raise ValueError("clustered_vectors must include 'topic_id' and 'text'")
            for topic_id, group in df.dropna(subset=['topic_id', 'text']).groupby('topic_id'):
                if topic_id == -1:
                    continue
                topic_to_docs[int(topic_id)] = group['text'].tolist()
        else:
            for item in self.clustered_vectors:
                if not isinstance(item, dict): continue
                topic_id = item.get("topic_id")
                text = item.get("text")
                if topic_id is not None and text and topic_id != -1:
                    topic_to_docs.setdefault(topic_id, []).append(text)

        logger.info(f"Grouped documents into {len(topic_to_docs)} topics")
        return topic_to_docs

    
    def generate_summary(self, topic_id: int, docs: List[str]) -> str:
        """Generate summary for a single topic with enhanced error handling"""
        try:
            combined_text = "\n\n".join(docs[:self.config.MAX_DOCS_PER_TOPIC])
            
            logger.info(f"Generating summary for topic {topic_id} with {len(docs)} documents")
            logger.info(f"Combined text length: {len(combined_text)} characters")
            
            prompt = (
                f"You are a helpful assistant. Write a concise and clear {self.config.MAX_SUMMARY_WORDS}-word summary "
                f"on the following topic using the provided documents:\n\n{combined_text}\n\n"
                f"Summary:"
            )
            
            response = self.openai_client.chat.completions.create(
                model=self.config.SUMMARY_MODEL,
                messages=[{"role": "user", "content": prompt}],
                temperature=0.5,
                max_tokens=1500
            )
            
            summary = response.choices[0].message.content.strip()
            logger.info(f"Successfully generated summary for topic {topic_id}: {len(summary)} characters")
            return summary
            
        except Exception as e:
            logger.error(f"Failed to generate summary for topic {topic_id}: {e}")
            raise
    
    def save_to_s3(self, topic_id: int, summary_text: str, run_number: int):
        """Save summary to S3 with run number"""
        try:
            s3_key = f"{self.config.S3_PREFIX}run_{run_number}/topic_{topic_id}.json"
            
            summary_data = {
                "topic_id": topic_id,
                "summary_text": summary_text,
                "source": "BERTopic",
                "run_number": run_number,
                "timestamp": datetime.now().isoformat(),
                "model_used": self.config.SUMMARY_MODEL
            }
            
            self.s3_client.put_object(
                Bucket=self.config.S3_BUCKET,
                Key=s3_key,
                Body=json.dumps(summary_data, indent=2),
                ContentType="application/json"
            )
            logger.info(f"Saved topic {topic_id} to S3: {s3_key}")
            
        except Exception as e:
            logger.error(f"Failed to save topic {topic_id} to S3: {e}")
            raise
    
    def save_to_pinecone(self, topic_id: int, summary_text: str, run_number: int):
        """Save summary chunks to Pinecone with run number"""
        try:
            chunks = [summary_text[i:i+self.config.CHUNK_SIZE] 
                     for i in range(0, len(summary_text), self.config.CHUNK_SIZE)]
            
            vectors_to_upsert = []
            for i, chunk in enumerate(chunks):
                embedding = self.embedder.encode(chunk).tolist()
                vector_id = f"summary-run{run_number}-{topic_id}-{i}"
                
                metadata = {
                    "type": "summary",
                    "topic_id": str(topic_id),
                    "chunk_id": i,
                    "source": "BERTopic",
                    "summary_quality": "v1.0",
                    "run_number": run_number,
                    "timestamp": datetime.now().isoformat(),
                    "model_used": self.config.SUMMARY_MODEL
                }
                
                vectors_to_upsert.append((vector_id, embedding, metadata))
            
            # Batch upsert for efficiency
            self.pinecone_index.upsert(vectors=vectors_to_upsert)
            logger.info(f"Saved {len(chunks)} chunks for topic {topic_id} to Pinecone")
            
        except Exception as e:
            logger.error(f"Failed to save topic {topic_id} to Pinecone: {e}")
            raise
    
    def save_summaries_locally(self, summaries: Dict[int, str], run_number: int):
        """Save summaries to local file for backup"""
        try:
            local_dir = f"rizzbot_data/summaries_run_{run_number}"
            os.makedirs(local_dir, exist_ok=True)
            
            # Save individual summaries
            for topic_id, summary_text in summaries.items():
                filename = os.path.join(local_dir, f"topic_{topic_id}.txt")
                with open(filename, 'w', encoding='utf-8') as f:
                    f.write(f"Topic ID: {topic_id}\n")
                    f.write(f"Run Number: {run_number}\n")
                    f.write(f"Timestamp: {datetime.now().isoformat()}\n")
                    f.write(f"Model: {self.config.SUMMARY_MODEL}\n")
                    f.write("-" * 50 + "\n")
                    f.write(summary_text)
            
            # Save consolidated file
            consolidated_file = os.path.join(local_dir, "all_summaries.json")
            with open(consolidated_file, 'w', encoding='utf-8') as f:
                summary_data = {
                    "run_number": run_number,
                    "timestamp": datetime.now().isoformat(),
                    "model_used": self.config.SUMMARY_MODEL,
                    "total_topics": len(summaries),
                    "summaries": summaries
                }
                json.dump(summary_data, f, indent=2, ensure_ascii=False)
            
            logger.info(f"Saved {len(summaries)} summaries locally in {local_dir}")
            
        except Exception as e:
            logger.error(f"Failed to save summaries locally: {e}")
            raise
    
    def run_full_summarization(self):
    #Run summarization for all topics once
        run_number = 1
        topic_to_docs = self.group_documents_by_topic()
        summaries = {}
        failed = []

        logger.info(f"Running summarization for {len(topic_to_docs)} topics...")

        for topic_id in tqdm(topic_to_docs, desc="Summarizing topics"):
            try:
                docs = topic_to_docs[topic_id]
                summary = self.generate_summary(topic_id, docs)
                summaries[topic_id] = summary
                self.save_to_s3(topic_id, summary, run_number)
                self.save_to_pinecone(topic_id, summary, run_number)
            except Exception as e:
                logger.error(f"Topic {topic_id} failed: {e}")
                failed.append(topic_id)

        self.save_summaries_locally(summaries, run_number)
        logger.info(f"Summary generation completed. Success: {len(summaries)} | Failed: {len(failed)}")

        return summaries

    
    def save_consolidated_results(self, all_results: Dict[int, Dict[int, str]]):
        """Save consolidated results from all runs"""
        try:
            consolidated_dir = "rizzbot_data/consolidated_results"
            os.makedirs(consolidated_dir, exist_ok=True)
            
            # Get total topics count properly
            topic_to_docs = self.group_documents_by_topic()
            total_topics = len(topic_to_docs)
            
            # Summary statistics
            stats = {
                "total_runs": self.config.NUM_TEST_RUNS,
                "timestamp": datetime.now().isoformat(),
                "model_used": self.config.SUMMARY_MODEL,
                "total_topics_available": total_topics,
                "run_statistics": {}
            }
            
            for run_number, summaries in all_results.items():
                stats["run_statistics"][run_number] = {
                    "topics_processed": len(summaries),
                    "success_rate": len(summaries) / total_topics if total_topics > 0 else 0
                }
            
            # Save statistics
            with open(os.path.join(consolidated_dir, "run_statistics.json"), 'w') as f:
                json.dump(stats, f, indent=2)
            
            # Save all results
            with open(os.path.join(consolidated_dir, "all_results.json"), 'w') as f:
                json.dump(all_results, f, indent=2, ensure_ascii=False)
            
            logger.info(f"Consolidated results saved in {consolidated_dir}")
            
        except Exception as e:
            logger.error(f"Failed to save consolidated results: {e}")
    
    def run(self):
        """Main execution method"""
        try:
            self.initialize_clients()
            self.load_data()
            return self.run_full_summarization()
        except Exception as e:
            logger.error(f"Fatal error during execution: {e}")
            raise


def main():
    """Main entry point"""
    try:
        # You can specify a different config file path if needed
        # summarizer = TopicSummarizer("path/to/your/config.yaml")
        summarizer = TopicSummarizer("config.yaml")  # Uses default "config.yaml" in current directory
        results = summarizer.run()
        
        # Print final summary
        total_successful = sum(len(summaries) for summaries in results.values())
        print(f"\n Final Results:")
        print(f"   Total summaries generated: {total_successful}")
        
    except Exception as e:
        logger.error(f"Script failed: {e}")
        raise

if __name__ == "__main__":
    main()




2025-06-30 18:16:09,513 - INFO - Loaded configuration from config.yaml
2025-06-30 18:16:09,515 - INFO - All required API keys found in configuration
2025-06-30 18:16:09,516 - INFO - Initializing clients...
2025-06-30 18:16:09,548 - INFO - Found credentials in shared credentials file: ~/.aws/credentials
2025-06-30 18:16:10,424 - INFO - S3 client initialized
2025-06-30 18:16:11,270 - INFO - OpenAI client initialized
2025-06-30 18:16:14,802 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-06-30 18:16:14,809 - INFO - OpenAI API connection test successful
2025-06-30 18:16:17,807 - INFO - Pinecone client initialized
2025-06-30 18:16:17,851 - INFO - Use pytorch device_name: cuda:0
2025-06-30 18:16:17,853 - INFO - Load pretrained SentenceTransformer: all-MiniLM-L6-v2
2025-06-30 18:16:31,399 - INFO - Sentence transformer initialized
2025-06-30 18:16:31,400 - INFO - All clients initialized successfully
2025-06-30 18:16:31,401 - INFO - Loading clustere

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2025-06-30 18:16:46,983 - INFO - Saved 7 chunks for topic 0 to Pinecone
Summarizing topics:   2%|▏         | 1/45 [00:14<10:19, 14.07s/it]2025-06-30 18:16:46,986 - INFO - Generating summary for topic 3 with 45 documents
2025-06-30 18:16:46,987 - INFO - Combined text length: 161456 characters
2025-06-30 18:16:58,349 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-06-30 18:16:58,359 - INFO - Successfully generated summary for topic 3: 2507 characters
2025-06-30 18:16:58,920 - INFO - Saved topic 3 to S3: rizzbot/Summaries/run_1/topic_3.json


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2025-06-30 18:16:59,241 - INFO - Saved 6 chunks for topic 3 to Pinecone
Summarizing topics:   4%|▍         | 2/45 [00:26<09:19, 13.00s/it]2025-06-30 18:16:59,243 - INFO - Generating summary for topic 4 with 22 documents
2025-06-30 18:16:59,244 - INFO - Combined text length: 80309 characters
2025-06-30 18:17:12,197 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-06-30 18:17:12,202 - INFO - Successfully generated summary for topic 4: 3286 characters
2025-06-30 18:17:12,763 - INFO - Saved topic 4 to S3: rizzbot/Summaries/run_1/topic_4.json


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2025-06-30 18:17:13,138 - INFO - Saved 7 chunks for topic 4 to Pinecone
Summarizing topics:   7%|▋         | 3/45 [00:40<09:23, 13.41s/it]2025-06-30 18:17:13,141 - INFO - Generating summary for topic 5 with 18 documents
2025-06-30 18:17:13,142 - INFO - Combined text length: 67388 characters
2025-06-30 18:17:21,522 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-06-30 18:17:21,532 - INFO - Successfully generated summary for topic 5: 2695 characters
2025-06-30 18:17:22,177 - INFO - Saved topic 5 to S3: rizzbot/Summaries/run_1/topic_5.json


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2025-06-30 18:17:22,467 - INFO - Saved 6 chunks for topic 5 to Pinecone
Summarizing topics:   9%|▉         | 4/45 [00:49<08:03, 11.80s/it]2025-06-30 18:17:22,469 - INFO - Generating summary for topic 6 with 18 documents
2025-06-30 18:17:22,471 - INFO - Combined text length: 64600 characters
2025-06-30 18:17:33,423 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-06-30 18:17:33,426 - INFO - Successfully generated summary for topic 6: 3447 characters
2025-06-30 18:17:33,968 - INFO - Saved topic 6 to S3: rizzbot/Summaries/run_1/topic_6.json


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2025-06-30 18:17:34,293 - INFO - Saved 7 chunks for topic 6 to Pinecone
Summarizing topics:  11%|█         | 5/45 [01:01<07:52, 11.81s/it]2025-06-30 18:17:34,296 - INFO - Generating summary for topic 7 with 15 documents
2025-06-30 18:17:34,298 - INFO - Combined text length: 55562 characters
2025-06-30 18:17:41,689 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-06-30 18:17:41,694 - INFO - Successfully generated summary for topic 7: 2476 characters
2025-06-30 18:17:42,293 - INFO - Saved topic 7 to S3: rizzbot/Summaries/run_1/topic_7.json


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2025-06-30 18:17:42,576 - INFO - Saved 5 chunks for topic 7 to Pinecone
Summarizing topics:  13%|█▎        | 6/45 [01:09<06:53, 10.61s/it]2025-06-30 18:17:42,578 - INFO - Generating summary for topic 8 with 14 documents
2025-06-30 18:17:42,579 - INFO - Combined text length: 24371 characters
2025-06-30 18:17:54,939 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-06-30 18:17:54,943 - INFO - Successfully generated summary for topic 8: 2887 characters
2025-06-30 18:17:55,592 - INFO - Saved topic 8 to S3: rizzbot/Summaries/run_1/topic_8.json


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2025-06-30 18:17:55,963 - INFO - Saved 6 chunks for topic 8 to Pinecone
Summarizing topics:  16%|█▌        | 7/45 [01:23<07:17, 11.52s/it]2025-06-30 18:17:55,965 - INFO - Generating summary for topic 10 with 13 documents
2025-06-30 18:17:55,967 - INFO - Combined text length: 44147 characters
2025-06-30 18:18:06,783 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-06-30 18:18:06,797 - INFO - Successfully generated summary for topic 10: 3085 characters
2025-06-30 18:18:07,316 - INFO - Saved topic 10 to S3: rizzbot/Summaries/run_1/topic_10.json


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2025-06-30 18:18:07,650 - INFO - Saved 7 chunks for topic 10 to Pinecone
Summarizing topics:  18%|█▊        | 8/45 [01:34<07:08, 11.57s/it]2025-06-30 18:18:07,652 - INFO - Generating summary for topic 11 with 12 documents
2025-06-30 18:18:07,653 - INFO - Combined text length: 22787 characters
2025-06-30 18:18:14,181 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-06-30 18:18:14,184 - INFO - Successfully generated summary for topic 11: 2434 characters
2025-06-30 18:18:14,721 - INFO - Saved topic 11 to S3: rizzbot/Summaries/run_1/topic_11.json


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2025-06-30 18:18:15,048 - INFO - Saved 5 chunks for topic 11 to Pinecone
Summarizing topics:  20%|██        | 9/45 [01:42<06:09, 10.27s/it]2025-06-30 18:18:15,051 - INFO - Generating summary for topic 13 with 11 documents
2025-06-30 18:18:15,052 - INFO - Combined text length: 36413 characters
2025-06-30 18:18:25,906 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-06-30 18:18:25,914 - INFO - Successfully generated summary for topic 13: 2789 characters
2025-06-30 18:18:26,449 - INFO - Saved topic 13 to S3: rizzbot/Summaries/run_1/topic_13.json


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2025-06-30 18:18:26,757 - INFO - Saved 6 chunks for topic 13 to Pinecone
Summarizing topics:  22%|██▏       | 10/45 [01:53<06:14, 10.71s/it]2025-06-30 18:18:26,760 - INFO - Generating summary for topic 14 with 10 documents
2025-06-30 18:18:26,762 - INFO - Combined text length: 35004 characters
2025-06-30 18:18:38,692 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-06-30 18:18:38,697 - INFO - Successfully generated summary for topic 14: 2936 characters
2025-06-30 18:18:39,252 - INFO - Saved topic 14 to S3: rizzbot/Summaries/run_1/topic_14.json


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2025-06-30 18:18:39,551 - INFO - Saved 6 chunks for topic 14 to Pinecone
Summarizing topics:  24%|██▍       | 11/45 [02:06<06:25, 11.35s/it]2025-06-30 18:18:39,554 - INFO - Generating summary for topic 15 with 10 documents
2025-06-30 18:18:39,556 - INFO - Combined text length: 35164 characters
2025-06-30 18:18:46,769 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-06-30 18:18:46,784 - INFO - Successfully generated summary for topic 15: 2648 characters
2025-06-30 18:18:47,320 - INFO - Saved topic 15 to S3: rizzbot/Summaries/run_1/topic_15.json


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2025-06-30 18:18:47,647 - INFO - Saved 6 chunks for topic 15 to Pinecone
Summarizing topics:  27%|██▋       | 12/45 [02:14<05:41, 10.36s/it]2025-06-30 18:18:47,651 - INFO - Generating summary for topic 16 with 9 documents
2025-06-30 18:18:47,652 - INFO - Combined text length: 19626 characters
2025-06-30 18:18:57,918 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-06-30 18:18:57,922 - INFO - Successfully generated summary for topic 16: 2765 characters
2025-06-30 18:18:58,505 - INFO - Saved topic 16 to S3: rizzbot/Summaries/run_1/topic_16.json


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2025-06-30 18:18:58,795 - INFO - Saved 6 chunks for topic 16 to Pinecone
Summarizing topics:  29%|██▉       | 13/45 [02:25<05:39, 10.60s/it]2025-06-30 18:18:58,798 - INFO - Generating summary for topic 17 with 9 documents
2025-06-30 18:18:58,799 - INFO - Combined text length: 31391 characters
2025-06-30 18:19:06,080 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-06-30 18:19:06,082 - INFO - Successfully generated summary for topic 17: 2959 characters
2025-06-30 18:19:06,711 - INFO - Saved topic 17 to S3: rizzbot/Summaries/run_1/topic_17.json


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2025-06-30 18:19:07,023 - INFO - Saved 6 chunks for topic 17 to Pinecone
Summarizing topics:  31%|███       | 14/45 [02:34<05:06,  9.88s/it]2025-06-30 18:19:07,027 - INFO - Generating summary for topic 18 with 9 documents
2025-06-30 18:19:07,028 - INFO - Combined text length: 32047 characters
2025-06-30 18:19:17,073 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-06-30 18:19:17,076 - INFO - Successfully generated summary for topic 18: 2625 characters
2025-06-30 18:19:17,619 - INFO - Saved topic 18 to S3: rizzbot/Summaries/run_1/topic_18.json


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2025-06-30 18:19:17,910 - INFO - Saved 6 chunks for topic 18 to Pinecone
Summarizing topics:  33%|███▎      | 15/45 [02:44<05:05, 10.19s/it]2025-06-30 18:19:17,913 - INFO - Generating summary for topic 19 with 9 documents
2025-06-30 18:19:17,914 - INFO - Combined text length: 31748 characters
2025-06-30 18:19:28,292 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-06-30 18:19:28,297 - INFO - Successfully generated summary for topic 19: 2479 characters
2025-06-30 18:19:28,867 - INFO - Saved topic 19 to S3: rizzbot/Summaries/run_1/topic_19.json


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2025-06-30 18:19:29,129 - INFO - Saved 5 chunks for topic 19 to Pinecone
Summarizing topics:  36%|███▌      | 16/45 [02:56<05:04, 10.50s/it]2025-06-30 18:19:29,131 - INFO - Generating summary for topic 20 with 8 documents
2025-06-30 18:19:29,132 - INFO - Combined text length: 29209 characters
2025-06-30 18:19:38,692 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-06-30 18:19:38,696 - INFO - Successfully generated summary for topic 20: 2891 characters
2025-06-30 18:19:39,284 - INFO - Saved topic 20 to S3: rizzbot/Summaries/run_1/topic_20.json


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2025-06-30 18:19:39,935 - INFO - Saved 6 chunks for topic 20 to Pinecone
Summarizing topics:  38%|███▊      | 17/45 [03:07<04:56, 10.59s/it]2025-06-30 18:19:39,939 - INFO - Generating summary for topic 21 with 8 documents
2025-06-30 18:19:39,940 - INFO - Combined text length: 3929 characters
2025-06-30 18:19:44,989 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-06-30 18:19:44,994 - INFO - Successfully generated summary for topic 21: 1753 characters
2025-06-30 18:19:45,098 - INFO - Saved topic 21 to S3: rizzbot/Summaries/run_1/topic_21.json


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2025-06-30 18:19:45,372 - INFO - Saved 4 chunks for topic 21 to Pinecone
Summarizing topics:  40%|████      | 18/45 [03:12<04:04,  9.04s/it]2025-06-30 18:19:45,374 - INFO - Generating summary for topic 24 with 8 documents
2025-06-30 18:19:45,375 - INFO - Combined text length: 25502 characters
2025-06-30 18:19:57,424 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-06-30 18:19:57,430 - INFO - Successfully generated summary for topic 24: 2942 characters
2025-06-30 18:19:57,969 - INFO - Saved topic 24 to S3: rizzbot/Summaries/run_1/topic_24.json


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2025-06-30 18:19:58,282 - INFO - Saved 6 chunks for topic 24 to Pinecone
Summarizing topics:  42%|████▏     | 19/45 [03:25<04:25, 10.20s/it]2025-06-30 18:19:58,284 - INFO - Generating summary for topic 25 with 8 documents
2025-06-30 18:19:58,285 - INFO - Combined text length: 28739 characters
2025-06-30 18:20:10,801 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-06-30 18:20:10,805 - INFO - Successfully generated summary for topic 25: 3032 characters
2025-06-30 18:20:11,410 - INFO - Saved topic 25 to S3: rizzbot/Summaries/run_1/topic_25.json


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2025-06-30 18:20:11,739 - INFO - Saved 7 chunks for topic 25 to Pinecone
Summarizing topics:  44%|████▍     | 20/45 [03:38<04:39, 11.18s/it]2025-06-30 18:20:11,741 - INFO - Generating summary for topic 26 with 8 documents
2025-06-30 18:20:11,742 - INFO - Combined text length: 29870 characters
2025-06-30 18:20:21,772 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-06-30 18:20:21,779 - INFO - Successfully generated summary for topic 26: 3191 characters
2025-06-30 18:20:22,293 - INFO - Saved topic 26 to S3: rizzbot/Summaries/run_1/topic_26.json


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2025-06-30 18:20:22,647 - INFO - Saved 7 chunks for topic 26 to Pinecone
Summarizing topics:  47%|████▋     | 21/45 [03:49<04:26, 11.10s/it]2025-06-30 18:20:22,649 - INFO - Generating summary for topic 27 with 6 documents
2025-06-30 18:20:22,651 - INFO - Combined text length: 21976 characters
2025-06-30 18:20:34,785 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-06-30 18:20:34,788 - INFO - Successfully generated summary for topic 27: 3343 characters
2025-06-30 18:20:35,338 - INFO - Saved topic 27 to S3: rizzbot/Summaries/run_1/topic_27.json


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2025-06-30 18:20:35,773 - INFO - Saved 7 chunks for topic 27 to Pinecone
Summarizing topics:  49%|████▉     | 22/45 [04:02<04:29, 11.71s/it]2025-06-30 18:20:35,775 - INFO - Generating summary for topic 28 with 6 documents
2025-06-30 18:20:35,776 - INFO - Combined text length: 3237 characters
2025-06-30 18:20:42,222 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-06-30 18:20:42,225 - INFO - Successfully generated summary for topic 28: 1895 characters
2025-06-30 18:20:42,778 - INFO - Saved topic 28 to S3: rizzbot/Summaries/run_1/topic_28.json


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2025-06-30 18:20:43,034 - INFO - Saved 4 chunks for topic 28 to Pinecone
Summarizing topics:  51%|█████     | 23/45 [04:10<03:48, 10.37s/it]2025-06-30 18:20:43,036 - INFO - Generating summary for topic 30 with 6 documents
2025-06-30 18:20:43,038 - INFO - Combined text length: 22273 characters
2025-06-30 18:20:51,908 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-06-30 18:20:51,912 - INFO - Successfully generated summary for topic 30: 2655 characters
2025-06-30 18:20:52,442 - INFO - Saved topic 30 to S3: rizzbot/Summaries/run_1/topic_30.json


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2025-06-30 18:20:52,730 - INFO - Saved 6 chunks for topic 30 to Pinecone
Summarizing topics:  53%|█████▎    | 24/45 [04:19<03:33, 10.17s/it]2025-06-30 18:20:52,733 - INFO - Generating summary for topic 31 with 6 documents
2025-06-30 18:20:52,735 - INFO - Combined text length: 22427 characters
2025-06-30 18:21:05,342 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-06-30 18:21:05,346 - INFO - Successfully generated summary for topic 31: 3297 characters
2025-06-30 18:21:05,901 - INFO - Saved topic 31 to S3: rizzbot/Summaries/run_1/topic_31.json


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2025-06-30 18:21:06,242 - INFO - Saved 7 chunks for topic 31 to Pinecone
Summarizing topics:  56%|█████▌    | 25/45 [04:33<03:43, 11.17s/it]2025-06-30 18:21:06,245 - INFO - Generating summary for topic 32 with 6 documents
2025-06-30 18:21:06,245 - INFO - Combined text length: 23255 characters
2025-06-30 18:21:15,643 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-06-30 18:21:15,646 - INFO - Successfully generated summary for topic 32: 3251 characters
2025-06-30 18:21:16,346 - INFO - Saved topic 32 to S3: rizzbot/Summaries/run_1/topic_32.json


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2025-06-30 18:21:17,078 - INFO - Saved 7 chunks for topic 32 to Pinecone
Summarizing topics:  58%|█████▊    | 26/45 [04:44<03:30, 11.07s/it]2025-06-30 18:21:17,079 - INFO - Generating summary for topic 33 with 6 documents
2025-06-30 18:21:17,080 - INFO - Combined text length: 20170 characters
2025-06-30 18:21:26,051 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-06-30 18:21:26,060 - INFO - Successfully generated summary for topic 33: 2626 characters
2025-06-30 18:21:26,668 - INFO - Saved topic 33 to S3: rizzbot/Summaries/run_1/topic_33.json


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2025-06-30 18:21:26,993 - INFO - Saved 6 chunks for topic 33 to Pinecone
Summarizing topics:  60%|██████    | 27/45 [04:54<03:13, 10.72s/it]2025-06-30 18:21:26,995 - INFO - Generating summary for topic 34 with 6 documents
2025-06-30 18:21:26,996 - INFO - Combined text length: 22242 characters
2025-06-30 18:21:34,782 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-06-30 18:21:34,785 - INFO - Successfully generated summary for topic 34: 2720 characters
2025-06-30 18:21:35,334 - INFO - Saved topic 34 to S3: rizzbot/Summaries/run_1/topic_34.json


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2025-06-30 18:21:35,613 - INFO - Saved 6 chunks for topic 34 to Pinecone
Summarizing topics:  62%|██████▏   | 28/45 [05:02<02:51, 10.09s/it]2025-06-30 18:21:35,616 - INFO - Generating summary for topic 35 with 5 documents
2025-06-30 18:21:35,617 - INFO - Combined text length: 16334 characters
2025-06-30 18:21:43,269 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-06-30 18:21:43,271 - INFO - Successfully generated summary for topic 35: 2753 characters
2025-06-30 18:21:43,818 - INFO - Saved topic 35 to S3: rizzbot/Summaries/run_1/topic_35.json


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2025-06-30 18:21:44,116 - INFO - Saved 6 chunks for topic 35 to Pinecone
Summarizing topics:  64%|██████▍   | 29/45 [05:11<02:33,  9.62s/it]2025-06-30 18:21:44,118 - INFO - Generating summary for topic 36 with 5 documents
2025-06-30 18:21:44,119 - INFO - Combined text length: 18792 characters
2025-06-30 18:21:56,570 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-06-30 18:21:56,579 - INFO - Successfully generated summary for topic 36: 3101 characters
2025-06-30 18:21:57,089 - INFO - Saved topic 36 to S3: rizzbot/Summaries/run_1/topic_36.json


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2025-06-30 18:21:57,384 - INFO - Saved 7 chunks for topic 36 to Pinecone
Summarizing topics:  67%|██████▋   | 30/45 [05:24<02:40, 10.71s/it]2025-06-30 18:21:57,386 - INFO - Generating summary for topic 37 with 5 documents
2025-06-30 18:21:57,386 - INFO - Combined text length: 18866 characters
2025-06-30 18:22:06,813 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-06-30 18:22:06,820 - INFO - Successfully generated summary for topic 37: 3241 characters
2025-06-30 18:22:07,388 - INFO - Saved topic 37 to S3: rizzbot/Summaries/run_1/topic_37.json


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2025-06-30 18:22:08,149 - INFO - Saved 7 chunks for topic 37 to Pinecone
Summarizing topics:  69%|██████▉   | 31/45 [05:35<02:30, 10.73s/it]2025-06-30 18:22:08,151 - INFO - Generating summary for topic 38 with 5 documents
2025-06-30 18:22:08,153 - INFO - Combined text length: 1337 characters
2025-06-30 18:22:12,521 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-06-30 18:22:12,541 - INFO - Successfully generated summary for topic 38: 1405 characters
2025-06-30 18:22:12,641 - INFO - Saved topic 38 to S3: rizzbot/Summaries/run_1/topic_38.json


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2025-06-30 18:22:12,876 - INFO - Saved 3 chunks for topic 38 to Pinecone
Summarizing topics:  71%|███████   | 32/45 [05:39<01:56,  8.93s/it]2025-06-30 18:22:12,878 - INFO - Generating summary for topic 40 with 5 documents
2025-06-30 18:22:12,879 - INFO - Combined text length: 9337 characters
2025-06-30 18:22:21,830 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-06-30 18:22:21,833 - INFO - Successfully generated summary for topic 40: 2236 characters
2025-06-30 18:22:22,402 - INFO - Saved topic 40 to S3: rizzbot/Summaries/run_1/topic_40.json


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2025-06-30 18:22:22,782 - INFO - Saved 5 chunks for topic 40 to Pinecone
Summarizing topics:  73%|███████▎  | 33/45 [05:49<01:50,  9.22s/it]2025-06-30 18:22:22,786 - INFO - Generating summary for topic 41 with 5 documents
2025-06-30 18:22:22,788 - INFO - Combined text length: 18213 characters
2025-06-30 18:22:30,367 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-06-30 18:22:30,370 - INFO - Successfully generated summary for topic 41: 3324 characters
2025-06-30 18:22:30,926 - INFO - Saved topic 41 to S3: rizzbot/Summaries/run_1/topic_41.json


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2025-06-30 18:22:31,258 - INFO - Saved 7 chunks for topic 41 to Pinecone
Summarizing topics:  76%|███████▌  | 34/45 [05:58<01:38,  9.00s/it]2025-06-30 18:22:31,260 - INFO - Generating summary for topic 42 with 5 documents
2025-06-30 18:22:31,261 - INFO - Combined text length: 17199 characters
2025-06-30 18:22:37,846 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-06-30 18:22:37,850 - INFO - Successfully generated summary for topic 42: 3083 characters
2025-06-30 18:22:38,469 - INFO - Saved topic 42 to S3: rizzbot/Summaries/run_1/topic_42.json


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2025-06-30 18:22:38,802 - INFO - Saved 7 chunks for topic 42 to Pinecone
Summarizing topics:  78%|███████▊  | 35/45 [06:05<01:25,  8.56s/it]2025-06-30 18:22:38,804 - INFO - Generating summary for topic 43 with 4 documents
2025-06-30 18:22:38,805 - INFO - Combined text length: 15135 characters
2025-06-30 18:22:51,960 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-06-30 18:22:51,962 - INFO - Successfully generated summary for topic 43: 3242 characters
2025-06-30 18:22:52,512 - INFO - Saved topic 43 to S3: rizzbot/Summaries/run_1/topic_43.json


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2025-06-30 18:22:52,818 - INFO - Saved 7 chunks for topic 43 to Pinecone
Summarizing topics:  80%|████████  | 36/45 [06:19<01:31, 10.20s/it]2025-06-30 18:22:52,820 - INFO - Generating summary for topic 44 with 4 documents
2025-06-30 18:22:52,822 - INFO - Combined text length: 14060 characters
2025-06-30 18:23:01,445 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-06-30 18:23:01,450 - INFO - Successfully generated summary for topic 44: 3082 characters
2025-06-30 18:23:01,999 - INFO - Saved topic 44 to S3: rizzbot/Summaries/run_1/topic_44.json


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2025-06-30 18:23:02,389 - INFO - Saved 7 chunks for topic 44 to Pinecone
Summarizing topics:  82%|████████▏ | 37/45 [06:29<01:20, 10.01s/it]2025-06-30 18:23:02,391 - INFO - Generating summary for topic 45 with 4 documents
2025-06-30 18:23:02,393 - INFO - Combined text length: 14340 characters
2025-06-30 18:23:11,286 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-06-30 18:23:11,290 - INFO - Successfully generated summary for topic 45: 2837 characters
2025-06-30 18:23:11,813 - INFO - Saved topic 45 to S3: rizzbot/Summaries/run_1/topic_45.json


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2025-06-30 18:23:12,182 - INFO - Saved 6 chunks for topic 45 to Pinecone
Summarizing topics:  84%|████████▍ | 38/45 [06:39<01:09,  9.94s/it]2025-06-30 18:23:12,184 - INFO - Generating summary for topic 46 with 4 documents
2025-06-30 18:23:12,185 - INFO - Combined text length: 10618 characters
2025-06-30 18:23:22,438 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-06-30 18:23:22,440 - INFO - Successfully generated summary for topic 46: 3085 characters
2025-06-30 18:23:23,011 - INFO - Saved topic 46 to S3: rizzbot/Summaries/run_1/topic_46.json


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2025-06-30 18:23:23,345 - INFO - Saved 7 chunks for topic 46 to Pinecone
Summarizing topics:  87%|████████▋ | 39/45 [06:50<01:01, 10.31s/it]2025-06-30 18:23:23,348 - INFO - Generating summary for topic 48 with 4 documents
2025-06-30 18:23:23,349 - INFO - Combined text length: 14473 characters
2025-06-30 18:23:31,516 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-06-30 18:23:31,519 - INFO - Successfully generated summary for topic 48: 3023 characters
2025-06-30 18:23:32,076 - INFO - Saved topic 48 to S3: rizzbot/Summaries/run_1/topic_48.json


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2025-06-30 18:23:32,404 - INFO - Saved 7 chunks for topic 48 to Pinecone
Summarizing topics:  89%|████████▉ | 40/45 [06:59<00:49,  9.93s/it]2025-06-30 18:23:32,407 - INFO - Generating summary for topic 49 with 4 documents
2025-06-30 18:23:32,408 - INFO - Combined text length: 12764 characters
2025-06-30 18:23:44,896 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-06-30 18:23:44,900 - INFO - Successfully generated summary for topic 49: 3203 characters
2025-06-30 18:23:45,429 - INFO - Saved topic 49 to S3: rizzbot/Summaries/run_1/topic_49.json


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2025-06-30 18:23:45,721 - INFO - Saved 7 chunks for topic 49 to Pinecone
Summarizing topics:  91%|█████████ | 41/45 [07:12<00:43, 10.95s/it]2025-06-30 18:23:45,724 - INFO - Generating summary for topic 50 with 4 documents
2025-06-30 18:23:45,725 - INFO - Combined text length: 14257 characters
2025-06-30 18:23:57,488 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-06-30 18:23:57,490 - INFO - Successfully generated summary for topic 50: 3043 characters
2025-06-30 18:23:58,018 - INFO - Saved topic 50 to S3: rizzbot/Summaries/run_1/topic_50.json


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2025-06-30 18:23:58,335 - INFO - Saved 7 chunks for topic 50 to Pinecone
Summarizing topics:  93%|█████████▎| 42/45 [07:25<00:34, 11.45s/it]2025-06-30 18:23:58,337 - INFO - Generating summary for topic 51 with 4 documents
2025-06-30 18:23:58,338 - INFO - Combined text length: 14642 characters
2025-06-30 18:24:09,171 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-06-30 18:24:09,174 - INFO - Successfully generated summary for topic 51: 2987 characters
2025-06-30 18:24:09,719 - INFO - Saved topic 51 to S3: rizzbot/Summaries/run_1/topic_51.json


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2025-06-30 18:24:10,011 - INFO - Saved 6 chunks for topic 51 to Pinecone
Summarizing topics:  96%|█████████▌| 43/45 [07:37<00:23, 11.52s/it]2025-06-30 18:24:10,015 - INFO - Generating summary for topic 52 with 4 documents
2025-06-30 18:24:10,016 - INFO - Combined text length: 11770 characters
2025-06-30 18:24:20,228 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-06-30 18:24:20,232 - INFO - Successfully generated summary for topic 52: 3235 characters
2025-06-30 18:24:20,761 - INFO - Saved topic 52 to S3: rizzbot/Summaries/run_1/topic_52.json


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2025-06-30 18:24:21,080 - INFO - Saved 7 chunks for topic 52 to Pinecone
Summarizing topics:  98%|█████████▊| 44/45 [07:48<00:11, 11.38s/it]2025-06-30 18:24:21,083 - INFO - Generating summary for topic 53 with 3 documents
2025-06-30 18:24:21,084 - INFO - Combined text length: 11025 characters
2025-06-30 18:24:32,000 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-06-30 18:24:32,005 - INFO - Successfully generated summary for topic 53: 3328 characters
2025-06-30 18:24:32,543 - INFO - Saved topic 53 to S3: rizzbot/Summaries/run_1/topic_53.json


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2025-06-30 18:24:32,912 - INFO - Saved 7 chunks for topic 53 to Pinecone
Summarizing topics: 100%|██████████| 45/45 [07:59<00:00, 10.67s/it]
2025-06-30 18:24:32,929 - INFO - Saved 45 summaries locally in rizzbot_data/summaries_run_1
2025-06-30 18:24:32,930 - INFO - Summary generation completed. Success: 45 | Failed: 0



 Final Results:
   Total summaries generated: 129067


In [None]:
# Quick diagnostic script
import pickle

# Check your data structure
with open("rizzbot_data/full_vectors.pkl", "rb") as f:
    full_vectors = pickle.load(f)

print(f"Total items: {len(full_vectors)}")
print(f"full_vectors type: {type(full_vectors)}")
if isinstance(full_vectors, dict):
    first_item = next(iter(full_vectors.values()))
elif hasattr(full_vectors, "iloc"):  # DataFrame
    first_item = full_vectors.iloc[0]
else:
    first_item = full_vectors[0]
print(f"First item type: {type(first_item)}")
print(f"First item: {first_item}")

if hasattr(full_vectors, "columns"):
    print(f"Columns in DataFrame: {list(full_vectors.columns)}")
elif isinstance(full_vectors[0], dict):
    print(f"Keys in first item: {list(full_vectors[0].keys())}")

# Count items with topic_id
if hasattr(full_vectors, "topic_id"):
    count_with_topic_id = full_vectors["topic_id"].notnull().sum()
else:
    count_with_topic_id = sum(
        1 for item in full_vectors
        if isinstance(item, dict) and
        'topic_id' in item and
        item['topic_id'] is not None
    )
print(f"Items with valid topic_id: {count_with_topic_id}")
# ...existing code...

Total items: 741
full_vectors type: <class 'pandas.core.frame.DataFrame'>
First item type: <class 'pandas.core.series.Series'>
First item: id                                                    emb-0100
text          ,"Wait a second. You guys got stunt doubles? ...
embedding    [0.0493264534, 0.0287857763, -0.0159280337, 0....
Name: 0, dtype: object
Columns in DataFrame: ['id', 'text', 'embedding']
Items with valid topic_id: 0


In [None]:
# Quick diagnostic script for clustered_vectors.pkl

import pickle

# Check your data structure
with open("rizzbot_data/clustered_vectors.pkl", "rb") as f:
    clustered_vectors = pickle.load(f)

print(f"Total items: {len(clustered_vectors)}")
print(f"clustered_vectors type: {type(clustered_vectors)}")
if isinstance(clustered_vectors, dict):
    first_item = next(iter(clustered_vectors.values()))
elif hasattr(clustered_vectors, "iloc"):  # DataFrame
    first_item = clustered_vectors.iloc[0]
else:
    first_item = clustered_vectors[0]
print(f"First item type: {type(first_item)}")
print(f"First item: {first_item}")

if hasattr(clustered_vectors, "columns"):
    print(f"Columns in DataFrame: {list(clustered_vectors.columns)}")
elif isinstance(clustered_vectors[0], dict):
    print(f"Keys in first item: {list(clustered_vectors[0].keys())}")

# Count items with topic_id
if hasattr(clustered_vectors, "topic_id"):
    count_with_topic_id = clustered_vectors["topic_id"].notnull().sum()
else:
    count_with_topic_id = sum(
        1 for item in clustered_vectors
        if isinstance(item, dict) and
        'topic_id' in item and
        item['topic_id'] is not None
    )
print(f"Items with valid topic_id: {count_with_topic_id}")

Total items: 741
clustered_vectors type: <class 'pandas.core.frame.DataFrame'>
First item type: <class 'pandas.core.series.Series'>
First item: id                                                      emb-0100
text            ,"Wait a second. You guys got stunt doubles? ...
embedding      [0.0493264534, 0.0287857763, -0.0159280337, 0....
cluster                                                        0
x                                                       7.796501
y                                                       14.66375
topic_num                                                     -1
topic_score                                             0.331495
Name: 0, dtype: object
Columns in DataFrame: ['id', 'text', 'embedding', 'cluster', 'x', 'y', 'topic_num', 'topic_score']
Items with valid topic_id: 0
