In [22]:
import os
import numpy as np
import pandas as pd
import json
import faiss
from typing import List, Dict, Any, Union, Optional
import openai
from openai import OpenAI
from tqdm import tqdm
from dotenv import load_dotenv
import time


In [34]:
load_dotenv(dotenv_path=r"A:\UCSD\College\Rady TA\Shiny HyPE\Gen-AI\.env")
openai_api_key = os.getenv("OPENAI_API_KEY")


In [4]:
class ShinyDocHyPE:
    """
    HyPE (Hypothetical Prompt Embeddings) implementation for Shiny documentation.
    This class processes the output of ShinyDocCrawler to create a searchable index.
    """
    
    def __init__(
        self,
        docs_dir: str = "shiny_docs",
        embedding_model: str = "text-embedding-3-small",
        llm_model: str = "gpt-3.5-turbo",
        num_prompts_per_chunk: int = 5,
        temperature: float = 0.7,
        max_tokens: int = 200,
        api_key: str = None,
        verbose: bool = False,
        batch_size: int = 1,
        embedding_batch_size: int = 20
    ):
        """
        Initialize the ShinyDocHyPE system.
        
        Args:
            docs_dir: Directory containing the crawled Shiny documentation
            embedding_model: The OpenAI embedding model to use
            llm_model: The OpenAI language model to use for generating hypothetical prompts
            num_prompts_per_chunk: Number of hypothetical prompts to generate per document
            temperature: Temperature for generation (higher = more diversity)
            max_tokens: Maximum tokens for generated prompts
            api_key: OpenAI API key (if None, will use environment variable)
            verbose: Whether to print verbose output
            batch_size: Number of documents to process in each batch
            embedding_batch_size: Number of prompts to embed in each batch
        """
        self.docs_dir = docs_dir
        self.embedding_model = embedding_model
        self.llm_model = llm_model
        self.num_prompts_per_chunk = num_prompts_per_chunk
        self.temperature = temperature
        self.max_tokens = max_tokens
        self.verbose = verbose
        self.batch_size = batch_size
        self.embedding_batch_size = embedding_batch_size
        
        # Set up OpenAI client
        if api_key:
            openai.api_key = api_key
        self.client = OpenAI()
        
        # Storage for the indexed data
        self.doc_structure = {}
        self.documents = []
        self.hypothetical_prompts = []
        self.prompt_embeddings = None
        self.faiss_index = None
        self.doc_indices = []
        
        # Create output directories
        os.makedirs("shiny_hype_index", exist_ok=True)
        os.makedirs("shiny_hype_index/embeddings", exist_ok=True)
        
    def load_doc_structure(self):
        """Load the document structure from crawled data"""
        structure_path = os.path.join(self.docs_dir, "doc_structure.json")
        
        if not os.path.exists(structure_path):
            raise FileNotFoundError(f"Document structure not found at {structure_path}. Run ShinyDocCrawler first.")
        
        with open(structure_path, 'r', encoding='utf-8') as f:
            self.doc_structure = json.load(f)
            
        if self.verbose:
            print(f"Loaded document structure with {len(self.doc_structure)} documents")
    
    def load_documents(self):
        """Load all documents from the content directory"""
        content_dir = os.path.join(self.docs_dir, "content")
        
        if not os.path.exists(content_dir):
            raise FileNotFoundError(f"Content directory not found at {content_dir}. Run ShinyDocCrawler first.")
        
        documents = []
        
        for doc_id in self.doc_structure:
            file_path = os.path.join(content_dir, f"{doc_id}.json")
            
            if os.path.exists(file_path):
                with open(file_path, 'r', encoding='utf-8') as f:
                    doc_data = json.load(f)
                    
                # Create a clean text representation of the content
                content_text = ""
                
                # Process content elements
                for element in doc_data.get("content", []):
                    if element.get("type") in ["p", "h1", "h2", "h3", "h4", "h5"]:
                        content_text += element.get("text", "") + "\n\n"
                    elif element.get("type") in ["ul", "ol"]:
                        for item in element.get("items", []):
                            content_text += f"- {item}\n"
                        content_text += "\n"
                
                # Create document record with metadata
                document = {
                    "id": doc_id,
                    "url": doc_data.get("metadata", {}).get("url", ""),
                    "title": doc_data.get("metadata", {}).get("title", ""),
                    "description": doc_data.get("metadata", {}).get("description", ""),
                    "content": content_text,
                    "code_examples": doc_data.get("code_examples", [])
                }
                
                documents.append(document)
            
        self.documents = documents
        
        if self.verbose:
            print(f"Loaded {len(self.documents)} documents")
    
    def generate_hypothetical_prompts(self, document):
        """
        Generate hypothetical prompts that would lead to this document as an answer.
        
        Args:
            document: Document dict with content and metadata
            
        Returns:
            List of hypothetical prompts
        """
        # Create a contextual representation of the document
        doc_context = f"Title: {document['title']}\n"
        if document['description']:
            doc_context += f"Description: {document['description']}\n"
        doc_context += f"\nContent:\n{document['content']}"
        
        # If we have code examples, add the first one (or a portion)
        if document['code_examples'] and len(document['code_examples']) > 0:
            code = document['code_examples'][0]
            # Limit code example length
            if len(code) > 500:
                code = code[:500] + "..."
            doc_context += f"\n\nCode Example:\n{code}"
        
        system_message = f"""You are an expert at generating questions that a Shiny Python developer might ask.
        For the provided Shiny documentation passage, generate {self.num_prompts_per_chunk} different, specific questions 
        that this documentation would be a good answer to.
        Generate diverse questions that cover different aspects of the document.
        Make sure the questions are directly answerable using ONLY the information in the passage.
        Return only the questions, one per line, with no additional text or numbering."""
        
        prompt = f"""
        Documentation passage:
        \"\"\"{doc_context}\"\"\"
        
        Generate {self.num_prompts_per_chunk} different questions that a Shiny user or developer might ask that would be answered by this documentation.
        """
        
        try:
            response = self.client.chat.completions.create(
                model=self.llm_model,
                messages=[
                    {"role": "system", "content": system_message},
                    {"role": "user", "content": prompt}
                ],
                temperature=self.temperature,
                max_tokens=self.max_tokens
            )
            result = response.choices[0].message.content.strip()
            
            # Process the result into a list of questions
            prompts = [q.strip() for q in result.split('\n') if q.strip()]
            
            # Truncate or pad the list to the desired number of prompts
            if len(prompts) > self.num_prompts_per_chunk:
                prompts = prompts[:self.num_prompts_per_chunk]
            elif len(prompts) < self.num_prompts_per_chunk:
                # If we don't have enough prompts, duplicate some existing ones to reach the desired count
                prompts += prompts[:self.num_prompts_per_chunk - len(prompts)]
            
            return prompts
            
        except Exception as e:
            if self.verbose:
                print(f"Error generating prompts: {e}")
            # Return some default prompts
            return [
                f"How do I use {document['title']}?",
                f"What is {document['title']} in Shiny?",
                f"Can you explain {document['title']}?",
                f"What are the main features of {document['title']}?",
                f"How does {document['title']} work in Shiny?"
            ][:self.num_prompts_per_chunk]
    
    def get_embeddings(self, texts):
        """
        Get embeddings for a list of texts using OpenAI.
        
        Args:
            texts: List of text strings
            
        Returns:
            Numpy array of embeddings
        """
        if not texts:
            return np.array([])
            
        # Process in smaller batches to avoid API limits
        all_embeddings = []
        
        # Process in batches
        for i in range(0, len(texts), self.embedding_batch_size):
            batch = texts[i:i+self.embedding_batch_size]
            
            try:
                response = self.client.embeddings.create(
                    input=batch,
                    model=self.embedding_model
                )
                batch_embeddings = [item.embedding for item in response.data]
                all_embeddings.extend(batch_embeddings)
                
                # Rate limiting - sleep between batches
                if i + self.embedding_batch_size < len(texts):
                    time.sleep(1)
                    
            except Exception as e:
                if self.verbose:
                    print(f"Error getting embeddings for batch {i}:{i+self.embedding_batch_size}: {e}")
                
                # Try again with smaller batch size if we hit a limit error
                if self.embedding_batch_size > 1:
                    single_embeddings = []
                    for text in batch:
                        try:
                            # Try one at a time if batch fails
                            response = self.client.embeddings.create(
                                input=[text],
                                model=self.embedding_model
                            )
                            single_embeddings.append(response.data[0].embedding)
                            time.sleep(1)  # Be extra careful with rate limits
                        except Exception as e2:
                            if self.verbose:
                                print(f"Error getting embedding for single text: {e2}")
                            # Fill with zeros if we still can't get embeddings
                            # Should generally not happen, but just in case
                            single_embeddings.append([0] * 1536)  # Default OpenAI embedding size
                    
                    all_embeddings.extend(single_embeddings)
                else:
                    # If we're already processing one at a time, just append zeros
                    all_embeddings.extend([[0] * 1536 for _ in batch])
        
        return np.array(all_embeddings)
    
    def process_document_batch(self, batch, start_idx):
        """
        Process a batch of documents to generate and embed prompts.
        
        Args:
            batch: List of documents to process
            start_idx: Starting index for this batch
            
        Returns:
            Tuple of (prompt_embeddings, hypothetical_prompts, doc_indices)
        """
        all_prompts = []
        doc_indices = []
        all_doc_prompts = []
        
        for i, doc in enumerate(tqdm(batch, disable=not self.verbose)):
            doc_idx = start_idx + i
            
            # Generate prompts
            prompts = self.generate_hypothetical_prompts(doc)
            
            # Store prompts
            all_prompts.extend(prompts)
            doc_indices.extend([doc_idx] * len(prompts))
            all_doc_prompts.append(prompts)
            
            # Save prompts to file
            with open(f"shiny_hype_index/doc_{doc_idx}_prompts.json", 'w', encoding='utf-8') as f:
                json.dump(prompts, f, ensure_ascii=False, indent=2)
            
            # Sleep to avoid rate limiting
            time.sleep(0.5)
        
        # Get embeddings for all prompts in the batch
        prompt_embeddings = self.get_embeddings(all_prompts)
        
        # Save embeddings to file
        np.save(f"shiny_hype_index/embeddings/batch_{start_idx}.npy", prompt_embeddings)
        
        # Save doc indices
        with open(f"shiny_hype_index/doc_indices_batch_{start_idx}.json", 'w', encoding='utf-8') as f:
            json.dump(doc_indices, f, ensure_ascii=False, indent=2)
        
        # Return data for this batch
        return prompt_embeddings, all_doc_prompts, doc_indices
    
    def index_documents(self):
        """Create a HyPE index from loaded documents"""
        if not self.documents:
            self.load_doc_structure()
            self.load_documents()
        
        if self.verbose:
            print(f"Indexing {len(self.documents)} documents with HyPE...")
        
        # Save the documents
        with open("shiny_hype_index/documents.json", 'w', encoding='utf-8') as f:
            json.dump(self.documents, f, ensure_ascii=False, indent=2)
        
        # Process in batches
        all_prompts = []
        all_embeddings = []
        all_doc_indices = []
        all_hypothetical_prompts = []
        
        for i in range(0, len(self.documents), self.batch_size):
            batch = self.documents[i:i+self.batch_size]
            
            if self.verbose:
                print(f"Processing batch {i//self.batch_size + 1}/{len(self.documents)//self.batch_size + 1}")
            
            # Process batch
            embeddings, doc_prompts, doc_indices = self.process_document_batch(batch, i)
            
            # Accumulate results
            all_embeddings.append(embeddings)
            all_hypothetical_prompts.extend(doc_prompts)
            all_doc_indices.extend(doc_indices)
        
        # Combine all embeddings
        if all_embeddings:
            self.prompt_embeddings = np.vstack(all_embeddings)
        else:
            # Create empty array with correct dimension if no embeddings
            self.prompt_embeddings = np.array([]).reshape(0, 1536)  # OpenAI default embedding size
        
        # Store the accumulated results
        self.hypothetical_prompts = all_hypothetical_prompts
        self.doc_indices = all_doc_indices
        
        # Create FAISS index for fast similarity search
        self.build_faiss_index()
        
        # Save final index
        self.save("shiny_hype_index")
        
        if self.verbose:
            print(f"Indexed {len(self.documents)} documents with {len(all_doc_indices)} hypothetical prompts")
    
    def build_faiss_index(self):
        """Build a FAISS index for fast similarity search"""
        if self.prompt_embeddings.size == 0:
            if self.verbose:
                print("No embeddings available to build index. Skipping.")
            return
            
        vector_dimension = self.prompt_embeddings.shape[1]
        self.faiss_index = faiss.IndexFlatL2(vector_dimension)
        self.faiss_index.add(self.prompt_embeddings.astype('float32'))
    
    def retrieve(self, query, top_k=5):
        """
        Retrieve documents for a query using HyPE.
        
        Args:
            query: User query
            top_k: Number of documents to retrieve
            
        Returns:
            List of document dicts with scores
        """
        if self.faiss_index is None:
            raise ValueError("No documents have been indexed or index not built. Call index_documents first.")
        
        if self.faiss_index.ntotal == 0:
            return []
        
        # Get query embedding
        query_embedding = self.get_embeddings([query])[0]
        
        # Get top_k * num_prompts_per_chunk or the maximum available
        k = min(top_k * self.num_prompts_per_chunk, self.faiss_index.ntotal)
        
        # Search the FAISS index
        distances, indices = self.faiss_index.search(
            query_embedding.reshape(1, -1).astype('float32'), 
            k
        )
        
        # Map indices back to document indices
        doc_indices = [self.doc_indices[idx] for idx in indices[0]]
        prompt_indices = [idx for idx in indices[0]]
        
        # Create a mapping from document index to its best score and matching prompt
        doc_scores = {}
        doc_prompts = {}
        for i, (doc_idx, prompt_idx) in enumerate(zip(doc_indices, prompt_indices)):
            if doc_idx not in doc_scores or distances[0][i] < doc_scores[doc_idx]:
                doc_scores[doc_idx] = distances[0][i]
                # The matched prompt is the one at prompt_idx in the flattened list
                doc_prompts[doc_idx] = prompt_idx
        
        # Get unique documents with their scores and prompts
        unique_docs = []
        for doc_idx, score in sorted(doc_scores.items(), key=lambda x: x[1])[:top_k]:
            if doc_idx < len(self.documents):
                doc_data = self.documents[doc_idx].copy()
                doc_data['score'] = float(score)
                
                # Get the matched prompt
                prompt_idx = doc_prompts[doc_idx]
                if prompt_idx < len(self.prompt_embeddings):
                    # Calculate which prompt within the document's set matched
                    prompt_count = 0
                    doc_prompt_idx = 0
                    for j in range(len(self.doc_indices)):
                        if self.doc_indices[j] == doc_idx:
                            if j == prompt_idx:
                                doc_prompt_idx = prompt_count
                                break
                            prompt_count += 1
                    
                    # Get the document's prompts
                    if doc_idx < len(self.hypothetical_prompts):
                        doc_data['all_prompts'] = self.hypothetical_prompts[doc_idx]
                        if doc_prompt_idx < len(self.hypothetical_prompts[doc_idx]):
                            doc_data['matched_prompt'] = self.hypothetical_prompts[doc_idx][doc_prompt_idx]
                        else:
                            doc_data['matched_prompt'] = "Unknown matched prompt"
                    else:
                        doc_data['all_prompts'] = []
                        doc_data['matched_prompt'] = "Unknown matched prompt"
                else:
                    doc_data['all_prompts'] = []
                    doc_data['matched_prompt'] = "Unknown matched prompt"
                
                unique_docs.append(doc_data)
        
        return unique_docs
    
    def answer_question(self, query, top_k=3):
        """
        Generate an answer to a question using retrieved documents.
        
        Args:
            query: User query
            top_k: Number of documents to retrieve
            
        Returns:
            Dict with answer, retrieved docs, and query
        """
        # Retrieve relevant documents
        results = self.retrieve(query, top_k=top_k)
        
        if not results:
            return {
                "query": query,
                "answer": "I couldn't find any relevant information in the Shiny documentation to answer your question.",
                "documents": [],
                "prompt_matches": []
            }
            
        # Prepare context from retrieved documents
        context = ""
        prompt_matches = []
        
        for i, doc in enumerate(results):
            context += f"\n\nDOCUMENT {i+1}:\nTitle: {doc['title']}\n"
            if doc['description']:
                context += f"Description: {doc['description']}\n"
            context += f"Content:\n{doc['content']}\n"
            
            # Add a code example if available
            if doc['code_examples'] and len(doc['code_examples']) > 0:
                code = doc['code_examples'][0]
                # Limit code example length
                if len(code) > 500:
                    code = code[:500] + "..."
                context += f"Code Example:\n{code}\n"
            
            # Track the matched prompt
            prompt_matches.append({
                "document_title": doc['title'],
                "document_url": doc['url'],
                "matched_prompt": doc.get('matched_prompt', "Unknown"),
                "all_prompts": doc.get('all_prompts', []),
                "score": doc['score']
            })
        
        # Generate answer using OpenAI
        system_message = """You are an expert Shiny Python developer assistant. 
        Answer the user's question using ONLY the provided Shiny documentation.
        If the documentation doesn't contain the answer, say so clearly.
        Include code examples when relevant, formatting them properly with Python syntax.
        Be concise but thorough, and reference specific parts of the documentation when possible."""
        
        user_message = f"""Question: {query}
        
        Use the following Shiny documentation to answer the question:
        {context}"""
        
        response = self.client.chat.completions.create(
            model=self.llm_model,
            messages=[
                {"role": "system", "content": system_message},
                {"role": "user", "content": user_message}
            ],
            temperature=0.3,  # Lower temperature for more factual responses
            max_tokens=1000
        )
        
        answer = response.choices[0].message.content
        
        return {
            "query": query,
            "answer": answer,
            "documents": results,
            "prompt_matches": prompt_matches
        }
    
    def save(self, path="shiny_hype_index"):
        """Save the indexed data to disk"""
        os.makedirs(path, exist_ok=True)
        
        # Save documents if not already saved
        documents_path = os.path.join(path, "documents.json")
        if not os.path.exists(documents_path):
            with open(documents_path, 'w', encoding='utf-8') as f:
                json.dump(self.documents, f, ensure_ascii=False, indent=2)
        
        # Save hypothetical prompts
        with open(os.path.join(path, "hypothetical_prompts.json"), 'w', encoding='utf-8') as f:
            json.dump(self.hypothetical_prompts, f, ensure_ascii=False, indent=2)
        
        # Save document indices
        with open(os.path.join(path, "doc_indices.json"), 'w', encoding='utf-8') as f:
            json.dump(self.doc_indices, f, ensure_ascii=False, indent=2)
        
        # Save embeddings
        np.save(os.path.join(path, "prompt_embeddings.npy"), self.prompt_embeddings)
        
        # Save FAISS index if it exists
        if self.faiss_index is not None:
            faiss.write_index(self.faiss_index, os.path.join(path, "faiss_index.bin"))
        
        # Save configuration
        config = {
            "embedding_model": self.embedding_model,
            "llm_model": self.llm_model,
            "num_prompts_per_chunk": self.num_prompts_per_chunk,
            "temperature": self.temperature,
            "max_tokens": self.max_tokens,
            "batch_size": self.batch_size,
            "embedding_batch_size": self.embedding_batch_size
        }
        
        with open(os.path.join(path, "config.json"), 'w', encoding='utf-8') as f:
            json.dump(config, f, ensure_ascii=False, indent=2)
        
        if self.verbose:
            print(f"Saved index to {path}")
    
    @classmethod
    def load(cls, path="shiny_hype_index", api_key=None, verbose=False):
        """Load a saved index from disk"""
        # Load configuration
        config_path = os.path.join(path, "config.json")
        if not os.path.exists(config_path):
            raise FileNotFoundError(f"Config file not found at {config_path}")
            
        with open(config_path, 'r', encoding='utf-8') as f:
            config = json.load(f)
        
        # Add default values if not present in config
        config.setdefault("batch_size", 1)
        config.setdefault("embedding_batch_size", 20)
        
        # Create instance
        instance = cls(
            embedding_model=config["embedding_model"],
            llm_model=config["llm_model"],
            num_prompts_per_chunk=config["num_prompts_per_chunk"],
            temperature=config["temperature"],
            max_tokens=config["max_tokens"],
            batch_size=config["batch_size"],
            embedding_batch_size=config["embedding_batch_size"],
            api_key=api_key,
            verbose=verbose
        )
        
        # Load documents
        docs_path = os.path.join(path, "documents.json")
        if os.path.exists(docs_path):
            with open(docs_path, 'r', encoding='utf-8') as f:
                instance.documents = json.load(f)
        
        # Load hypothetical prompts
        prompts_path = os.path.join(path, "hypothetical_prompts.json")
        if os.path.exists(prompts_path):
            with open(prompts_path, 'r', encoding='utf-8') as f:
                instance.hypothetical_prompts = json.load(f)
        
        # Load document indices
        indices_path = os.path.join(path, "doc_indices.json")
        if os.path.exists(indices_path):
            with open(indices_path, 'r', encoding='utf-8') as f:
                instance.doc_indices = json.load(f)
        
        # Load embeddings
        embeddings_path = os.path.join(path, "prompt_embeddings.npy")
        if os.path.exists(embeddings_path):
            instance.prompt_embeddings = np.load(embeddings_path)
        
        # Load FAISS index
        index_path = os.path.join(path, "faiss_index.bin")
        if os.path.exists(index_path):
            try:
                instance.faiss_index = faiss.read_index(index_path)
            except Exception as e:
                if verbose:
                    print(f"Error loading FAISS index: {e}")
                # Rebuild index if loading fails
                if instance.prompt_embeddings is not None and instance.prompt_embeddings.size > 0:
                    instance.build_faiss_index()
        
        if verbose:
            print(f"Loaded index from {path} with {len(instance.documents)} documents")
        
        return instance


In [5]:
if __name__ == "__main__":
    # Set your OpenAI API key
    api_key = os.getenv("OPENAI_API_KEY")
    
    # Initialize ShinyDocHyPE
    hype = ShinyDocHyPE(
        docs_dir="shiny_docs",  # Directory where ShinyDocCrawler saved the docs
        embedding_model="text-embedding-3-small",
        llm_model="gpt-3.5-turbo",
        num_prompts_per_chunk=5,
        api_key=api_key,
        verbose=True,
        batch_size=1,          # Process 1 document at a time
        embedding_batch_size=5  # Process 5 prompts at a time
    )
    
    
    hype.load_doc_structure()
    hype.load_documents()
    
    # Save documents first
    with open("shiny_hype_index/documents.json", 'w', encoding='utf-8') as f:
        json.dump(hype.documents, f, ensure_ascii=False, indent=2)
        
    # Process one document for testing
    test_doc = hype.documents[0]
    print(f"Processing test document: {test_doc['title']}")
    
    # Generate prompts
    prompts = hype.generate_hypothetical_prompts(test_doc)
    print(f"Generated {len(prompts)} prompts:")
    for i, prompt in enumerate(prompts):
        print(f"{i+1}. {prompt}")
    
    # Save these prompts
    with open("shiny_hype_index/test_prompts.json", 'w', encoding='utf-8') as f:
        json.dump(prompts, f, ensure_ascii=False, indent=2)
    
    # Get embeddings one at a time
    for i, prompt in enumerate(prompts):
        try:
            embedding = hype.get_embeddings([prompt])
            print(f"Successfully embedded prompt {i+1}")
            # Save this embedding
            np.save(f"shiny_hype_index/embeddings/test_embedding_{i}.npy", embedding)
        except Exception as e:
            print(f"Error embedding prompt {i+1}: {e}")
    
    print("\nTest processing completed. You can now safely index all documents.")

Loaded document structure with 473 documents
Loaded 473 documents
Processing test document: Overview – Shiny for Python
Generated 5 prompts:
1. What are the basics of creating Shiny apps and how do input and output components interact?
2. How can I include dynamic plots, tables, and interactive widgets in my Shiny application?
3. What layout components does Shiny provide to help with arranging inputs and outputs?
4. How does Shiny handle reactivity and minimize re-rendering when dependencies change?
5. In what ways can I customize my Shiny app using custom HTML, CSS, and JavaScript?
Successfully embedded prompt 1
Successfully embedded prompt 2
Successfully embedded prompt 3
Successfully embedded prompt 4
Successfully embedded prompt 5

Test processing completed. You can now safely index all documents.


In [8]:
hype.index_documents()  


Indexing 473 documents with HyPE...
Processing batch 1/474


  0%|          | 0/1 [00:00<?, ?it/s]

100%|██████████| 1/1 [00:02<00:00,  2.30s/it]


Processing batch 2/474


100%|██████████| 1/1 [00:01<00:00,  1.67s/it]


Processing batch 3/474


100%|██████████| 1/1 [00:02<00:00,  2.26s/it]


Processing batch 4/474


100%|██████████| 1/1 [00:01<00:00,  1.97s/it]


Processing batch 5/474


100%|██████████| 1/1 [00:01<00:00,  1.56s/it]


Processing batch 6/474


100%|██████████| 1/1 [00:01<00:00,  1.69s/it]


Processing batch 7/474


100%|██████████| 1/1 [00:01<00:00,  1.70s/it]


Processing batch 8/474


100%|██████████| 1/1 [00:01<00:00,  1.59s/it]


Processing batch 9/474


100%|██████████| 1/1 [00:01<00:00,  1.46s/it]


Processing batch 10/474


100%|██████████| 1/1 [00:01<00:00,  1.90s/it]


Processing batch 11/474


100%|██████████| 1/1 [00:02<00:00,  2.24s/it]


Processing batch 12/474


100%|██████████| 1/1 [00:01<00:00,  1.56s/it]


Processing batch 13/474


100%|██████████| 1/1 [00:01<00:00,  1.89s/it]


Processing batch 14/474


100%|██████████| 1/1 [00:01<00:00,  1.91s/it]


Processing batch 15/474


100%|██████████| 1/1 [00:01<00:00,  1.96s/it]


Processing batch 16/474


100%|██████████| 1/1 [00:01<00:00,  1.87s/it]


Processing batch 17/474


100%|██████████| 1/1 [00:02<00:00,  2.30s/it]


Processing batch 18/474


100%|██████████| 1/1 [00:01<00:00,  1.59s/it]


Processing batch 19/474


100%|██████████| 1/1 [00:01<00:00,  1.80s/it]


Processing batch 20/474


100%|██████████| 1/1 [00:01<00:00,  1.68s/it]


Processing batch 21/474


100%|██████████| 1/1 [00:02<00:00,  2.62s/it]


Processing batch 22/474


100%|██████████| 1/1 [00:02<00:00,  2.52s/it]


Processing batch 23/474


100%|██████████| 1/1 [00:01<00:00,  1.54s/it]


Processing batch 24/474


100%|██████████| 1/1 [00:01<00:00,  1.70s/it]


Processing batch 25/474


100%|██████████| 1/1 [00:01<00:00,  1.89s/it]


Processing batch 26/474


100%|██████████| 1/1 [00:01<00:00,  1.84s/it]


Processing batch 27/474


100%|██████████| 1/1 [00:02<00:00,  2.54s/it]


Processing batch 28/474


100%|██████████| 1/1 [00:01<00:00,  1.79s/it]


Processing batch 29/474


100%|██████████| 1/1 [00:01<00:00,  1.66s/it]


Processing batch 30/474


100%|██████████| 1/1 [00:01<00:00,  1.90s/it]


Processing batch 31/474


100%|██████████| 1/1 [00:01<00:00,  1.53s/it]


Processing batch 32/474


100%|██████████| 1/1 [00:01<00:00,  1.83s/it]


Processing batch 33/474


100%|██████████| 1/1 [00:02<00:00,  2.29s/it]


Processing batch 34/474


100%|██████████| 1/1 [00:01<00:00,  1.82s/it]


Processing batch 35/474


100%|██████████| 1/1 [00:02<00:00,  2.40s/it]


Processing batch 36/474


100%|██████████| 1/1 [00:02<00:00,  2.10s/it]


Processing batch 37/474


100%|██████████| 1/1 [00:02<00:00,  2.37s/it]


Processing batch 38/474


100%|██████████| 1/1 [00:01<00:00,  1.92s/it]


Processing batch 39/474


100%|██████████| 1/1 [00:01<00:00,  1.73s/it]


Processing batch 40/474


100%|██████████| 1/1 [00:02<00:00,  2.38s/it]


Processing batch 41/474


100%|██████████| 1/1 [00:01<00:00,  1.65s/it]


Processing batch 42/474


100%|██████████| 1/1 [00:01<00:00,  1.76s/it]


Processing batch 43/474


100%|██████████| 1/1 [00:01<00:00,  1.73s/it]


Processing batch 44/474


100%|██████████| 1/1 [00:01<00:00,  1.84s/it]


Processing batch 45/474


100%|██████████| 1/1 [00:01<00:00,  1.82s/it]


Processing batch 46/474


100%|██████████| 1/1 [00:01<00:00,  1.66s/it]


Processing batch 47/474


100%|██████████| 1/1 [00:01<00:00,  1.77s/it]


Processing batch 48/474


100%|██████████| 1/1 [00:02<00:00,  2.17s/it]


Processing batch 49/474


100%|██████████| 1/1 [00:01<00:00,  1.57s/it]


Processing batch 50/474


100%|██████████| 1/1 [00:01<00:00,  1.62s/it]


Processing batch 51/474


100%|██████████| 1/1 [00:01<00:00,  1.92s/it]


Processing batch 52/474


100%|██████████| 1/1 [00:01<00:00,  1.87s/it]


Processing batch 53/474


100%|██████████| 1/1 [00:02<00:00,  2.09s/it]


Processing batch 54/474


100%|██████████| 1/1 [00:02<00:00,  2.14s/it]


Processing batch 55/474


100%|██████████| 1/1 [00:02<00:00,  2.81s/it]


Processing batch 56/474


100%|██████████| 1/1 [00:01<00:00,  1.95s/it]


Processing batch 57/474


100%|██████████| 1/1 [00:01<00:00,  1.70s/it]


Processing batch 58/474


100%|██████████| 1/1 [00:01<00:00,  1.64s/it]


Processing batch 59/474


100%|██████████| 1/1 [00:01<00:00,  1.72s/it]


Processing batch 60/474


100%|██████████| 1/1 [00:02<00:00,  2.32s/it]


Processing batch 61/474


100%|██████████| 1/1 [00:01<00:00,  1.58s/it]


Processing batch 62/474


100%|██████████| 1/1 [00:02<00:00,  2.22s/it]


Processing batch 63/474


100%|██████████| 1/1 [00:01<00:00,  1.90s/it]


Processing batch 64/474


100%|██████████| 1/1 [00:01<00:00,  1.98s/it]


Processing batch 65/474


100%|██████████| 1/1 [00:02<00:00,  2.39s/it]


Processing batch 66/474


100%|██████████| 1/1 [00:01<00:00,  1.63s/it]


Processing batch 67/474


100%|██████████| 1/1 [00:01<00:00,  1.92s/it]


Processing batch 68/474


100%|██████████| 1/1 [00:02<00:00,  2.23s/it]


Processing batch 69/474


100%|██████████| 1/1 [00:01<00:00,  1.57s/it]


Processing batch 70/474


100%|██████████| 1/1 [00:02<00:00,  2.13s/it]


Processing batch 71/474


100%|██████████| 1/1 [00:01<00:00,  1.86s/it]


Processing batch 72/474


100%|██████████| 1/1 [00:02<00:00,  2.17s/it]


Processing batch 73/474


100%|██████████| 1/1 [00:02<00:00,  2.12s/it]


Processing batch 74/474


100%|██████████| 1/1 [00:02<00:00,  2.16s/it]


Processing batch 75/474


100%|██████████| 1/1 [00:02<00:00,  2.87s/it]


Processing batch 76/474


100%|██████████| 1/1 [00:01<00:00,  1.78s/it]


Processing batch 77/474


100%|██████████| 1/1 [00:02<00:00,  2.14s/it]


Processing batch 78/474


100%|██████████| 1/1 [00:01<00:00,  1.67s/it]


Processing batch 79/474


100%|██████████| 1/1 [00:01<00:00,  1.67s/it]


Processing batch 80/474


100%|██████████| 1/1 [00:01<00:00,  1.87s/it]


Processing batch 81/474


100%|██████████| 1/1 [00:01<00:00,  1.55s/it]


Processing batch 82/474


100%|██████████| 1/1 [00:01<00:00,  1.72s/it]


Processing batch 83/474


100%|██████████| 1/1 [00:01<00:00,  1.80s/it]


Processing batch 84/474


100%|██████████| 1/1 [00:02<00:00,  2.20s/it]


Processing batch 85/474


100%|██████████| 1/1 [00:02<00:00,  2.13s/it]


Processing batch 86/474


100%|██████████| 1/1 [00:01<00:00,  1.91s/it]


Processing batch 87/474


100%|██████████| 1/1 [00:01<00:00,  1.89s/it]


Processing batch 88/474


100%|██████████| 1/1 [00:01<00:00,  1.87s/it]


Processing batch 89/474


100%|██████████| 1/1 [00:01<00:00,  1.82s/it]


Processing batch 90/474


100%|██████████| 1/1 [00:02<00:00,  2.49s/it]


Processing batch 91/474


100%|██████████| 1/1 [00:02<00:00,  2.45s/it]


Processing batch 92/474


100%|██████████| 1/1 [00:02<00:00,  2.19s/it]


Processing batch 93/474


100%|██████████| 1/1 [00:02<00:00,  2.28s/it]


Processing batch 94/474


100%|██████████| 1/1 [00:01<00:00,  1.66s/it]


Processing batch 95/474


100%|██████████| 1/1 [00:02<00:00,  2.30s/it]


Processing batch 96/474


100%|██████████| 1/1 [00:02<00:00,  2.66s/it]


Processing batch 97/474


100%|██████████| 1/1 [00:02<00:00,  2.02s/it]


Processing batch 98/474


100%|██████████| 1/1 [00:01<00:00,  1.56s/it]


Processing batch 99/474


100%|██████████| 1/1 [00:01<00:00,  1.71s/it]


Processing batch 100/474


100%|██████████| 1/1 [00:01<00:00,  1.64s/it]


Processing batch 101/474


100%|██████████| 1/1 [00:01<00:00,  1.58s/it]


Processing batch 102/474


100%|██████████| 1/1 [00:01<00:00,  1.65s/it]


Processing batch 103/474


100%|██████████| 1/1 [00:02<00:00,  2.94s/it]


Processing batch 104/474


100%|██████████| 1/1 [00:02<00:00,  2.40s/it]


Processing batch 105/474


100%|██████████| 1/1 [00:01<00:00,  1.62s/it]


Processing batch 106/474


100%|██████████| 1/1 [00:02<00:00,  2.56s/it]


Processing batch 107/474


100%|██████████| 1/1 [00:01<00:00,  1.94s/it]


Processing batch 108/474


100%|██████████| 1/1 [00:01<00:00,  1.64s/it]


Processing batch 109/474


100%|██████████| 1/1 [00:02<00:00,  2.52s/it]


Processing batch 110/474


100%|██████████| 1/1 [00:01<00:00,  1.61s/it]


Processing batch 111/474


100%|██████████| 1/1 [00:01<00:00,  1.87s/it]


Processing batch 112/474


100%|██████████| 1/1 [00:01<00:00,  1.78s/it]


Processing batch 113/474


100%|██████████| 1/1 [00:01<00:00,  1.92s/it]


Processing batch 114/474


100%|██████████| 1/1 [00:01<00:00,  1.77s/it]


Processing batch 115/474


100%|██████████| 1/1 [00:01<00:00,  1.89s/it]


Processing batch 116/474


100%|██████████| 1/1 [00:01<00:00,  1.56s/it]


Processing batch 117/474


100%|██████████| 1/1 [00:01<00:00,  1.98s/it]


Processing batch 118/474


100%|██████████| 1/1 [00:02<00:00,  2.26s/it]


Processing batch 119/474


100%|██████████| 1/1 [00:01<00:00,  1.73s/it]


Processing batch 120/474


100%|██████████| 1/1 [00:02<00:00,  2.02s/it]


Processing batch 121/474


100%|██████████| 1/1 [00:02<00:00,  2.18s/it]


Processing batch 122/474


100%|██████████| 1/1 [00:01<00:00,  1.92s/it]


Processing batch 123/474


100%|██████████| 1/1 [00:01<00:00,  1.97s/it]


Processing batch 124/474


100%|██████████| 1/1 [00:01<00:00,  2.00s/it]


Processing batch 125/474


100%|██████████| 1/1 [00:01<00:00,  1.65s/it]


Processing batch 126/474


100%|██████████| 1/1 [00:01<00:00,  1.84s/it]


Processing batch 127/474


100%|██████████| 1/1 [00:02<00:00,  2.17s/it]


Processing batch 128/474


100%|██████████| 1/1 [00:01<00:00,  1.83s/it]


Processing batch 129/474


100%|██████████| 1/1 [00:01<00:00,  1.58s/it]


Processing batch 130/474


100%|██████████| 1/1 [00:01<00:00,  1.76s/it]


Processing batch 131/474


100%|██████████| 1/1 [00:02<00:00,  2.07s/it]


Processing batch 132/474


100%|██████████| 1/1 [00:01<00:00,  1.81s/it]


Processing batch 133/474


100%|██████████| 1/1 [00:01<00:00,  1.90s/it]


Processing batch 134/474


100%|██████████| 1/1 [00:01<00:00,  2.00s/it]


Processing batch 135/474


100%|██████████| 1/1 [00:01<00:00,  1.87s/it]


Processing batch 136/474


100%|██████████| 1/1 [00:02<00:00,  2.15s/it]


Processing batch 137/474


100%|██████████| 1/1 [00:02<00:00,  2.11s/it]


Processing batch 138/474


100%|██████████| 1/1 [00:01<00:00,  1.96s/it]


Processing batch 139/474


100%|██████████| 1/1 [00:02<00:00,  2.32s/it]


Processing batch 140/474


100%|██████████| 1/1 [00:01<00:00,  1.67s/it]


Processing batch 141/474


100%|██████████| 1/1 [00:02<00:00,  2.02s/it]


Processing batch 142/474


100%|██████████| 1/1 [00:02<00:00,  2.17s/it]


Processing batch 143/474


100%|██████████| 1/1 [00:01<00:00,  1.79s/it]


Processing batch 144/474


100%|██████████| 1/1 [00:02<00:00,  2.05s/it]


Processing batch 145/474


100%|██████████| 1/1 [00:01<00:00,  1.97s/it]


Processing batch 146/474


100%|██████████| 1/1 [00:01<00:00,  1.77s/it]


Processing batch 147/474


100%|██████████| 1/1 [00:01<00:00,  1.85s/it]


Processing batch 148/474


100%|██████████| 1/1 [00:01<00:00,  1.91s/it]


Processing batch 149/474


100%|██████████| 1/1 [00:01<00:00,  1.87s/it]


Processing batch 150/474


100%|██████████| 1/1 [00:01<00:00,  1.70s/it]


Processing batch 151/474


100%|██████████| 1/1 [00:01<00:00,  1.55s/it]


Processing batch 152/474


100%|██████████| 1/1 [00:01<00:00,  1.92s/it]


Processing batch 153/474


100%|██████████| 1/1 [00:01<00:00,  1.94s/it]


Processing batch 154/474


100%|██████████| 1/1 [00:01<00:00,  1.78s/it]


Processing batch 155/474


100%|██████████| 1/1 [00:02<00:00,  2.26s/it]


Processing batch 156/474


100%|██████████| 1/1 [00:01<00:00,  1.93s/it]


Processing batch 157/474


100%|██████████| 1/1 [00:01<00:00,  1.71s/it]


Processing batch 158/474


100%|██████████| 1/1 [00:02<00:00,  2.63s/it]


Processing batch 159/474


100%|██████████| 1/1 [00:01<00:00,  1.92s/it]


Processing batch 160/474


100%|██████████| 1/1 [00:01<00:00,  1.92s/it]


Processing batch 161/474


100%|██████████| 1/1 [00:02<00:00,  2.28s/it]


Processing batch 162/474


100%|██████████| 1/1 [00:02<00:00,  2.16s/it]


Processing batch 163/474


100%|██████████| 1/1 [00:01<00:00,  1.83s/it]


Processing batch 164/474


100%|██████████| 1/1 [00:01<00:00,  1.93s/it]


Processing batch 165/474


100%|██████████| 1/1 [00:13<00:00, 13.56s/it]


Processing batch 166/474


100%|██████████| 1/1 [00:02<00:00,  2.24s/it]


Processing batch 167/474


100%|██████████| 1/1 [00:01<00:00,  1.95s/it]


Processing batch 168/474


100%|██████████| 1/1 [00:01<00:00,  1.62s/it]


Processing batch 169/474


100%|██████████| 1/1 [00:02<00:00,  2.29s/it]


Processing batch 170/474


100%|██████████| 1/1 [00:01<00:00,  1.84s/it]


Processing batch 171/474


100%|██████████| 1/1 [00:01<00:00,  1.75s/it]


Processing batch 172/474


100%|██████████| 1/1 [00:01<00:00,  1.70s/it]


Processing batch 173/474


100%|██████████| 1/1 [00:01<00:00,  1.95s/it]


Processing batch 174/474


100%|██████████| 1/1 [00:01<00:00,  1.78s/it]


Processing batch 175/474


100%|██████████| 1/1 [00:02<00:00,  2.13s/it]


Processing batch 176/474


100%|██████████| 1/1 [00:02<00:00,  2.55s/it]


Processing batch 177/474


100%|██████████| 1/1 [00:02<00:00,  2.44s/it]


Processing batch 178/474


100%|██████████| 1/1 [00:01<00:00,  1.98s/it]


Processing batch 179/474


100%|██████████| 1/1 [00:01<00:00,  1.63s/it]


Processing batch 180/474


100%|██████████| 1/1 [00:01<00:00,  1.84s/it]


Processing batch 181/474


100%|██████████| 1/1 [00:02<00:00,  2.32s/it]


Processing batch 182/474


100%|██████████| 1/1 [00:01<00:00,  1.56s/it]


Processing batch 183/474


100%|██████████| 1/1 [00:01<00:00,  1.63s/it]


Processing batch 184/474


100%|██████████| 1/1 [00:02<00:00,  2.18s/it]


Processing batch 185/474


100%|██████████| 1/1 [00:02<00:00,  2.14s/it]


Processing batch 186/474


100%|██████████| 1/1 [00:01<00:00,  1.72s/it]


Processing batch 187/474


100%|██████████| 1/1 [00:02<00:00,  2.58s/it]


Processing batch 188/474


100%|██████████| 1/1 [00:02<00:00,  2.02s/it]


Processing batch 189/474


100%|██████████| 1/1 [00:01<00:00,  1.71s/it]


Processing batch 190/474


100%|██████████| 1/1 [00:02<00:00,  2.46s/it]


Processing batch 191/474


100%|██████████| 1/1 [00:01<00:00,  1.85s/it]


Processing batch 192/474


100%|██████████| 1/1 [00:01<00:00,  1.69s/it]


Processing batch 193/474


100%|██████████| 1/1 [00:02<00:00,  2.76s/it]


Processing batch 194/474


100%|██████████| 1/1 [00:02<00:00,  2.26s/it]


Processing batch 195/474


100%|██████████| 1/1 [00:01<00:00,  1.89s/it]


Processing batch 196/474


100%|██████████| 1/1 [00:01<00:00,  1.90s/it]


Processing batch 197/474


100%|██████████| 1/1 [00:01<00:00,  1.49s/it]


Processing batch 198/474


100%|██████████| 1/1 [00:02<00:00,  2.08s/it]


Processing batch 199/474


100%|██████████| 1/1 [00:01<00:00,  1.74s/it]


Processing batch 200/474


100%|██████████| 1/1 [00:02<00:00,  2.06s/it]


Processing batch 201/474


100%|██████████| 1/1 [00:01<00:00,  1.56s/it]


Processing batch 202/474


100%|██████████| 1/1 [00:01<00:00,  1.78s/it]


Processing batch 203/474


100%|██████████| 1/1 [00:01<00:00,  1.63s/it]


Processing batch 204/474


100%|██████████| 1/1 [00:01<00:00,  1.92s/it]


Processing batch 205/474


100%|██████████| 1/1 [00:02<00:00,  2.52s/it]


Processing batch 206/474


100%|██████████| 1/1 [00:02<00:00,  2.73s/it]


Processing batch 207/474


100%|██████████| 1/1 [00:01<00:00,  1.94s/it]


Processing batch 208/474


100%|██████████| 1/1 [00:01<00:00,  1.83s/it]


Processing batch 209/474


100%|██████████| 1/1 [00:02<00:00,  2.09s/it]


Processing batch 210/474


100%|██████████| 1/1 [00:01<00:00,  1.87s/it]


Processing batch 211/474


100%|██████████| 1/1 [00:01<00:00,  1.69s/it]


Processing batch 212/474


100%|██████████| 1/1 [00:01<00:00,  1.79s/it]


Processing batch 213/474


100%|██████████| 1/1 [00:01<00:00,  1.62s/it]


Processing batch 214/474


100%|██████████| 1/1 [00:01<00:00,  1.81s/it]


Processing batch 215/474


100%|██████████| 1/1 [00:01<00:00,  1.71s/it]


Processing batch 216/474


100%|██████████| 1/1 [00:01<00:00,  1.65s/it]


Processing batch 217/474


100%|██████████| 1/1 [00:01<00:00,  1.84s/it]


Processing batch 218/474


100%|██████████| 1/1 [00:01<00:00,  1.84s/it]


Processing batch 219/474


100%|██████████| 1/1 [00:01<00:00,  1.61s/it]


Processing batch 220/474


100%|██████████| 1/1 [00:02<00:00,  2.37s/it]


Processing batch 221/474


100%|██████████| 1/1 [00:01<00:00,  1.68s/it]


Processing batch 222/474


100%|██████████| 1/1 [00:01<00:00,  1.90s/it]


Processing batch 223/474


100%|██████████| 1/1 [00:01<00:00,  1.56s/it]


Processing batch 224/474


100%|██████████| 1/1 [00:02<00:00,  2.23s/it]


Processing batch 225/474


100%|██████████| 1/1 [00:02<00:00,  2.15s/it]


Processing batch 226/474


100%|██████████| 1/1 [00:02<00:00,  2.03s/it]


Processing batch 227/474


100%|██████████| 1/1 [00:01<00:00,  1.70s/it]


Processing batch 228/474


100%|██████████| 1/1 [00:01<00:00,  1.91s/it]


Processing batch 229/474


100%|██████████| 1/1 [00:01<00:00,  1.48s/it]


Processing batch 230/474


100%|██████████| 1/1 [00:01<00:00,  1.81s/it]


Processing batch 231/474


100%|██████████| 1/1 [00:02<00:00,  2.11s/it]


Processing batch 232/474


100%|██████████| 1/1 [00:01<00:00,  1.72s/it]


Processing batch 233/474


100%|██████████| 1/1 [00:01<00:00,  1.84s/it]


Processing batch 234/474


100%|██████████| 1/1 [00:01<00:00,  1.80s/it]


Processing batch 235/474


100%|██████████| 1/1 [00:01<00:00,  1.43s/it]


Processing batch 236/474


100%|██████████| 1/1 [00:01<00:00,  1.66s/it]


Processing batch 237/474


100%|██████████| 1/1 [00:01<00:00,  1.87s/it]


Processing batch 238/474


100%|██████████| 1/1 [00:02<00:00,  2.05s/it]


Processing batch 239/474


100%|██████████| 1/1 [00:01<00:00,  1.93s/it]


Processing batch 240/474


100%|██████████| 1/1 [00:01<00:00,  1.94s/it]


Processing batch 241/474


100%|██████████| 1/1 [00:01<00:00,  1.74s/it]


Processing batch 242/474


100%|██████████| 1/1 [00:01<00:00,  1.65s/it]


Processing batch 243/474


100%|██████████| 1/1 [00:01<00:00,  1.69s/it]


Processing batch 244/474


100%|██████████| 1/1 [00:02<00:00,  2.44s/it]


Processing batch 245/474


100%|██████████| 1/1 [00:02<00:00,  2.05s/it]


Processing batch 246/474


100%|██████████| 1/1 [00:01<00:00,  1.83s/it]


Processing batch 247/474


100%|██████████| 1/1 [00:01<00:00,  1.79s/it]


Processing batch 248/474


100%|██████████| 1/1 [00:01<00:00,  1.63s/it]


Processing batch 249/474


100%|██████████| 1/1 [00:01<00:00,  1.95s/it]


Processing batch 250/474


100%|██████████| 1/1 [00:01<00:00,  1.36s/it]


Processing batch 251/474


100%|██████████| 1/1 [00:01<00:00,  1.63s/it]


Processing batch 252/474


100%|██████████| 1/1 [00:02<00:00,  2.26s/it]


Processing batch 253/474


100%|██████████| 1/1 [00:01<00:00,  1.92s/it]


Processing batch 254/474


100%|██████████| 1/1 [00:01<00:00,  1.76s/it]


Processing batch 255/474


100%|██████████| 1/1 [00:01<00:00,  1.66s/it]


Processing batch 256/474


100%|██████████| 1/1 [00:01<00:00,  1.69s/it]


Processing batch 257/474


100%|██████████| 1/1 [00:01<00:00,  1.61s/it]


Processing batch 258/474


100%|██████████| 1/1 [00:01<00:00,  1.85s/it]


Processing batch 259/474


100%|██████████| 1/1 [00:01<00:00,  1.59s/it]


Processing batch 260/474


100%|██████████| 1/1 [00:01<00:00,  1.62s/it]


Processing batch 261/474


100%|██████████| 1/1 [00:02<00:00,  2.38s/it]


Processing batch 262/474


100%|██████████| 1/1 [00:01<00:00,  1.94s/it]


Processing batch 263/474


100%|██████████| 1/1 [00:01<00:00,  1.63s/it]


Processing batch 264/474


100%|██████████| 1/1 [00:01<00:00,  1.83s/it]


Processing batch 265/474


100%|██████████| 1/1 [00:02<00:00,  2.09s/it]


Processing batch 266/474


100%|██████████| 1/1 [00:02<00:00,  2.01s/it]


Processing batch 267/474


100%|██████████| 1/1 [00:02<00:00,  2.38s/it]


Processing batch 268/474


100%|██████████| 1/1 [00:01<00:00,  1.80s/it]


Processing batch 269/474


100%|██████████| 1/1 [00:02<00:00,  2.08s/it]


Processing batch 270/474


100%|██████████| 1/1 [00:01<00:00,  1.55s/it]


Processing batch 271/474


100%|██████████| 1/1 [00:02<00:00,  2.23s/it]


Processing batch 272/474


100%|██████████| 1/1 [00:01<00:00,  1.81s/it]


Processing batch 273/474


100%|██████████| 1/1 [00:02<00:00,  2.50s/it]


Processing batch 274/474


100%|██████████| 1/1 [00:02<00:00,  2.10s/it]


Processing batch 275/474


100%|██████████| 1/1 [00:01<00:00,  1.49s/it]


Processing batch 276/474


100%|██████████| 1/1 [00:02<00:00,  2.11s/it]


Processing batch 277/474


100%|██████████| 1/1 [00:02<00:00,  2.08s/it]


Processing batch 278/474


100%|██████████| 1/1 [00:01<00:00,  1.59s/it]


Processing batch 279/474


100%|██████████| 1/1 [00:02<00:00,  2.04s/it]


Processing batch 280/474


100%|██████████| 1/1 [00:02<00:00,  2.35s/it]


Processing batch 281/474


100%|██████████| 1/1 [00:01<00:00,  1.88s/it]


Processing batch 282/474


100%|██████████| 1/1 [00:01<00:00,  1.79s/it]


Processing batch 283/474


100%|██████████| 1/1 [00:01<00:00,  1.95s/it]


Processing batch 284/474


100%|██████████| 1/1 [00:01<00:00,  1.66s/it]


Processing batch 285/474


100%|██████████| 1/1 [00:01<00:00,  1.67s/it]


Processing batch 286/474


100%|██████████| 1/1 [00:01<00:00,  1.51s/it]


Processing batch 287/474


100%|██████████| 1/1 [00:01<00:00,  1.63s/it]


Processing batch 288/474


100%|██████████| 1/1 [00:01<00:00,  1.57s/it]


Processing batch 289/474


100%|██████████| 1/1 [00:02<00:00,  2.04s/it]


Processing batch 290/474


100%|██████████| 1/1 [00:02<00:00,  2.08s/it]


Processing batch 291/474


100%|██████████| 1/1 [00:01<00:00,  1.83s/it]


Processing batch 292/474


100%|██████████| 1/1 [00:01<00:00,  1.84s/it]


Processing batch 293/474


100%|██████████| 1/1 [00:02<00:00,  2.02s/it]


Processing batch 294/474


100%|██████████| 1/1 [00:01<00:00,  1.36s/it]


Processing batch 295/474


100%|██████████| 1/1 [00:01<00:00,  1.92s/it]


Processing batch 296/474


100%|██████████| 1/1 [00:01<00:00,  1.50s/it]


Processing batch 297/474


100%|██████████| 1/1 [00:01<00:00,  1.62s/it]


Processing batch 298/474


100%|██████████| 1/1 [00:01<00:00,  1.63s/it]


Processing batch 299/474


100%|██████████| 1/1 [00:02<00:00,  2.03s/it]


Processing batch 300/474


100%|██████████| 1/1 [00:01<00:00,  1.67s/it]


Processing batch 301/474


100%|██████████| 1/1 [00:01<00:00,  1.76s/it]


Processing batch 302/474


100%|██████████| 1/1 [00:01<00:00,  1.62s/it]


Processing batch 303/474


100%|██████████| 1/1 [00:01<00:00,  1.42s/it]


Processing batch 304/474


100%|██████████| 1/1 [00:01<00:00,  1.85s/it]


Processing batch 305/474


100%|██████████| 1/1 [00:01<00:00,  1.95s/it]


Processing batch 306/474


100%|██████████| 1/1 [00:02<00:00,  2.31s/it]


Processing batch 307/474


100%|██████████| 1/1 [00:01<00:00,  1.92s/it]


Processing batch 308/474


100%|██████████| 1/1 [00:01<00:00,  1.99s/it]


Processing batch 309/474


100%|██████████| 1/1 [00:02<00:00,  2.01s/it]


Processing batch 310/474


100%|██████████| 1/1 [00:01<00:00,  1.69s/it]


Processing batch 311/474


100%|██████████| 1/1 [00:01<00:00,  1.74s/it]


Processing batch 312/474


100%|██████████| 1/1 [00:01<00:00,  1.61s/it]


Processing batch 313/474


100%|██████████| 1/1 [00:01<00:00,  1.79s/it]


Processing batch 314/474


100%|██████████| 1/1 [00:01<00:00,  1.57s/it]


Processing batch 315/474


100%|██████████| 1/1 [00:01<00:00,  1.64s/it]


Processing batch 316/474


100%|██████████| 1/1 [00:02<00:00,  2.01s/it]


Processing batch 317/474


100%|██████████| 1/1 [00:02<00:00,  2.05s/it]


Processing batch 318/474


100%|██████████| 1/1 [00:02<00:00,  2.92s/it]


Processing batch 319/474


100%|██████████| 1/1 [00:02<00:00,  2.17s/it]


Processing batch 320/474


100%|██████████| 1/1 [00:03<00:00,  3.45s/it]


Processing batch 321/474


100%|██████████| 1/1 [00:02<00:00,  2.36s/it]


Processing batch 322/474


100%|██████████| 1/1 [00:01<00:00,  1.72s/it]


Processing batch 323/474


100%|██████████| 1/1 [00:01<00:00,  1.65s/it]


Processing batch 324/474


100%|██████████| 1/1 [00:02<00:00,  2.08s/it]


Processing batch 325/474


100%|██████████| 1/1 [00:01<00:00,  1.64s/it]


Processing batch 326/474


100%|██████████| 1/1 [00:01<00:00,  1.66s/it]


Processing batch 327/474


100%|██████████| 1/1 [00:01<00:00,  1.69s/it]


Processing batch 328/474


100%|██████████| 1/1 [00:01<00:00,  1.93s/it]


Processing batch 329/474


100%|██████████| 1/1 [00:01<00:00,  1.63s/it]


Processing batch 330/474


100%|██████████| 1/1 [00:01<00:00,  1.48s/it]


Processing batch 331/474


100%|██████████| 1/1 [00:02<00:00,  2.03s/it]


Processing batch 332/474


100%|██████████| 1/1 [00:01<00:00,  1.62s/it]


Processing batch 333/474


100%|██████████| 1/1 [00:01<00:00,  1.79s/it]


Processing batch 334/474


100%|██████████| 1/1 [00:01<00:00,  1.75s/it]


Processing batch 335/474


100%|██████████| 1/1 [00:01<00:00,  1.49s/it]


Processing batch 336/474


100%|██████████| 1/1 [00:01<00:00,  1.88s/it]


Processing batch 337/474


100%|██████████| 1/1 [00:01<00:00,  1.96s/it]


Processing batch 338/474


100%|██████████| 1/1 [00:02<00:00,  2.18s/it]


Processing batch 339/474


100%|██████████| 1/1 [00:01<00:00,  1.62s/it]


Processing batch 340/474


100%|██████████| 1/1 [00:01<00:00,  1.66s/it]


Processing batch 341/474


100%|██████████| 1/1 [00:01<00:00,  1.83s/it]


Processing batch 342/474


100%|██████████| 1/1 [00:02<00:00,  2.16s/it]


Processing batch 343/474


100%|██████████| 1/1 [00:01<00:00,  1.66s/it]


Processing batch 344/474


100%|██████████| 1/1 [00:02<00:00,  2.43s/it]


Processing batch 345/474


100%|██████████| 1/1 [00:01<00:00,  1.86s/it]


Processing batch 346/474


100%|██████████| 1/1 [00:01<00:00,  1.44s/it]


Processing batch 347/474


100%|██████████| 1/1 [00:01<00:00,  1.94s/it]


Processing batch 348/474


100%|██████████| 1/1 [00:01<00:00,  1.76s/it]


Processing batch 349/474


100%|██████████| 1/1 [00:01<00:00,  1.58s/it]


Processing batch 350/474


100%|██████████| 1/1 [00:01<00:00,  1.91s/it]


Processing batch 351/474


100%|██████████| 1/1 [00:02<00:00,  2.26s/it]


Processing batch 352/474


100%|██████████| 1/1 [00:01<00:00,  1.80s/it]


Processing batch 353/474


100%|██████████| 1/1 [00:01<00:00,  1.44s/it]


Processing batch 354/474


100%|██████████| 1/1 [00:01<00:00,  1.33s/it]


Processing batch 355/474


100%|██████████| 1/1 [00:01<00:00,  1.84s/it]


Processing batch 356/474


100%|██████████| 1/1 [00:01<00:00,  1.83s/it]


Processing batch 357/474


100%|██████████| 1/1 [00:02<00:00,  2.01s/it]


Processing batch 358/474


100%|██████████| 1/1 [00:02<00:00,  2.74s/it]


Processing batch 359/474


100%|██████████| 1/1 [00:02<00:00,  2.08s/it]


Processing batch 360/474


100%|██████████| 1/1 [00:01<00:00,  1.94s/it]


Processing batch 361/474


100%|██████████| 1/1 [00:01<00:00,  1.68s/it]


Processing batch 362/474


100%|██████████| 1/1 [00:01<00:00,  1.83s/it]


Processing batch 363/474


100%|██████████| 1/1 [00:02<00:00,  2.82s/it]


Processing batch 364/474


100%|██████████| 1/1 [00:02<00:00,  2.02s/it]


Processing batch 365/474


100%|██████████| 1/1 [00:01<00:00,  1.81s/it]


Processing batch 366/474


100%|██████████| 1/1 [00:01<00:00,  1.63s/it]


Processing batch 367/474


100%|██████████| 1/1 [00:01<00:00,  1.70s/it]


Processing batch 368/474


100%|██████████| 1/1 [00:01<00:00,  1.74s/it]


Processing batch 369/474


100%|██████████| 1/1 [00:01<00:00,  1.61s/it]


Processing batch 370/474


100%|██████████| 1/1 [00:02<00:00,  2.28s/it]


Processing batch 371/474


100%|██████████| 1/1 [00:01<00:00,  1.42s/it]


Processing batch 372/474


100%|██████████| 1/1 [00:01<00:00,  1.83s/it]


Processing batch 373/474


100%|██████████| 1/1 [00:01<00:00,  1.54s/it]


Processing batch 374/474


100%|██████████| 1/1 [00:01<00:00,  1.70s/it]


Processing batch 375/474


100%|██████████| 1/1 [00:02<00:00,  2.23s/it]


Processing batch 376/474


100%|██████████| 1/1 [00:01<00:00,  1.82s/it]


Processing batch 377/474


100%|██████████| 1/1 [00:01<00:00,  1.70s/it]


Processing batch 378/474


100%|██████████| 1/1 [00:01<00:00,  1.92s/it]


Processing batch 379/474


100%|██████████| 1/1 [00:01<00:00,  1.55s/it]


Processing batch 380/474


100%|██████████| 1/1 [00:01<00:00,  1.97s/it]


Processing batch 381/474


100%|██████████| 1/1 [00:01<00:00,  1.72s/it]


Processing batch 382/474


100%|██████████| 1/1 [00:01<00:00,  1.73s/it]


Processing batch 383/474


100%|██████████| 1/1 [00:01<00:00,  1.92s/it]


Processing batch 384/474


100%|██████████| 1/1 [00:01<00:00,  1.84s/it]


Processing batch 385/474


100%|██████████| 1/1 [00:01<00:00,  1.83s/it]


Processing batch 386/474


100%|██████████| 1/1 [00:01<00:00,  1.99s/it]


Processing batch 387/474


100%|██████████| 1/1 [00:01<00:00,  1.66s/it]


Processing batch 388/474


100%|██████████| 1/1 [00:01<00:00,  1.71s/it]


Processing batch 389/474


100%|██████████| 1/1 [00:01<00:00,  1.55s/it]


Processing batch 390/474


100%|██████████| 1/1 [00:02<00:00,  2.19s/it]


Processing batch 391/474


100%|██████████| 1/1 [00:01<00:00,  1.55s/it]


Processing batch 392/474


100%|██████████| 1/1 [00:01<00:00,  1.63s/it]


Processing batch 393/474


100%|██████████| 1/1 [00:03<00:00,  3.66s/it]


Processing batch 394/474


100%|██████████| 1/1 [00:01<00:00,  1.76s/it]


Processing batch 395/474


100%|██████████| 1/1 [00:01<00:00,  1.88s/it]


Processing batch 396/474


100%|██████████| 1/1 [00:01<00:00,  1.71s/it]


Processing batch 397/474


100%|██████████| 1/1 [00:01<00:00,  1.71s/it]


Processing batch 398/474


100%|██████████| 1/1 [00:02<00:00,  2.10s/it]


Processing batch 399/474


100%|██████████| 1/1 [00:01<00:00,  1.50s/it]


Processing batch 400/474


100%|██████████| 1/1 [00:01<00:00,  1.63s/it]


Processing batch 401/474


100%|██████████| 1/1 [00:01<00:00,  1.65s/it]


Processing batch 402/474


100%|██████████| 1/1 [00:01<00:00,  1.80s/it]


Processing batch 403/474


100%|██████████| 1/1 [00:01<00:00,  1.76s/it]


Processing batch 404/474


100%|██████████| 1/1 [00:01<00:00,  1.63s/it]


Processing batch 405/474


100%|██████████| 1/1 [00:01<00:00,  1.96s/it]


Processing batch 406/474


100%|██████████| 1/1 [00:01<00:00,  1.71s/it]


Processing batch 407/474


100%|██████████| 1/1 [00:01<00:00,  1.62s/it]


Processing batch 408/474


100%|██████████| 1/1 [00:01<00:00,  1.77s/it]


Processing batch 409/474


100%|██████████| 1/1 [00:02<00:00,  2.10s/it]


Processing batch 410/474


100%|██████████| 1/1 [00:02<00:00,  2.44s/it]


Processing batch 411/474


100%|██████████| 1/1 [00:01<00:00,  1.43s/it]


Processing batch 412/474


100%|██████████| 1/1 [00:02<00:00,  2.42s/it]


Processing batch 413/474


100%|██████████| 1/1 [00:01<00:00,  1.51s/it]


Processing batch 414/474


100%|██████████| 1/1 [00:01<00:00,  1.84s/it]


Processing batch 415/474


100%|██████████| 1/1 [00:02<00:00,  2.33s/it]


Processing batch 416/474


100%|██████████| 1/1 [00:01<00:00,  1.87s/it]


Processing batch 417/474


100%|██████████| 1/1 [00:01<00:00,  1.54s/it]


Processing batch 418/474


100%|██████████| 1/1 [00:01<00:00,  1.97s/it]


Processing batch 419/474


100%|██████████| 1/1 [00:01<00:00,  1.88s/it]


Processing batch 420/474


100%|██████████| 1/1 [00:01<00:00,  1.94s/it]


Processing batch 421/474


100%|██████████| 1/1 [00:01<00:00,  1.77s/it]


Processing batch 422/474


100%|██████████| 1/1 [00:01<00:00,  1.51s/it]


Processing batch 423/474


100%|██████████| 1/1 [00:01<00:00,  1.50s/it]


Processing batch 424/474


100%|██████████| 1/1 [00:02<00:00,  2.34s/it]


Processing batch 425/474


100%|██████████| 1/1 [00:01<00:00,  1.86s/it]


Processing batch 426/474


100%|██████████| 1/1 [00:01<00:00,  1.74s/it]


Processing batch 427/474


100%|██████████| 1/1 [00:02<00:00,  2.41s/it]


Processing batch 428/474


100%|██████████| 1/1 [00:01<00:00,  1.86s/it]


Processing batch 429/474


100%|██████████| 1/1 [00:01<00:00,  1.55s/it]


Processing batch 430/474


100%|██████████| 1/1 [00:01<00:00,  1.85s/it]


Processing batch 431/474


100%|██████████| 1/1 [00:01<00:00,  1.50s/it]


Processing batch 432/474


100%|██████████| 1/1 [00:02<00:00,  2.04s/it]


Processing batch 433/474


100%|██████████| 1/1 [00:01<00:00,  1.87s/it]


Processing batch 434/474


100%|██████████| 1/1 [00:01<00:00,  1.86s/it]


Processing batch 435/474


100%|██████████| 1/1 [00:01<00:00,  1.73s/it]


Processing batch 436/474


100%|██████████| 1/1 [00:01<00:00,  1.72s/it]


Processing batch 437/474


100%|██████████| 1/1 [00:02<00:00,  2.15s/it]


Processing batch 438/474


100%|██████████| 1/1 [00:01<00:00,  1.96s/it]


Processing batch 439/474


100%|██████████| 1/1 [00:02<00:00,  2.01s/it]


Processing batch 440/474


100%|██████████| 1/1 [00:01<00:00,  1.65s/it]


Processing batch 441/474


100%|██████████| 1/1 [00:01<00:00,  1.93s/it]


Processing batch 442/474


100%|██████████| 1/1 [00:01<00:00,  1.93s/it]


Processing batch 443/474


100%|██████████| 1/1 [00:01<00:00,  1.69s/it]


Processing batch 444/474


100%|██████████| 1/1 [00:02<00:00,  2.01s/it]


Processing batch 445/474


100%|██████████| 1/1 [00:01<00:00,  1.99s/it]


Processing batch 446/474


100%|██████████| 1/1 [00:02<00:00,  2.10s/it]


Processing batch 447/474


100%|██████████| 1/1 [00:02<00:00,  2.16s/it]


Processing batch 448/474


100%|██████████| 1/1 [00:02<00:00,  2.15s/it]


Processing batch 449/474


100%|██████████| 1/1 [00:01<00:00,  1.86s/it]


Processing batch 450/474


100%|██████████| 1/1 [00:02<00:00,  2.18s/it]


Processing batch 451/474


100%|██████████| 1/1 [00:02<00:00,  2.12s/it]


Processing batch 452/474


100%|██████████| 1/1 [00:02<00:00,  2.14s/it]


Processing batch 453/474


100%|██████████| 1/1 [00:02<00:00,  2.05s/it]


Processing batch 454/474


100%|██████████| 1/1 [00:01<00:00,  1.83s/it]


Processing batch 455/474


100%|██████████| 1/1 [00:01<00:00,  1.46s/it]


Processing batch 456/474


100%|██████████| 1/1 [00:01<00:00,  1.76s/it]


Processing batch 457/474


100%|██████████| 1/1 [00:02<00:00,  2.13s/it]


Processing batch 458/474


100%|██████████| 1/1 [00:01<00:00,  1.95s/it]


Processing batch 459/474


100%|██████████| 1/1 [00:01<00:00,  1.93s/it]


Processing batch 460/474


100%|██████████| 1/1 [00:02<00:00,  2.06s/it]


Processing batch 461/474


100%|██████████| 1/1 [00:01<00:00,  1.61s/it]


Processing batch 462/474


100%|██████████| 1/1 [00:01<00:00,  1.76s/it]


Processing batch 463/474


100%|██████████| 1/1 [00:01<00:00,  1.73s/it]


Processing batch 464/474


100%|██████████| 1/1 [00:01<00:00,  1.99s/it]


Processing batch 465/474


100%|██████████| 1/1 [00:01<00:00,  1.85s/it]


Processing batch 466/474


100%|██████████| 1/1 [00:01<00:00,  1.77s/it]


Processing batch 467/474


100%|██████████| 1/1 [00:01<00:00,  1.86s/it]


Processing batch 468/474


100%|██████████| 1/1 [00:02<00:00,  2.01s/it]


Processing batch 469/474


100%|██████████| 1/1 [00:01<00:00,  2.00s/it]


Processing batch 470/474


100%|██████████| 1/1 [00:01<00:00,  1.76s/it]


Processing batch 471/474


100%|██████████| 1/1 [00:01<00:00,  1.98s/it]


Processing batch 472/474


100%|██████████| 1/1 [00:02<00:00,  2.24s/it]


Processing batch 473/474


100%|██████████| 1/1 [00:01<00:00,  1.83s/it]


Saved index to shiny_hype_index
Indexed 473 documents with 2365 hypothetical prompts


In [31]:
    
# Load from saved index
hype = ShinyDocHyPE.load("shiny_hype_index", api_key=api_key, verbose=True)

# Test with a query
query = "Tell me about Chatbot"
result = hype.answer_question(query, top_k=3)

# Print results
print(f"\nQuery: {result['query']}")
print(f"\nAnswer: {result['answer']}")
print("\nMatched prompts:")
for i, match in enumerate(result['prompt_matches']):
    print(f"\n{i+1}. Document: {match['document_title']}")
    print(f"   Matched prompt: \"{match['matched_prompt']}\"")

Loaded index from shiny_hype_index with 473 documents
Error getting embeddings for batch 0:5: Error code: 401 - {'error': {'message': 'Incorrect API key provided: <your_op*********key>. You can find your API key at https://platform.openai.com/account/api-keys.', 'type': 'invalid_request_error', 'param': None, 'code': 'invalid_api_key'}}
Error getting embedding for single text: Error code: 401 - {'error': {'message': 'Incorrect API key provided: <your_op*********key>. You can find your API key at https://platform.openai.com/account/api-keys.', 'type': 'invalid_request_error', 'param': None, 'code': 'invalid_api_key'}}


AuthenticationError: Error code: 401 - {'error': {'message': 'Incorrect API key provided: <your_op*********key>. You can find your API key at https://platform.openai.com/account/api-keys.', 'type': 'invalid_request_error', 'param': None, 'code': 'invalid_api_key'}}

In [11]:
# Load from saved index
hype = ShinyDocHyPE.load("shiny_hype_index", api_key=api_key, verbose=True)

# Test with a query
query = "How do I create a reactive UI in Shiny?"
result = hype.answer_question(query, top_k=3)

# Print results
print(f"\nQuery: {result['query']}")
print(f"\nAnswer: {result['answer']}")
print("\nMatched documents and prompts:")
for i, match in enumerate(result['prompt_matches']):
    print(f"\n{i+1}. Document: {match['document_title']}")
    print(f"   Document URL: {match['document_url']}")
    
    # Get the document ID/filename from the URL
    doc_id = "Unknown"
    if 'document' in result and i < len(result['documents']):
        doc_id = result['documents'][i].get('id', 'Unknown')
    
    print(f"   Document ID: {doc_id}")
    print(f"   Matched prompt: \"{match['matched_prompt']}\"")
    print(f"   Score: {match['score']:.4f}")
    
    # Print snippet of document content
    if 'documents' in result and i < len(result['documents']):
        content = result['documents'][i].get('content', '')
        # Show the first 150 characters of content
        content_preview = content[:150] + "..." if len(content) > 150 else content
        print(f"   Content preview: \"{content_preview}\"")
    
    print("\n   All hypothetical prompts for this document:")
    for j, prompt in enumerate(match['all_prompts']):
        print(f"     {j+1}. {prompt}")

Loaded index from shiny_hype_index with 473 documents

Query: How do I create a reactive UI in Shiny?

Answer: To create a reactive UI in Shiny, you can use the `@render.express` decorator along with `ui.hold()` for reactive rendering. Here's a step-by-step guide based on the provided Shiny documentation:

1. Define a function that generates the UI elements you want to display reactively.
2. Decorate the function with `@render.express` to make it reactively render the UI.
3. Use `ui.hold()` to collect the UI code into a variable within the function.

Here's an example code snippet demonstrating how to create a reactive UI in Shiny using `@render.express` and `ui.hold()`:

```python
from shiny.express import ui, render

# Define the UI function with @render.express decorator
@render.express
def reactive_ui():
    with ui.card(class_="mt-3"):
        ui.h3("Socrates")
        "470-399 BC"

# Call the UI function to display the reactive UI
reactive_ui()
```

By following these steps and u