In [None]:
import os
import PyPDF2
import docx
import openai
import time
from typing import List, Dict
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import Chroma
from langchain.embeddings.openai import OpenAIEmbeddings

class BrownEmulator:
    def __init__(self, api_key: str, folder_path: str):
        """Initialize the Brown emulator with API key and folder path."""
        self.api_key = api_key
        self.folder_path = folder_path
        openai.api_key = api_key
        self.vectorstore = None
        
    def extract_from_pdf(self, file_path: str) -> str:
        """Extract text from PDF files."""
        try:
            with open(file_path, 'rb') as file:
                reader = PyPDF2.PdfReader(file)
                return ' '.join([page.extract_text() for page in reader.pages])
        except Exception as e:
            print(f"Error processing PDF {file_path}: {e}")
            return ""

    def extract_from_docx(self, file_path: str) -> str:
        """Extract text from DOCX files."""
        try:
            doc = docx.Document(file_path)
            return ' '.join([paragraph.text for paragraph in doc.paragraphs])
        except Exception as e:
            print(f"Error processing DOCX {file_path}: {e}")
            return ""

    def extract_from_txt(self, file_path: str) -> str:
        """Extract text from TXT files."""
        try:
            with open(file_path, 'r', encoding='utf-8') as file:
                return file.read()
        except Exception as e:
            print(f"Error processing TXT {file_path}: {e}")
            return ""

    def extract_all_texts(self) -> str:
        """Extract text from all supported files in the folder."""
        all_text = ""
        file_handlers = {
            '.pdf': self.extract_from_pdf,
            '.docx': self.extract_from_docx,
            '.txt': self.extract_from_txt
        }
        
        for filename in os.listdir(self.folder_path):
            file_path = os.path.join(self.folder_path, filename)
            ext = os.path.splitext(filename)[1].lower()
            
            if ext in file_handlers:
                print(f"Processing {filename}...")
                text = file_handlers[ext](file_path)
                all_text += f"\n{text}"
                
        return all_text

    def create_index(self, chunk_size: int = 1000, chunk_overlap: int = 100) -> None:
        """Create a searchable index from the extracted text."""
        try:
            text = self.extract_all_texts()
            text_splitter = RecursiveCharacterTextSplitter(
                chunk_size=chunk_size,
                chunk_overlap=chunk_overlap
            )
            docs = text_splitter.create_documents([text])
            
            embeddings = OpenAIEmbeddings(model="text-embedding-ada-002")
            self.vectorstore = Chroma.from_documents(docs, embeddings)
            print("Index created successfully!")
            
        except Exception as e:
            print(f"Error creating index: {e}")
            raise

    def get_context(self, query: str, top_k: int = 3) -> str:
        """Retrieve relevant context for a query."""
        if not self.vectorstore:
            raise ValueError("Please create an index first using create_index()")
            
        docs = self.vectorstore.similarity_search(query, k=top_k)
        return "\n".join([doc.page_content for doc in docs])

    def query(self, user_query: str, max_retries: int = 3) -> str:
        """Query the system with error handling and retries."""
        try:
            context = self.get_context(user_query)
            
            prompt = f"""
            You are emulating Jonathan A. C. Brown, a scholar of Islamic thought and Hadith studies. 
            Use the following context from Brown's writings to inform your response.
            
            Context:
            {context}
            
            Question:
            {user_query}
            
            Respond in Brown's academic style, drawing from the provided context and your understanding 
            of Islamic scholarship. Include relevant technical terms and conceptual frameworks when appropriate.
            """
            
            for attempt in range(max_retries):
                try:
                    response = openai.ChatCompletion.create(
                        model="gpt-4",
                        messages=[
                            {"role": "system", "content": prompt}
                        ],
                        temperature=0.7
                    )
                    return response.choices[0].message.content
                    
                except openai.error.RateLimitError:
                    if attempt < max_retries - 1:
                        time.sleep(20 * (attempt + 1))  # Exponential backoff
                    else:
                        raise
                        
        except Exception as e:
            return f"Error generating response: {e}"

# Usage example:
if __name__ == "__main__":
    # Initialize the emulator
    emulator = BrownEmulator(
        api_key="your-api-key-here",
        folder_path="path/to/your/documents"
    )
    
    # Create the index
    emulator.create_index()
    
    # Example query
    response = emulator.query("What is Brown's perspective on the prevalence of matn criticism in the Sunni tradition prior to the canonization of Bukhari and Muslim's Sahihayn?")
    print(response)