In [None]:
!pip install langchain_community tiktoken langchain-openai langchainhub chromadb langchain pypdf gradio

In [4]:
# Set your environment variables
import os
os.environ['LANGCHAIN_TRACING_V2'] = 'true'
os.environ['LANGCHAIN_ENDPOINT'] = 'https://api.smith.langchain.com'
os.environ['LANGCHAIN_API_KEY'] = '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'

In [9]:
import gradio as gr
import os
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_openai import OpenAIEmbeddings
from langchain.vectorstores import Chroma
from langchain_openai import ChatOpenAI
from langchain.chains import RetrievalQA

class InsuranceAnalyzer:
    def __init__(self):
        """Initialize the insurance analysis system with LangChain components"""
        # Initialize OpenAI API key
        os.environ['OPENAI_API_KEY'] = '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'  # Replace with your API key
        
        # Initialize embedding model and text splitter
        self.embeddings = OpenAIEmbeddings()
        self.text_splitter = RecursiveCharacterTextSplitter(
            chunk_size=1000,
            chunk_overlap=100
        )
        self.vectorstore = None
        self.qa_chain = None
        
        # Load documents on initialization
        self.load_documents()

    def load_documents(self):
        """Load and process insurance company filings"""
        try:
            documents = []
            filings = {
                "Travelers": "./10KFilings/travelers_10k_2024.pdf",
                "Progressive": "./10KFilings/progressive_10k_2024.pdf",
                "Chubb": "./10KFilings/chubb_10k_2024.pdf",
                "Allstate": "./10KFilings/allstate_10k_2024.pdf"
            }
            
            for company, path in filings.items():
                loader = PyPDFLoader(path)
                docs = loader.load()
                chunks = self.text_splitter.split_documents(docs)
                
                # Add company metadata
                for chunk in chunks:
                    chunk.metadata['company'] = company
                
                documents.extend(chunks)

            # Create vector store and QA chain
            self.vectorstore = Chroma.from_documents(documents, self.embeddings)
            
            llm = ChatOpenAI(temperature=0)
            self.qa_chain = RetrievalQA.from_chain_type(
                llm=llm,
                chain_type="stuff",
                retriever=self.vectorstore.as_retriever()
            )
            return True
        except Exception as e:
            print(f"Error loading documents: {str(e)}")
            return False

    def process_question(self, question):
        """Process a question about the insurance filings"""
        if not self.qa_chain:
            return "System is not initialized. Please try again."
            
        try:
            # Create a prompt that encourages detailed answers
            prompt = f"""
            Question: {question}
            
            Please provide a detailed answer based on the 10-K filings. 
            If the question mentions specific companies, focus on those companies.
            If no companies are specified, consider information from all companies.
            Include relevant quotes or data points from the filings when possible.
            If you don't have the information the topic the question mention, suggest the user to add financial statement from prior years
            """
            
            response = self.qa_chain.run(prompt)
            return response
        except Exception as e:
            return f"Error processing question: {str(e)}"

# Initialize the analyzer
analyzer = InsuranceAnalyzer()

def analyze_insurance_query(question):
    """Gradio interface function to process insurance queries"""
    if not question.strip():
        return "Please enter a question."
    return analyzer.process_question(question)

# Create the Gradio interface
demo = gr.Interface(
    fn=analyze_insurance_query,
    inputs=[
        gr.Textbox(
            label="Ask a question about insurance companies",
            placeholder="E.g., What are the biggest strategic initiatives for Allstate?",
            lines=3
        )
    ],
    outputs=[
        gr.Textbox(
            label="Analysis Result",
            lines=10
        )
    ],
    title="Insurance Competitor Analysis",
    description="""Analyze competitors' financial statements and strategic initiatives.
    Available companies: Travelers, Progressive, Chubb, Allstate""",
    examples=[
        ["What are the causes driving the largest amount of losses across all carriers?"],
        ["What are the biggest strategic initiatives for Allstate?"],
        ["Compare Traveler's Strategic initiatives with Chubb's"]
    ],
    theme=gr.themes.Soft()
)

# Launch the app
demo.launch()

* Running on local URL:  http://127.0.0.1:7864

To create a public link, set `share=True` in `launch()`.


