In [None]:
import streamlit as st
import os
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import Chroma
from langchain.chains import ConversationalRetrievalChain
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import PyPDFLoader
import chromadb
import re

# API Key Setup
from api_key import OPENAI_API_KEY
os.environ['OPENAI_API_KEY'] = OPENAI_API_KEY


class MyApp:
    def __init__(self, openai_api_key: str = None) -> None:
        self.OPENAI_API_KEY = openai_api_key
        self.chain = None
        self.chat_history = []
        self.chroma_client = chromadb.Client()
        self.count = 0

    def process_file(self, file_path: str):
        """
        Load and process the PDF file. Modify this to support other file formats if needed.
        """
        loader = PyPDFLoader(file_path)
        documents = loader.load()
        file_name = os.path.basename(file_path)
        return documents, file_name

    def build_chain(self, documents, file_name):
        """
        Build the retrieval chain for semantic search and Q&A.
        """
        embeddings = OpenAIEmbeddings(openai_api_key=self.OPENAI_API_KEY)
        pdf_search = Chroma.from_documents(documents, embeddings, collection_name=file_name)
        chain = ConversationalRetrievalChain.from_llm(
            ChatOpenAI(temperature=0.0, openai_api_key=self.OPENAI_API_KEY),
            retriever=pdf_search.as_retriever(search_kwargs={"k": 3}),
            return_source_documents=True,
        )
        return chain


# Streamlit App
st.title("📚 Generalized Conversational AI Assistant")
st.subheader("Upload a document and ask your questions!")

# File Upload
uploaded_file = st.file_uploader("Upload your document (PDF format only):", type=["pdf"])
if uploaded_file:
    app = MyApp(OPENAI_API_KEY)
    file_path = os.path.join("uploads", uploaded_file.name)

    # Save the uploaded file temporarily
    with open(file_path, "wb") as f:
        f.write(uploaded_file.getbuffer())

    # Process the file and build the chain
    documents, file_name = app.process_file(file_path)
    if app.count == 0:
        app.chain = app.build_chain(documents, file_name)
        app.count += 1

    # User Query
    user_query = st.text_input("Enter your query:")
    if user_query:
        result = app.chain(
            {"question": user_query, "chat_history": app.chat_history}, return_only_outputs=True
        )
        app.chat_history.append((user_query, result["answer"]))
        st.write(f"Response: {result['answer']}")
        if result["source_documents"]:
            st.write("Source(s):")
            for doc in result["source_documents"]:
                st.write(f"- Page {doc.metadata['page']}: {doc.metadata['source']}")

# Cleanup (Optional)
if st.button("Clear Chat History"):
    app.chat_history = []
    st.write("Chat history cleared.")
