In [None]:
!pip install langchain chromadb sentence-transformers PyPDF2 streamlit pyngrok --quiet


In [None]:
import os
import streamlit as st
from langchain.chains import RetrievalQA
from langchain.llms import Groq
from langchain.vectorstores import Chroma
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import CharacterTextSplitter
from pyngrok import ngrok


In [None]:
# Replace '<YOUR_GROQ_API_KEY>' with your own key when running
os.environ["GROQ_API_KEY"] = "<YOUR_GROQ_API_KEY>"

llm = Groq(model="groq-gpt-3.5-turbo", api_key=os.getenv("GROQ_API_KEY"))


In [None]:
# Embeddings
embedding_model = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

# Local ChromaDB vector store
vectorstore = Chroma(persist_directory="db", embedding_function=embedding_model)


In [None]:
def main():
    st.title("RAG PDF Chatbot")

    # PDF upload
    uploaded_files = st.file_uploader("Upload PDF(s)", type="pdf", accept_multiple_files=True)
    if uploaded_files:
        for uploaded_file in uploaded_files:
            loader = PyPDFLoader(uploaded_file)
            documents = loader.load()

            # Split into chunks
            text_splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=50)
            texts = text_splitter.split_documents(documents)

            # Add to ChromaDB
            vectorstore.add_documents(texts)
            vectorstore.persist()
        st.success("PDF(s) processed and added to vector database!")

    # Create retriever & QA chain
    retriever = vectorstore.as_retriever(search_kwargs={"k": 3})
    qa_chain = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=retriever)

    # Query input
    user_input = st.text_input("Ask a question:")
    if user_input:
        response = qa_chain.run(user_input)
        st.write(response)

if __name__ == "__main__":
    main()


In [None]:
!streamlit run app.py &>/dev/null&
public_url = ngrok.connect(port="8501")
public_url
