In [None]:
!pip install langchain langchain-community langchainhub chromadb pypdf sentence-transformers langchain-groq


In [None]:
import os
from google.colab import files
from langchain_community.document_loaders import PyPDFLoader, TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import Chroma
from langchain.chains import RetrievalQA
from langchain_groq import ChatGroq

In [None]:
os.environ["GROQ_API_KEY"] = "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"

In [None]:
def load_and_process_document():
    """
    Uploads and processes a valid PDF or TXT file. Retries until successful.
    """
    global qa_chain

    while True:
        try:
            print("📤 Please upload a .pdf or .txt file:")
            uploaded = files.upload()
            file_path = list(uploaded.keys())[0]
            print(f"📁 Uploaded: {file_path}")

            # Validate file extension
            if file_path.endswith(".pdf"):
                loader = PyPDFLoader(file_path)
            elif file_path.endswith(".txt"):
                loader = TextLoader(file_path)
            else:
                raise ValueError("❌ Invalid file type. Only .pdf and .txt are supported.")

            # Load and chunk
            documents = loader.load()
            splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)
            chunks = splitter.split_documents(documents)
            print(f"📄 Loaded and split into {len(chunks)} chunks.")

            # Embed
            embedding_model = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
            vectordb = Chroma.from_documents(chunks, embedding=embedding_model, persist_directory="./db")
            retriever = vectordb.as_retriever()

            # LLM
            llm = ChatGroq(model="llama3-70b-8192")

            # Chain
            qa_chain = RetrievalQA.from_chain_type(
                llm=llm,
                chain_type="stuff",
                retriever=retriever,
                return_source_documents=True
            )

            print("✅ Document processed and ready for Q&A.")
            break  # exit loop on success

        except ValueError as ve:
            print(str(ve))
            print("🔁 Let's try uploading again...\n")
        except Exception as e:
            print("⚠️ Unexpected error:", str(e))
            print("🔁 Try uploading a different file.\n")



In [None]:
load_and_process_document()

In [None]:
while True:
    q = input("Ask a question ('new' to upload new file, 'exit' to quit): ")
    if q.lower() == "exit":
        print("👋 Exiting. Goodbye!")
        break
    elif q.lower() == "new":
        load_and_process_document()
    else:
        result = qa_chain({"query": q})
        print("📌 Answer:", result["result"])
