In [1]:
def read_large_file(file_path):
    with open(file_path, 'r') as file:
        for line in file:
            yield line.strip()

# Efficiently processing lines without loading the entire file into memory
for line in read_large_file("large_text_file.txt"):
    print(line)

Documents vectorized and saved successfully!
Number of requested results 3 is greater than number of elements in index 2, updating n_results = 2
Result 1:
The Rise of Artificial Intelligence

Artificial Intelligence (AI) is revolutionizing various industries by enhancing automation, decision-making, and data analysis. With advancements in deep learning models and natural language processing, AI is now integrated into everyday applications.

Key Benefits of AI:
1. Improved efficiency in repetitive tasks.
2. Enhanced decision-making using data-driven insights.
3. Automation of complex processes in industries like healthcare, finance, and aerospace.

Result 2:
Challenges in AI:
- Ethical concerns regarding data privacy.
- Potential job displacement due to automation.
- Ensuring AI models are fair and unbiased.

The future of AI holds promise for personalized services, smarter automation, and enhanced human-computer interactions.


In [None]:
from langchain import PromptTemplate, LLMChain
from langchain.chains import RetrievalQA
from langchain.vectorstores import Chroma
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.llms import GroqAPI
import logging

class QAChain:
    def __init__(self, chroma_path: str, model_name: str = "groq-api"):
        # Logging setup
        logging.basicConfig(level=logging.INFO)
        self.logger = logging.getLogger(__name__)

        # Load embeddings and vectorstore
        self.logger.info("Loading embeddings and vectorstore...")
        self.embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
        self.vectorstore = Chroma(persist_directory=chroma_path, embedding_function=self.embeddings)

        # Load language model
        self.logger.info("Initializing language model...")
        self.llm = GroqAPI(api_key="YOUR_GROQ_API_KEY")

        # Prompt template
        self.prompt_template = PromptTemplate(
            template=(
                "You are an AI assistant. Answer the following question accurately based on the provided context:\n"
                "Context: {context}\n"
                "Question: {question}\n"
                "Answer:"
            ),
            input_variables=["context", "question"]
        )

        # QA Chain
        self.chain = RetrievalQA.from_chain_type(
            llm=self.llm,
            retriever=self.vectorstore.as_retriever(),
            chain_type="stuff",  # 'stuff' works best for short, concise answers
            chain_type_kwargs={"prompt": self.prompt_template}
        )

    def query(self, question: str) -> str:
        """Query the QA chain and return the result."""
        try:
            self.logger.info(f"Received question: {question}")
            result = self.chain.run(question)
            self.logger.info(f"Generated answer: {result}")
            return result
        except Exception as e:
            self.logger.error(f"Error during QAChain query: {e}")
            return "I'm sorry, but I couldn't retrieve the information you requested."

    def add_document(self, doc_text: str, doc_id: str):
        """Add a new document to the vectorstore."""
        try:
            self.vectorstore.add_texts([doc_text], metadatas=[{"doc_id": doc_id}])
            self.vectorstore.persist()
            self.logger.info(f"Document {doc_id} added successfully.")
        except Exception as e:
            self.logger.error(f"Error adding document: {e}")

    def batch_query(self, questions: list[str]) -> list[str]:
        """Efficient batch processing for multiple queries."""
        responses = []
        for question in questions:
            responses.append(self.query(question))
        return responses