# Background Setup

In [1]:
%%capture

from langchain_google_genai import ChatGoogleGenerativeAI
from langchain.agents import initialize_agent, AgentType
from dotenv import load_dotenv
import os
from langchain_community.tools import DuckDuckGoSearchRun
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import Chroma
import fitz  # PyMuPDF for extracting text from PDFs
import chromadb
from chromadb.utils import embedding_functions
from llama_cpp import Llama

load_dotenv()

api_key = "AIzaSyBq4gLmdqTbipqUlLOs2ld5uT-ti2Q4EGs"

llm = ChatGoogleGenerativeAI(
    model="gemini-1.5-flash",
    temperature=0,
    max_tokens=None,
    timeout=None,
    max_retries=2,
    api_key=api_key,
)

# Define the web scraping tool
search = DuckDuckGoSearchRun()
# Initialize the Langchain agent with the search tool and Gemini model
tools = [search]
agent = initialize_agent(
    tools=tools,
    agent_type=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
    llm=llm,
    verbose=True
)

# Function to get context from the web based on a prompt
def get_context_from_web(prompt):
    # Use Langchain to fetch relevant search results from the web
    response = agent.run(prompt)
    return response

embedding_model = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

persist_directory = "./chroma_db"
embedding_model = embedding_functions.DefaultEmbeddingFunction()
vector_db = chromadb.PersistentClient(path=persist_directory)
collection = vector_db.get_or_create_collection(name="pdf_documents")

def extract_text_from_pdf(pdf_path):
    """Extracts text from a given PDF file."""
    text = ""
    try:
        with fitz.open(pdf_path) as doc:
            for page in doc:
                text += page.get_text("text") + "\n"
    except Exception as e:
        print(f"Error extracting text from {pdf_path}: {e}")
    return text.strip()

def populate_vector_db_from_pdfs(pdf_directory):
    """Dynamically populates the Chroma vector database from PDFs."""
    pdf_files = [f for f in os.listdir(pdf_directory) if f.endswith(".pdf")]

    for pdf_file in pdf_files:
        pdf_path = os.path.join(pdf_directory, pdf_file)
        text_content = extract_text_from_pdf(pdf_path)

        if text_content:
            # Generate embedding
            embedding = embedding_model([text_content])[0]

            # Add to ChromaDB
            collection.add(
                ids=[pdf_file],  # Unique identifier (use filename)
                embeddings=[embedding],
                metadatas=[{"source": pdf_file}],
                documents=[text_content]
            )
            print(f"Added {pdf_file} to the vector database.")

# Usage: Call the function with your PDF directory
populate_vector_db_from_pdfs("dataset")

def check_vector_db_with_llm(query, top_k=3):
    """
    The LLM itself checks whether the vector database contains sufficient information.
    """
    # Get the collection from the vector database
    collection = vector_db.get_collection(name="pdf_documents")

    # Perform similarity search
    results = collection.query(
        query_texts=[query],
        n_results=top_k
    )

    # Extract retrieved documents
    retrieved_contexts = results.get("documents", [[]])[0]  # Extracts the first list from results
    retrieved_context = "\n".join(retrieved_contexts) if retrieved_contexts else "No relevant context found."

    # Define the evaluation prompt
    eval_prompt = f"""
    You are an AI assistant. Determine if the retrieved context contains enough information to fully answer the question.

    Query:
    {query}

    Retrieved Context:
    {retrieved_context}

    Answer with "YES" if the retrieved context is sufficient, otherwise answer with "NO".
    """

    # Invoke LLM and extract response text
    decision_message = llm.invoke(eval_prompt)

    # Ensure we extract the text correctly
    decision = decision_message.content.strip() if hasattr(decision_message, "content") else str(decision_message).strip()

    return decision, retrieved_context

def compress_context(context):
    """
    Compresses the context to a maximum of 1024 characters.
    """
    if len(context) > 1024:
        return context[:1021] + "..."
    return context

# Load the GGUF model
model_path = "qwenfinal.gguf"  # Replace with your GGUF file path
qwen = Llama(model_path=model_path, n_ctx=4096) 

def generate(prompt, model=qwen):
    """
    Generates a response using RAG.
    - First checks vector DB for knowledge.
    - Uses web search if necessary.
    - Combines retrieved knowledge into the final prompt.
    """

    # Step 1: Retrieve context from Vector Database
    decision, context = check_vector_db_with_llm(prompt)

    print(decision)

    # Step 2: If vector DB lacks information, use Web Search
    if decision.upper() != "YES":
        context = get_context_from_web(prompt)
    
    deep_prompt = """Below is a question related to AI related researches. Write an answer that appropriately completes the request.

    ### Question:
    {}

    ### Context:
    {}

    ### Answer:
    {}"""

    context = compress_context(context)

    # Step 3: Construct Augmented Prompt
    deep_prompt = deep_prompt.format(prompt, context, "")

    response = model(deep_prompt, max_tokens=100, temperature=0.7)

    return response["choices"][0]["text"]

# Inference

In [2]:
%%capture

response = generate("What is Deepseek")

In [3]:
print(response)

 Deepseek's V3 model is a reasoning model that shows better or equal performance to competitors while achieving it with a fraction of the training and inference cost. The model was downloaded more than ChatGPT, leading to market concerns about AI investments. Deepseek's approach to improving algorithms instead of hardware has made them a disruptor in the industry.
