<a href="https://colab.research.google.com/github/Amangithubs/nsk-project/blob/main/NSK_AI_final_code.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Document Ingestion

This section covers loading document content and preparing it for processing, including installing necessary libraries and defining the function to load different file types.

In [None]:
!pip install PyPDF2 --quiet # PyPDF2 is needed by PyPDFLoader
!pip install langchain --quiet
!pip install langchain-community --quiet
!pip install pypdf --quiet # Install pypdf explicitly as required by PyPDFLoader
!pip install docx2txt --quiet # Required for Docx2txtLoader

from langchain_community.document_loaders import PyPDFLoader, TextLoader, Docx2txtLoader
import os

def load_document(file_path):
    """
    Loads content from a document based on its file type using Langchain loaders.

    Args:
        file_path (str): The path to the document file.

    Returns:
        list[str]: A list of page content strings extracted from the document.
        Returns an empty list if the file type is not supported or loading fails.
    """
    _, file_extension = os.path.splitext(file_path)
    file_extension = file_extension.lower()

    documents = []
    try:
        if file_extension == ".pdf":
            loader = PyPDFLoader(file_path)
            documents = loader.load()
        elif file_extension == ".txt":
            loader = TextLoader(file_path)
            documents = loader.load()
        elif file_extension == ".docx":
            loader = Docx2txtLoader(file_path)
            documents = loader.load()
        else:
            print(f"Unsupported file type: {file_extension}")
            return [] # Return empty list for unsupported types

    except Exception as e:
        print(f"Error loading document {file_path}: {e}")
        return [] # Return empty list on error

    # Extract text content from Langchain Document objects
    pages_text = [doc.page_content for doc in documents]
    return pages_text

# Note: The process_document function (in cell iw7IqxGzZzYg9) will need to be updated
# to call this new load_document function instead of load_pdf_pages,
# and to pass the file path correctly.

## Language Model (Ollama) Setup

This section covers downloading, installing, and running the Ollama server and downloading the necessary language models (LLMs) that the RAG pipeline will use for text generation.

In [None]:
!curl -fsSL https://ollama.com/install.sh | sh

In [None]:
# Start the Ollama server in the background
# This might require specific handling to keep it running in Colab
# A common approach is to run it as a background process
import subprocess
import time

print("Starting Ollama server...")
process = subprocess.Popen(["ollama", "serve"], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
time.sleep(5) # Give the server a few seconds to start
print("Ollama server process started.")
# Note: The server will run in the background as long as this Colab runtime is active.

In [None]:
# Download an Ollama model (e.g., llama2)
# You can replace 'llama2' with another model name if you prefer
print("Downloading Ollama model 'llama2'...")
# Use subprocess.run to wait for the download to complete
result = subprocess.run(["ollama", "pull", "llama2"], capture_output=True, text=True)
print("Ollama pull output:")
print(result.stdout)
print(result.stderr)

if result.returncode != 0:
    print("Error downloading model. Please check the model name and try again.")
else:
    print("Model 'llama2' downloaded successfully.")

In [None]:
!pip install ollama --quiet
import ollama

# Configure the Ollama client
# Replace with the actual URL of your Ollama server if it's not running on the default host/port
# If running locally and need Colab access, you might need to use ngrok or similar.
client = ollama.Client(host='http://localhost:11434')

# This function is now used by the answer_question function.
def generate_answer_with_ollama(prompt, model="llama2"):
    """
    Generates a text completion using an Ollama model.

    Args:
        prompt (str): The input prompt for text generation.
        model (str): The name of the Ollama model to use (e.g., "llama2", "mistral").

    Returns:
        str: The generated text.
    """
    try:
        response = client.generate(
            model=model,
            prompt=prompt,
            stream=False # Set to True if you want streaming responses
        )
        return response['response']
    except ollama.ResponseError as e:
        print(f"Ollama API error: {e}")
        return f"Error generating answer with Ollama: {e}"
    except Exception as e:
        print(f"An unexpected error occurred during Ollama generation: {e}")
        return f"An unexpected error occurred: {e}"

# Note: The answer_question function (in cell k4A6TI6mzSG2) calls this function.

In [None]:
# Verify that Ollama is running and the model is available
print("Checking Ollama status and model list...")
result = subprocess.run(["ollama", "list"], capture_output=True, text=True)
print("Ollama list output:")
print(result.stdout)
print(result.stderr)

if "llama2" in result.stdout:
    print("Ollama is running and 'llama2' model is available.")
else:
    print("Ollama or 'llama2' model not found. Check previous steps.")

## Indexing and Storage

This section covers generating numerical representations (embeddings) of the document content and storing them in a searchable vector database.

In [None]:
!pip install faiss-cpu --quiet
import faiss
import numpy as np

class VectorStore:
    def __init__(self):
        # Updated dimension to match the embedding model (sentence-transformers/multi-qa-mpnet-base-dot-v1)
        self.index = faiss.IndexFlatL2(768)
        self.texts = []

    def add(self, text, embedding):
        self.index.add(np.array([embedding]).astype('float32'))
        self.texts.append(text)

    def search(self, query_embedding, top_k=3):
        D, I = self.index.search(np.array([query_embedding]).astype('float32'), top_k)
        return [self.texts[i] for i in I[0]]

# Note: This class is used by process_document and answer_question.

In [None]:
!pip install sentence-transformers --quiet
from sentence_transformers import SentenceTransformer

# Load the sentence transformer model
# Using the same model as before for consistency with FAISS dimension
embedding_model = SentenceTransformer("sentence-transformers/multi-qa-mpnet-base-dot-v1")

def get_embedding(text):
    # Use the sentence_transformers library to get embeddings
    embedding = embedding_model.encode(text)
    return embedding.tolist() # Return as a list to match previous format if needed

# Note: This function is called by process_document and answer_question.

In [None]:
store = VectorStore()

def process_document(file_input): # Renamed and changed parameter name for clarity
    print("--- Starting process_document function ---") # Debug print

    # Gradio File component provides a path via file_input.name
    file_path = file_input.name if file_input else None

    if not file_path:
        print("Error: No file uploaded.")
        return "Error: No file uploaded."

    pages = [] # Initialize pages list
    try:
        print(f"--- Calling load_document for {file_path} ---") # Debug print
        pages = load_document(file_path) # Call the new function
        print(f"--- load_document returned {len(pages) if pages else 0} pages ---") # Debug print

        # Debugging print: Show extracted text or indicate if extraction failed
        if not pages:
            print("Error: No text extracted from the document by load_document.")
            return "Error: No text extracted from the document by load_document."
        else:
            print(f"Successfully extracted text from {len(pages)} pages.")
            # Optionally print a snippet of the extracted text
            # print("Extracted text snippet (first 500 chars):")
            # print("".join(pages)[:500])
            # print("-" * 20)

    except Exception as e:
        print(f"Error during load_document for {file_path}: {e}")
        print("-" * 20)
        return f"Error during document loading: {e}"


    # Clear the store before adding new data from a new document
    print("--- Clearing/Initializing VectorStore ---") # Debug print
    global store
    store = VectorStore()
    print("--- VectorStore cleared/initialized ---") # Debug print


    print("--- Starting chunking and embedding loop ---") # Debug print
    for i, text in enumerate(pages):
        print(f"--- Processing page {i+1}/{len(pages)} ---") # Debug print
        chunks = chunk_text(text)
        print(f"--- Page {i+1} chunked into {len(chunks)} chunks ---") # Debug print
        for j, chunk in enumerate(chunks):
            # print(f"--- Embedding chunk {j+1}/{len(chunks)} of page {i+1} ---") # Too verbose?
            try:
                emb = get_embedding(chunk)
                # print(f"--- Embedding obtained for chunk {j+1} ---") # Too verbose?
                # print(f"--- Adding chunk {j+1} to VectorStore ---") # Too verbose?
                store.add(chunk, emb)
                # print(f"--- Chunk {j+1} added to VectorStore ---") # Too verbose?
            except Exception as e:
                print(f"Error embedding or adding chunk: {e}")
                # Continue processing other chunks even if one fails
                pass

    print("--- Chunking and embedding loop finished ---") # Debug print
    return "Document processed and ready!"

## RAG Pipeline Core (Retrieval & Generation)

This section covers both the **retrieval** of relevant document chunks from the vector store (using the VectorStore's search method) and the **generation** of the final answer. The generation uses the retrieved context and the user query to create a grounded answer using a language model (Ollama), implemented with Langchain Runnables and Prompt Templates.

In [None]:
def chunk_text(text, chunk_size=1000, overlap=200):
    chunks = []
    start = 0
    while start < len(text):
        end = min(start + chunk_size, len(text))
        chunks.append(text[start:end])
        start += chunk_size - overlap
    return chunks

class VectorStore:
    def __init__(self):
        # Updated dimension to match the embedding model (sentence-transformers/multi-qa-mpnet-base-dot-v1)
        self.index = faiss.IndexFlatL2(768)  # embedding dim assumed 768, adjust if needed
        self.texts = []

    def add(self, text, embedding):
        self.index.add(np.array([embedding]).astype('float32'))
        self.texts.append(text)

    def search(self, query_embedding, top_k=5):
        D, I = self.index.search(np.array([query_embedding]).astype('float32'), top_k)
        return [self.texts[i] for i in I[0] if i != -1]

# Note: The get_embedding function is defined in cell rpOLYhSCzY7M
# The ollama client is initialized in cell 25286156
# The generate_answer_with_ollama function is defined in cell 25286156

# Import Langchain components for generation
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser
# Import ChatOllama from the new package
!pip install -U langchain-ollama --quiet # Already installed in previous step
from langchain_ollama import ChatOllama


# Define the Langchain components for the RAG chain
# Assuming Ollama server is running at localhost:11434 and model 'llama2' is pulled
# You might need to adjust model name and base_url if different
llm = ChatOllama(model="llama2", base_url="http://localhost:11434")

# Define the prompt template using Langchain
prompt = ChatPromptTemplate.from_template("""Answer the following question based only on the provided context:
{context}

Question: {question}
""")

# Create a RAG chain using Langchain Runnables
# This chain takes context and question, formats the prompt, and passes it to the LLM
rag_chain = (
    {"context": RunnablePassthrough(), "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)


def answer_question(question):
    print("--- Starting answer_question function (Langchain Runnables) ---")
    try:
        print("--- Getting question embedding ---")
        q_emb = get_embedding(question)
        print("--- Question embedding obtained ---")

        print("--- Searching vector store ---")
        # Increase top_k to retrieve more context
        context_docs = store.search(q_emb, top_k=5)
        # Join the retrieved text chunks
        context_text = "\n\n".join(context_docs)
        print("--- Vector store search completed ---")

        # Debugging prints
        print("Retrieved Context:")
        print(context_text)
        print("-" * 20)

        prompt = context_text + f"\n\nQuestion: {question}\nAnswer:" # Prompt created by Langchain template now

        print("Generated Prompt:")
        print(prompt)
        print("-" * 20)

        print("--- Invoking Langchain RAG chain ---")
        # Invoke the Langchain RAG chain with context and question
        response = rag_chain.invoke({"context": context_text, "question": question})
        print("--- Langchain RAG chain invocation completed ---")

        # Debugging print for successful response
        print("Model Response:")
        print(response)
        print("-" * 20)
        return response

    except Exception as outer_e:
        # Catch any other errors in the function
        print(f"An unexpected error occurred in answer_question: {outer_e}")
        print("-" * 20)
        return f"An unexpected error occurred: {outer_e}"

## User Interaction

This section covers setting up the user interface using Gradio to allow users to upload documents, process them, ask questions, and view answers.

In [None]:
!pip install gradio --quiet
import gradio as gr

with gr.Blocks() as demo:
    with gr.Row():
        pdf_input = gr.File(label="Upload Document") # Changed label
        upload_btn = gr.Button("Process Document") # Changed label
        status = gr.Textbox(label="Status")

    # Call the new process_document function
    upload_btn.click(process_document, inputs=[pdf_input], outputs=[status])

    question = gr.Textbox(label="Ask a question about the Document") # Changed label
    answer = gr.Textbox(label="Answer")

    question.submit(answer_question, inputs=[question], outputs=[answer])

demo.launch()