In [1]:
!pip install googlesearch-python lxml deepspeed sentence-transformers

Defaulting to user installation because normal site-packages is not writeable


In [2]:
# ============== AGENT 2: INFORMATION RETRIEVAL ==============

# --- Imports ---
import os
import socket
from typing import List
import subprocess
import time
import re

# LangChain and related libraries
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.vectorstores import Chroma
from langchain_core.prompts import PromptTemplate
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_ollama.llms import OllamaLLM
from langchain_text_splitters import RecursiveCharacterTextSplitter

# Search library
try:
    from googlesearch import search
except ImportError:
    print("Error: 'googlesearch-python' is not installed. Please run 'pip install googlesearch-python'")

# It's good practice to set a user agent for web requests
os.environ["USER_AGENT"] = "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/109.0"

# --- Function 1: Dynamic Search (Corrected Version 2.0) ---
def dynamic_search(query: str, num_results: int = 4) -> list[str]:
    """
    Searches the web for a given query, focusing on recent and relevant
    technical blogs and tutorials related to NVIDIA GPU acceleration.
    """
    print(f"-> Starting dynamic search for: '{query}'")
    enhanced_query = f"{query} site:developer.nvidia.com OR site:medium.com/rapids-ai OR site:medium.com/cupy-team"
    print(f"-> Enhanced query: '{enhanced_query}'")
    try:
        # THE FIX IS HERE: Removed the 'query=' keyword argument.
        search_results = list(search(enhanced_query, num_results=num_results))
        
        print(f"-> Found URLs: {search_results}")
        return search_results
    except Exception as e:
        print(f"An error occurred during search: {e}")
        # Fallback to a static list if the search fails
        return [
            "https://medium.com/cupy-team/announcing-cupy-v13-66979ee7fab0",
            "https://www.unum.cloud/blog/2022-01-26-cupy",
        ]

# --- Function 2: Main RAG Pipeline ---
def process_with_rag(query: str) -> str:
    """
    Processes a user query using a Retrieval-Augmented Generation (RAG) pipeline.
    """
    print("--- Running RAG Pipeline ---")
    # Step 1: Get URLs from the dynamic search function
    urls = dynamic_search(query)
    if not urls:
        return "Sorry, I couldn't find any relevant articles online for your query."

    # Step 2: Load the documents from the web
    print("-> Loading documents from URLs...")
    docs = [WebBaseLoader(url).load() for url in urls]
    docs_list = [item for sublist in docs for item in sublist]

    # Step 3: Split the documents into chunks
    print("-> Splitting documents into chunks...")
    text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
        chunk_size=500, chunk_overlap=100
    )
    doc_splits = text_splitter.split_documents(docs_list)

    # Step 4: Create embeddings and a vector store retriever
    print("-> Creating embeddings and vector store...")
    embedding_model = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2") #
    vectorstore = Chroma.from_documents(
        documents=doc_splits,
        embedding=embedding_model,
        collection_name="rag-chroma"
    )
    retriever = vectorstore.as_retriever()

    # Step 5: Generate an answer using the LLM and the retrieved context
    print("-> Retrieving context and generating answer...")
    GENERATE_PROMPT = PromptTemplate(
        template="""
        You are an AI assistant for data scientists. Answer the user's question based ONLY on the context provided.
        Focus on practical code examples and clear explanations from the context.
        If the context doesn't contain enough information, just say that you couldn't find a specific answer in the provided documents.

        CONTEXT:
        {context}

        QUESTION:
        {question}

        YOUR ANSWER:
        """,
        input_variables=["context", "question"],
    )

    retrieved_docs = retriever.invoke(query)
    context_text = "\\n\\n---\\n\\n".join([doc.page_content for doc in retrieved_docs])

    # Connect to the local LLM on Sol
    host_node = socket.gethostname()
    llm = OllamaLLM(model="qwen3:14b", base_url=f"http://kvinod@{host_node}:11434/") # We'll need to update the user ID

    rag_chain = GENERATE_PROMPT | llm
    answer = rag_chain.invoke({"context": context_text, "question": query})

    # Clean up the vector store
    vectorstore.delete_collection()
    
    print("--- RAG Pipeline Complete ---")
    return answer

# --- Example Usage ---
if __name__ == '__main__':
    test_query = "I want to do an array multiplication of the dimension 10x10. How can I do that ion Python?"
    final_answer = process_with_rag(test_query)
    print("\\n======= FINAL ANSWER =======\\n")
    print(final_answer)

USER_AGENT environment variable not set, consider setting it to identify your requests.


--- Running RAG Pipeline ---
-> Starting dynamic search for: 'I want to do an array multiplication of the dimension 10x10. How can I do that ion Python?'
-> Enhanced query: 'I want to do an array multiplication of the dimension 10x10. How can I do that ion Python? site:developer.nvidia.com OR site:medium.com/rapids-ai OR site:medium.com/cupy-team'
-> Found URLs: ['/search?num=6', '', '', '']
-> Loading documents from URLs...


MissingSchema: Invalid URL '/search?num=6': No scheme supplied. Perhaps you meant https:///search?num=6?

In [None]:
# =================================================================
# AI TUTOR AGENT (DEFINITIVE, DYNAMIC & FULLY ABSTRACT FINAL)
# This version contains ZERO hardcoded library/language names in the prompts.
# =================================================================

import os
import socket
import json
from typing import List

# LangChain and related libraries
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.vectorstores import Chroma
from langchain_core.prompts import PromptTemplate
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter
from pydantic import BaseModel, Field
from langchain_ollama.chat_models import ChatOllama

# Search library
try:
    from googlesearch import search
except ImportError:
    print("Error: 'googlesearch-python' is not installed. Please run 'pip install googlesearch-python'")

os.environ["USER_AGENT"] = "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/109.0"

class SearchQueryGenerator(BaseModel):
    queries: List[str] = Field(description="A list of targeted, keyword-focused search queries.")

# --- Part 1: Fully Abstracted Search Query Generation ---
def generate_search_queries(query: str, llm) -> List[str]:
    """
    Uses a dynamic prompt to generate language-and-library-specific search queries.
    """
    print("-> Using LLM with FULLY ABSTRACTED prompt to generate search queries...")
    
    # DYNAMIC PROMPT: The LLM now identifies the correct libraries and language on its own.
    prompt_template = PromptTemplate(
        template="""
        You are an expert at generating web search queries for a technical audience.
        Analyze the user's question to identify the core technical task and the programming language.
        Based on your knowledge, generate 5 concise, targeted search queries. Two queries should be for the standard, CPU-based library for that task/language. Three queries should be for potential GPU-accelerated libraries for that task/language, prioritizing NVIDIA-based solutions if they exist.

        User Question: "{question}"
        
        Generate a JSON list of 5 search query strings.
        """,
        input_variables=["question"],
    )
    
    query_generation_chain = prompt_template | llm.with_structured_output(SearchQueryGenerator)

    try:
        response_model = query_generation_chain.invoke({"question": query})
        print(f"-> Generated queries: {response_model.queries}")
        return response_model.queries
    except Exception as e:
        print(f"-> LLM failed to generate structured output: {e}")
        return []

# --- Part 2: The Main Search Function (Unchanged) ---
def dynamic_search_agentic(queries: List[str]) -> list[str]:
    print("-> Executing dynamic search...")
    all_urls = set()
    for q in queries:
        try:
            enhanced_query = f"{q} site:developer.nvidia.com OR site:medium.com/rapids-ai OR site:medium.com/cupy-team"
            search_results = list(search(enhanced_query, num_results=2))
            for url in search_results:
                all_urls.add(url)
        except Exception as e:
            print(f"An error occurred during search for query '{q}': {e}")
            continue
    final_urls = [url for url in list(all_urls) if url]
    print(f"-> Found {len(final_urls)} unique URLs: {final_urls}")
    return final_urls

# --- Part 3: The Definitive Dynamic RAG Pipeline ---
def process_with_rag(query: str) -> str:
    print("--- Running FINAL DYNAMIC RAG Pipeline ---")

    host_node = socket.gethostname()
    llm = ChatOllama(model="qwen3:14b", base_url=f"http://jgarc111@{host_node}:11434/") # Placeholder ID

    search_queries = generate_search_queries(query, llm)
    urls = []
    if search_queries:
        urls = dynamic_search_agentic(search_queries)

    context_text = ""
    if urls:
        print("-> Found documents. Loading and processing context...")
        docs = [WebBaseLoader(url).load() for url in urls]
        docs_list = [item for sublist in docs for item in sublist]
        text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(chunk_size=500, chunk_overlap=100)
        doc_splits = text_splitter.split_documents(docs_list)
        embedding_model = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
        vectorstore = Chroma.from_documents(documents=doc_splits, embedding=embedding_model, collection_name="rag-chroma")
        retriever = vectorstore.as_retriever()
        retrieved_docs = retriever.invoke(query)
        context_text = "\n\n---\n\n".join([doc.page_content for doc in retrieved_docs])
        vectorstore.delete_collection()

    # THE DEFINITIVE, FULLY ABSTRACTED "NVIDIA-FIRST EDUCATIONAL PRIORITY" PROMPT
    final_prompt_template = PromptTemplate(
        template="""
        You are a friendly and knowledgeable AI Tutor for a project focused on data science acceleration. Your primary mission is to educate users on leveraging NVIDIA-based GPU libraries.

        First, analyze the user's QUESTION to identify the core task and programming language. Based on your knowledge, determine if a common NVIDIA-based GPU-accelerated library exists for that specific task and language.

        Then, follow the appropriate path below to structure your conversational and helpful answer:

        **PATH 1: An NVIDIA-based GPU-accelerated library EXISTS for this task.**
        1.  Identify the standard CPU library and the NVIDIA GPU library for the user's language and task.
        2.  Start with a friendly opening that explains you will show both the GPU-accelerated and standard methods.
        3.  Provide a heading for the GPU solution, dynamically inserting the library name (e.g., `### Recommended GPU Solution (with [GPU Library Name])`).
        4.  Write the code example for the GPU solution.
        5.  Add a "Performance Note" section. Explain the benefits of the GPU approach (e.g., for large datasets) and the trade-offs (e.g., performance on small data vs. large data, data transfer overhead).
        6.  Provide a heading for the CPU solution, dynamically inserting the library name (e.g., `### Standard CPU Solution (with [CPU Library Name])`).
        7.  Write the CPU-based code for comparison.
        8.  Do NOT add any disclaimer note at the end.

        **PATH 2: An NVIDIA-based GPU-accelerated library DOES NOT EXIST for this task.**
        1.  Identify the standard library for the user's specified language and task.
        2.  Start with a friendly opening explaining the standard approach.
        3.  Provide a heading for the standard solution, dynamically inserting the library name (e.g., `### Standard Solution (with [Library Name])`).
        4.  Write the code example using the identified standard library.
        5.  End your entire response with the exact sentence: "Note: The provided solution is the standard method for this task, as a direct NVIDIA-based GPU library for it is not common."

        Use the CONTEXT below to inform your answer if it is relevant, but your primary instruction is to follow the mission and logic paths described above.

        CONTEXT:
        {context}

        QUESTION:
        {question}

        YOUR FINAL ANSWER:
        """,
        input_variables=["context", "question"],
    )
    
    final_chain = final_prompt_template | llm
    answer = final_chain.invoke({"context": context_text, "question": query}).content

    print(answer)

    print("--- Pipeline Complete ---")
    return answer

In [None]:
# =================================================================
# GRADIO CHAT APPLICATION FOR AI TUTOR (FINAL)
#
# This version fixes the UserWarning by specifying the modern 'messages'
# format for the chatbot component.
# =================================================================

import gradio as gr
import re
import time

# This wrapper function connects our backend logic to the Gradio UI.
# It assumes 'process_with_rag' is defined and available in the notebook environment.
def tutor_chat_interface(user_message, history):
    # The 'history' is now a list of dictionaries. Append the new user message.
    history.append({"role": "user", "content": user_message})
    
    # Show a "thinking..." message while processing
    yield history + [{"role": "assistant", "content": "Thinking..."}], "Thinking..."

    # Call our existing RAG pipeline function
    full_response = process_with_rag(user_message)

    # Use regex to separate the <think> block from the final answer
    think_pattern = re.compile(r"<think>(.*?)</think>", re.DOTALL)
    think_match = think_pattern.search(full_response)
    
    thought_process = "No thought process was found in the response."
    if think_match:
        # Extract the thought process and clean it up
        thought_process = think_match.group(1).strip()
        # Remove the <think> block from the final answer shown to the user
        final_answer = think_pattern.sub("", full_response).strip()
    else:
        # If no <think> block, the whole response is the answer
        final_answer = full_response

    # Update the chatbot history with the AI's clean answer
    history.append({"role": "assistant", "content": final_answer})
    
    # Return the final history and the extracted thought process
    yield history, thought_process


# Build the Gradio UI using Blocks for more control
with gr.Blocks(theme=gr.themes.Soft(), css=".gradio-container {background-color: #f5f5f5;}") as demo:
    gr.Markdown("# 🤖 AI Accelerated Data Science Tutor")
    gr.Markdown("Ask a question about a data science task. The tutor will provide an explanation and code, prioritizing NVIDIA GPU-accelerated solutions where possible.")

    # THE FIX IS HERE: Added 'type="messages"' to the chatbot component.
    chatbot = gr.Chatbot(label="Conversation", height=450, bubble_full_width=False, type="messages")
    
    with gr.Accordion("🔎 Show Agent's Thought Process", open=False):
        cot_output = gr.Markdown("The agent's reasoning will appear here after it responds.")

    with gr.Row():
        msg_textbox = gr.Textbox(
            label="Your Question",
            placeholder="e.g., How do I multiply two 10x10 arrays in Python?",
            scale=4,
            autofocus=True,
            container=False # This makes the textbox look cleaner
        )
        submit_btn = gr.Button("Ask", variant="primary", scale=1, min_width=150)

    # Main function to handle the chat logic
    def handle_submit(user_message, chat_history):
        response_generator = tutor_chat_interface(user_message, chat_history)
        
        # The generator now yields the full message history and the thought process
        for history_state, thought_process in response_generator:
             yield history_state, thought_process


    # Connect the submit button and textbox to the handler function
    submit_btn.click(
        handle_submit, 
        [msg_textbox, chatbot], 
        [chatbot, cot_output]
    ).then(lambda: gr.update(value=""), None, [msg_textbox], queue=False)

    msg_textbox.submit(
        handle_submit, 
        [msg_textbox, chatbot], 
        [chatbot, cot_output]
    ).then(lambda: gr.update(value=""), None, [msg_textbox], queue=False)


# Launch the application
# Set share=True if you need a public link from the Sol jupyter notebook
demo.launch(share=True, debug=True)