## Imports

In [1]:
# !pip install googlesearch-python lxml deepspeed sentence-transformers 
# !pip install feedparser sentence-transformers

In [2]:
#  
# Cell 2: Imports
#
# Description: All required libraries for the application are imported here.
#

import os
import json
import socket
from typing import List, Dict
import subprocess
import time
import re
import gradio as gr

# LangChain and related libraries
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.vectorstores import Chroma
from langchain_core.prompts import PromptTemplate
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter
from pydantic import BaseModel, Field
from langchain_ollama.chat_models import ChatOllama
from langchain_ollama.embeddings import OllamaEmbeddings

# Search library
try:
    from googlesearch import search
except ImportError:
    print("Error: 'googlesearch-python' is not installed. Please run 'pip install googlesearch-python'")

# Set a user agent to avoid being blocked by Google search
os.environ["USER_AGENT"] = "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/109.0"

USER_AGENT environment variable not set, consider setting it to identify your requests.


## Agent 1 - Profiler

In [3]:
#
# Cell 3: Agent 1 - User Profiler
#
# Description: This agent is responsible for understanding the user's knowledge level.
# It dynamically generates a questionnaire, analyzes the answers, and creates a
# profile that will be used by Agent 3 to tailor its responses.
#

class Questionnaire(BaseModel):
    questions: List[str] = Field(description="A list of 4-5 questions for the user.")

class UserProfilerAgent_V3:
    """
    Agent 1 (V3): Guarantees the user's name is collected first before
    using an LLM to dynamically generate the rest of the questionnaire.
    **ADAPTED FOR MODULAR, GRADIO-FRIENDLY USE.**
    """
    def __init__(self, profiles_dir: str = "user_profiles"):
        self.profiles_dir = profiles_dir
        if not os.path.exists(self.profiles_dir):
            os.makedirs(self.profiles_dir)

        try:
            host_node = socket.gethostname()
            # NOTE: The ASURITE ID should be that of the user running the Ollama server.
            # This is specified as a hackathon resource.
            asurite_id = "apoojar4"
            self.llm = ChatOllama(model="qwen3:14b", base_url=f"http://{asurite_id}@{host_node}:11434/")
            self.structured_llm = self.llm.with_structured_output(Questionnaire)
            print("✅ [Agent 1] Successfully connected to Ollama LLM.")
        except Exception as e:
            print(f"❌ [Agent 1] Error connecting to Ollama: {e}")
            self.llm = None

    def get_user_list(self) -> List[str]:
        """Scans the profiles directory and returns a list of user names."""
        if not os.path.exists(self.profiles_dir):
            return []
        files = [f for f in os.listdir(self.profiles_dir) if f.endswith('.txt')]
        # Convert 'first_last.txt' to 'First Last'
        names = [" ".join(f.replace('.txt', '').split('_')).title() for f in files]
        return names

    def _generate_questions_with_llm(self) -> List[str]:
        """Uses an LLM to dynamically generate a user questionnaire."""
        if not self.llm: # Fallback if LLM is not available
            return [
                "On a scale of 1-5, how comfortable are you with Python?",
                "Which Python data science libraries (like Pandas or NumPy) have you used before?",
                "Have you ever heard of using GPUs to speed up data analysis?",
                "What's the first tool you'd reach for to do a large matrix multiplication in Python?"
            ]
            
        print("\n🤖 [Agent 1] Generating a personalized questionnaire...")
        prompt = PromptTemplate(
            template="""
            You are a helpful assistant for an AI Data Science Tutor. Your goal is to create a short questionnaire (4-5 questions) to understand a user's knowledge level.
            The questions should gently probe their experience with:
            1. The Python programming language.
            2. Common CPU-based data science libraries (like NumPy, Pandas).
            3. Their awareness of GPU computing and hardware acceleration.
            4. Their familiarity with any NVIDIA-specific GPU libraries (like CuPy or RAPIDS).
            IMPORTANT: Do NOT ask for the user's name, as it will be collected separately.
            Return the questions as a JSON list. Be conversational and friendly.
            """,
            input_variables=[],
        )
        query_generation_chain = prompt | self.structured_llm
        try:
            response_model = query_generation_chain.invoke({})
            return response_model.questions
        except Exception as e:
            print(f"-> [Agent 1] LLM failed to generate questions, falling back to default. Error: {e}")
            return [
                "On a scale of 1-5, how comfortable are you with Python?",
                "Which Python data science libraries (like Pandas or NumPy) have you used before?",
                "Have you ever heard of using GPUs to speed up data analysis?",
                "What's the first tool you'd reach for to do a large matrix multiplication in Python?"
            ]

    def generate_and_save_report(self, user_name: str, answers_dict: dict) -> str:
        """
        Takes a user name and a dictionary of answers, generates a report with an LLM,
        and saves it to a file. Returns the path to the saved file.
        """
        if not self.llm:
            return "Error: LLM not connected."

        print(f"\n🤖 [Agent 1] Analyzing responses for {user_name} and creating a profile...")
        answers_str = "\n".join([f"- {q}: {a}" for q, a in answers_dict.items()])
        prompt = PromptTemplate(
            template="""
            You are an expert AI analyst. A user named {user_name} has answered a questionnaire about their data science skills.
            Your task is to analyze their answers and generate a "TUTORING STRATEGY" report for our AI Tutor.
            **User's Answers:**
            {answers}
            **Your Task:**
            1.  Determine the user's knowledge level: 'Beginner', 'Intermediate', or 'Advanced'.
            2.  Write a concise report following the correct strategy format below. This report will be given to another AI, so the instructions must be clear.
            ---
            **STRATEGY FORMATS (Choose ONE):**
            **If 'Beginner':**
            Start with `Knowledge Level: Beginner`. On the next line, start with `TUTORING STRATEGY: The user is a beginner.` Then, explain that the tutor should use high-level concepts, explain the 'why' of GPU acceleration, and introduce NVIDIA libraries (like CuPy) as a simple, powerful alternative.
            **If 'Intermediate':**
            Start with `Knowledge Level: Intermediate`. On the next line, start with `TUTORING STRATEGY: The user is at an intermediate level.` Then, explain that the tutor should provide direct code comparisons (e.g., NumPy vs. CuPy), focus on performance benefits, and show clear benchmarking examples.
            **If 'Advanced':**
            Start with `Knowledge Level: Advanced`. On the next line, start with `TUTORING STRATEGY: The user is advanced.` Then, explain that the tutor can provide nuanced advice, discuss the broader NVIDIA RAPIDS ecosystem, and cover specific benchmarking methodologies on the Sol supercomputer.
            ---
            Now, generate the complete report.
            """,
            input_variables=["user_name", "answers"],
        )
        report_generation_chain = prompt | self.llm
        response_message = report_generation_chain.invoke({"user_name": user_name, "answers": answers_str})
        report_content = response_message.content
        full_report = f"--- User Profile for {user_name} ---\n{report_content}\n--- End of Profile ---"

        # Save the report to a text file
        filename = "_".join(user_name.lower().split()) + ".txt"
        filepath = os.path.join(self.profiles_dir, filename)
        with open(filepath, "w") as f:
            f.write(full_report)
        print(f"\n✅ [Agent 1] User profile report saved successfully to: {filepath}")
        return filepath

## Agent 4 - Benchmarker

In [4]:
class SolBenchmarker:
    def __init__(self, user: str, python_env: str = "rapids25.02"):
        if not user or user == "YOUR_ASURITE_ID":
            raise ValueError("A valid ASURITE username is required for SolBenchmarker.")
        self.user = user
        self.python_env = python_env

    def _generate_sbatch_script(self, script_dir: str, script_name: str, job_type: str) -> str:
        # This function now generates a specific script for either a CPU or GPU job
        if job_type == "gpu":
            # The GPU job requests a GPU resource
            resources = "#SBATCH -G 1"
        else: # cpu
            # The CPU job requests a standard node without a GPU
            resources = "#SBATCH --nodes=1" 

        return f"""#!/bin/bash
#SBATCH -p general
#SBATCH -q public
{resources}
#SBATCH -A grp_hackathon2025
#SBATCH --reservation=hackathon2025
#SBATCH -t 0-00:10:00
#SBATCH -c 1
#SBATCH -o {script_dir}/slurm-{job_type}-%j.out
#SBATCH -e {script_dir}/slurm-{job_type}-%j.err

module load mamba/latest
source activate {self.python_env}

echo "--- STARTING {job_type.upper()} BENCHMARK ---"
/usr/bin/time -p python3 {script_dir}/{script_name} 2>&1
echo "--- FINISHED {job_type.upper()} BENCHMARK ---"
"""

    def _submit_job(self, code: str, job_type: str, benchmark_dir: str) -> str:
        # Helper to submit a single job and return its ID
        script_name = f"{job_type}_benchmark.py"
        script_path = os.path.join(benchmark_dir, script_name)
        sbatch_path = os.path.join(benchmark_dir, f"{job_type}_job.sh")
        
        with open(script_path, "w") as f: f.write(code)
        sbatch_content = self._generate_sbatch_script(benchmark_dir, script_name, job_type)
        with open(sbatch_path, "w") as f: f.write(sbatch_content)

        process = subprocess.run(f"sbatch {sbatch_path}", shell=True, capture_output=True, text=True)
        if process.returncode != 0: raise RuntimeError(f"sbatch submission for {job_type} failed: {process.stderr}")
        
        job_id_match = re.search(r"Submitted batch job (\\d+)", process.stdout.strip())
        if not job_id_match: raise RuntimeError(f"Could not parse Job ID for {job_type}: {process.stdout}")
        
        job_id = job_id_match.group(1)
        print(f"--> [Agent 4] Submitted {job_type.upper()} job with ID: {job_id}")
        return job_id

    def _check_job_completion(self, job_id: str, job_type: str, benchmark_dir: str) -> dict:
        # Helper to check if a job is done and parse its output
        queue_process = subprocess.run(f"squeue -u {self.user} -j {job_id}", shell=True, capture_output=True, text=True)
        if job_id not in queue_process.stdout:
            print(f"--> [Agent 4] {job_type.upper()} Job {job_id} completed.")
            output_file_path = os.path.join(benchmark_dir, f"slurm-{job_type}-{job_id}.out")
            if os.path.exists(output_file_path):
                with open(output_file_path, "r") as f:
                    output_content = f.read()
                real_time_match = re.search(r"real\s+([\d.]+)", output_content)
                time_val = float(real_time_match.group(1)) if real_time_match else None
                return {"status": "complete", "job_type": job_type, "time": time_val}
            else:
                return {"status": "error", "job_type": job_type, "message": "Output file not found."}
        return {"status": "running"}

    def run_benchmark_parallel(self, cpu_code: str, gpu_code: str):
        # The main method, now a generator that yields results as they complete.
        benchmark_dir = os.path.join(os.getcwd(), "benchmark_files")
        os.makedirs(benchmark_dir, exist_ok=True)
        
        try:
            gpu_job_id = self._submit_job(gpu_code, "gpu", benchmark_dir)
            cpu_job_id = self._submit_job(cpu_code, "cpu", benchmark_dir)

            active_jobs = {"gpu": gpu_job_id, "cpu": cpu_job_id}

            while active_jobs:
                # Check the status of each active job
                for job_type, job_id in list(active_jobs.items()):
                    result = self._check_job_completion(job_id, job_type, benchmark_dir)
                    if result["status"] != "running":
                        yield result # Yield the result as soon as a job finishes
                        del active_jobs[job_type] # Remove from active monitoring
                
                if active_jobs:
                    time.sleep(5) # Wait before polling again

        except Exception as e:
            yield {"status": "error", "message": str(e)}

In [5]:
# #
# # Cell 4: Agent 4 - Sol Benchmarker
# #
# # Description: This agent is responsible for executing code on the Sol supercomputer
# # to benchmark the performance difference between CPU and GPU implementations. It writes
# # and submits a SLURM batch script.
# #

# class SolBenchmarker:
#     def __init__(self, user: str, python_env: str = "rapids25.02"): # Note: Kernel name from docs
#         if not user or user == "YOUR_ASURITE_ID":
#             raise ValueError("A valid ASURITE username is required for SolBenchmarker.")
#         self.user = user
#         self.python_env = python_env

#     def _generate_sbatch_script(self, script_dir: str, cpu_script_name: str, gpu_script_name: str) -> str:
#         # This SLURM script is configured according to the hackathon's resources 
#         return f"""#!/bin/bash
# #SBATCH -p general
# #SBATCH -q public
# #SBATCH -G 1
# #SBATCH -A grp_hackathon2025
# #SBATCH --reservation=hackathon2025
# #SBATCH -t 0-00:10:00
# #SBATCH -c 1
# #SBATCH -o {script_dir}/slurm-%j.out
# #SBATCH -e {script_dir}/slurm-%j.err

# module load mamba/latest
# source activate {self.python_env}

# echo "--- STARTING CPU BENCHMARK ---"
# /usr/bin/time -p python3 {script_dir}/{cpu_script_name} 2>&1
# echo "--- FINISHED CPU BENCHMARK ---"

# echo ""
# echo "--- STARTING GPU BENCHMARK ---"
# /usr/bin/time -p python3 {script_dir}/{gpu_script_name} 2>&1
# echo "--- FINISHED GPU BENCHMARK ---"
# """
#     def _parse_output(self, output_content: str) -> dict:
#         try:
#             # --- FIX 1: Removed erroneous backslash before the string literal ---
#             # This regex finds the execution time from the '/usr/bin/time' command output.
#             real_times = re.findall(r"real\s+([\d.]+)", output_content)
            
#             cpu_time = float(real_times[0]) if len(real_times) > 0 else None
#             gpu_time = float(real_times[1]) if len(real_times) > 1 else None
#             print("cpu_time_seconds", cpu_time, "gpu_time_seconds", gpu_time)
#             return {"status": "success", "cpu_time_seconds": cpu_time, "gpu_time_seconds": gpu_time}
#         except (IndexError, ValueError) as e:
#             return {"status": "error", "message": f"Failed to parse benchmark times. Error: {e}", "raw_log": output_content}

#     def run_benchmark(self, cpu_code: str, gpu_code: str) -> dict:
#         benchmark_dir = os.path.join(os.getcwd(), "benchmark_files")
#         os.makedirs(benchmark_dir, exist_ok=True)
        
#         cpu_script_path = os.path.join(benchmark_dir, "cpu_benchmark.py")
#         gpu_script_path = os.path.join(benchmark_dir, "gpu_benchmark.py")
#         sbatch_path = os.path.join(benchmark_dir, "benchmark_job.sh")

#         try:
#             with open(cpu_script_path, "w") as f: f.write(cpu_code)
#             with open(gpu_script_path, "w") as f: f.write(gpu_code)
#             sbatch_script = self._generate_sbatch_script(benchmark_dir, "cpu_benchmark.py", "gpu_benchmark.py")
#             with open(sbatch_path, "w") as f: f.write(sbatch_script)

#             process = subprocess.run(f"sbatch {sbatch_path}", shell=True, capture_output=True, text=True)
#             if process.returncode != 0: raise RuntimeError(f"sbatch submission failed: {process.stderr}")

#             # --- FIX 2: Removed erroneous backslash before the string literal ---
#             # This regex finds the Job ID from the sbatch submission output.
#             job_id_match = re.search(r"Submitted batch job (\d+)", process.stdout.strip())

#             if not job_id_match: raise RuntimeError(f"Could not parse Job ID from sbatch output: {process.stdout}")
#             job_id = job_id_match.group(1)
#             print(f"--> [Agent 4] Submitted benchmark job to SLURM with ID: {job_id}")

#             print("--> [Agent 4] Waiting for job to complete...")
#             while True:
#                 queue_process = subprocess.run(f"squeue -u {self.user} -j {job_id}", shell=True, capture_output=True, text=True)
#                 if job_id not in queue_process.stdout: break
#                 time.sleep(10)

#             print(f"--> [Agent 4] Job {job_id} completed.")
#             output_file_path = os.path.join(benchmark_dir, f"slurm-{job_id}.out")
            
#             if not os.path.exists(output_file_path):
#                  err_file_path = os.path.join(benchmark_dir, f"slurm-{job_id}.err")
#                  if os.path.exists(err_file_path):
#                      with open(err_file_path, "r") as f: error_content = f.read()
#                      return {"status": "error", "message": f"Job failed. See error log: {error_content}"}
#                  return {"status": "error", "message": f"Output file not found."}

#             with open(output_file_path, "r") as f: output_content = f.read()
#             return self._parse_output(output_content)
#         except Exception as e:
#             return {"status": "error", "message": str(e)}

In [6]:
# --- NEW: Code Upscaler Function with Strict Constraints ---
def _upscale_code_agentic(original_code: str, llm, library_string: str) -> str:
    print("--> [Agent 3] Invoking LLM with STRICT CONSTRAINTS to upscale code...")
    
    # This class is used to ensure the LLM returns only a code string.
    class CodeOutput(BaseModel):
        code: str = Field(description="The complete, upscaled Python code.")
        
    # THE NEW, MORE SPECIFIC PROMPT
    prompt_template = PromptTemplate(
        template="""
        You are a benchmark preparation assistant. Your task is to analyze the following Python script and make it suitable for a meaningful performance benchmark.

        **Your instructions are very strict:**
        1.  **DO NOT** change any of the `import` statements.
        2.  **ONLY** find the variable that defines the primary data size (e.g., `size = 10`, `n_rows = 1000`).
        3.  Change the value of that single variable to a much larger number to ensure the benchmark is meaningful. For example, change matrix dimensions to at least 8000 or DataFrame rows to over 5 million.
        4.  Return the complete, modified Python script and nothing else.

        **Constraint:** The final code must only use libraries from this list: {available_libraries}

        Original Code:
        ```python
        {original_code}
        ```

        Upscaled Code:
        """,
        input_variables=["original_code", "available_libraries"],
    )
    
    # The chain now uses the new, stricter prompt
    structured_chain = prompt_template | llm.with_structured_output(CodeOutput)
    
    try:
        response_model = structured_chain.invoke({
            "original_code": original_code,
            "available_libraries": library_string
        })
        upscaled_code = response_model.code
        print("--> [Agent 3] Code successfully upscaled.")
        return upscaled_code
    except Exception as e:
        print(f"--> [Agent 3] Could not upscale code, using original. Error: {e}")
        # Fallback to original code if upscaling fails
        return original_code

## Agent 2 - Retriever

In [7]:
# =================================================================
# AGENTS 2 & 3 HELPER FUNCTIONS
# This cell contains all the supporting logic for retrieval and code processing.
# =================================================================
from sentence_transformers import util
import feedparser

class SearchQueryGenerator(BaseModel):
    queries: List[str] = Field(description="A list of targeted, keyword-focused search queries.")

def generate_search_queries(query: str, llm) -> List[str]:
    print("-> [Agent 2] Using LLM to generate search queries...")
    prompt_template = PromptTemplate(
        template="""
        You are an expert at generating web search queries. Analyze the user's question to identify the core technical task and programming language.
        Generate 3 concise, targeted search queries. One query should be for the standard, CPU-based approach. Two queries should be for GPU-accelerated approaches, prioritizing NVIDIA-based solutions if they exist.
        User Question: "{question}"
        Generate a JSON list of 3 search query strings.
        """,
        input_variables=["question"],
    )
    query_generation_chain = prompt_template | llm.with_structured_output(SearchQueryGenerator)
    try:
        response_model = query_generation_chain.invoke({"question": query})
        print(f"-> [Agent 2] Generated queries: {response_model.queries}")
        return response_model.queries
    except Exception as e:
        print(f"-> [Agent 2] LLM failed to generate structured output: {e}")
        return []

# --- NEW: Hybrid Retriever Functions (Tier 1, 2, 3) ---

def fetch_articles_from_rss(query: str, embedding_model) -> list[str]:
    print("-> [Tier 1] Trying Semantic Search on RSS feeds...")
    RSS_FEEDS = {
        "NVIDIA Developer Blog": "https://developer.nvidia.com/blog/feed/",
        "RAPIDS AI (Medium)": "https://medium.com/feed/rapids-ai",
        "CuPy (Medium)": "https://medium.com/feed/cupy-team"
    }
    query_embedding = embedding_model.embed_query(query)
    found_articles = []
    for name, url in RSS_FEEDS.items():
        feed = feedparser.parse(url)
        for entry in feed.entries:
            entry_text = entry.title + ". " + entry.summary
            entry_embedding = embedding_model.embed_query(entry_text)
            similarity = util.pytorch_cos_sim(query_embedding, entry_embedding)
            if similarity.item() > 0.35: # Keep a minimal threshold
                found_articles.append({'link': entry.link, 'similarity': similarity.item()})
    
    found_articles.sort(key=lambda x: x['similarity'], reverse=True)
    final_urls = [article['link'] for article in found_articles[:5]]
    print(f"-> [Tier 1] Found {len(final_urls)} semantically relevant URLs from RSS.")
    return final_urls

def dynamic_web_search(queries: List[str]) -> list[str]:
    print("-> [Tier 2] Falling back to polite Web Search...")
    all_urls = set()
    for q in queries:
        try:
            search_results = list(search(q, num_results=2))
            all_urls.update(url for url in search_results if url)
            time.sleep(2)
        except Exception as e:
            print(f"An error occurred during web search: {e}")
            continue
    return list(all_urls)

def hybrid_retriever(query: str, llm, embedding_model) -> list[str]:
    # 1. First, try the high-quality RSS feeds
    urls = fetch_articles_from_rss(query, embedding_model)

    # 2. If RSS finds nothing, fallback to a general web search
    if not urls:
        print("--> RSS search found no relevant articles. Falling back to web search.")
        search_queries = generate_search_queries(query, llm)
        if search_queries:
            urls = dynamic_web_search(search_queries)

    # 3. If both searches fail, use a final safety net of default links
    if not urls:
        print("--> All searches failed. Using default fallback links.")
        urls = [
            "https://developer.nvidia.com/blog/icymi-leveraging-the-power-of-gpus-with-cupy-in-python/",
            "https://medium.com/rapids-ai/rapids-23-08-release-23db51c255f0"
        ]
        
    print(f"-> [Hybrid Retriever] Found {len(urls)} final URLs to use for context.")
    return urls

# --- Other Helper Functions ---
def _get_available_libraries(filepath: str) -> List[str]:
    if not os.path.exists(filepath):
        print(f"--> Library file not found at {filepath}. Using a default list.")
        return ['numpy', 'cupy', 'pandas', 'cudf', 'cuml', 'scipy', 'sklearn']
    with open(filepath, 'r') as f:
        lines = f.readlines()
    libraries = []
    for line in lines:
        if not line.startswith('#'):
            package_name = line.split()[0]
            libraries.append(package_name)
    print(f"--> Successfully loaded {len(libraries)} available libraries from file.")
    return libraries

# DEFINITIVE, "FIND ALL PAIRS" CODE EXTRACTOR
def _extract_python_code(markdown_text: str) -> Dict[str, str]:
    """
    The definitive helper function to parse CPU and GPU code blocks.
    It uses a robust regex with findall to capture all heading-and-code pairs
    and then assigns them correctly.
    """
    print("--> [Agent 3] Extracting code blocks using definitive 'Find All Pairs' parser...")
    
    cpu_code = ""
    gpu_code = ""
    
    # This pattern looks for a line that starts with hashes (optional) and contains "Solution",
    # then captures that heading line (group 1) and the subsequent python code block (group 2).
    pattern = re.compile(
        r"(###?\s*.*?Solution.*?)\n*```python\n(.*?)\n```", 
        re.DOTALL | re.IGNORECASE
    )
    
    matches = pattern.findall(markdown_text)
    print(matches)
    
    for heading, code in matches:
        heading_lower = heading.lower()
        if "gpu" in heading_lower:
            gpu_code = code.strip()
        elif "cpu" in heading_lower:
            cpu_code = code.strip()
            
    # Your helpful debugging prints
    print("################################################")
    print("GPU Code Found:\n", gpu_code)
    print("################################################")
    print("CPU Code Found:\n", cpu_code)
    print("################################################")

    return {"cpu_code": cpu_code, "gpu_code": gpu_code}

In [8]:
# resp = """Recommended GPU Solution (Using CuPy)
# For large matrices like 500x500, CuPy (a GPU-accelerated library) can significantly speed up computations by leveraging the parallel power of GPUs. Here's how to multiply two matrices with CuPy:


# import cupy as cp

# # Create two 500x500 matrices on the GPU
# a = cp.random.rand(500, 500)
# b = cp.random.rand(500, 500)

# # Matrix multiplication using CuPy
# result = a @ b  # or cp.dot(a, b)
# print(result)
# Key Benefits:

# Speed: GPU parallelism can make this operation up to 10x faster than CPU-based NumPy for large matrices.
# Scalability: Easily handles larger matrices (e.g., 10,000x10,000) with minimal changes to code.
# Standard CPU Solution (Using NumPy)
# If you're working on a CPU or don't have access to a GPU, NumPy is the standard tool for matrix operations:


# import numpy as np

# # Create two 500x500 matrices on the CPU
# a = np.random.rand(500, 500)
# b = np.random.rand(500, 500)

# # Matrix multiplication using NumPy
# result = a @ b  # or np.dot(a, b)
# print(result)
# Performance Note:

# For small matrices (like 500x500), the CPU solution is efficient and sufficient.
# For larger matrices or repeated operations, the GPU solution with CuPy becomes significantly more efficient.
# GPU Acceleration Note
# Matrix multiplication is a perfect use case for GPUs due to its high parallelism. CuPy's implementation is optimized for NVIDIA GPUs, making it ideal for scientific computing, machine learning, and data analysis workflows. If you're working on larger-scale problems (e.g., 10,000x10,000 matrices), consider switching to CuPy for dramatic speedups!"""
# extracted_code = _extract_python_code(resp)
# cpu_code, gpu_code = extracted_code["cpu_code"], extracted_code["gpu_code"]

## Agent 3 - Synthesizer

In [9]:
def process_with_rag(query: str, user_profile_path: str = None) -> str:
    print("\n--- Running FINAL Integrated RAG Pipeline ---")
    host_node = socket.gethostname()
    asurite_id = "apoojar4"
    llm = ChatOllama(model="qwen3:14b", base_url=f"http://{asurite_id}@{host_node}:11434/")
    embedding_model = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

    # Load available libraries from file
    available_libs = _get_available_libraries("installed_libraries.txt")
    library_string = ", ".join(available_libs)

    # Call the hybrid retriever to get URLs
    # Note: hybrid_retriever is assumed to be defined in the cell above (Cell 5)
    urls = hybrid_retriever(query, llm, embedding_model)

    context_text = ""
    if urls:
        print("-> Found documents. Loading and processing context...")
        docs = [WebBaseLoader(url).load() for url in urls]
        docs_list = [item for sublist in docs for item in sublist]
        text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(chunk_size=500, chunk_overlap=100)
        doc_splits = text_splitter.split_documents(docs_list)
        vectorstore = Chroma.from_documents(documents=doc_splits, embedding=embedding_model, collection_name="rag-chroma")
        retriever = vectorstore.as_retriever()
        retrieved_docs = retriever.invoke(query)
        context_text = "\n\n---\n\n".join([doc.page_content for doc in retrieved_docs])
        vectorstore.delete_collection()

    user_profile_content = "Knowledge Level: Intermediate\nTUTORING STRATEGY: The user is at an intermediate level. Provide direct code comparisons (e.g., NumPy vs. CuPy), focus on performance benefits, and show clear benchmarking examples."
    if user_profile_path and os.path.exists(user_profile_path):
        print("\n--- Loading the user data:", user_profile_path)
        with open(user_profile_path, 'r') as f:
            user_profile_content = f.read()

    # THE DEFINITIVE PROMPT, COMBINING ALL LOGIC: USER PROFILE, 3-PATH REASONING, AND LIBRARY CONSTRAINTS
    # In Cell 6, replace the final_prompt_template string with this:

    # THE ULTIMATE, BALANCED PROMPT (LOGIC + PERSONA)
    final_prompt_template = PromptTemplate(
        template="""
        You are an expert AI Tutor and an enthusiastic advocate for NVIDIA technology. Your primary mission is to provide an encouraging, insightful, and clear answer that is perfectly tailored to the user's knowledge level. Your tone should be that of a patient expert who is excited to help users discover the power of GPU acceleration.

        **STEP 1: ANALYZE YOUR INSTRUCTIONS**
        - First, read the **TUTORING STRATEGY DOCUMENT**. This is your most important instruction. You MUST tailor the tone, depth, and complexity of your final answer to this level.
        - Second, read the user's **QUESTION**. Determine if its intent is **theoretical** (what/why) or **procedural** (how-to).
        - Third, note the **CODE GENERATION CONSTRAINT**.

        **TUTORING STRATEGY DOCUMENT:**
        {user_profile}

        **CODE GENERATION CONSTRAINT:**
        You MUST only use libraries from the following list: `{available_libraries}`.

        **STEP 2: CHOOSE YOUR RESPONSE PATH AND APPLY THE TUTOR PERSONA**
        Based on the user's intent, follow ONE of the three paths below.

        ---
        **PATH 1: The question is THEORETICAL.**
        - Provide a clear, insightful, and conversational explanation.
        - **TAILOR YOUR TONE:** For a Beginner, use simple analogies and focus on the 'wow factor' of the technology. For an Intermediate user, be technically precise. For an Advanced user, discuss deeper architectural concepts and trade-offs.
        - You may include ONE concise code snippet ONLY if it is essential to illustrate a key concept. Do not provide comparative code blocks.

        ---
        **PATH 2: The question is PROCEDURAL and a common NVIDIA-based GPU library EXISTS for the task.**
        - Your answer MUST frame the GPU solution as the modern, high-performance standard.
        - **TAILOR YOUR TONE:** For a Beginner, start with an encouraging tone, explaining what the code is doing in simple terms and focusing on how accessible GPU power can be. For an Intermediate user, focus on the performance benefits with clear code comparisons. For an Advanced user, provide more optimized code and add notes about advanced features like CUDA streams or memory management.
        - Present the `### Recommended GPU Solution` first, followed by the `### Standard CPU Solution` clearly labeled as a "baseline for comparison."
        - Always add a "Performance Note" or "Key Benefits" section to explain the trade-offs and advantages.

        ---
        **PATH 3: The question is PROCEDURAL and a common NVIDIA-based GPU library DOES NOT EXIST for the task.**
        - **TAILOR YOUR TONE:** Explain the standard solution at a level appropriate for the user.
        - Provide the `### Standard Solution` with a clear code example.
        - After the solution, add a `### GPU Acceleration Note`. In this section, explain *why* this specific task is not typically GPU-accelerated. Then, proactively pivot to an encouraging and helpful suggestion. Frame it as an exciting next step, explaining where the user *could* apply GPU acceleration in a related part of their workflow.
        ---

        **STEP 3: GENERATE THE FINAL, PERSONALIZED ANSWER**
        Use the **RELEVANT CONTEXT** below to find supporting facts or code if helpful, but always follow your primary logic and persona from STEP 1 and STEP 2.

        **RELEVANT CONTEXT FROM WEB SEARCH:**
        {context}

        **USER'S QUESTION:**
        {question}

        YOUR FINAL, TAILORED ANSWER:
        """,
        input_variables=["user_profile", "question", "context", "available_libraries"],
    )
    
    final_chain = final_prompt_template | llm
    llm_response_text = final_chain.invoke({
        "user_profile": user_profile_content,
        "question": query,
        "context": context_text,
        "available_libraries": library_string
    }).content
    
    print("--> [Agent 3] Generated conversational answer.")
    print(llm_response_text)

    yield {"status": "llm_complete", "content": llm_response_text}

    # Code extraction, upscaling, and benchmarking logic remains the same
    extracted_code = _extract_python_code(llm_response_text)
    cpu_code, gpu_code = extracted_code["cpu_code"], extracted_code["gpu_code"]

    if cpu_code and gpu_code:
        print("--> [Agent 3] Both CPU and GPU code found. Proceeding to upscale and benchmark in parallel.")
        
        upscaled_cpu_code = _upscale_code_agentic(cpu_code, llm, library_string)
        upscaled_gpu_code = _upscale_code_agentic(gpu_code, llm, library_string)
        
        yield {"status": "benchmark_running", "content": "\n\n---\n*⏳ Submitting parallel benchmark jobs to Sol...*"}

        try:
            benchmarker = SolBenchmarker(user=asurite_id)
            # Loop through the results as they come in from the parallel jobs
            for result in benchmarker.run_benchmark_parallel(upscaled_cpu_code, upscaled_gpu_code):
                yield result # Pass the result directly up to the UI handler

        except ValueError as e:
            yield {"status": "error", "message": f"Benchmark configuration error: {e}"}
    else:
        print("--> [Agent 3] Did not find both code types. Skipping benchmark.")

    # if cpu_code and gpu_code:
    #     print("--> [Agent 3] Both CPU and GPU code found. Proceeding to upscale and benchmark.")
    #     yield {"status": "benchmark_running", "content": "\n\n---\n*⏳ Benchmark in progress on the Sol Supercomputer...*"}
    #     upscaled_cpu_code = _upscale_code_agentic(cpu_code, llm, library_string)
    #     upscaled_gpu_code = _upscale_code_agentic(gpu_code, llm, library_string)
    #     try:
    #         benchmarker = SolBenchmarker(user=asurite_id)
    #         benchmark_results = benchmarker.run_benchmark(upscaled_cpu_code, upscaled_gpu_code)
    #         if benchmark_results.get("status") == "success":
    #             cpu_time = benchmark_results.get('cpu_time_seconds')
    #             gpu_time = benchmark_results.get('gpu_time_seconds')
    #             cpu_time_str = f"{cpu_time:.4f} seconds" if cpu_time is not None else "N/A"
    #             gpu_time_str = f"{gpu_time:.4f} seconds" if gpu_time is not None else "N/A"
                
    #             benchmark_md = "\n\n---\n### 📊 Real-World Benchmark Results (from ASU's Sol Supercomputer)\n"
    #             benchmark_md += "| Metric | Result |\n|---|---|\n"
    #             benchmark_md += f"| CPU Time | {cpu_time_str} |\n"
    #             benchmark_md += f"| GPU Time | {gpu_time_str} |\n"

    #             if cpu_time and gpu_time and gpu_time > 0:
    #                 speedup = cpu_time / gpu_time
    #                 benchmark_md += f"| **Speedup** | **{speedup:.2f}x faster on GPU!** |\\n"
                
    #             # llm_response_text += benchmark_md
    #         else:
    #             benchmark_md = f"\n\n---\n### ⚠️ Benchmark Failed\n{benchmark_results.get('message')}"
    #             # llm_response_text += f"\n\n---\n### ⚠️ Benchmark Failed\\n{benchmark_results.get('message')}"

    #         # --- YIELD 3: Send the final benchmark table back to the UI ---
    #         yield {"status": "benchmark_complete", "content": benchmark_md}
    #     except ValueError as e:
    #         # llm_response_text += f"\n\n---\n### ⚠️ Benchmark Skipped\\n{e}"
    #         yield {"status": "benchmark_complete", "content": f"\\n\\n---\n### ⚠️ Benchmark Skipped\\n{e}"}
    # else:
    #     print("--> [Agent 3] Did not find both code types. Skipping benchmark.")
        
    print("--- Pipeline Complete ---")
    # return llm_response_text

# Gradio

In [None]:
#
# Cell 6: Gradio User Interface
#
# Description: This cell builds the complete Gradio app, integrating all agents.
# It includes controls for creating and selecting user profiles, a chat interface for
# posing questions to the tutor, and logic to connect the selected user profile to
# the RAG pipeline.
#

# --- Instantiate Agent 1 for use in the UI ---
profiler_agent = UserProfilerAgent_V3()

with gr.Blocks(theme=gr.themes.Soft(), css=".gradio-container {background-color: #f5f5f5;}") as demo:
    
    # --- State Management for user profiles ---
    user_state = gr.State({
        "all_users": profiler_agent.get_user_list(),
        "current_user": None
    })

    gr.Markdown("# 🤖 AI Accelerated Data Science Tutor")
    
    with gr.Row():
        with gr.Column(scale=4):
            gr.Markdown("Ask a question about a data science task. For a personalized response, create or select a user profile.")
        with gr.Column(scale=2, min_width=300):
            # --- User Management UI ---
            user_dropdown = gr.Dropdown(
                label="Current User",
                choices=user_state.value["all_users"],
                interactive=True
            )
            with gr.Row():
                new_user_btn = gr.Button("✚ New / Update User")

    gr.Markdown("---")

    # --- Main Chat UI ---
    chatbot = gr.Chatbot(label="Conversation", height=500, type="messages")

    with gr.Accordion("🔎 Show Agent's Thought Process", open=False):
        cot_output = gr.Markdown("The agent's reasoning will appear here after it responds.")
    
    with gr.Row():
        msg_textbox = gr.Textbox(
            label="Your Question",
            placeholder="e.g., How do I multiply two 10x10 arrays in Python?",
            scale=4,
            lines=2,
            container=False
        )
        submit_btn = gr.Button("Ask Tutor", variant="primary", scale=1, min_width=150)

    # --- Questionnaire Modal (Hidden by default) ---
    with gr.Group(visible=False) as profiler_ui_group:
        with gr.Blocks() as profiler_modal:
            gr.Markdown("### User Profile Questionnaire")
            gr.Markdown("Please answer the following questions to help me tailor my explanations to your knowledge level.")
            profiler_name_input = gr.Textbox(label="First and Last Name")
            
            q_inputs = [gr.Textbox(label=f"Question {i+1}", visible=False) for i in range(5)]

            submit_profile_btn = gr.Button("Submit Profile")
            cancel_profile_btn = gr.Button("Cancel")

    # ==================================
    # GRADIO HANDLER FUNCTIONS
    # ==================================

    # --- NEW: Function 1 - Instant UI Update ---
    def on_user_submit(user_message, chat_history):
        chat_history = chat_history or []
        # Append the user's message and a "Thinking..." placeholder immediately
        chat_history.append({"role": "user", "content": user_message})
        chat_history.append({"role": "assistant", "content": "🧠 Thinking..."})
        # This function returns instantly, guaranteeing the UI updates
        return gr.update(value=""), chat_history

    # --- NEW: Function 2 - Backend Processing ---
    # def get_bot_response(chat_history, selected_user):
    #     user_message = chat_history[-2]["content"] # Get the user's latest message

    #     # Initial check for selected user
    #     if not selected_user:
    #         chat_history[-1]["content"] = "⚠️ **Warning:** No user profile selected. Please create or select a profile first."
    #         yield chat_history, "No user selected."
    #         return

    #     user_profile_path = os.path.join(profiler_agent.profiles_dir, "_".join(selected_user.lower().split()) + ".txt")
        
    #     # Call the backend generator
    #     response_generator = process_with_rag(user_message, user_profile_path)
        
    #     # Loop through streaming updates
    #     for update in response_generator:
    #         status = update.get("status")
    #         content = update.get("content")
            
    #         think_match = re.search(r"<think>(.*?)</think>", content, re.DOTALL)
    #         thought_process = think_match.group(1).strip() if think_match else "No thought process was found."
    #         clean_content = re.sub(r"<think>.*?</think>", "", content, flags=re.DOTALL).strip()
            
    #         if status == "llm_complete":
    #             chat_history[-1]["content"] = clean_content
    #             yield chat_history, thought_process
    #         elif status == "benchmark_running":
    #             chat_history.append({"role": "assistant", "content": clean_content})
    #             yield chat_history, thought_process
    #         elif status == "benchmark_complete":
    #             chat_history[-1]["content"] = clean_content
    #             yield chat_history, thought_process

    # REPLACE THE get_bot_response FUNCTION IN CELL 10

    def get_bot_response(chat_history, selected_user):
        user_message = chat_history[-2]["content"]

        if not selected_user:
            chat_history[-1]["content"] = "⚠️ **Warning:** No user profile selected."
            yield chat_history, "No user selected."
            return

        user_profile_path = os.path.join(profiler_agent.profiles_dir, "_".join(selected_user.lower().split()) + ".txt")
        response_generator = process_with_rag(user_message, user_profile_path)
        
        # This dictionary will hold the results as they stream in
        benchmark_data = {"cpu_time": None, "gpu_time": None}
        benchmark_message_index = -1

        for update in response_generator:
            status = update.get("status")
            
            if status == "llm_complete":
                content = update.get("content")
                think_match = re.search(r"<think>(.*?)</think>", content, re.DOTALL)
                thought_process = think_match.group(1).strip() if think_match else "No thought process found."
                clean_content = re.sub(r"<think>.*?</think>", "", content, flags=re.DOTALL).strip()
                chat_history[-1]["content"] = clean_content
                yield chat_history, thought_process
            
            elif status == "benchmark_running":
                # Append a new message bubble for the benchmark status
                chat_history.append({"role": "assistant", "content": update.get("content")})
                benchmark_message_index = len(chat_history) - 1 # Remember the position of this message
                yield chat_history, "Benchmark jobs submitted..."

            elif status == "complete":
                # A job (CPU or GPU) has finished!
                job_type = update.get("job_type")
                time_val = update.get("time")
                if job_type == "gpu": benchmark_data["gpu_time"] = time_val
                elif job_type == "cpu": benchmark_data["cpu_time"] = time_val
                
                # --- Re-create the results table with whatever data we have so far ---
                gpu_time_str = f"{benchmark_data['gpu_time']:.4f}s" if benchmark_data["gpu_time"] is not None else "⏳ In Progress..."
                cpu_time_str = f"{benchmark_data['cpu_time']:.4f}s" if benchmark_data["cpu_time"] is not None else "⏳ In Progress..."

                benchmark_md = "\n\n---\n### 📊 Live Benchmark Results (from ASU's Sol Supercomputer)\n"
                benchmark_md += "| Metric | Result |\n|---|---|\n"
                benchmark_md += f"| CPU Time | {cpu_time_str} |\n"
                benchmark_md += f"| GPU Time | {gpu_time_str} |\n"
                if benchmark_data["cpu_time"] and benchmark_data["gpu_time"]:
                    speedup = benchmark_data["cpu_time"] / benchmark_data["gpu_time"]
                    benchmark_md += f"| **Speedup** | **{speedup:.2f}x faster on GPU!** |\\n"

                # Update the benchmark status message with the new table
                if benchmark_message_index != -1:
                    chat_history[benchmark_message_index]["content"] = benchmark_md
                yield chat_history, "Benchmark update received..."

            elif status == "error":
                 chat_history.append({"role": "assistant", "content": f"⚠️ Benchmark Error: {update.get('message')}"})
                 yield chat_history, "Benchmark error."

    # --- User Profile Logic ---
    def start_profiling_flow(current_user):
        """Called when 'New / Update User' is clicked."""
        questions = profiler_agent._generate_questions_with_llm()
        
        updates = [gr.update(visible=True)]
        updates.append(gr.update(value=current_user if current_user else "", visible=True))
        
        for i in range(5):
            if i < len(questions):
                updates.append(gr.update(label=questions[i], value="", visible=True))
            else:
                updates.append(gr.update(visible=False))
        
        return *updates, questions

    # --- FIX 1: Add 'questions' to the function signature to receive it from the state component. ---
    def process_profile_submission(user_name, state, questions, *answers):
        """Called when 'Submit Profile' is clicked."""
        
        if not user_name:
            gr.Warning("User name cannot be empty!")
            return state, gr.update(choices=state["all_users"], value=state["current_user"]), gr.update(visible=True)

        answers_dict = {q: a for q, a in zip(questions, answers) if q and a}
        profiler_agent.generate_and_save_report(user_name, answers_dict)
        
        gr.Info(f"Profile for {user_name} has been saved!")

        updated_users = profiler_agent.get_user_list()
        state["all_users"] = updated_users
        state["current_user"] = user_name
        
        return state, gr.update(choices=updated_users, value=user_name), gr.update(visible=False)

    def cancel_profiling():
        return gr.update(visible=False)

    def update_current_user(selected_user, state):
        state["current_user"] = selected_user
        gr.Info(f"Switched to user: {selected_user}")
        return state

    # ==================================
    # WIRING UP THE UI EVENTS
    # ==================================
    
    question_state = gr.State([])

    submit_btn.click(
        on_user_submit,
        [msg_textbox, chatbot],
        [msg_textbox, chatbot]
    ).then(
        get_bot_response,
        [chatbot, user_dropdown],
        [chatbot, cot_output],
        show_progress="hidden"
    )

    msg_textbox.submit(
        on_user_submit,
        [msg_textbox, chatbot],
        [msg_textbox, chatbot]
    ).then(
        get_bot_response,
        [chatbot, user_dropdown],
        [chatbot, cot_output],
        show_progress="hidden"
    )

    new_user_btn.click(
        start_profiling_flow,
        inputs=[user_dropdown],
        outputs=[profiler_ui_group, profiler_name_input] + q_inputs + [question_state]
    )
    
    submit_profile_btn.click(
        process_profile_submission,
        # --- FIX 2: Pass the 'question_state' component as an input. ---
        inputs=[profiler_name_input, user_state, question_state, *q_inputs],
        outputs=[user_state, user_dropdown, profiler_ui_group]
    )

    cancel_profile_btn.click(cancel_profiling, outputs=[profiler_ui_group])

    user_dropdown.change(
        update_current_user,
        inputs=[user_dropdown, user_state],
        outputs=[user_state]
    )

# --- Launch the Application ---
demo.queue().launch(share=True, debug=True)

✅ [Agent 1] Successfully connected to Ollama LLM.
* Running on local URL:  http://127.0.0.1:7861
* Running on public URL: https://a36a3a1b0f804b3b06.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)



--- Running FINAL Integrated RAG Pipeline ---
--> Successfully loaded 575 available libraries from file.
-> [Tier 1] Trying Semantic Search on RSS feeds...
-> [Tier 1] Found 5 semantically relevant URLs from RSS.
-> [Hybrid Retriever] Found 5 final URLs to use for context.
-> Found documents. Loading and processing context...

--- Loading the user data: user_profiles/ashwith.txt
