In [1]:
# ============== AGENT 1: LLM-POWERED USER PROFILER (V3 - Corrected) =============="

import os
import json
import socket
from typing import List

# As per the deprecation warning, we should import from Pydantic V2.
# This future-proofs our code.
from pydantic import BaseModel, Field
from langchain_core.prompts import PromptTemplate
from langchain_ollama.chat_models import ChatOllama

# --- Pydantic model for structured LLM output ---
class Questionnaire(BaseModel):
    questions: List[str] = Field(description="A list of 4-5 questions for the user.")

class UserProfilerAgent_V3:
    """
    Agent 1 (V3): Guarantees the user's name is collected first before
    using an LLM to dynamically generate the rest of the questionnaire.
    """

    def __init__(self, profiles_dir: str = "user_profiles"):
        self.profiles_dir = profiles_dir
        if not os.path.exists(self.profiles_dir):
            os.makedirs(self.profiles_dir)

        try:
            host_node = socket.gethostname()
            # NOTE: Replace 'jgarc111' with the ASURITE ID of the user running the Ollama server
            self.llm = ChatOllama(model="qwen3:14b", base_url=f"http://jgarc111@{host_node}:11434/")
            self.structured_llm = self.llm.with_structured_output(Questionnaire)
            print("✅ Successfully connected to Ollama LLM.")
        except Exception as e:
            print(f"❌ Error connecting to Ollama: {e}")
            self.llm = None

    def _generate_questions_with_llm(self) -> List[str]:
        """
        Uses an LLM to dynamically generate a user questionnaire.
        **Correction:** The prompt now explicitly tells the LLM to NOT ask for a name.
        """
        print("\n🤖 Generating a personalized questionnaire for you...")
        
        prompt = PromptTemplate(
            template="""
            You are a helpful assistant for an AI Data Science Tutor. Your goal is to create a short questionnaire (4-5 questions) to understand a user's knowledge level.
            
            The questions should gently probe their experience with:
            1. The Python programming language.
            2. Common CPU-based data science libraries (like NumPy, Pandas).
            3. Their awareness of GPU computing and hardware acceleration.
            4. Their familiarity with any NVIDIA-specific GPU libraries (like CuPy or RAPIDS).

            IMPORTANT: Do NOT ask for the user's name, as it has already been collected.
            Return the questions as a JSON list. Be conversational and friendly.
            """,
            input_variables=[],
        )
        
        query_generation_chain = prompt | self.structured_llm
        
        try:
            response_model = query_generation_chain.invoke({})
            return response_model.questions
        except Exception as e:
            print(f"-> LLM failed to generate questions, falling back to default. Error: {e}")
            return [
                "On a scale of 1-5, how comfortable are you with Python?",
                "Which Python data science libraries (like Pandas or NumPy) have you used before?",
                "Have you ever heard of using GPUs to speed up data analysis?",
                "What's the first tool you'd reach for to do a large matrix multiplication in Python?"
            ]

    def _ask_dynamic_questions(self, questions: List[str]) -> dict:
        """
        Asks the dynamically generated questions and records the answers.
        """
        answers = {}
        for i, question in enumerate(questions, start=1):
            answer = input(f"{i+1}. {question} ") # Start numbering from 2
            answers[question] = answer
        return answers

    def _generate_report_with_llm(self, user_name: str, answers: dict) -> str:
        """
        Sends the user's answers to the LLM to generate a profile report.
        """
        print("\n🤖 Analyzing your responses and creating a profile...")
        
        answers_str = "\n".join([f"- {q}: {a}" for q, a in answers.items()])

        prompt = PromptTemplate(
            template="""
            You are an expert AI analyst. A user named {user_name} has answered a questionnaire about their data science skills.
            Your task is to analyze their answers and generate a "TUTORING STRATEGY" report for our AI Tutor.

            **User's Answers:**
            {answers}

            **Your Task:**
            1.  Determine the user's knowledge level: 'Beginner', 'Intermediate', or 'Advanced'.
            2.  Write a concise report following the correct strategy format below. This report will be given to another AI, so the instructions must be clear.

            ---
            **STRATEGY FORMATS (Choose ONE):**

            **If 'Beginner':**
            Start with `Knowledge Level: Beginner` on one line. On the next line, start with the exact phrase `TUTORING STRATEGY: The user is a beginner.` Then, explain that the tutor should use high-level concepts, explain the 'why' of GPU acceleration, and introduce NVIDIA libraries (like CuPy) as a simple, powerful alternative to what they already know.

            **If 'Intermediate':**
            Start with `Knowledge Level: Intermediate` on one line. On the next line, start with the exact phrase `TUTORING STRATEGY: The user is at an intermediate level.` Then, explain that the tutor should provide direct code comparisons (e.g., NumPy vs. CuPy), focus on performance benefits, and show clear benchmarking examples.

            **If 'Advanced':**
            Start with `Knowledge Level: Advanced` on one line. On the next line, start with the exact phrase `TUTORING STRATEGY: The user is advanced.` Then, explain that the tutor can provide nuanced advice, discuss the broader NVIDIA RAPIDS ecosystem, and cover specific benchmarking methodologies on the Sol supercomputer.
            ---

            Now, generate the complete report, including the "Knowledge Level" and the "TUTORING STRATEGY".
            """,
            input_variables=["user_name", "answers"],
        )

        report_generation_chain = prompt | self.llm
        response_message = report_generation_chain.invoke({"user_name": user_name, "answers": answers_str})
        report_content = response_message.content
        
        return report_content

    def _save_report(self, report: str, user_name: str):
        """
        Saves the generated report to a text file.
        """
        filename = "_".join(user_name.lower().split()) + ".txt"
        filepath = os.path.join(self.profiles_dir, filename)
        
        with open(filepath, "w") as f:
            f.write(report)
            
        print(f"\n✅ User profile report saved successfully to: {filepath}")

    def run(self):
        """
        The main method to run the agent's full workflow.
        **Correction:** Asks for name first to ensure correct file naming.
        """
        if not self.llm:
            return

        print("🤖 Hello! I'm your AI Accelerated Data Science Tutor.")
        user_name = input("1. To get started, what is your first and last name? ")

        questions = self._generate_questions_with_llm()
        answers = self._ask_dynamic_questions(questions)
        report_content = self._generate_report_with_llm(user_name, answers)
        
        # Add the final header to the report
        full_report = f"--- User Profile for {user_name} ---\n{report_content}\n--- End of Profile ---"
        
        print("\n--- Generated Profile Report ---")
        print(full_report)
        
        self._save_report(full_report, user_name)

# --- Example Usage ---
if __name__ == '__main__':
    profiler_agent = UserProfilerAgent_V3()
    profiler_agent.run()


For example, replace imports like: `from langchain_core.pydantic_v1 import BaseModel`
with: `from pydantic import BaseModel`
or the v1 compatibility namespace if you are working in a code base that has not been fully upgraded to pydantic 2 yet. 	from pydantic.v1 import BaseModel

  exec(code_obj, self.user_global_ns, self.user_ns)


✅ Successfully connected to Ollama LLM.

🤖 Generating a personalized questionnaire for you...
🤖 Let's get started. Please answer the following:



1. Have you used Python before, and if so, how long have you been programming in it?  2 years
2. Are you familiar with libraries like NumPy or Pandas for data manipulation and analysis?  yes
3. Have you worked with GPU computing or heard about using GPUs to accelerate data processing tasks?  no 
4. Are you aware of any NVIDIA libraries designed for GPU acceleration, such as CuPy or RAPIDS?  no
5. Have you used any libraries or tools for data science that leverage GPU computing, and if so, which ones?  no



🤖 Analyzing your responses and creating a profile...

--- Generated Profile Report ---
--- User Profile for 2 years ---
<think>
Okay, let's start by analyzing the user's answers. They mentioned having 2 years of experience with Python, which is a good start. They are familiar with NumPy and Pandas, so that's intermediate knowledge in data manipulation. However, they haven't worked with GPU computing or NVIDIA libraries like CuPy or RAPIDS. They also haven't used any libraries that leverage GPU computing.

So, their knowledge level is probably intermediate. They have some experience with data science tools but no experience with GPU acceleration. The next step is to check the strategy format for intermediate users. The strategy should involve code comparisons between NumPy and CuPy, focus on performance benefits, and include benchmarking examples. That makes sense because they already know the basics but need to see how GPU can enhance their current workflows. I need to make sure the r