In [1]:
# Install dependencies
!pip install -q langchain-groq langchain-huggingface langchain-community faiss-cpu sentence-transformers python-dotenv pylint

### Imports and setting API key 

In [None]:
# Import libraries
import os
import json
import tempfile
import subprocess
from pathlib import Path
from datetime import datetime

from dotenv import load_dotenv
from langchain_groq import ChatGroq
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import JsonOutputParser, StrOutputParser
from langchain.agents import create_tool_calling_agent, AgentExecutor
from langchain.tools import tool
from langchain_text_splitters import RecursiveCharacterTextSplitter

# Load API key
os.environ["GROQ_API_KEY"] = "YOUR_GROQ_API_KEY"  
MODEL = "openai/gpt-oss-120b" 

### Define Tools (Read, Lint, Run)

In [4]:
from langchain.tools import tool
from pathlib import Path
import tempfile
import subprocess
import sys

@tool
def read_code_file(filename: str) -> str:
    """Read a code file."""
    return Path(filename).read_text(encoding='utf-8')

@tool
def analyze_with_linter(code: str, language: str = "python") -> str:
    """Lint Python code using pylint."""
    if language != "python":
        return "Linter only supports Python."
    with tempfile.NamedTemporaryFile(mode='w', suffix='.py', delete=False) as f:
        f.write(code)
        path = f.name
    try:
        result = subprocess.run([sys.executable, '-m', 'pylint', '--reports=no', path],
                              capture_output=True, text=True, timeout=10)
        return result.stdout.strip() or "No lint issues."
    except Exception as e:
        return f"Linter error: {e}"
    finally:
        Path(path).unlink(missing_ok=True)

@tool
def execute_code_safely(code: str, language: str = "python") -> str:
    """Safely run Python code."""
    if language != "python":
        return "Execution only for Python."
    with tempfile.NamedTemporaryFile(mode='w', suffix='.py', delete=False) as f:
        f.write(code)
        path = f.name
    try:
        result = subprocess.run([sys.executable, path], capture_output=True, text=True, timeout=5)
        if result.returncode == 0:
            return result.stdout.strip()
        else:
            return f"Error: {result.stderr.strip()}"
    except subprocess.TimeoutExpired:
        return "Error: Code timed out."
    finally:
        Path(path).unlink(missing_ok=True)

### Vector Store Manager (Local FAISS)

this will create a folder and store the embedding there ... i.e code_index/

In [5]:
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from pathlib import Path

class VectorStoreManager:
    def __init__(self, index_path="code_index"):
        self.index_path = index_path
        self.embeddings = HuggingFaceEmbeddings(
            model_name="sentence-transformers/all-MiniLM-L6-v2"
        )
        self.vectorstore = self._load_or_create()

    def _load_or_create(self):
        if Path(self.index_path).exists():
            return FAISS.load_local(
                self.index_path, 
                self.embeddings, 
                allow_dangerous_deserialization=True
            )
        # Create empty index
        empty = FAISS.from_texts([""], self.embeddings)
        empty.save_local(self.index_path)
        return empty

    def add_experience(self, text):
        self.vectorstore.add_texts([text])
        self.vectorstore.save_local(self.index_path)

    def retrieve(self, query, k=2):
        results = self.vectorstore.similarity_search(query, k=k)
        return "\n\n".join(r.page_content for r in results)

### Agent + Chains

In [None]:
from langchain_groq import ChatGroq
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import JsonOutputParser, StrOutputParser
from langchain.agents import create_tool_calling_agent, AgentExecutor

#ccreate agent with tools
def create_code_agent():
    llm = ChatGroq(model="openai/gpt-oss-120b", temperature=0)
    tools = [read_code_file, analyze_with_linter, execute_code_safely]
    prompt = ChatPromptTemplate.from_messages([
        ("system" "You are a helpful code assistant. You MUST use the provided tools to analyze code. Do not answer from memory."),
        ("human","{input}"),
        ("placeholder","{agent_scratchpad}")
    ])
    agent = create_tool_calling_agent(llm, tools, prompt)
    return AgentExecutor(agent=agent, tools=tools, verbose=True)

#format agent output to JSON
def create_json_formatter():
    llm = ChatGroq(model="openai/gpt-oss-120b", temperature=0)
    prompt = ChatPromptTemplate.from_template(
        "Convert to JSON with keys: summary, steps, issues_found, refactored_code.\nText: {analysis}\nJSON:"
    )
    return prompt | llm | JsonOutputParser()

#QA chain for questions
def create_qa_chain():
    llm = ChatGroq(model="openai/gpt-oss-120b", temperature=0)
    prompt = ChatPromptTemplate.from_template(
        "Answer based on context.\nContext:\n{context}\nQuestion: {question}\nAnswer:"
    )
    return prompt | llm | StrOutputParser()

### Create Sample Code File

In [None]:
from pathlib import Path

#create sample code directory and file
Path("sample_code").mkdir(exist_ok=True)
buggy_code = '''
def find_max(numbers):
    max_val = 0
    for num in numbers:
        if num > max_val:
            max_val = num
    return max_val

print(find_max([-5, -2, -10]))
'''
Path("sample_code/buggy.py").write_text(buggy_code)
print("Created sample_code/buggy.py")

Created sample_code/buggy.py


###  Main Code Assistant Function

In [None]:
def code_assistant(file_path, task="explain"):
    code = Path(file_path).read_text(encoding='utf-8')
    lang = "python" 
    
    # Use first 1000 chars to avoid token limits
    snippet = code[:1000]
    
    # Run agent
    agent = create_code_agent()
    agent_response = agent.invoke({
        "input": f"{task.capitalize()} this code:\n\n{snippet}"
    })
    
    #format to JSON
    formatter = create_json_formatter()
    try:
        result = formatter.invoke({"analysis": agent_response["output"]})
    except:
        result = {
            "summary": "Agent completed analysis",
            "steps": ["Used tools"],
            "issues_found": ["Could not parse details"],
            "refactored_code": snippet
        }
    
    # Save to vector DB
    vsm = VectorStoreManager()
    vsm.add_experience(f"Task: {task}\nCode:\n{snippet}\nSummary:\n{result['summary']}")
    
    # Save output
    with open("output.json", "w") as f:
        json.dump(result, f, indent=2)
    
    return result

### Ask Question (RetrievalQA)

In [None]:
def ask_question(query, file_path):
    # Load code and ensure it's in vector DB
    code = Path(file_path).read_text(encoding='utf-8')
    vsm = VectorStoreManager()
    vsm.add_experience(f"Code:\n{code}")
    
    # Retrieve relevant context
    context = vsm.retrieve(query)
    
    # Ask LLM
    qa_chain = create_qa_chain()
    answer = qa_chain.invoke({"context": context, "question": query})
    
    print(f" {query}\n💡 {answer}")
    return answer

### Run Debug Task


In [15]:
result = code_assistant("sample_code/buggy.py", "debug")
print("\n DEBUG RESULT:")
print(json.dumps(result, indent=2))



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `execute_code_safely` with `{'code': 'def find_max(numbers):\n    max_val = 0\n    for num in numbers:\n        if num > max_val:\n            max_val = num\n    return max_val\n\nprint(find_max([-5, -2, -10]))'}`


[0m[38;5;200m[1;3m0[0m[32;1m[1;3m**Problem**

The function `find_max` is intended to return the largest number in a list.  
It initializes `max_val` to `0`:

```python
max_val = 0
```

When the list contains only negative numbers (e.g., `[-5, -2, -10]`), every element is **less** than `0`, so the `if` condition never becomes true and the function returns the initial value `0`. This is incorrect – the maximum of that list should be `-2`.

**Fix**

1. **Initialize `max_val` with the first element of the list** (or raise an error for an empty list).  
2. Optionally, handle the empty‑list case explicitly.

**Corrected code**

```python
def find_max(numbers):
    """
    Return the maximum value in a n

### EXplain Task


In [16]:
result2 = code_assistant("sample_code/buggy.py", "explain")
print("\n EXPLAIN RESULT:")
print(json.dumps(result2, indent=2))



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m**What the code does (intended behavior)**  

```python
def find_max(numbers):
    max_val = 0                # 1️⃣ start with a “current maximum”
    for num in numbers:        # 2️⃣ look at each element in the list
        if num > max_val:      # 3️⃣ if the element is larger than the current maximum …
            max_val = num      # … make it the new maximum
    return max_val             # 4️⃣ after the loop, return the biggest value we found

print(find_max([-5, -2, -10]))
```

1. **Initialize a variable** `max_val` to `0`.  
2. **Iterate** over every number in the supplied iterable `numbers`.  
3. **Compare** each `num` with the current `max_val`.  
4. If `num` is larger, **update** `max_val` to that `num`.  
5. After the loop finishes, **return** the largest value that was seen.  

The `print` statement calls the function with the list `[-5, -2, -10]` and prints the result.

---

### Why the output is surprising

Runn

### Refactor Task

In [17]:
result3 = code_assistant("sample_code/buggy.py", "refactor")
print("\n REFACTOR RESULT:")
print(json.dumps(result3, indent=2))



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mHere’s a clean, safe, and Pythonic refactor of the original function. It:

* Handles empty input gracefully (raises a clear `ValueError`).
* Works correctly for lists that contain negative numbers by initializing the maximum with the first element instead of `0`.
* Uses type hints and a docstring for better readability and IDE support.
* Leverages Python’s built‑in `max` for clarity, but also shows an explicit loop version if you prefer to avoid the built‑in.

```python
from typing import Iterable, List, Union

Number = Union[int, float]


def find_max(numbers: Iterable[Number]) -> Number:
    """
    Return the maximum value from an iterable of numbers.

    Parameters
    ----------
    numbers : Iterable[Number]
        An iterable containing numeric values (int or float).

    Returns
    -------
    Number
        The largest number in the iterable.

    Raises
    ------
    ValueError
        If the iterable is empty.


###  RetrievalQA: Ask a Question

In [18]:
ask_question("Why does find_max return 0 for negative numbers?", "sample_code/buggy.py")

❓ Why does find_max return 0 for negative numbers?
💡 `find_max` returns 0 for a list that contains only negative numbers because of the way the variable that holds the “current maximum” is initialized.

```python
def find_max(numbers):
    max_val = 0          # <-- start value
    for num in numbers:
        if num > max_val:
            max_val = num
    return max_val
```

1. **Initial value is 0** – Before the loop begins, `max_val` is set to `0`.  
2. **Comparison logic** – Inside the loop the code only updates `max_val` when `num > max_val`.  
3. **All numbers are negative** – Any negative `num` (e.g., –5, –2, –10) is **not** greater than `0`. Therefore the condition `num > max_val` is never true, the body of the `if` never executes, and `max_val` stays at its initial value of `0`.  

So the function ends up returning the initial placeholder (`0`) instead of the true maximum of the list.

### What’s happening with an empty list?
If `numbers` is empty, the loop never runs either, 

'`find_max` returns\u202f0 for a list that contains only negative numbers because of the way the variable that holds the “current maximum” is initialized.\n\n```python\ndef find_max(numbers):\n    max_val = 0          # <-- start value\n    for num in numbers:\n        if num > max_val:\n            max_val = num\n    return max_val\n```\n\n1. **Initial value is 0** – Before the loop begins, `max_val` is set to `0`.  \n2. **Comparison logic** – Inside the loop the code only updates `max_val` when `num > max_val`.  \n3. **All numbers are negative** – Any negative `num` (e.g., –5, –2, –10) is **not** greater than `0`. Therefore the condition `num > max_val` is never true, the body of the `if` never executes, and `max_val` stays at its initial value of `0`.  \n\nSo the function ends up returning the initial placeholder (`0`) instead of the true maximum of the list.\n\n### What’s happening with an empty list?\nIf `numbers` is empty, the loop never runs either, and the function still return