<a href="https://colab.research.google.com/github/AshameTheDestroyer/DevScribe/blob/main/DevScribe1_0_0.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install -q langchain langchain-openai langchain-community pypdf faiss-cpu sentence-transformers chromadb python-dotenv langchain-experimental langchain-google-genai ragas

[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/67.3 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m67.3/67.3 kB[0m [31m3.3 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m53.3/53.3 kB[0m [31m1.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m84.7/84.7 kB[0m [31m4.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.5/2.5 MB[0m [31m36.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m329.6/329.6 kB[0m [31m17.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m23.8/23.8 MB[0m [31m82.5 MB/s[0m eta [36m0:00:0

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


# Initialization

In [3]:
import os
from google.colab import userdata
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_google_genai import GoogleGenerativeAIEmbeddings

os.environ["LANGSMITH_API_KEY"] = userdata.get("LANGSMITH_API_KEY")
os.environ["LANGSMITH_TRACING"] = "true"

os.environ["LANGSMITH_PROJECT"] = "DevScribe"

os.environ["GOOGLE_API_KEY"] = userdata.get("GOOGLE_API_KEY")
model = ChatGoogleGenerativeAI(model="gemini-2.5-flash", temperature=0.4)
deterministic_model = ChatGoogleGenerativeAI(model="gemini-2.5-flash", temperature=0)

embeddings = GoogleGenerativeAIEmbeddings(model="models/gemini-embedding-001")

# Defining Company's Knowledge Base

In [4]:
from langchain_core.documents import Document

knowledge_base = [
    Document(
        page_content="""# Style Guide\n\n## Variable Naming\n\nAll variables must be snake_case. For constants, use SCREAMING_SNAKE_CASE. Avoid using camelCase or PascalCase for variable names.\n\n## Docstrings\n\nDocstrings must use Google Format. Every function and method should have a docstring describing its purpose, arguments, and return values. Examples should be included where appropriate.\n\n## Code Formatting\n\nUse a maximum line length of 100 characters. Adhere to PEP 8 guidelines for all Python code. Run an auto-formatter like Black before committing code.""",
        metadata={"source": "style_guide.md"}
    ),
    Document(
        page_content="""# Security Policy\n\n## Logging\n\nNever use `print()` for debugging or logging in production code. Always use `app_logger` for all logging purposes. Configure `app_logger` to output to a centralized logging system.\n\n## API Keys\n\nAPI keys and other sensitive credentials must always be loaded from environment variables (e.g., `.env` files or platform-specific secrets management systems) and never hardcoded in the codebase. Ensure `.env` files are not committed to version control.\n\n## Input Validation\n\nAll user inputs must be thoroughly validated on the server-side to prevent common vulnerabilities like SQL injection, XSS, and command injection.""",
        metadata={"source": "security_policy.md"}
    ),
    Document(
        page_content="""# Legacy Deprecation\n\n## Data Handling\n\nDo not use `pandas` for data manipulation in new projects; use `polars` instead for its performance benefits and modern API. Migrate existing `pandas` code to `polars` where feasible.\n\n## Old Authentication System\n\nThe `LegacyAuth` module is deprecated and will be removed by Q4 2024. All new authentication flows must use `OAuth2Client`. Migrate existing services to `OAuth2Client` as soon as possible.""",
        metadata={"source": "legacy_deprecation.md"}
    )
]

# Reading Company's Knowledge Base

In [5]:
from langchain_core.documents import Document
from langchain_community.vectorstores import FAISS
from langchain_community.document_loaders import TextLoader
from langchain_text_splitters import MarkdownHeaderTextSplitter

headers_to_split_on = [
    ("#", "Header 1"),
    ("##", "Header 2"),
    ("###", "Header 3"),
]

markdown_splitter = MarkdownHeaderTextSplitter(headers_to_split_on=headers_to_split_on)

chunks = []
for document in knowledge_base:
  md_header_splits = markdown_splitter.split_text(document.page_content)
  for i, chunk in enumerate(md_header_splits):
    chunks.append(chunk)
    print(f"--- Chunk {i+1} ---")
    print(f"Content: '{chunk.page_content.strip()}'")
    print(f"Metadata: {chunk.metadata}\n")

vectorstore = FAISS.from_documents(
    documents=chunks,
    embedding=embeddings,
)



--- Chunk 1 ---
Content: 'All variables must be snake_case. For constants, use SCREAMING_SNAKE_CASE. Avoid using camelCase or PascalCase for variable names.'
Metadata: {'Header 1': 'Style Guide', 'Header 2': 'Variable Naming'}

--- Chunk 2 ---
Content: 'Docstrings must use Google Format. Every function and method should have a docstring describing its purpose, arguments, and return values. Examples should be included where appropriate.'
Metadata: {'Header 1': 'Style Guide', 'Header 2': 'Docstrings'}

--- Chunk 3 ---
Content: 'Use a maximum line length of 100 characters. Adhere to PEP 8 guidelines for all Python code. Run an auto-formatter like Black before committing code.'
Metadata: {'Header 1': 'Style Guide', 'Header 2': 'Code Formatting'}

--- Chunk 1 ---
Content: 'Never use `print()` for debugging or logging in production code. Always use `app_logger` for all logging purposes. Configure `app_logger` to output to a centralized logging system.'
Metadata: {'Header 1': 'Security Policy

# Creating Multiquery Retriever

In [6]:
from langchain_core.prompts import PromptTemplate
from langchain_classic.retrievers.multi_query import MultiQueryRetriever

retriever = vectorstore.as_retriever(search_kwargs={"k": 3, "search_type": "mmr"})

mq_prompt = PromptTemplate(
    input_variables=["question"],
    template="""You are an AI language model assistant. Your task is
    to generate 3 different versions of search queries based on the provided code snippet.
    The goal is to identify potential flaws, security vulnerabilities, style guide violations,
    or uses of deprecated features within the code, or any other policy violations, according
    to company policies. Provide these alternative queries separated by newlines.
    Original code snippet: {question}""".
)

multiquery_retriever = MultiQueryRetriever.from_llm(
    retriever=retriever, llm=deterministic_model, prompt=mq_prompt
)

# Defining Refactorer

In [7]:
from typing import List
from pydantic import BaseModel, Field

class RAGRefactorSuggestion(BaseModel):
    critique: str = Field(description="Critique of the original code.")
    refactored_code: str = Field(description="The refactored code.")
    changes_made: str = Field(description="Explanation of changes made.")
    policy_citations: List[str] = Field(description="List of policy documents that justify the refactoring.")

print("RAGRefactorSuggestion Pydantic schema defined.")

RAGRefactorSuggestion Pydantic schema defined.


In [8]:
from langchain_core.prompts import PromptTemplate
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import JsonOutputParser

refactor_prompt = PromptTemplate(
    input_variables=["context", "question"],
    template="""You are an expert code refactorer. Your task is to identify potential flaws,
    security vulnerabilities, style guide violations, or uses of deprecated features within the provided code snippet,
    or any other policy violations, based ONLY on the provided context.

    Critique the code, suggest a refactored version, explain the changes made,
    and cite the specific policy documents from the context that justify your refactoring.

    Context: {context}

    Code Snippet: {question}

    Your output MUST be a JSON object conforming to the following schema:
    {{
        "critique": "<your critique here>",
        "refactored_code": "<your refactored code here>",
        "changes_made": "<explanation of changes>",
        "policy_citations": [
            "<policy document citation 1 (e.g., style_guide.md, Security Policy > API Keys, Legacy Deprecation > Data Handling)>",
            "<policy document citation 2>"
        ]
    }}
    Ensure you provide at least one policy citation if a refactoring is suggested. If no refactoring is needed, just provide a positive critique and empty lists for changes and citations. Answer based ONLY on the provided context.
    """,
)

refactor_chain = (
    {
        "context": multiquery_retriever,
        "question": RunnablePassthrough(),
    }
    | refactor_prompt
    | deterministic_model
    | JsonOutputParser(pydantic_object=RAGRefactorSuggestion)
)

# Example Query

In [9]:
question = """def get_user_data(user_id):
    print(f'Fetching data for user: {user_id}')
    # Simulate fetching data from a database
    if user_id == 1:
        return {'id': 1, 'name': 'Alice', 'email': 'alice@example.com'}
    else:
        return None
  refactor this code please"""
commands = refactor_chain.invoke({"question": question})
print(commands)



# Evaluating Faithfulness & Recall

In [10]:
import pandas as pd

ground_truth_answer_list = [
    "The code uses print() for logging instead of app_logger, lacks a docstring, and does not perform input validation. It should use app_logger for all logging, include a Google-format docstring, and validate user_id to prevent vulnerabilities.",
    "import logging\n\napp_logger = logging.getLogger(__name__)\napp_logger.setLevel(logging.INFO)\n\nif not app_logger.handlers:\n    handler = logging.StreamHandler()\n    formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')\n    handler.setFormatter(formatter)\n    app_logger.addHandler(handler)\n\ndef get_user_data(user_id):\n    \"\"\"Fetches user data from a simulated database.\n\n    Validates the user_id and logs the fetching attempt before returning\n    simulated user data.\n\n    Args:\n        user_id: The unique identifier of the user to fetch. Must be a positive integer.\n\n    Returns:\n        A dictionary containing user data (id, name, email) if found and valid,\n        otherwise None.\n    \"\"\"\n    if not isinstance(user_id, int) or user_id <= 0:\n        app_logger.warning(f\"Invalid user_id provided: {user_id}. Must be a positive integer.\")\n        return None\n\n    app_logger.info(f'Fetching data for user: {user_id}')\n    if user_id == 1:\n        return {'id': 1, 'name': 'Alice', 'email': 'alice@example.com'}\n    else:\n        return None"
]
ground_truth_answer = "\n---\n".join(ground_truth_answer_list)

retrieved_docs = multiquery_retriever.invoke(question)
contexts = [doc.page_content for doc in retrieved_docs]

generated_output = refactor_chain.invoke({"question": question})
answer_critique = generated_output["critique"]
answer_refactored_code = generated_output["refactored_code"]

generated_answer = f"Critique: {answer_critique}\n\nRefactored Code: {answer_refactored_code}"

dataset_df = pd.DataFrame({
    'question': [question],
    'contexts': [contexts],
    'answer': [generated_answer],
    'ground_truths': [ground_truth_answer]
})

print("Evaluation dataset_df created successfully:")
print(dataset_df.head())

Evaluation dataset_df created successfully:
                                            question  \
0  def get_user_data(user_id):\n    print(f'Fetch...   

                                            contexts  \
0  [All user inputs must be thoroughly validated ...   

                                              answer  \
0  Critique: The provided code snippet has severa...   

                                       ground_truths  
0  The code uses print() for logging instead of a...  


In [11]:
from ragas import evaluate
from datasets import Dataset
from ragas.metrics import faithfulness, context_recall

ragas_dataset = Dataset.from_pandas(dataset_df)

result = evaluate(
    ragas_dataset,
    llm=model,
    embeddings=embeddings,
    metrics=[ faithfulness, context_recall ],
    column_map={ "reference": "ground_truths" },
)

print("Ragas Evaluation Results:")
print(result)

  from ragas.metrics import faithfulness, context_recall
  from ragas.metrics import faithfulness, context_recall


Evaluating:   0%|          | 0/2 [00:00<?, ?it/s]

Ragas Evaluation Results:
{'faithfulness': 0.3889, 'context_recall': 1.0000}


# Building up The Agent

In [12]:
style_guide_retriever = vectorstore.as_retriever(search_kwargs={"filter": {"Header 1": "Style Guide"}})
security_policy_retriever = vectorstore.as_retriever(search_kwargs={"filter": {"Header 1": "Security Policy"}})

print("Specialized retrievers 'style_guide_retriever' and 'security_policy_retriever' created.")

Specialized retrievers 'style_guide_retriever' and 'security_policy_retriever' created.


In [23]:
from langchain_core.tools import Tool

style_guide_tool = Tool(
    name="style_guide_retriever",
    description="Useful for retrieving information about the company's coding style guide.",
    func=style_guide_retriever.invoke,
)

security_policy_tool = Tool(
    name="security_policy_retriever",
    description="Useful for retrieving information about the company's security policies.",
    func=security_policy_retriever.invoke,
)

print("Specialized tools 'style_guide_tool' and 'security_policy_tool' created.")

Specialized tools 'style_guide_tool' and 'security_policy_tool' created.


In [24]:
from langchain.agents import create_agent
from langchain_core.prompts import ChatPromptTemplate
from langchain.agents.structured_output import ToolStrategy

tools = [style_guide_tool, security_policy_tool]
agent = create_agent(
    deterministic_model,
    tools=tools,
    response_format=ToolStrategy(RAGRefactorSuggestion),
    system_prompt="You are a helpful assistant that can answer questions about coding style guides and security policies.",
)

print("Agent and AgentExecutor created with specialized tools.")

Agent and AgentExecutor created with specialized tools.


In [25]:
question = """
def calculateResult(some_value):
    # This function lacks a proper docstring.
    print(f"Calculating result for {some_value}")
    if some_value > 10:
        secret_api_key = "my_hardcoded_key" # This is a hardcoded secret.
        print("Using API key for calculation")
    else:
        print("No API key needed")
    return some_value * 2

refactor this code snippet to adhere to company style guidelines and security policies.
"""

result = agent.invoke({
    "messages": [{"role": "user", "content": question}]
})

print("Agent's Refactoring Suggestion:")
print(result["structured_response"])
print("\nTo view the detailed 'Think' -> 'Act' -> 'Observe' flow and tool usage, please check the LangSmith console. The trace for this execution will be available there.")

Agent's Refactoring Suggestion:

To view the detailed 'Think' -> 'Act' -> 'Observe' flow and tool usage, please check the LangSmith console. The trace for this execution will be available there.


# Test

In [26]:
question = """
# Function to load data
def Load_Data(filename):
print(f"Loading {filename}...")
import pandas as pd
df = pd.read_csv(filename)
return df
refactor this code please
"""

result = agent.invoke({
    "messages": [{"role": "user", "content": question}]
})

print("Agent's Refactoring Suggestion:")
print(result["structured_response"])

Agent's Refactoring Suggestion:
critique="The original code has several issues:\n1.  **Function Naming:** The function `Load_Data` uses `PascalCase` with an underscore, which does not conform to the company's `snake_case` convention for variables (and implicitly functions in Python).\n2.  **Import Statement Placement:** The `import pandas as pd` statement is placed inside the function, which is inefficient as it re-imports the module every time the function is called. Imports should generally be at the top of the file.\n3.  **Logging:** It uses `print()` for logging, which is explicitly prohibited by the company's security policy for production code.\n4.  **Error Handling:** It lacks robust error handling for common file-related issues like `FileNotFoundError`, `EmptyDataError`, or parsing errors.\n5.  **Docstring:** The function lacks a comprehensive docstring describing its purpose, arguments, and return value." refactored_code='import pandas as pd\n\n# Assume \'app_logger\' is confi