In [10]:
from langchain_ollama import OllamaLLM
from langchain.prompts import PromptTemplate
from langchain.schema.runnable import RunnablePassthrough
from langgraph.graph import StateGraph, END
from typing import TypedDict, Annotated
import operator

# Configure Ollama with gpt-oss:20b model
llm = OllamaLLM(
    model="gpt-oss:20b",
    temperature=0.1,  # Lower temperature for more deterministic, focused outputs
)

# ============================================================================
# STATE DEFINITION
# ============================================================================
class ProjectAnalysisState(TypedDict):
    """State passed between nodes in the graph"""
    project_code: str
    structured_analysis: str
    module_summaries: str
    security_report: str
    messages: Annotated[list, operator.add]

# ============================================================================
# PROMPT TEMPLATES WITH STRICT PROMPTING
# ============================================================================

START_NODE_PROMPT = PromptTemplate(
    input_variables=["project_code"],
    template="""You are a senior software engineer with 15+ years of experience in code analysis and architecture review.

YOUR TASK: Analyze the provided project files and output a STRICTLY STRUCTURED summary.

CRITICAL REQUIREMENTS:
1. Follow the EXACT output format specified below
2. Do NOT add conversational text, explanations, or preambles
3. Do NOT skip any section
4. Be precise and factual - no assumptions or speculation
5. If information is unavailable, explicitly state "NOT FOUND"

INPUT:
{project_code}

OUTPUT FORMAT (Use this exact structure):

## FILE INVENTORY
[List each file with full path, type, and LOC]
- File: <path>
  Type: <language/framework>
  Lines of Code: <number>

## FUNCTIONS AND CLASSES
[For each file, list all functions/classes with signatures]
- File: <path>
  - Function: <name>(<parameters>) -> <return_type>
    Purpose: <one-line description>
  - Class: <name>
    Methods: [<method1>, <method2>, ...]
    Purpose: <one-line description>

## DEPENDENCIES
[List all external and internal dependencies]
External Libraries:
- <library_name> (version: <version>) - Used in: [<file1>, <file2>]

Internal Modules:
- <module_name> - Imported by: [<file1>, <file2>]

## KEY MODULES
[Identify core modules and their responsibilities]
- Module: <name>
  Responsibility: <specific role>
  Entry Points: [<function/class names>]
  
## MODULE RELATIONSHIPS
[Document how modules interact]
- <Module A> --[<relationship type>]--> <Module B>
  Details: <specific interaction description>

VALIDATION CHECKLIST:
☐ All files documented
☐ All functions/classes listed
☐ All dependencies captured
☐ Relationships mapped
☐ No conversational text included

BEGIN ANALYSIS NOW."""
)

SUMMARY_NODE_PROMPT = PromptTemplate(
    input_variables=["structured_analysis"],
    template="""You are an expert technical documentation specialist with expertise in creating clear, actionable module summaries.

YOUR TASK: Generate CONCISE, STRUCTURED summaries for each module/page in the project.

STRICT RULES:
1. Each summary must be 3-5 sentences maximum
2. Follow the EXACT format specified below
3. Focus on WHAT the module does, not HOW it's implemented
4. No redundant information from input
5. Use active voice and present tense

INPUT (Structured Project Data):
{structured_analysis}

OUTPUT FORMAT (Use this exact structure for EACH module):

### MODULE: <module_name>

**Functionality**: <1-2 sentence description of primary purpose>

**Inputs**: 
- <input1>: <type> - <description>
- <input2>: <type> - <description>
[If no inputs: "None"]

**Outputs**:
- <output1>: <type> - <description>
- <output2>: <type> - <description>
[If no outputs: "None"]

**Dependencies**: <comma-separated list of direct dependencies>
[If no dependencies: "None"]

**Integration Points**: <how this module connects to others>

---

QUALITY CRITERIA:
☐ Each summary is 3-5 sentences
☐ All modules covered
☐ Inputs/outputs clearly specified
☐ Dependencies listed
☐ No implementation details
☐ No conversational filler

BEGIN SUMMARY GENERATION NOW."""
)

SECURITY_NODE_PROMPT = PromptTemplate(
    input_variables=["structured_analysis"],
    template="""You are a senior security architect and code quality expert with OWASP Top 10 and SANS Top 25 expertise.

YOUR TASK: Perform a COMPREHENSIVE security and code quality analysis.

MANDATORY ANALYSIS AREAS:
1. OWASP Top 10 vulnerabilities (2021)
2. Code smells and anti-patterns
3. Dependency vulnerabilities
4. Authentication/Authorization flaws
5. Input validation issues
6. Data exposure risks

STRICT RULES:
1. Follow the EXACT output format below
2. Assign severity: CRITICAL | HIGH | MEDIUM | LOW
3. Each finding must include: Location, Issue, Impact, Mitigation
4. Rank findings by severity (CRITICAL first)
5. Provide ACTIONABLE mitigation steps (no generic advice)
6. If no issues found in a category, state "NO ISSUES DETECTED"

INPUT (Structured Project Data):
{structured_analysis}

OUTPUT FORMAT:

## SECURITY VULNERABILITIES

### [SEVERITY] <Vulnerability Type>
**Location**: <file>:<line_number> or <module>:<function>
**Finding**: <precise description of the vulnerability>
**Impact**: <specific security impact and potential exploit scenario>
**CWE ID**: <CWE-XXX> (if applicable)
**Mitigation Steps**:
1. <specific action with code example if applicable>
2. <specific action>
3. <specific action>

---

## CODE SMELLS

### [SEVERITY] <Code Smell Type>
**Location**: <file>:<line_number> or <module>:<function>
**Issue**: <precise description of the code smell>
**Impact**: <effect on maintainability, performance, or reliability>
**Refactoring Suggestion**:
1. <specific refactoring step>
2. <specific refactoring step>

---

## DEPENDENCY RISKS

### <Library Name> (version <X.Y.Z>)
**Severity**: [CRITICAL | HIGH | MEDIUM | LOW]
**Known CVEs**: <CVE-XXXX-XXXXX> or "None known"
**Risk**: <specific risk description>
**Remediation**: <upgrade command or alternative library>

---

## SUMMARY METRICS
- Total Issues Found: <number>
- Critical: <number>
- High: <number>
- Medium: <number>
- Low: <number>

PRIORITY REMEDIATION ORDER:
1. <issue reference> - <reason>
2. <issue reference> - <reason>
3. <issue reference> - <reason>

VALIDATION CHECKLIST:
☐ All severity levels assigned
☐ All locations specified
☐ All mitigations are actionable
☐ Issues ranked by severity
☐ No generic security advice
☐ Code examples provided where applicable

BEGIN SECURITY ANALYSIS NOW."""
)

# ============================================================================
# NODE FUNCTIONS
# ============================================================================

def start_node(state: ProjectAnalysisState) -> ProjectAnalysisState:
    """
    Analyzes project code and produces structured representation.
    """
    chain = START_NODE_PROMPT | llm
    result = chain.invoke({"project_code": state["project_code"]})
    
    return {
        **state,
        "structured_analysis": result,
        "messages": [f"✓ Start Node: Structured analysis completed"]
    }

def summary_node(state: ProjectAnalysisState) -> ProjectAnalysisState:
    """
    Generates concise summaries of each module.
    """
    chain = SUMMARY_NODE_PROMPT | llm
    result = chain.invoke({"structured_analysis": state["structured_analysis"]})
    
    return {
        **state,
        "module_summaries": result,
        "messages": [f"✓ Summary Node: Module summaries generated"]
    }

def security_node(state: ProjectAnalysisState) -> ProjectAnalysisState:
    """
    Analyzes code for security vulnerabilities and code smells.
    """
    chain = SECURITY_NODE_PROMPT | llm
    result = chain.invoke({"structured_analysis": state["structured_analysis"]})
    
    return {
        **state,
        "security_report": result,
        "messages": [f"✓ Security Node: Security analysis completed"]
    }

# ============================================================================
# GRAPH CONSTRUCTION
# ============================================================================

def create_analysis_graph():
    """
    Creates and configures the LangGraph workflow.
    """
    workflow = StateGraph(ProjectAnalysisState)
    
    # Add nodes
    workflow.add_node("start_node", start_node)
    workflow.add_node("summary_node", summary_node)
    workflow.add_node("security_node", security_node)
    
    # Set entry point
    workflow.set_entry_point("start_node")
    
    # Add edges - start_node feeds into both summary and security nodes
    workflow.add_edge("start_node", "summary_node")
    workflow.add_edge("start_node", "security_node")
    
    # Both terminal nodes end the graph
    workflow.add_edge("summary_node", END)
    workflow.add_edge("security_node", END)
    
    return workflow.compile()

# ============================================================================
# EXECUTION HELPER
# ============================================================================

def analyze_project(project_code: str) -> dict:
    """
    Execute the complete analysis pipeline.
    
    Args:
        project_code: String containing the project files/code to analyze
        
    Returns:
        Dictionary containing outputs from all nodes
    """
    graph = create_analysis_graph()
    
    initial_state = {
        "project_code": project_code,
        "structured_analysis": "",
        "module_summaries": "",
        "security_report": "",
        "messages": []
    }
    
    result = graph.invoke(initial_state)
    
    return {
        "structure": result["structured_analysis"],
        "summaries": result["module_summaries"],
        "security_report": result["security_report"],
        "messages": result["messages"]
    }

# ============================================================================
# EXAMPLE USAGE
# ============================================================================


In [None]:
example_project = """
# file: main.py
import sqlite3
import os

def get_user(user_id):
conn = sqlite3.connect('users.db')
cursor = conn.cursor()
# SQL Injection vulnerability
query = f"SELECT * FROM users WHERE id = {user_id}"
cursor.execute(query)
return cursor.fetchone()

def authenticate(username, password):
# Weak authentication
if username == "admin" and password == "admin123":
    return True
return False

# file: utils.py
def save_file(filename, content):
# Path traversal vulnerability
with open(filename, 'w') as f:
    f.write(content)

def log_error(error_msg):
# Information disclosure
print(f"Error: {error_msg}")

# file: api.py
from flask import Flask, request
app = Flask(__name__)

@app.route('/search')
def search():
# XSS vulnerability
query = request.args.get('q')
return f"<h1>Results for: {query}</h1>"
"""

print("=" * 80)
print("STARTING PROJECT ANALYSIS")
print("=" * 80)

results = analyze_project(example_project)

print("\n" + "=" * 80)
print("STRUCTURED ANALYSIS")
print("=" * 80)
print(results["structure"])

print("\n" + "=" * 80)
print("MODULE SUMMARIES")
print("=" * 80)
print(results["summaries"])

print("\n" + "=" * 80)
print("SECURITY REPORT")
print("=" * 80)
print(results["security_report"])

print("\n" + "=" * 80)
print("EXECUTION LOG")
print("=" * 80)
for msg in results["messages"]:
    print(msg)

STARTING PROJECT ANALYSIS


In [4]:

# Example project code input
example_project = """
# file: main.py
import sqlite3

def get_user(user_id):
    conn = sqlite3.connect('users.db')
    cursor = conn.cursor()
    query = f"SELECT * FROM users WHERE id = {user_id}"
    cursor.execute(query)
    return cursor.fetchone()
"""

results = analyze_project(example_project)

print("=" * 80)
print("STRUCTURED ANALYSIS")
print("=" * 80)
print(results["structure"])

print("\n" + "=" * 80)
print("MODULE SUMMARIES")
print("=" * 80)
print(results["summaries"])

print("\n" + "=" * 80)
print("SECURITY REPORT")
print("=" * 80)
print(results["security_report"])

NameError: name 'analyze_project' is not defined