In [1]:
import os
import shutil
import ast
from pathlib import Path
from typing import List
from git import Repo, GitCommandError
from langchain.chat_models import ChatOpenAI
from langchain_core.documents import Document

# ----------------------------------------------------------------
# Architecture Generator Script
# ----------------------------------------------------------------

# --- Utility: Clone the repository shallowly using subprocess to avoid GitPython issues ---
import subprocess

def clone_repo(repo_url: str, clone_dir: str = "tmp_repo_arch") -> str:
    """
    Performs a shallow clone of the given GitHub repository (latest commit only) using subprocess.
    Returns the local path to the cloned repository.
    """
    if os.path.exists(clone_dir):
        shutil.rmtree(clone_dir, ignore_errors=True)
    try:
        subprocess.run(["git", "clone", "--depth", "1", repo_url, clone_dir], check=True)
    except subprocess.CalledProcessError as e:
        raise RuntimeError(f"Git clone failed: {e}")
    return clone_dir

# --- Utility: Extract Python components via AST ---
def extract_python_components(repo_path: str) -> List[Document]:
    """
    Walks through .py files in the repo, parses AST, and extracts
    all FunctionDef and ClassDef code segments as Documents.
    """
    components: List[Document] = []
    for py_file in Path(repo_path).rglob("*.py"):
        try:
            source = py_file.read_text(encoding="utf-8")
            tree = ast.parse(source)
            for node in tree.body:
                if isinstance(node, (ast.FunctionDef, ast.ClassDef)):
                    snippet = ast.get_source_segment(source, node)
                    if snippet and len(snippet) > 0:
                        metadata = {
                            "source": str(py_file),
                            "type": type(node).__name__,
                            "name": node.name
                        }
                        components.append(Document(page_content=snippet, metadata=metadata))
        except Exception:
            # Skip files that cannot be parsed
            continue
    return components

# --- Core: Generate system architecture using an LLM ---
def generate_system_architecture(components: List[Document]) -> str:
    """
    Given a list of code components, prompts the LLM to
    produce a Mermaid diagram and explanatory narrative.
    """
    # Prepare a simple list of component identifiers
    comp_list = [f"{doc.metadata['type']} {doc.metadata['name']}" for doc in components]
    comp_text = "\n".join(comp_list)

    # Initialize the LLM client
    llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0)

    print()
    # Compose the prompt
    prompt = f"""
You are a software architect. Your job is to understand the architecture of a codebase based on its components and generate a high-level 
overview and user guide based on the overview:
{comp_text}

Please:
1. Draw a Mermaid flow diagram that shows how these components interact also add the explanation of the function in top down manner.
2. Provide a concise narrative explaining the overall architecture and data flow.
3. Ensure the diagram is clear and easy to understand, with proper labels for each component.
4. Create a proper user guide that explains how to use the system, including any prerequisites or setup steps.
5. Use clear and professional language suitable for technical documentation.
"""

    # Invoke the LLM and return its response
    result = llm.invoke(prompt).content.strip()
    return result

# --- Script entry point ---
def main(repo_url: str):
    # Step 1: Clone repository
    repo_path = clone_repo(repo_url)

    # Step 2: Extract code components
    components = extract_python_components(repo_path)
    if not components:
        print("No Python classes or functions found. Cannot generate architecture.")
        return

    # Step 3: Generate and display architecture
    architecture = generate_system_architecture(components)
    print("\n=== System Architecture Output ===\n")
    print(architecture)

if __name__ == "__main__":
    import sys
    # If a valid GitHub URL is passed as the first arg, use it; otherwise prompt
    if len(sys.argv) >= 2 and sys.argv[1].startswith("http"):  
        repo_url = sys.argv[1]
    else:
        repo_url = input("Enter GitHub repository URL: ")
    main(repo_url)


RuntimeError: Git clone failed: Command '['git', 'clone', '--depth', '1', '', 'tmp_repo_arch']' returned non-zero exit status 128.