In [11]:
from langgraph.graph import StateGraph, END
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain.prompts import PromptTemplate
import os

In [12]:
llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash")

In [13]:
class RepoState(dict):
    files: list  # [(path, content)]
    file_summaries: dict
    project_summary: str

In [14]:
def summarize_file(state: RepoState):
    file_summaries = {}
    template = PromptTemplate(
        input_variables=["filename", "code"],
        template="""
        You are analyzing a code file.
        Filename: {filename}
        Code:
        {code}

        Provide a clear summary of what this file does.
        """
    )
    for fname, code in state["files"]:
        summary = llm.predict(template.format(filename=fname, code=code[:5000]))  # truncate long files
        file_summaries[fname] = summary
    state["file_summaries"] = file_summaries
    return state

In [15]:
def summarize_project(state: RepoState):
    all_summaries = "\n".join(
        f"{fname}: {summary}" for fname, summary in state["file_summaries"].items()
    )
    project_summary = llm.predict(f"""
    Here are the summaries of each file in a project:

    {all_summaries}

    Please generate an overall description of the project:
    - What it does
    - Its architecture
    - Its purpose
    """)
    state["project_summary"] = project_summary
    return state

In [16]:
# ---------- STEP 4: Graph ----------
graph = StateGraph(RepoState)
graph.add_node("summarize_file", summarize_file)
graph.add_node("summarize_project", summarize_project)

graph.set_entry_point("summarize_file")
graph.add_edge("summarize_file", "summarize_project")
graph.add_edge("summarize_project", END)

app = graph.compile()


In [18]:
# ---------- STEP 5: Run with cleaned batching ----------
import os

local_dir = "./repo_clone2"  # path where you cloned the repo
repo_files = []

# Collect repo files (skip .git folder and other irrelevant stuff)
for root, _, files in os.walk(local_dir):
    if ".git" in root:  # skip git internals
        continue
    for f in files:
        file_path = os.path.join(root, f)
        try:
            with open(file_path, "r", encoding="utf-8", errors="ignore") as file:
                content = file.read()
            rel_path = os.path.relpath(file_path, local_dir)
            repo_files.append((rel_path, content))
        except Exception as e:
            print(f"Skipping {file_path}: {e}")

print(f"Collected {len(repo_files)} files (excluding .git)")

# Split files by size
small_files = [(name, code) for name, code in repo_files if len(code) < 3000]
large_files = [(name, code) for name, code in repo_files if len(code) >= 3000]

file_summaries = {}

# ---------- Summarize small files one-by-one ----------
for name, code in small_files:
    summary = llm.predict(f"""
    Summarize this file's purpose and role in the project.

    File: {name}
    Code:
    {code}
    """)
    file_summaries[name] = summary.strip()

# ---------- Summarize large files (truncate if too big) ----------
for name, code in large_files:
    summary = llm.predict(f"""
    Summarize this LARGE file's purpose and role in the project.
    Only focus on the main components, functions, or logic.

    File: {name}
    Code:
    {code[:5000]}  # truncate to avoid hitting token limit
    """)
    file_summaries[name] = summary.strip()

# ---------- Global project summary ----------
all_summaries_text = "\n".join(f"{fname}: {summary}" for fname, summary in file_summaries.items())
project_summary = llm.predict(f"""
Here are the file summaries from a code repository:

{all_summaries_text}

Now, provide a **global project summary** that explains:
- What the project is
- Its architecture
- Its purpose
- Main components and how they fit together
""")

Collected 10 files (excluding .git)


In [None]:
# ---------- Output ----------
print("\n=== Project Summary ===")
print(project_summary)

print("\n=== File Summaries ===")
for f, s in file_summaries.items():
    print(f"\n{f}:\n{s}")



=== Project Summary ===
## To-Do List Project Summary

This project is a simple **To-Do List web application** built using React. Its purpose is to allow users to create, manage, and track tasks they need to complete. Users can add tasks, mark them as complete, and delete them.

**Architecture:**

The application follows a component-based architecture, typical of React projects. It has a clear separation of concerns between data management (state) and UI rendering. The application uses a unidirectional data flow, where data is passed down from parent components to child components via props, and updates are triggered by events handled in the parent component and passed down to child components via props.

**Main Components and Their Interaction:**

*   **`index.js` (Entry Point):** This file bootstraps the React application. It finds the root DOM element (`<div id="root">`) in `public/index.html` and renders the `App` component within it.

*   **`App.js` (Root Component):** This is th

In [None]:
#output generated for 2 files
print("Project Summary:")
print(result["project_summary"])
print("\nFile Summaries:")
for f, s in result["file_summaries"].items():
    print(f"\n{f}:\n{s}")

Project Summary:
## Project Description: A Simple To-Do List Application

This project is a basic to-do list application built using React. It allows users to view a list of tasks, mark them as complete or incomplete, and delete them.

**What it does:**

The application provides the following functionality:

*   **Displays a list of tasks:** Each task shows its name and completion status.
*   **Toggles task completion:** Users can mark tasks as complete or incomplete with a button. Completed tasks are visually distinguished (likely with a strikethrough).
*   **Deletes tasks:** Users can remove tasks from the list.

**Architecture:**

The application follows a component-based architecture, a common pattern in React development:

*   **`App` Component (./public/index.html):** This is the main component and serves as the root of the application. It:
    *   Manages the application's state, specifically the list of tasks.
    *   Provides functions to update the state: `toggleComplete` and