# importing the needed liberaries and global variables

In [1]:
from dotenv import load_dotenv
import os
import subprocess
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_core.output_parsers import PydanticOutputParser ,StrOutputParser
from pydantic import BaseModel
from pydantic import Field
from langchain_core.prompts import PromptTemplate

load_dotenv()

GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")



# agents setup

In [2]:

model = ChatGoogleGenerativeAI(
    model="gemini-2.5-flash",
    temperature=1.0,  
    max_tokens=None,
    timeout=None,
    max_retries=2,
)

small test

In [3]:
response = model.invoke("Hello, how are you?")
response.text

"Hello! I'm doing well, thank you for asking.\n\nHow may I help you today?"

## pydantic response structure


In [4]:

class UnitTestResponse(BaseModel):
    test_file_name : str = Field(description="The name of the test file.")
    tests: str = Field(description="Pytest test file content as plain code only.")


class ReadmeResponse(BaseModel):
    readme: str = Field(
        description="Complete README.md content as plain Markdown."
    )
class FiledocumentationResponse(BaseModel):
    file_documentation: str = Field(
        description="Technical documentation for the file. MUST explicitly include the file name."
    )


## agents

In [5]:


def generate_unit_test(file_name : str ,content: str , llm=model) -> str:


    parser = PydanticOutputParser(pydantic_object=UnitTestResponse)

    prompt = PromptTemplate(
        template=(
            "You are a senior Python test engineer. Generate pytest unit tests for the Python code I provide.\n"
            "Constraints:\n"
            "- Output only the test filename and the test code (no explanations).\n"
            "- Deterministic: no network, no sleeps, no randomness unless seeded.\n"
            "- No real filesystem side effects: use tmp_path.\n"
            "- Prefer unit tests over integration tests.\n"
            "Coverage goals:\n"
            "- Test main public functions/classes.\n"
            "- Include  edge cases, and failure cases (assert exceptions) only if needed.\n"
            "- Hit important branches.\n\n"
            "{format_instructions}\n\n"
            "python file name {file_name}\n"
            "Python code to test:\n"
            "```python\n{content}\n```"
        ),
        input_variables=["content" , "file_name"],
        partial_variables={"format_instructions": parser.get_format_instructions()},
    )

    chain = prompt | llm | parser
    resp: UnitTestResponse = chain.invoke({"content": content , "file_name": file_name})
    return resp.test_file_name ,resp.tests


example_content = """
def addition(a, b):
    return a + b
"""

print(generate_unit_test("exemple.py" ,example_content))

('test_addition.py', 'import pytest\nfrom exemple import addition\n\n@pytest.mark.parametrize(\n    "a, b, expected",\n    [\n        (1, 2, 3),            # Positive integers\n        (-1, -2, -3),         # Negative integers\n        (1, -2, -1),          # Mixed sign integers\n        (0, 5, 5),            # Zero with positive\n        (-5, 0, -5),          # Zero with negative\n        (0, 0, 0),            # Both zeros\n        (1.5, 2.5, 4.0),      # Floating point numbers\n        (1, 2.5, 3.5),        # Mixed integer and float\n        (1000000, 2000000, 3000000), # Large integers\n        (-100, 100, 0),       # Inverse numbers\n        (3.14159, 2.71828, 5.85987), # More complex floats\n    ],\n)\ndef test_addition_valid_inputs(a, b, expected):\n    """Test addition with various valid numerical inputs."""\n    assert addition(a, b) == pytest.approx(expected)\n\n# Although the original function is simple and doesn\'t explicitly raise\n# exceptions for non-numeric types (it wou

In [6]:
def generate_file_documentation(file_name: str, content: str, llm=model) -> str:


    parser = PydanticOutputParser(pydantic_object=FiledocumentationResponse)

    prompt = PromptTemplate(
        template=(
            "You are a senior Python engineer.\n"
            "Write clear technical documentation for the following Python file.\n\n"
            "Rules:\n"
            "- Explicitly mention the file name: {file_name}\n"
            "- Explain the purpose of this file\n"
            "- Describe main functions/classes\n"
            "- Mention key behaviors and assumptions\n\n"
            "{format_instructions}\n\n"
            "File name: {file_name}\n\n"
            "Python file content:\n"
            "```python\n{content}\n```"
        ),
        input_variables=["file_name", "content"],
        partial_variables={"format_instructions": parser.get_format_instructions()},
    )

    chain = prompt | llm | parser
    resp: FiledocumentationResponse = chain.invoke(
        {"file_name": file_name, "content": content}
    )
    return resp.file_documentation

exemple_file_name = "exemple.py"
example_content = """
def addition(a, b):
    return a + b
"""
print(generate_file_documentation(exemple_file_name, example_content))

## Technical Documentation for `exemple.py`

### File Name
`exemple.py`

### Purpose of this File
This file serves as a minimalist module providing a fundamental utility function for performing basic arithmetic addition. Its primary purpose is to encapsulate a single, straightforward operation, demonstrating the definition of a simple, reusable function in Python.

### Main Functions

#### `addition(a, b)`
*   **Description**: This function is designed to take two arguments, `a` and `b`, and return their arithmetic sum.
*   **Parameters**:
    *   `a`: The first operand for the addition. Expected to be a numeric type (e.g., `int`, `float`) or any type that supports the `+` operator.
    *   `b`: The second operand for the addition. Expected to be a numeric type (e.g., `int`, `float`) or any type that supports the `+` operator.
*   **Returns**:
    *   The result of `a + b`. The return type will typically be consistent with the input types (e.g., `int` if both `a` and `b` are `int`, `fl

In [7]:


def generate_readme(
    project_structure: str,
    file_docs: list[str],
    llm=model,
) -> str:


    parser = PydanticOutputParser(pydantic_object=ReadmeResponse)

    joined_file_docs = "\n\n".join(
        f"- {doc}" for doc in file_docs
    )

    prompt = PromptTemplate(
        template=(
            "You are a senior software engineer.\n"
            "Generate a complete README.md for a Python project.\n\n"
            "README must include:\n"
            "- Project overview\n"
            "- Project structure\n"
            "- Setup / installation\n"
            "- Usage\n\n"
            "{format_instructions}\n\n"
            "Project structure:\n"
            "{project_structure}\n\n"
            "Files documentation:\n"
            "{file_docs}\n"
            "ALL IN MARKDOWN"
        ),
        input_variables=["project_structure", "file_docs"],
        partial_variables={"format_instructions": parser.get_format_instructions()},
    )

    chain = prompt | llm | StrOutputParser()
    res = chain.invoke(
        {
            "project_structure": project_structure,
            "file_docs": joined_file_docs,
        }
    )
    return res


# helper functions 

get latest repo updates 

In [8]:

def get_latest_repo_updates():
    result = subprocess.run(['git', 'pull', 'origin', 'main'], 
                        capture_output=True, 
                        text=True)

    print(result.stdout)  
    print(result.stderr)  
    print(result.returncode)  




push updates to github

In [9]:
def push_updates_to_github(comments = ""):
    result = subprocess.run(['git', 'add', '.'],)
    result = subprocess.run(['git', 'commit', '-m', comments],)
    result = subprocess.run(['git', 'push', 'origin', 'main'],)
    print(result.stdout)
    print(result.stderr)
    print(result.returncode)
    return result.stdout


get files to read and those to ignore

In [10]:


def files_to_read():
    ignore= files_dirs_to_ignore()
    
    def should_ignore(path):
        return any(part in ignore for part in path.split(os.sep))
    py_files = []
    for root, dirs, files in os.walk('.'):
        if should_ignore(root):
            continue
        for file in files:
            

            if should_ignore(file):
                continue
            if file.endswith('.py'):
                py_files.append(os.path.join(root, file))
    return py_files
def files_dirs_to_ignore():
    with open(".gitignore", "r") as f:
        ignore = f.read().split("\n")
    ignore = [i.strip() for i in ignore if i.strip()]
    ignore.extend([".git", "agent.ipynb"])
    return ignore


get repo architecture for documentation later on

In [11]:
def get_git_structure():
    ignore = files_dirs_to_ignore()
    
    def should_ignore(path):
        return any(part in ignore for part in path.split(os.sep))
    
    structure = []
    
    def process_dir(current_dir, prefix=""):
        items = os.listdir(current_dir)
        
        dirs = [d for d in items if os.path.isdir(os.path.join(current_dir, d))]
        files = [f for f in items if os.path.isfile(os.path.join(current_dir, f))]
        
        dirs.sort()
        files.sort()
        
        for i, dir_name in enumerate(dirs):
            if dir_name in ignore:
                continue
                
            full_path = os.path.join(current_dir, dir_name)
            if should_ignore(full_path):
                continue
                
            is_last_dir = i == len(dirs) - 1 and not files
            structure.append(f"{prefix}{'└── ' if is_last_dir else '├── '}{dir_name}/")
            
            extension = "    " if is_last_dir else "│   "
            process_dir(full_path, prefix + extension)
        
        for i, file_name in enumerate(files):
            if file_name in ignore or file_name.startswith('.'):
                continue
                
            is_last = i == len(files) - 1
            structure.append(f"{prefix}{'└── ' if is_last else '├── '}{file_name}")
    
    structure.append(".")
    process_dir(".")
    
    return '\n'.join(structure)


print(get_git_structure())


.
├── test_files/
│   └── test_bank_simulator.py
├── Bank_Simulator.py
└── minibank.json


# assumulate the agent pipeline

In [13]:


def agent_pipeline():
    # get latest repo updates
    print("fetching the latest updates...")
    #get_latest_repo_updates()

    # get files to read
    print("getting the files to read...")
    files = files_to_read()
    list_doc = []

    # make test files dir
    print("making test files dir...")
    os.makedirs("test_files", exist_ok=True)
    print("creating test files and documentation...")
    for file in files:
        
        with open(file, "r") as f:
            content = f.read()
        doc = generate_file_documentation(file, content)
        list_doc.append(doc)
        test_file_name , test_code = generate_unit_test(file, content)

        with open(f"test_files/{test_file_name}", "w") as f:
            f.write(test_code)
    print("generating readme file")
    proj_structure = get_git_structure()
    readme = generate_readme(proj_structure , list_doc)
    with open("README.md", "w", encoding="utf-8", newline="\n") as f:
        f.write(readme)

    print("pushing updates to github")
    push_updates_to_github("adding test files and documentation")
    
        
agent_pipeline()

fetching the latest updates...
getting the files to read...
making test files dir...
creating test files and documentation...
generating readme file
