# SETUP

In [425]:
from dotenv import load_dotenv
import os
from openai import OpenAI

# Load API KEYS
load_dotenv()
openai_api_key = os.getenv('OPENAI_API_KEY')
groq_api_key = os.getenv('GROQ_API_KEY')

# API Urls compatibles wit openai
groq_url = "https://api.groq.com/openai/v1"

# Client instances
openai_client = OpenAI(api_key=openai_api_key)
groq_client = OpenAI(api_key=groq_api_key, base_url=groq_url)

# Clients models and client dictionary
models = ["gpt-4o-mini",
          "meta-llama/llama-4-scout-17b-16e-instruct",
          "openai/gpt-oss-20b",
          "openai/gpt-oss-120b" ]

clients = {
    "gpt-4o-mini": openai_client,                                   # OpenAI model                $0.15/$0.60
    "meta-llama/llama-4-scout-17b-16e-instruct" : groq_client,      # Groq Llama model            $0.11/$0.34
    "openai/gpt-oss-20b": groq_client,                              # Groq GPT OSS 20B - cheaper  $0.075/$0.30
    "openai/gpt-oss-120b": groq_client                              # Groq GPT OSS 120B powerful  $0.15/$0.60
}



In [467]:
SYSTEM_PROMPT = """
You are a Python expert. Analyze Python functions and improve documentation.
- Generate or improve Python docstrings following best practices updated in 2025:
  - Include a brief description of what the function does.
  - Include Args with type annotations and descriptions.
  - Include Returns with type and description if applicable.
  - Include Raises ONLY if the function can raise exceptions.
  - If the function doensn't return anything, don't include the return secton. 
  - Keep lines <= 79 characters.
- ONLY return a valid JSON array (list of dictionaries). Each string must be enclosed
  in a single pair of double quotes. Use '\n' for line breaks inside strings.
- Correct any formatting errors in existing docstrings.
- Do NOT add explanations, code, or anything else outside the JSON.
- Do NOT use Markdown syntax (no ```python).
- Write all text in English.
- If there is nothing to improve, return the original docstring.
- Each dictionary must have keys:
  - "name": function name
  - "docstring": improved or generated docstring (without triple quotes)
- Do not break strings into multiple quoted segments.
"""

PROMPT_BASE = """
Generate or improve Python docstrings for the following functions.
Return a JSON array (list of dicts), each dict with keys:
- "name": function name
- "docstring": improved or generated docstring (without triple quotes)

Functions:

"""


# STEP 1: Extract the functions info from a file

In [427]:
import ast

#############################################################################
# Function to extract the info of the functons from the path of a python file 
#############################################################################

def extract_functions(file_path):

    with open(file_path, "r", encoding="utf-8") as f:
        code = f.read()

    # Parse the code into an Abstract Syntax Tree (AST)
    tree = ast.parse(code)

    funcs = []
    for node in ast.walk(tree):
        if isinstance(node, ast.FunctionDef):
            # Extract data of each function
            func_info = {
                "name": node.name,
                "args": [arg.arg for arg in node.args.args],
                "docstring": ast.get_docstring(node),
                "source": ast.get_source_segment(code, node)
            }
            funcs.append(func_info)

    return funcs

In [472]:

file_path = "examples/example.py" 

functions = extract_functions(file_path)

for func in functions:
    print(f"\n🔹 Name: {func['name']}")
    print(f"   Arguments: {func['args']}")
    print(f"   Docstring: {func['docstring']}")
    print(f"   Source: {func['source']}")

    print("-" * 50)



🔹 Name: greet
   Arguments: ['name']
   Docstring: Return a friendly greeting for the given name.

Args:
    name (str): The name of the person to greet.

Returns:
    str: A greeting message containing the given name.
   Source: def greet(name):
    """
    Return a friendly greeting for the given name.
    
    Args:
        name (str): The name of the person to greet.
    
    Returns:
        str: A greeting message containing the given name.
    """
    return f'Hello, {name}!'
--------------------------------------------------

🔹 Name: add
   Arguments: ['a', 'b']
   Docstring: Return the sum of two numeric values.

Args:
    a (int | float): The first value to add.
    b (int | float): The second value to add.

Returns:
    int | float: The sum of a and b.
   Source: def add(a, b):
    """
    Return the sum of two numeric values.
    
    Args:
        a (int | float): The first value to add.
        b (int | float): The second value to add.
    
    Returns:
        int | flo

# STEP 2: Generate docstrings with openai library

In [473]:
functions

[{'name': 'greet',
  'args': ['name'],
  'docstring': 'Return a friendly greeting for the given name.\n\nArgs:\n    name (str): The name of the person to greet.\n\nReturns:\n    str: A greeting message containing the given name.',
  'source': 'def greet(name):\n    """\n    Return a friendly greeting for the given name.\n    \n    Args:\n        name (str): The name of the person to greet.\n    \n    Returns:\n        str: A greeting message containing the given name.\n    """\n    return f\'Hello, {name}!\''},
 {'name': 'add',
  'args': ['a', 'b'],
  'docstring': 'Return the sum of two numeric values.\n\nArgs:\n    a (int | float): The first value to add.\n    b (int | float): The second value to add.\n\nReturns:\n    int | float: The sum of a and b.',
  'source': 'def add(a, b):\n    """\n    Return the sum of two numeric values.\n    \n    Args:\n        a (int | float): The first value to add.\n        b (int | float): The second value to add.\n    \n    Returns:\n        int | flo

In [474]:
def make_prompt(prompt_base, functions):
  functions_code = "\n\n".join([f['source'] for f in functions])
  prompt = prompt_base + functions_code

  return prompt


In [475]:
print(make_prompt(PROMPT_BASE, functions))


Generate or improve Python docstrings for the following functions.
Return a JSON array (list of dicts), each dict with keys:
- "name": function name
- "docstring": improved or generated docstring (without triple quotes)

Functions:

def greet(name):
    """
    Return a friendly greeting for the given name.
    
    Args:
        name (str): The name of the person to greet.
    
    Returns:
        str: A greeting message containing the given name.
    """
    return f'Hello, {name}!'

def add(a, b):
    """
    Return the sum of two numeric values.
    
    Args:
        a (int | float): The first value to add.
        b (int | float): The second value to add.
    
    Returns:
        int | float: The sum of a and b.
    """
    return a + b

def multiply(a, b, c=1):
    """
 
    """
    return a * b * c


In [433]:
import json

def generate_docstrings(functions, prompt_base, system_prompt=None, model="openai/gpt-oss-20b"):
    """
    Generate docstrings for multiple functions in a single LLM call.

    Args:
        functions: list of dicts with keys 'name' and 'source'
        model: str, model name to use

    Returns:
        list of dicts with keys 'name' and 'docstring'
    """
    prompt = make_prompt(prompt_base, functions)
    
    if model not in clients:
        raise ValueError(f"Model '{model}' not found in clients dictionary.")
    client = clients[model]

    response = client.chat.completions.create(
        model=model,
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": prompt}
        ],
        max_tokens=2000, 
        response_format={"type": "text"}
    )
    print("DEBUG: response:", response)
    raw_text = response.choices[0].message.content.strip()

    try:
        # Convierte el JSON generado por el modelo en una lista de dicts de Python
        return json.loads(raw_text)
    except json.JSONDecodeError as e:
        print("Error parsing JSON from model output:", e)
        print("Raw output was:")
        print(response.choices[0].message.content)
        return []


In [476]:
suggested_docstrings = generate_docstrings(
    functions=functions, 
    model="openai/gpt-oss-20b", 
    prompt_base=PROMPT_BASE,
    system_prompt=SYSTEM_PROMPT
)

DEBUG: response: ChatCompletion(id='chatcmpl-9c6c7c13-53dd-4969-9430-7e2adc40e5a2', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content='[\n  {\n    "name": "greet",\n    "docstring": "Return a friendly greeting for the given name.\\n\\nArgs:\\n    name (str): The name of the person to greet.\\n\\nReturns:\\n    str: A greeting message containing the given name."\n  },\n  {\n    "name": "add",\n    "docstring": "Return the sum of two numeric values.\\n\\nArgs:\\n    a (int, float): The first value to add.\\n    b (int, float): The second value to add.\\n\\nReturns:\\n    int, float: The sum of a and b."\n  },\n  {\n    "name": "multiply",\n    "docstring": "Return the product of two or three numeric values.\\n\\nArgs:\\n    a (int, float): The first multiplier.\\n    b (int, float): The second multiplier.\\n    c (int, float, optional): The optional third multiplier. Defaults to 1.\\n\\nReturns:\\n    int, float: The product of a, b, and 

In [477]:
suggested_docstrings

[{'name': 'greet',
  'docstring': 'Return a friendly greeting for the given name.\n\nArgs:\n    name (str): The name of the person to greet.\n\nReturns:\n    str: A greeting message containing the given name.'},
 {'name': 'add',
  'docstring': 'Return the sum of two numeric values.\n\nArgs:\n    a (int, float): The first value to add.\n    b (int, float): The second value to add.\n\nReturns:\n    int, float: The sum of a and b.'},
 {'name': 'multiply',
  'docstring': 'Return the product of two or three numeric values.\n\nArgs:\n    a (int, float): The first multiplier.\n    b (int, float): The second multiplier.\n    c (int, float, optional): The optional third multiplier. Defaults to 1.\n\nReturns:\n    int, float: The product of a, b, and c.'}]

# STEP 3: Write docstring in the file

In [451]:
import ast
from pathlib import Path

def update_docstring(file_path, func_data):
    """
    Update or insert docstrings for specific functions in a Python file.

    Args:
        file_path (str | Path): Path to the Python file.
        func_data (dict): Dictionary with:
            - "name": function name.
            - "docstring": new docstring (string with \n for line breaks).
    """
    file_path = Path(file_path)
    lines = file_path.read_text(encoding="utf-8").splitlines()
    source = "\n".join(lines)
    tree = ast.parse(source)

    for node in ast.walk(tree):
        if isinstance(node, ast.FunctionDef) and node.name == func_data["name"]:
            # --- Compute indentation ---
            func_indent = len(lines[node.lineno - 1]) - len(lines[node.lineno - 1].lstrip())
            body_indent = " " * (func_indent + 4)

            # --- Prepare the new docstring block ---
            doc_lines = func_data["docstring"].split("\n")
            new_doc_block = (
                [body_indent + '"""']
                + [body_indent + line for line in doc_lines]
                + [body_indent + '"""']
            )

            # --- If function already has a docstring, replace it ---
            if (
                node.body
                and isinstance(node.body[0], ast.Expr)
                and isinstance(getattr(node.body[0], "value", None), ast.Constant)
                and isinstance(node.body[0].value.value, str)
            ):
                doc_start_idx = node.body[0].lineno - 1

                # Find where the existing triple quotes end
                doc_end_idx = doc_start_idx + 1
                open_quote = None
                if lines[doc_start_idx].strip().startswith('"""'):
                    open_quote = '"""'
                elif lines[doc_start_idx].strip().startswith("'''"):
                    open_quote = "'''"

                while doc_end_idx < len(lines):
                    if lines[doc_end_idx].strip().endswith(open_quote):
                        doc_end_idx += 1
                        break
                    doc_end_idx += 1

                # Replace the old docstring block
                lines[doc_start_idx:doc_end_idx] = new_doc_block

            else:
                # --- No existing docstring: insert it right after def line ---
                insert_idx = node.body[0].lineno - 1
                lines[insert_idx:insert_idx] = new_doc_block

    # --- Save the modified file ---
    file_path.write_text("\n".join(lines), encoding="utf-8")


In [478]:
for func in suggested_docstrings:
  update_docstring(file_path, func)

# MORE COMPLEX FILE

In [402]:
models

['gpt-4o-mini',
 'meta-llama/llama-4-scout-17b-16e-instruct',
 'openai/gpt-oss-20b',
 'openai/gpt-oss-120b']

In [468]:
file_path_0 = "examples/train_engine.py"
functions = extract_functions(file_path_0)


In [469]:
suggested = generate_docstrings(
    functions=functions, 
    model="gpt-4o-mini", 
    prompt_base=PROMPT_BASE,
    system_prompt=SYSTEM_PROMPT
)

DEBUG: response: ChatCompletion(id='chatcmpl-CVKXGECUOCdZwnCudT3QEGBbq54tK', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content='[\n    {\n        "name": "train_step",\n        "docstring": "Perform a single training epoch over the provided dataloader. It updates the model parameters via backpropagation, computes the average loss, and evaluates the specified metrics. Returns a dictionary mapping metric names to their computed values, including the average loss.\\n\\nArgs:\\n    model (torch.nn.Module): The model to train.\\n    dataloader (DataLoader): DataLoader yielding input tensors and target labels.\\n    loss_fn (callable): Callable that computes the loss between model outputs and targets.\\n    optimizer (torch.optim.Optimizer): Optimizer used to update the model.\\n    device (torch.device): Device on which computations occur.\\n    metrics_list (list, optional): List of metric names to compute; defaults to [\'accuracy\']. \\n\\

In [470]:
suggested

[{'name': 'train_step',
  'docstring': "Perform a single training epoch over the provided dataloader. It updates the model parameters via backpropagation, computes the average loss, and evaluates the specified metrics. Returns a dictionary mapping metric names to their computed values, including the average loss.\n\nArgs:\n    model (torch.nn.Module): The model to train.\n    dataloader (DataLoader): DataLoader yielding input tensors and target labels.\n    loss_fn (callable): Callable that computes the loss between model outputs and targets.\n    optimizer (torch.optim.Optimizer): Optimizer used to update the model.\n    device (torch.device): Device on which computations occur.\n    metrics_list (list, optional): List of metric names to compute; defaults to ['accuracy']. \n\nReturns:\n    dict: Metric names mapped to their computed float values.\n\nRaises:\n    RuntimeError: If there is an error during the training step."},
 {'name': 'train_mlflow',
  'docstring': 'Train a model with

In [471]:
for func_data in suggested:
  update_docstring(file_path_0, func_data=func_data )