# SETUP

In [None]:
from dotenv import load_dotenv
import os
from openai import OpenAI

# Load API KEYS
load_dotenv()
openai_api_key = os.getenv('OPENAI_API_KEY')
groq_api_key = os.getenv('GROQ_API_KEY')

# API Urls
groq_url = "https://api.groq.com/openai/v1"

# Client instances
openai_client = OpenAI(api_key=openai_api_key)
groq_client = OpenAI(api_key=groq_api_key, base_url=groq_url)

# Clients dictionary with models
clients = {
    "gpt-4o-mini": openai_client,                                   # OpenAI model
    "meta-llama/llama-4-scout-17b-16e-instruct" : groq_client,      # Groq cheap model for testing
    "openai/gpt-oss-20b": groq_client,                              # Groq GPT OSS 20B
    "openai/gpt-oss-120b": groq_client                              # Groq GPT OSS 120B powerful
}


In [66]:
# System prompt 
SYSTEM_PROMPT = """
You are a Python expert. Generate Python docstrings following best practices updated in 2025:
- Include a brief description of what the function does.
- Include Args with type annotations and descriptions.
- Include Returns with type and description if applicable.
- Include Raises if the function can raise exceptions.
- Keep lines <= 79 characters.
- ONLY return the docstring text, without triple quotes or any additional formatting.
- Correct any formatting errors in existing docstrings.
- Do NOT add explanations, comments, code, or anything else.
- Do NOT include Markdown syntax (e.g., ```python) or any code formatting.
- Write all the docstring in English.
"""

# Prompt base template
PROMPT_TEMPLATE = """Write or improve the Python docstring for the following function.
- If the function already has a docstring, improve it following best practices.
- If it has no docstring, generate a new one.
- Process the code exactly as provided.
"""


# STEP 1: Extract the functions info from a file

In [None]:
import ast

#############################################################################
# Function to extract the info of the functons from the path of a python file 
#############################################################################

def extract_functions(file_path):

    with open(file_path, "r", encoding="utf-8") as f:
        code = f.read()

    # Parse the code into an Abstract Syntax Tree (AST)
    tree = ast.parse(code)

    funcs = []
    for node in ast.walk(tree):
        if isinstance(node, ast.FunctionDef):
            # Extract data of each function
            func_info = {
                "name": node.name,
                "args": [arg.arg for arg in node.args.args],
                "docstring": ast.get_docstring(node),
                "source": ast.get_source_segment(code, node)
            }
            funcs.append(func_info)

    return funcs

In [8]:

file_path = "examples/example.py" 

functions = extract_functions(file_path)

for func in functions:
    print(f"\n🔹 Name: {func['name']}")
    print(f"   Arguments: {func['args']}")
    print(f"   Docstring: {func['docstring']}")
    print(f"   Source: {func['source']}")

    print("-" * 50)



🔹 Name: greet
   Arguments: ['name']
   Docstring: Return a greeting message
   Source: def greet(name):
    """Return a greeting message"""
    return f'Hello, {name}!'
--------------------------------------------------

🔹 Name: add
   Arguments: ['a', 'b']
   Docstring: None
   Source: def add(a, b):
    return a + b
--------------------------------------------------

🔹 Name: multiply
   Arguments: ['a', 'b', 'c']
   Docstring: Multiplies three numbers
   Source: def multiply(a, b, c=1):
    """Multiplies three numbers"""
    return a * b * c
--------------------------------------------------


# STEP 2: Generate docstring with openai library

In [79]:
##################################
# Function to generate docstring
##################################

def generate_docstring_from_source(
    func_source, 
    model="meta-llama/llama-4-scout-17b-16e-instruct", 
    prompt_template=None,
    system_prompt=None
):
    """
    Generate a Python docstring for a given function source code using an LLM.

    Args:
        func_source: str, the full source code of the function
        model: str, name of the model to use
        backend: str, "openai" or "groq"
        prompt_template: str, base user prompt to prepend before the function code
        system_prompt: str, system prompt to guide the LLM behavior

    Returns:
        str: generated docstring
    """
    if prompt_template is None:
        raise ValueError("prompt_template must be provided")
    if system_prompt is None:
        raise ValueError("system_prompt must be provided")

    prompt = prompt_template + "\nFunction:\n" + func_source
    
    if model not in clients:
        raise ValueError(f"Model '{model}' not found in clients dictionary.")
    
    client = clients[model]

    response = client.chat.completions.create(
        model=model,
        messages=[{"role": "system", "content": system_prompt},
                  {"role": "user", "content": prompt}],
        max_tokens=200
    )

    return response.choices[0].message.content.strip()


In [82]:
suggested_docstring = generate_docstring_from_source(
    func_source=functions[0]['source'], 
    model="meta-llama/llama-4-scout-17b-16e-instruct", 
    prompt_template=PROMPT_TEMPLATE,
    system_prompt=SYSTEM_PROMPT
)

In [83]:
suggested_docstring

"Returns a personalized greeting message.\n\nArgs:\n    name (str): The name of the person to be greeted. \n                Cannot be empty.\n\nReturns:\n    str: A greeting message with the name inserted, \n         formatted as 'Hello, <name>!'.\n\nRaises:\n    TypeError: If the input 'name' is not a string.\n    ValueError: If the input 'name' is an empty string."

# STEP 3: Compare and write docstring in the file 

In [38]:
def update_docstring_in_file(file_path, func_name, new_docstring):
    with open(file_path, "r", encoding="utf-8") as f:
        code = f.read()

    tree = ast.parse(code)

    for node in ast.walk(tree):
        if isinstance(node, ast.FunctionDef) and node.name == func_name:
            doc_node = ast.Expr(value=ast.Constant(value=new_docstring))
            
            if ast.get_docstring(node):  # si ya tiene docstring
                node.body[0] = doc_node
            else:  # si no tiene docstring, insertar al inicio
                node.body.insert(0, doc_node)

    # convertir AST a código
    new_code = astor.to_source(tree)

    with open(file_path, "w", encoding="utf-8") as f:
        f.write(new_code)


# STEP 4: Test the workflow

In [85]:
file_path = "examples/example.py"
# Extract the first function
functions = extract_functions(file_path)
func = functions[1]

# Print original source
print(f"Original source: \n {func["source"]}")

#Suggest docstring  
suggested_docstring = generate_docstring_from_source(func['source'], 
                                                     prompt_template=PROMPT_TEMPLATE, 
                                                     system_prompt=SYSTEM_PROMPT,
                                                     model="meta-llama/llama-4-scout-17b-16e-instruct"
                                                     )
print(f"Suggested docstring: {suggested_docstring}")
update_docstring_in_file(file_path, func['name'], suggested_docstring)

# Print modified source
functions = extract_functions(file_path)
func = functions[1]
print(f"New source: \n {func["source"]}")

Original source: 
 def add(a, b):
    return a + b
Suggested docstring: Add two numbers.

Args:
    a (int or float): The first number to add.
    b (int or float): The second number to add.

Returns:
    int or float: The sum of a and b.
New source: 
 def add(a, b):
    """Add two numbers.

Args:
    a (int or float): The first number to add.
    b (int or float): The second number to add.

Returns:
    int or float: The sum of a and b."""
    return a + b
