In [52]:
# The 'os' module provides a way to use operating system-dependent functionality in Python.
import os

# Imports the load_dotenv function from the dotenv module to manage environment variables.
from dotenv import load_dotenv

# The 'ast' module in Python allows for the manipulation and analysis of Python abstract syntax trees.
import ast

# Importing the ChatOpenAI class from the langchain_openai module for use in the program.
from langchain_openai import ChatOpenAI

# Importing ChatPromptTemplate from langchain_core.prompts for creating chat-based prompts in Python.
from langchain_core.prompts import ChatPromptTemplate

# Importing StrOutputParser from langchain's schema output_parser for handling string outputs.
from langchain.schema.output_parser import StrOutputParser

# Importing the Gradio library for building user interfaces in Python.
import gradio as gr

In [53]:
load_dotenv()

True

In [54]:
chat_model = ChatOpenAI(model="gpt-4o-mini-2024-07-18",
                        max_completion_tokens=2048,
                        api_key=os.getenv("OPENAI_API_KEY"),
                        temperature=0.0)

In [119]:
def extract_module_docstring(filepath):

    with open(filepath, "r") as file:
        file_content = file.read()

    prompt = ChatPromptTemplate.from_template("""
                                              Extract the module docstring only at the beginning of the Python file.
                                              Ignore docstring within a function or class.
                                              Include comments at the beginning of the module in which lines
                                              start with '#'.
                                              Enclose the module docstring by triple double quotes only 
                                              and remove # at the beggining of each line within the triple double quotes.
                                              Do not have backquotes at the beginning or the end of the module docstring.
                                              Do not include any part of the executable code in your response.
                                              If there is no module docstring, return None.
                                              Python code: {content}.
                                              """)

    chat_chain = prompt | chat_model | StrOutputParser()
    response = chat_chain.invoke({"content": file_content})

    return response

In [124]:
testing001 = extract_module_docstring(r'c:\Users\JimYi\PycharmProjects\ProjectEuler\Problem100.py')

In [125]:
print(testing001)

"""
Created on March 10, 2015
@author: Jim Yin
"""


In [5]:
def strip_double_quotes(s1):
    """
        Remove all occurrences of triple double quotes from a given string.
    
        This function takes a single string as input and replaces all instances 
        of the triple double quote sequence () with an empty string. This can 
        be useful for cleaning up text that may contain unnecessary or unwanted 
        triple double quotes.
    
        Parameters:
            s1 (str): The input string from which triple double quotes will be 
            removed.
    
        Returns:
            str: A new string with all triple double quotes removed.
    
        Example:
            >>> strip_double_quotes('This is a string with triple quotes')
            'This is a string with triple quotes'
    """

    return s.replace('"""', '').replace("'''", '')

In [6]:
def code_block_comment(code_block):
    """
        Generates comments for a given block of Python code using a chat model.
    
        This function takes a string representation of a block of Python code as input 
        and utilizes a chat model to generate comments that explain the code. The 
        comments are formatted such that each line begins with a '#' character, and 
        the function does not return the original code block, only the comments.
    
        Args:
            code_block (str): A string containing the Python code to be commented on.
    
        Returns:
            str: A string containing the comments generated for the provided code block.
    
        Example:
            >>> code = 'def add(a, b):\\n    return a + b'
            >>> comments = code_block_comment(code)
            >>> print(comments)
            # Defines a function named 'add' that takes two parameters 'a' and 'b'.
            # Returns the sum of 'a' and 'b'.
    """

    prompt = ChatPromptTemplate.from_template("""
                                              Provide only comments on the following block of Python codes. 
                                              Do not include any part of the code in your response.
                                              Each line should have less than 75 characters.
                                              Start each line of comments with '#'. 
                                              Do not include triple single, double, or backquotes.
                                              Python code: {raw_text}.
                                              """)

    chat_chain = prompt | chat_model | StrOutputParser()
    response = chat_chain.invoke({"raw_text": code_block})

    return response

In [7]:
def import_comment(import_line):
    """
        Generates a concise comment for a given Python import line.
    
        This function takes a string representing a line of Python imports and 
        utilizes a chat model to generate a one-line comment that summarizes 
        the purpose of the imports. The comment is prefixed with a '#' symbol 
        to indicate that it is a comment in Python code.
    
        Args:
            import_line (str): A string containing the Python import statements 
                               for which a comment is to be generated.
    
        Returns:
            str: A one-line comment summarizing the provided imports, formatted 
                 as a Python comment.
                 
        Example:
            >>> import_comment("import numpy as np")
            "# Importing the NumPy library for numerical operations."
    """

    prompt = ChatPromptTemplate.from_template("""
                                              Provide just one-line comment on each import in Python.
                                              Start the line with '#' and it should have fewer than 75 characters.
                                              Do not include any part of the code or triple single, double, 
                                              or backquote in your response.
                                              Python imports: {raw_text}.
                                              """)

    chat_chain = prompt | chat_model | StrOutputParser()
    response = chat_chain.invoke({"raw_text": import_line})

    return response

In [8]:
def get_docstring(function_text):

    prompt = ChatPromptTemplate.from_template("""
                                              Create a high quality docstring for the given python function.
                                              Break up any line more than 80 characters into multiple
                                              lines separated by '\n'. Do not have any '\t' in the output.
                                              Instead have four spaces '    '.
                                              Do not include the function in your response.
                                              Python function: {raw_text}.
                                              """)

    chat_chain = prompt | chat_model | StrOutputParser()
    response = chat_chain.invoke({"raw_text": function_text})
    response = strip_double_quotes(response)

    return response

In [9]:
def get_class_docstring(function_text):

    prompt = ChatPromptTemplate.from_template("""
                                              Create a concise, high quality docstring for the given python class.
                                              Break up any line more than 80 characters into multiple
                                              lines separated by '\n'. Do not have any '\t' in the output.
                                              Instead have four spaces '    '.
                                              Do not include the class in your response.
                                              Python class: {raw_text}.
                                              """)

    chat_chain = prompt | chat_model | StrOutputParser()
    response = chat_chain.invoke({"raw_text": function_text})
    response = strip_double_quotes(response)

    return response

In [10]:
def merge_docstring_and_function(original_function, docstring):
    split = original_function.split(":\n")
    first_part, second_part = split[0], split[1:]
    if second_part[0].startswith('\t'):
        second_part[0] = '\n    ' + second_part[0][1:]
    docstring = '    '.join(docstring.splitlines(True))
    if ":\n".join(second_part).startswith('\n'):
        merged_function = first_part + ":\n" + '    """' + docstring + '    """\n' + ":\n".join(second_part)
    else:
        merged_function = first_part + ":\n" + '    """' + docstring + '    """\n\n' + ":\n".join(second_part)

    return merged_function

In [11]:
class ParentNodeVisitor(ast.NodeVisitor):
    def __init__(self):
        self.parent_map = {}
        self.current_parent = None

    def visit(self, node):
        # Store the current parent for the current node
        if self.current_parent:
            self.parent_map[node] = self.current_parent

        # Set the current parent to the current node
        self.current_parent = node

        # Visit the children nodes
        self.generic_visit(node)

        # Reset the current parent after visiting children
        self.current_parent = self.parent_map.get(node)

In [12]:
def node_examine(file_path):

    filename = file_path.split('\\')[-1].split('.')[0]
    extention = file_path.split('\\')[-1].split('.')[1]

    with open(file_path, "r") as file:
        file_content = file.read()

    # Parse the file content into an Abstract Syntax Tree
    tree = ast.parse(file_content)

    
    # Create a list to hold the nodes
    nodes = []
    
    # Iterate through all nodes in the AST
    for node in ast.walk(tree):

        print(f'NODE: {node}')
        for parent in ast.iter_child_nodes(node):
            print(f'CHILD: {parent}')


        
        # # Check if the node is a FunctionDef
        # if isinstance(node, ast.FunctionDef):
        #     # Check if any parent node is a ClassDef
        #     is_inside_class = any(isinstance(parent, ast.ClassDef) for parent in ast.iter_child_nodes(node))
        #     if not is_inside_class:
        #         nodes.append(node)

In [13]:
def node_examine2(file_path):
    filename = file_path.split('\\')[-1].split('.')[0]
    extension = file_path.split('\\')[-1].split('.')[-1]
    
    with open(file_path, 'r') as file:
        file_content = file.read()
        tree = ast.parse(file_content)
        
        # Create an instance of the visitor
        visitor = ParentNodeVisitor()
        visitor.visit(tree)
        
        # Get the nodes of interest
        nodes = [node for node in ast.walk(tree) if isinstance(node, (ast.FunctionDef, ast.ClassDef))]
        
        # Create a dictionary to hold the parent nodes for each node
        parent_nodes = {node: visitor.parent_map.get(node) for node in nodes}
        
        return parent_nodes

In [14]:
def node_examine3(file_path):

    filename = file_path.split('\\')[-1].split('.')[0]
    extention = file_path.split('\\')[-1].split('.')[1]

    with open(file_path, "r") as file:
        file_content = file.read()

        # Parse the file content into an Abstract Syntax Tree
        tree = ast.parse(file_content)
        
        # Create an instance of the visitor
        visitor = ParentNodeVisitor()
        visitor.visit(tree)
        
        # Get the nodes of interest
        nodes = [node for node in ast.walk(tree) if isinstance(node, tuple([ast.FunctionDef,
                                                                            ast.ClassDef,
                                                                            ast.Import,
                                                                            ast.Assign,
                                                                            ast.If,
                                                                            ast.ImportFrom]))]
        
        # Create a dictionary to hold the parent nodes for each node
        parent_nodes = {node: visitor.parent_map.get(node) for node in nodes}

        nodes2 = [n for n in nodes if isinstance(parent_nodes[n], ast.Module)]

        # print(nodes)
        # print(parent_nodes)

    return nodes, nodes2

In [14]:
parent_nodes = node_examine3(r'c:\Users\JimYi\PycharmProjects\HuggingFace_models\PythonDocstringGenerativeAI\Problem054_withdocstring.py')

FileNotFoundError: [Errno 2] No such file or directory: 'c:\\Users\\JimYi\\PycharmProjects\\HuggingFace_models\\PythonDocstringGenerativeAI\\Problem054_withdocstring.py'

In [15]:
def docstring_generator(file_path):
    filename = file_path.split('\\')[-1].split('.')[0]
    extention = file_path.split('\\')[-1].split('.')[1]

    with open(file_path, "r") as file:
        file_content = file.read()

    # Parse the file content into an Abstract Syntax Tree
    tree = ast.parse(file_content)

    # Create an instance of the visitor
    visitor = ParentNodeVisitor()
    visitor.visit(tree)

    # Get the nodes of interest
    nodes = [node for node in ast.walk(tree) if isinstance(node, tuple([ast.FunctionDef,
                                                                        ast.ClassDef,
                                                                        ast.Import,
                                                                        ast.Assign,
                                                                        ast.Expr,
                                                                        ast.If,
                                                                        ast.Try,
                                                                        ast.With,
                                                                        ast.ImportFrom]))]

    # Create a dictionary to hold the parent nodes for each node
    parent_nodes = {node: visitor.parent_map.get(node) for node in nodes}

    nodes2 = [n for n in nodes if isinstance(parent_nodes[n], ast.Module)]

    # Collect function names and their source code
    result = ""
    for func in nodes2:
        if isinstance(func, ast.FunctionDef):
            start_line = func.lineno - 1  # Line numbers in AST are 1-based
            end_line = func.end_lineno if hasattr(func, 'end_lineno') else None
            function_code = "\n".join(file_content.splitlines()[start_line:end_line])
            doc1 = get_docstring(function_code)
            combined_code = merge_docstring_and_function(function_code, doc1)
            if func.decorator_list:
                dec_code_line = ast.unparse(func).split('\n')[0]
                if result.endswith('\n\n'):
                    result += "\n" + dec_code_line + "\n" + combined_code + "\n"
                else:
                    result += "\n\n" + dec_code_line + "\n" + combined_code + "\n"
            else:
                if result.endswith('\n\n'):
                    result += "\n" + combined_code + "\n"
                else:
                    result += "\n\n" + combined_code + "\n"

        elif isinstance(func, ast.ClassDef):
            start_line = func.lineno - 1  # Line numbers in AST are 1-based
            end_line = func.end_lineno if hasattr(func, 'end_lineno') else None
            function_code = "\n".join(file_content.splitlines()[start_line:end_line])
            doc1 = get_class_docstring(function_code)
            combined_code = merge_docstring_and_function(function_code, doc1)
            if func.decorator_list:
                dec_code_line = ast.unparse(func).split('\n')[0]
                if result.endswith('\n\n'):
                    result += "\n" + dec_code_line + "\n" + combined_code + "\n"
                else:
                    result += "\n\n" + dec_code_line + "\n" + combined_code + "\n"
            else:
                if result.endswith('\n\n'):
                    result += "\n" + combined_code + "\n"
                else:
                    result += "\n\n" + combined_code + "\n"

        elif isinstance(func, ast.Import):
            start_line = func.lineno - 1  # Line numbers in AST are 1-based
            end_line = func.end_lineno if hasattr(func, 'end_lineno') else None
            function_code = "\n".join(file_content.splitlines()[start_line:end_line])
            doc1 = import_comment(function_code)
            combined_code = doc1 + '\n' + function_code + '\n\n'
            result += combined_code

        elif isinstance(func, ast.ImportFrom):
            start_line = func.lineno - 1  # Line numbers in AST are 1-based
            end_line = func.end_lineno if hasattr(func, 'end_lineno') else None
            function_code = "\n".join(file_content.splitlines()[start_line:end_line])
            doc1 = import_comment(function_code)
            combined_code = doc1 + '\n' + function_code + '\n\n'
            result += combined_code

        elif isinstance(func, ast.Assign):
            start_line = func.lineno - 1  # Line numbers in AST are 1-based
            end_line = func.end_lineno if hasattr(func, 'end_lineno') else None
            function_code = "\n".join(file_content.splitlines()[start_line:end_line])
            doc1 = code_block_comment(function_code)
            combined_code = '\n\n' + doc1 + '\n' + function_code + '\n'
            result += combined_code

        elif isinstance(func, ast.Expr):
            start_line = func.lineno - 1  # Line numbers in AST are 1-based
            end_line = func.end_lineno if hasattr(func, 'end_lineno') else None
            function_code = "\n".join(file_content.splitlines()[start_line:end_line])
            doc1 = code_block_comment(function_code)
            combined_code = '\n' + doc1 + '\n' + function_code + '\n'
            result += combined_code

        elif isinstance(func, ast.If):
            start_line = func.lineno - 1  # Line numbers in AST are 1-based
            end_line = func.end_lineno if hasattr(func, 'end_lineno') else None
            function_code = "\n".join(file_content.splitlines()[start_line:end_line])
            if (function_code.split('\n')[0] == "if __name__ == '__main__':") or (function_code.split('\n')[0] == 'if __name__ == "__main__":'):
                doc1 = code_block_comment(function_code)
                combined_code = '\n\n' + doc1 + '\n' + function_code + '\n'
                result += combined_code
            else:
                result += '\n\n' + function_code + '\n'

        elif isinstance(func, ast.Try):
            start_line = func.lineno - 1  # Line numbers in AST are 1-based
            end_line = func.end_lineno if hasattr(func, 'end_lineno') else None
            function_code = "\n".join(file_content.splitlines()[start_line:end_line])
            doc1 = code_block_comment(function_code)
            combined_code = '\n\n' + doc1 + '\n' + function_code + '\n'
            result += combined_code

        elif isinstance(func, ast.With):
            start_line = func.lineno - 1  # Line numbers in AST are 1-based
            end_line = func.end_lineno if hasattr(func, 'end_lineno') else None
            function_code = "\n".join(file_content.splitlines()[start_line:end_line])
            doc1 = code_block_comment(function_code)
            combined_code = '\n\n' + doc1 + '\n' + function_code + '\n'
            result += combined_code

        else:
            start_line = func.lineno - 1  # Line numbers in AST are 1-based
            end_line = func.end_lineno if hasattr(func, 'end_lineno') else None
            function_code = "\n".join(file_content.splitlines()[start_line:end_line])
            if result.endswith('\n\n\n'):
                result += function_code + "\n"
            elif result.endswith('\n\n'):
                result += "\n" + function_code + "\n"
            else:
                result += "\n\n" + function_code + "\n"

    output_filename = f"{filename}_withdocstring.{extention}"
    with open(output_filename, "w") as file:
        file.write(result)

    return output_filename

In [12]:
demo = gr.Interface(fn=docstring_generator,
                    inputs=[gr.File(type='filepath')],
                    outputs=[gr.File(type='filepath')],
                    title="Python Docstring Generative AI",
                    description="This app automatically generates docstrings for functions and classes, "
                                "as well as comments for import and assignment statements. "
                                "Simply upload your Python file, and click on Submit. "
                                "You can download the updated version complete with enhanced documentation. "
                                "Please note that any existing comment outside a function or class will not show up.")

In [13]:
if __name__ == '__main__':
    demo.launch()

* Running on local URL:  http://127.0.0.1:7860
* To create a public link, set `share=True` in `launch()`.


In [19]:
demo.close()

Closing server running on port: 7860


In [16]:
class LimitedDepthVisitor(ast.NodeVisitor):
    def __init__(self, max_depth):
        self.max_depth = max_depth

    def visit(self, node, current_depth=0):
        if current_depth > self.max_depth:
            return  # Stop traversal if the depth exceeds the limit
        print(f"{'  ' * current_depth}Visiting: {type(node).__name__}")
        super().visit(node)  # Continue traversal for child nodes

In [17]:
def tree_view_depth(file_path):

    with open(file_path, "r") as file:
        file_content = file.read()

    # Parse the file content into an Abstract Syntax Tree
    tree = ast.parse(file_content)

    # Traverse the AST with a depth limit of 2
    visitor = LimitedDepthVisitor(max_depth=2)
    visitor.visit(tree)

In [18]:
def tree_view(file_path):

    # filename = file_path.split('\\')[-1].split('.')[0]
    # extention = file_path.split('\\')[-1].split('.')[1]

    with open(file_path, "r") as file:
        file_content = file.read()

    tree = ast.parse(file_content)
    print(ast.dump(tree, indent=4))

In [19]:
tree_view(r'c:/Users/JimYi/Downloads/app.py')

Module(
    body=[
        Import(
            names=[
                alias(name='random')]),
        Import(
            names=[
                alias(name='numpy', asname='np')]),
        Import(
            names=[
                alias(name='torch')]),
        ImportFrom(
            module='chatterbox.src.chatterbox.tts',
            names=[
                alias(name='ChatterboxTTS')],
            level=0),
        Import(
            names=[
                alias(name='gradio', asname='gr')]),
        Import(
            names=[
                alias(name='spaces')]),
        Assign(
            targets=[
                Name(id='DEVICE', ctx=Store())],
            value=IfExp(
                test=Call(
                    func=Attribute(
                        value=Attribute(
                            value=Name(id='torch', ctx=Load()),
                            attr='cuda',
                            ctx=Load()),
                        attr='is_available',
          

In [14]:
# The 'os' module provides a way to use operating system-dependent functionality in Python.
import os

# Imports the load_dotenv function from the dotenv module to manage environment variables.
from dotenv import load_dotenv

# The 'ast' module in Python allows for the manipulation and analysis of Python abstract syntax trees.
import ast

# Importing the ChatOpenAI class from the langchain_openai module for use in the program.
from langchain_openai import ChatOpenAI

# Importing ChatPromptTemplate from langchain_core.prompts for creating chat-based prompt templates.
from langchain_core.prompts import ChatPromptTemplate

# Importing StrOutputParser from langchain.schema.output_parser for output parsing functionality.
from langchain.schema.output_parser import StrOutputParser

# Importing the Gradio library for building user interfaces in Python.
import gradio as gr


load_dotenv()


chat_model = ChatOpenAI(model="gpt-4o-mini-2024-07-18",
                        max_completion_tokens=1024,
                        api_key=os.getenv("OPENAI_API_KEY"),
                        temperature=0.0)


def strip_double_quotes(s1):
    """
        Strips double quotes from a given string.
    
        This function takes a string as input and removes all occurrences of 
        triple double quotes () from it. It is useful for cleaning up 
        strings that may contain unnecessary formatting or markup.
    
        Parameters:
            s1 (str): The input string from which triple double quotes will 
            be removed.
    
        Returns:
            str: A new string with all triple double quotes removed.
    
        Example:
            >>> strip_double_quotes('This is a string with triple quotes')
            'This is a string with triple quotes'
    """

    return s1.replace('"""', '')


def code_block_comment(code_block):
    """
        Generates comments for a given block of Python code using a chat model.
    
        This function takes a string representation of a block of Python code as input 
        and utilizes a chat model to generate comments that explain the code. The 
        comments are formatted such that each line begins with a '#' symbol, and the 
        function does not return the original code block, only the comments.
    
        Parameters:
            code_block (str): A string containing the Python code that needs to be 
                              commented on.
    
        Returns:
            str: A string containing the comments generated for the provided code 
                 block, with each comment line prefixed by '#'.
    
        Example:
            >>> code = "def add(a, b): return a + b"
            >>> comments = code_block_comment(code)
            >>> print(comments)
            # Defines a function named 'add'
            # that takes two parameters 'a' and 'b'.
            # Returns the sum of 'a' and 'b'.
    """

    prompt = ChatPromptTemplate.from_template("""
                                              Provide comments on the following block of Python codes.
                                              Provide only comments. Do not include the function in your response.
                                              Start each line of comments with '#'. Do not include triple quotes.
                                              Python code: {raw_text}.
                                              """)

    chat_chain = prompt | chat_model | StrOutputParser()
    response = chat_chain.invoke({"raw_text": code_block})

    return response


def import_comment(import_line):
    """
        Generates a concise comment for a given Python import line.
    
        This function takes a string representing Python import statements and 
        utilizes a chat model to generate a one-line comment that summarizes 
        the purpose of the imports. The comment is prefixed with a '#' to 
        indicate that it is a comment in Python code.
    
        Args:
            import_line (str): A string containing Python import statements.
    
        Returns:
            str: A one-line comment summarizing the imports, prefixed with '#'.
    
        Example:
            >>> import_comment("import numpy as np")
            '# Importing the NumPy library for numerical operations.'
    """

    prompt = ChatPromptTemplate.from_template("""
                                              Provide a concise, one-line comment on the imports in Python.
                                              Start the line with '#'. 
                                              Python imports: {raw_text}.
                                              """)

    chat_chain = prompt | chat_model | StrOutputParser()
    response = chat_chain.invoke({"raw_text": import_line})

    return response


def get_docstring(function_text):
    """
        Generates a high-quality docstring for a given Python function.
    
        This function takes the source code of a Python function as input and 
        utilizes a chat model to create a well-structured and informative 
        docstring. The generated docstring adheres to the following guidelines:
    
        - Lines longer than 80 characters are broken into multiple lines, 
          separated by '\n'.
        - The output does not contain any tab characters ('\t').
        - Instead of tabs, four spaces ('    ') are used for indentation.
    
        Args:
            function_text (str): The source code of the Python function for which 
                                 the docstring is to be generated.
    
        Returns:
            str: A well-formatted docstring that describes the function's purpose, 
                 parameters, return values, and any exceptions raised.
    """

    prompt = ChatPromptTemplate.from_template("""
                                              Create a high quality docstring for the given python function.
                                              Break up any line more than 80 characters into multiple
                                              lines separated by '\n'. Do not have any '\t' in the output.
                                              Instead have four spaces '    '.
                                              Do not include the function in your response.
                                              Python function: {raw_text}.
                                              """)

    chat_chain = prompt | chat_model | StrOutputParser()
    response = chat_chain.invoke({"raw_text": function_text})
    response = strip_double_quotes(response)

    return response


def get_class_docstring(function_text):
    """
        Generates a concise and high-quality docstring for a given Python class.
    
        This function takes the raw text of a Python class as input and utilizes a 
        chat model to create a well-structured docstring. The output is formatted 
        to ensure that no line exceeds 80 characters, with long lines being broken 
        into multiple lines for better readability. The function also ensures that 
        the output does not contain any tab characters, replacing them with four 
        spaces instead.
    
        Parameters:
            function_text (str): The raw text of the Python class for which the 
            docstring is to be generated.
    
        Returns:
            str: A well-formatted docstring that describes the class, its 
            attributes, and methods, if applicable.
    """

    prompt = ChatPromptTemplate.from_template("""
                                              Create a concise, high quality docstring for the given python class.
                                              Break up any line more than 80 characters into multiple
                                              lines separated by '\n'. Do not have any '\t' in the output.
                                              Instead have four spaces '    '.
                                              Do not include the class in your response.
                                              Python class: {raw_text}.
                                              """)

    chat_chain = prompt | chat_model | StrOutputParser()
    response = chat_chain.invoke({"raw_text": function_text})
    response = strip_double_quotes(response)

    return response


def merge_docstring_and_function(original_function, docstring):
    """
    Merge a docstring with a given function definition.
    
    This function takes an original function definition as a string and a new
    docstring, then merges them together. The resulting string will contain the
    original function definition with the new docstring formatted correctly
    according to Python's conventions.
    
    Parameters:
        original_function (str): The original function definition as a string,
            including its signature and any existing docstring.
        docstring (str): The new docstring to be added to the function. This
            should describe the function's purpose, parameters, return values,
            and any exceptions raised.
    
    Returns:
        str: The merged function definition with the new docstring included.
            The formatting will ensure that the docstring is properly indented
            and follows the standard Python docstring conventions.
    
    Example:
        original_function = 'def example_function(param1, param2):\n    pass'
        docstring = 'This function does something.\n\nParameters:\n    param1: The first parameter.\n    param2: The second parameter.'
        merged = merge_docstring_and_function(original_function, docstring)
        print(merged)
    
    Note:
        The function assumes that the original_function string is well-formed
        and that the docstring is provided in a readable format.
    """

    split = original_function.split("\n")
    first_part, second_part = split[0], split[1:]
    if second_part[0].startswith('\t'):
        second_part[0] = '\n    ' + second_part[0][1:]
    docstring = '    '.join(docstring.splitlines(True))
    if "\n".join(second_part).startswith('\n'):
        merged_function = first_part + "\n" + '    """' + docstring + '    """\n' + "\n".join(second_part)
    else:
        merged_function = first_part + "\n" + '    """' + docstring + '    """\n\n' + "\n".join(second_part)

    return merged_function


class ParentNodeVisitor(ast.NodeVisitor):
    """
    A visitor class for traversing an Abstract Syntax Tree (AST) and mapping 
    each node to its parent node.
    
    This class extends the `ast.NodeVisitor` and maintains a mapping of nodes 
    to their respective parent nodes during the traversal. It provides a way 
    to keep track of the parent-child relationships in the AST.
    
    Attributes:
        parent_map (dict): A dictionary mapping each node to its parent node.
        current_parent (Node or None): The currently active parent node during 
        traversal.
    
    Methods:
        visit(node): Visits a node and updates the parent mapping. It sets the 
        current parent to the node being visited and resets it after visiting 
        the node's children.
    """

    def __init__(self):
        self.parent_map = {}
        self.current_parent = None

    def visit(self, node):
        # Store the current parent for the current node
        if self.current_parent:
            self.parent_map[node] = self.current_parent

        # Set the current parent to the current node
        self.current_parent = node

        # Visit the children nodes
        self.generic_visit(node)

        # Reset the current parent after visiting children
        self.current_parent = self.parent_map.get(node)


def docstring_generator(file_path):
    """
    Generates a new Python file containing the original code with added docstrings 
    for functions, classes, and relevant code blocks. The function reads the content 
    of a specified Python file, parses it into an Abstract Syntax Tree (AST), and 
    visits each node of interest to extract and merge docstrings with the original 
    code.
    
    Args:
        file_path (str): The path to the Python file from which to generate 
        docstrings. The file should be a valid Python script.
    
    Returns:
        str: The path to the newly created file that contains the original code 
        along with the generated docstrings. The new file will have the same name 
        as the original but will include "_withdocstring" before the file extension.
    
    Raises:
        FileNotFoundError: If the specified file does not exist.
        SyntaxError: If the file content cannot be parsed as valid Python code.
    
    Example:
        output_file = docstring_generator("path/to/your_script.py")
        print(f"Docstring added file created at: {output_file}")
    """

    filename = file_path.split('\\')[-1].split('.')[0]
    extention = file_path.split('\\')[-1].split('.')[1]

    with open(file_path, "r") as file:
        file_content = file.read()

    # Parse the file content into an Abstract Syntax Tree
    tree = ast.parse(file_content)

    # Create an instance of the visitor
    visitor = ParentNodeVisitor()
    visitor.visit(tree)

    # Get the nodes of interest
    nodes = [node for node in ast.walk(tree) if isinstance(node, tuple([ast.FunctionDef,
                                                                        ast.ClassDef,
                                                                        ast.Import,
                                                                        ast.Assign,
                                                                        ast.Expr,
                                                                        ast.If,
                                                                        ast.ImportFrom]))]

    # Create a dictionary to hold the parent nodes for each node
    parent_nodes = {node: visitor.parent_map.get(node) for node in nodes}

    nodes2 = [n for n in nodes if isinstance(parent_nodes[n], ast.Module)]

    # Collect function names and their source code
    result = ""
    for func in nodes2:
        if isinstance(func, ast.FunctionDef):
            start_line = func.lineno - 1  # Line numbers in AST are 1-based
            end_line = func.end_lineno if hasattr(func, 'end_lineno') else None
            function_code = "\n".join(file_content.splitlines()[start_line:end_line])
            doc1 = get_docstring(function_code)
            combined_code = merge_docstring_and_function(function_code, doc1)
            if result.endswith('\n\n'):
                result += "\n" + combined_code + "\n"
            else:
                result += "\n\n" + combined_code + "\n"

        elif isinstance(func, ast.ClassDef):
            start_line = func.lineno - 1  # Line numbers in AST are 1-based
            end_line = func.end_lineno if hasattr(func, 'end_lineno') else None
            function_code = "\n".join(file_content.splitlines()[start_line:end_line])
            doc1 = get_class_docstring(function_code)
            combined_code = merge_docstring_and_function(function_code, doc1)
            if result.endswith('\n\n'):
                result += "\n" + combined_code + "\n"
            else:
                result += "\n\n" + combined_code + "\n"

        elif isinstance(func, ast.Import):
            start_line = func.lineno - 1  # Line numbers in AST are 1-based
            end_line = func.end_lineno if hasattr(func, 'end_lineno') else None
            function_code = "\n".join(file_content.splitlines()[start_line:end_line])
            doc1 = import_comment(function_code)
            combined_code = doc1 + '\n' + function_code + '\n\n'
            result += combined_code

        elif isinstance(func, ast.ImportFrom):
            start_line = func.lineno - 1  # Line numbers in AST are 1-based
            end_line = func.end_lineno if hasattr(func, 'end_lineno') else None
            function_code = "\n".join(file_content.splitlines()[start_line:end_line])
            doc1 = import_comment(function_code)
            combined_code = doc1 + '\n' + function_code + '\n\n'
            result += combined_code

        elif isinstance(func, ast.If):
            start_line = func.lineno - 1  # Line numbers in AST are 1-based
            end_line = func.end_lineno if hasattr(func, 'end_lineno') else None
            function_code = "\n".join(file_content.splitlines()[start_line:end_line])
            if function_code.split('\n')[0] == "if __name__ == '__main__':":
                doc1 = code_block_comment(function_code)
                combined_code = '\n\n' + doc1 + '\n\n' + function_code + '\n'
                result += combined_code
            else:
                result += '\n\n' + function_code + '\n'

        else:
            start_line = func.lineno - 1  # Line numbers in AST are 1-based
            end_line = func.end_lineno if hasattr(func, 'end_lineno') else None
            function_code = "\n".join(file_content.splitlines()[start_line:end_line])
            if result.endswith('\n\n\n'):
                result += function_code + "\n"
            elif result.endswith('\n\n'):
                result += "\n" + function_code + "\n"
            else:
                result += "\n\n" + function_code + "\n"

    output_filename = f"{filename}_withdocstring.{extention}"
    with open(output_filename, "w") as file:
        file.write(result)

    return output_filename


demo = gr.Interface(fn=docstring_generator,
                    inputs=[gr.File(type='filepath')],
                    outputs=[gr.File(type='filepath')],
                    title="Python Docstring Generative AI",
                    description="This app writes docstring on Functions, Classes, and comments on Imports. "
                                "Just drop the python file and download the revised version with docstring. "
                                "Please note prior comments and assignments may be deleted in the revised version.")


# Check if the script is being run as the main program.  
# This ensures that the following code block only executes when the script is run directly,  
# and not when it is imported as a module in another script.  
# Call the launch method from the demo object to start the application or process.  

if __name__ == '__main__':
    demo.launch()


* Running on local URL:  http://127.0.0.1:7861
* To create a public link, set `share=True` in `launch()`.


In [15]:
demo.close()

Closing server running on port: 7861


In [49]:
def docstring_generator3(file_path):
    filename = file_path.split('\\')[-1].split('.')[0]
    extention = file_path.split('\\')[-1].split('.')[1]

    with open(file_path, "r") as file:
        file_content = file.read()

    # Parse the file content into an Abstract Syntax Tree
    tree = ast.parse(file_content)

    # Create an instance of the visitor
    visitor = ParentNodeVisitor()
    visitor.visit(tree)

    # Get the nodes of interest
    nodes = [node for node in ast.walk(tree) if isinstance(node, tuple([ast.FunctionDef,
                                                                        ast.ClassDef,
                                                                        ast.Import,
                                                                        ast.Assign,
                                                                        ast.Expr,
                                                                        ast.If,
                                                                        ast.ImportFrom]))]

    # Create a dictionary to hold the parent nodes for each node
    parent_nodes = {node: visitor.parent_map.get(node) for node in nodes}

    nodes2 = [n for n in nodes if isinstance(parent_nodes[n], ast.Module)]

    # Collect function names and their source code
    result = ""
    for func in nodes2:
        if isinstance(func, ast.FunctionDef):
            if func.decorator_list:
                dec_code_line = ast.unparse(func).split('\n')[0]

In [50]:
dec_code = docstring_generator3(r'c:/Users/JimYi/Downloads/app.py')

In [51]:
dec_code

'@spaces.GPU'