Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

finish function documentation with addtl. TODO, and QUESTION tags #4

Open
wants to merge 6 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions gct/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,7 @@ def render(
1. graphviz_object: graphviz.Digraph = Graphviz object to render.
2. file_path: str = file path to save the output to. If None, the svg output (str) will be returned.
3. output_format: str = Output format. Defaults to svg. Other formats include "png", "pdf".
@Returns: svg text of the graphviz object if file_path is not provided, otherwise an empty string
"""
updated_file_path = (
f"{TEMP_FOLDER}/{GRAPH_FOLDER_DEFAULT_NAME}"
Expand Down
12 changes: 6 additions & 6 deletions gct/constants.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
ROOT_NODE = -1
ROOT_NODE_LINENO = -1
NODE_NAMES_TO_IGNORE = {"super"}
SELF_NODE_NAME = "self"
TEMP_FOLDER = "temp"
GRAPH_FOLDER_DEFAULT_NAME = "gct_graph"
ROOT_NODE = -1 # key value for root node
ROOT_NODE_LINENO = -1 # line number of root node
NODE_NAMES_TO_IGNORE = {"super"} # set of node names to ignore when searching for definitions
SELF_NODE_NAME = "self" # value for a node that is a self reference
TEMP_FOLDER = "temp" # directory to temporarily store code
GRAPH_FOLDER_DEFAULT_NAME = "gct_graph" # default file name to store name of graph
42 changes: 23 additions & 19 deletions gct/network.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,11 +42,10 @@ def get_root_node(self) -> Node:

def get_parent_node(self, node: Node) -> Node:
"""
> Given a node, return the parent node

:param node: The node to get the parent of
:type node: Node
:return: The parent node of the node passed in.
Given a node, return the parent node
@Parameters:
1. node: Node = node to get the parent of.
@Returns: The parent node of the node passed in else None.
"""
try:
return list(self.G.predecessors(node))[0]
Expand All @@ -57,11 +56,16 @@ def get_children_nodes(self, node: Node) -> "list[Node]":
return list(self.G.successors(node))

def group_nodes_by_level(self):
"""
Groups all nodes by level.
@Returns: Dictionary where the key is the parent node and value is all the children nodes.
"""
if self._level_clustering:
return

for node in self.get_all_nodes():
parent_node = self.get_parent_node(node)
parent_node = self.get_parent_node(node)
# Nodes with no children (i.e. function not called) are not included in the graph.
if parent_node is None:
continue
self._level_clustering[parent_node] = self.get_children_nodes(parent_node)
Expand All @@ -85,23 +89,23 @@ def test():
```
Running `print_graph_by_levels()` will print:
```
--- A(1, 8, class) ---
b(2, 5, function)
c(3, 5, function)
d(4, 5, function)
B(6, 8, class)
--- B(6, 8, class) ---
test(7, 8, function)
--- A #1 ---
b #2
c #3
d #4
B #6
--- B #6 ---
test #7
```
Note how the nodes are printed by breath-first search order & not depth-first.
Ideally, the output should look something like this:
```
--- A(1, 8, class) ---
b(2, 5, function)
c(3, 5, function)
d(4, 5, function)
--- B(6, 8, class) ---
test(7, 8, function)
--- A #1 ---
b #2
c #3
d #4
--- B #6 ---
test #7
```
But you can leverage information stored in `self._level_clustering` to do this.
"""
Expand Down
15 changes: 8 additions & 7 deletions gct/package_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,12 +27,13 @@ def _is_dot_installed():
return

system = platform.system()
if system == "Windows":
message = "Graphviz package not install. Try running 'choco install -y graphviz'. \n If the error persists, install graphviz from here: https://graphviz.org/download"
elif system == "Darwin": # macOS
message = "Graphviz package not install. Try running 'brew install graphviz'. \n If the error persists, install graphviz from here: https://graphviz.org/download"
else: # assume Linux or other Unix-like system
message = "Graphviz package not install. Try running 'apt-get install graphviz'. \n If the error persists, install graphviz from here: https://graphviz.org/download"
installation_instructions = {
"Windows": "choco install -y graphviz",
"Darwin": "brew install graphviz",
"Linux": "apt-get install graphviz",
}
if system in installation_instructions:
message = f"Graphviz package not install. Try running '{installation_instructions[system]}'. \n If the error persists, install graphviz from here: https://graphviz.org/download"

message += f"\nFor any other errors, please post an issue (response time <= 10 minutes): {GCT_ISSUE_LINK}"
raise Exception(message)
Expand All @@ -44,4 +45,4 @@ def installer():
for package in PACKAGES:
_install_pip_package(package)

_is_dot_installed()
_is_dot_installed()
8 changes: 7 additions & 1 deletion gct/parse.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,13 @@


def extract(tree: ast, raw_code: "list[str]"):
"""2 pass algorithm"""
"""
2 pass algorithm to generate a graph of nodes and edges connecting the parent with child and potential child nodes.
@Parameters:
1. tree: ast = AST to be traversed.
2. raw_code: list[str] = array of lines of relevant code.
@Returns: None.
"""

node_line_map: "dict[int, Node]" = {
constants.ROOT_NODE: Node(constants.ROOT_NODE_LINENO, len(raw_code), "root")
Expand Down
9 changes: 8 additions & 1 deletion gct/type_check.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ class Metadata(


def helper_search_definition(tree, variable_name):
# TODO: handle case where variable is redefined
"""
Recursive function to search for the definition of a variable in an AST.
Note: This only searches the latest definition of a variable since Python is dynamically typed.
Expand All @@ -42,6 +43,10 @@ def func(self):
```
In this case, var is defined in the `Assign` node with two targets. This is stored as an ast.Tuple node.
So we must traverse through all the elements in the tuple to find the definition corresponding to `var`.
@Parameters:
1. tree: ast = AST to search through.
2. variable_name: str = name of variable to search for.
@Returns: the node of the definition if the correct Assign node is found, otherwise None.
"""

# Iterate through the nodes in the AST
Expand Down Expand Up @@ -73,7 +78,7 @@ def search_for_definition(tree: ast, name: str) -> list:
@Parameters:
1. tree: ast = AST of the file.
2. name: str = name of variable to search for.
@Returns: list of potential targets for the variable. If empty, no target has been found.
@Returns: list of potential targets node ids for the variable. If empty, no target has been found.
"""
if name in NODE_NAMES_TO_IGNORE:
return []
Expand Down Expand Up @@ -107,6 +112,7 @@ def search_for_definition(tree: ast, name: str) -> list:


def get_prefix_and_suffix(name: str) -> "tuple[str, str]":
# TODO: handle case where prefix is a module name and subclasses
"""
Given a function name, return the prefix and suffix.
E.g.:
Expand Down Expand Up @@ -157,6 +163,7 @@ def infer_complex_mappings(prefix: str, suffix: str, metadata: Metadata):
node_line_map: "dict[int, Node]" = metadata.node_line_map
potential_target_nodes: "list[Node]" = []


# Case I: prefix is a class/method name
prefix_target_nodes = find_nodes_by_name(prefix, node_line_map.values())
# get children nodes for each prefix node
Expand Down
16 changes: 15 additions & 1 deletion gct/url.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,15 @@


def fetch_valid_url(url: str):
"""Make sure valid URL. Parses github URLs to raw githubusercontent URLs."""
"""
Verify if the provided url is valid. Converts github URLs to raw githubusercontent URLs.
@Parameters:
1. url: str = url to be validated.
@Returns: An dictionary with the following keys:
1. valid: bool = True if url is valid, False otherwise.
2. url: str = url if url is valid (and converted to githubusercontent
url if provided github url), None otherwise.
"""
status = {"valid": False, "url": None}
if url is None or len(url.strip()) == 0 or not url.startswith("http"):
return status
Expand Down Expand Up @@ -34,6 +42,12 @@ def fetch_valid_url(url: str):


def try_open_url(url: str):
"""
Try to open the specified url.
@Parameters:
1. url: str = url to be opened.
@Returns: True is url is successfully opened, False otherwise.
"""
try:
urllib.request.urlopen(url)
return True
Expand Down
88 changes: 73 additions & 15 deletions gct/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,13 @@ def generate_random_color():


def save_code_to_file(code: str, resource: str):
"""
Save code to a file path.
@Parameters:
1. code: str = code to be written to the file.
2. resource: str = file path to write the code.
@Returns: None.
"""
if "/" in resource:
resource = resource.split("/")[-1]
if not resource.endswith(".py"):
Expand All @@ -24,6 +31,14 @@ def save_code_to_file(code: str, resource: str):


def flush(path: str):
"""
If the directory doesn't exist, create it. Otherwise remove
all files and directories in the existing directory. If an
error occurs, print the file path and error message.
@Parameters:
1. path: str = file path for the directory.
@Returns: None.
"""
# if folder doesn't exist, create it and exit. If it does exist, remove all files and create a new temp folder.
if not os.path.exists(path):
os.mkdir(path)
Expand All @@ -41,12 +56,16 @@ def flush(path: str):

def parse_file(resource: str):
"""
A resource can either be:
1. URL - in which case we fetch the code and parse it.
2. Path to a file - in which case we read the file and parse it.
3. Raw code - in which case we parse it directly.
Generates an AST from a file or URL.
@Parameters:
1. resource: str = url, file path, or code string to be parsed.
- URL: in which case we fetch the code and parse it.
- Path to a file: in which case we read the file and parse it.
- Raw code: in which case we parse it directly.
@Returns: the AST and lines of the code.
"""
if resource.startswith("http"):

if resource.startswith("https"):
response = requests.get(resource)
tree = ast.parse(response.text, filename=resource)
return tree, response.text.splitlines()
Expand All @@ -61,21 +80,31 @@ def parse_file(resource: str):


def get_indent_number(line: str):
"""
Gets the number of spaces per indent. Assumes that tabs are converted to spaces.
@Parameters:
1. line: str = line whose indent space count you want to get.
@Returns: count of the number of spaces per indent.
"""
return len(line) - len(line.lstrip())


def get_end_of_function(lines: "list[str]", lineno: int):
def get_end_of_function(lines: "list[str]", start_lineno: int):
"""
Fetches the end of a function definition by comparing indentation number of the
first line with the indentation of potential end function.
@Parameters:
1. filename: str = file containing function.
2. lineno: int = line number (0-based indexing) where function of interest starts from.
1. lines: list[str] = array of lines of code.
2. start_lineno: int = line number (0-based indexing) where function of interest starts from.
@Returns: line number where the function ends.
"""
start_indent = get_indent_number(lines[lineno])
for i in range(lineno + 1, len(lines)):
start_indent = get_indent_number(lines[start_lineno])
for i in range(start_lineno + 1, len(lines)):
line = lines[i]
"""
End of function occurs when indentation is decreased (i.e. code scope changed),
line is not a newline, and line is not a part of a declaration (i.e. multiline tuple).
"""
if (
get_indent_number(line) <= start_indent
and line.strip()
Expand All @@ -86,13 +115,27 @@ def get_end_of_function(lines: "list[str]", lineno: int):


def fetch_full_function(lines: "list[str]", start_lineno: int) -> "list[str]":
"""
Gets array of all the lines of a specific function
@Parameters:
1. lines: list[str] = array of lines of code.
2. start_lineno: int = line number (0-based indexing) where function of interest starts from.
@Returns: line number where the function ends.
"""
end_lineno = get_end_of_function(lines, start_lineno)
return lines[start_lineno : end_lineno + 1]


def is_call_node_in_function_of_interest(
lines: "list[str]", call_node_name: str
) -> bool:
"""
Check if node is called in a specific line of code.
@Parameters:
1. lines: list[str] = array of lines of code.
2. call_node_name: str = name of node to check for.
@Returns: True if node is found or call_node_name is None (i.e. node connection logic), else False.
"""
if not call_node_name:
# used in node connection logic. In this case, since we only consider
# function calls, we don't need to check if call_node_name is defined
Expand All @@ -110,14 +153,20 @@ def is_call_node_in_function_of_interest(


def is_line_function_or_class(line: str):
"""
Check if given line is a function or a class.
@Parameters:
1. line: str = line to be checked.
@Returns: True if the line is the start of a function or class definition.
"""
if line.lstrip().split(" ")[0] == "def" or line.lstrip().split(" ")[0] == "class":
return True
return False


def get_immediate_parent(lines: "list[str]", lineno: int, call_node_name: str = None):
"""
Given a function, fx, find the most immediate parent node.
Given a function, fx, find the index of the most immediate parent node containing the specified node.
In this case, most immediate parent node is the first instance where
the indentation number is lesser than fx.
E.g.:
Expand Down Expand Up @@ -148,7 +197,7 @@ def Y(): #2
1. lines: list[str] = relevant lines of code.
2. lineno:int = line number (0-based indexing) where function of interests starts from.
3. call_node_name: str = name of function of interest.
@Returns: line number of immediate parent node.
@Returns: line number of immediate parent node with specified node else -1. If no parent found, raise ValueError.
"""
assert lineno < len(lines), "lineno out of range"

Expand Down Expand Up @@ -185,8 +234,8 @@ def find_function_of_interest(name: str, metadata: type_check.Metadata) -> "list
Ignoring class definition calls for now.

@Parameters:
1. node_line_map: dict[int, Node] = maps line number to Node object.
2. name: str = name of function of interest.
1. name: str = name of function of interest.
2. node_line_map: dict[int, Node] = maps line number to Node object.
@Returns: list of potential target `Node` for function of interest.
"""
prefix, suffix = type_check.get_prefix_and_suffix(name)
Expand Down Expand Up @@ -221,7 +270,16 @@ def add_subgraphs(
root: Node,
visited: set = set(),
):
"""Recursively traverse (depth-first) the graph, `g`, and add corresponding subgraph to `root`."""
"""
Recursively traverse (depth-first) the graph, `g`, and add corresponding subgraph to `root`.""

@Parameters:
1. node_representation: Graph = graph of all nodes.
2. graphviz_graph: graphviz.Digraph = graph to be modified and written (i.e. adding subgraph and nodes).
3. root: Node = Root node to start.
4. visited: Set = set to store all visited nodes.
@Returns: None.
"""

for node in node_representation.G.successors(root):
node: Node = node
Expand Down