Merge pull request #10 from Myoldmopar/FixDocsAndApostropheHandling

More fixes
Myoldmopar · Jan 19, 2024 · 28c617e · 28c617e
2 parents c7e4b84 + 1a15254
commit 28c617e
Show file tree

Hide file tree

Showing 11 changed files with 232 additions and 141 deletions.
diff --git a/.coveragerc b/.coveragerc
@@ -4,3 +4,4 @@ source = energyplus_refactor_helper
 [report]
 omit =
     */energyplus_refactor_helper/main.py
+    */energyplus_refactor_helper/__main__.py
diff --git a/docs/function_call_group.rst b/docs/function_call_group.rst
@@ -0,0 +1,6 @@
+Function Call Group
+===================
+
+.. autoclass:: energyplus_refactor_helper.function_call_group.FunctionCallGroup
+    :members:
+    :class-doc-from: init
diff --git a/docs/index.rst b/docs/index.rst
@@ -14,6 +14,7 @@ This is a small package to help inspect code, and prepare and apply changes to t
 
    action
    function_call
+   function_call_group
    logger
    main
    source_file

diff --git a/energyplus_refactor_helper/function_call.py b/energyplus_refactor_helper/function_call.py
@@ -2,10 +2,10 @@
 
 
 class FunctionCall:
-
     MAX_LINES_FOR_SINGLE_CALL = 13  # to detect/avoid parsing issues
 
-    def __init__(self, call: int, line_start: int, file_start_index: int, line_start_index: int, first_line_text: str):
+    def __init__(self, call: int, func_name: str, line_start: int, file_start_index: int, line_start_index: int,
+                 first_line_text: str):
         """
         This class represents a single function call in the EnergyPlus source code.
         The parsing algorithms here rely on specific EnergyPlus source code style/structure assumptions, and are surely
@@ -16,14 +16,17 @@ def __init__(self, call: int, line_start: int, file_start_index: int, line_start
         function call when the end is reached.  The function call can then be parsed into arguments using methods here.
 
         :param call: This represents an integer call type, which is essentially the index of the function in the
-                     derived :meth:`RefactorBase.function_calls()` method.
+                     derived :meth:`energyplus_refactor_helper.action.RefactorBase.function_calls()` method.
+        :param func_name: This is the function name for this function call as defined in the
+                          :meth:`energyplus_refactor_helper.action.RefactorBase.function_calls()` method.
         :param line_start: This is the 1-based line number where this function call starts in the file.
         :param file_start_index: This is the character index in the raw file text where the function call starts.
         :param line_start_index: This is the character index in the first line of the function call where the
                                  call starts
         :param first_line_text: This is the raw first line text where the function call starts.
         """
         self.call_type = call
+        self.function_name = func_name
         self.multiline_text = [first_line_text]
         self.starting_line_number = line_start
         self.ending_line_number = line_start  # initialize here
@@ -68,13 +71,21 @@ def as_cleaned_multiline(self) -> list[str]:
             skip_first_line_to_call_start.append(this_line_content)
         return [x.strip() for x in skip_first_line_to_call_start]
 
-    def as_single_line(self) -> str:
+    def as_new_version(self) -> str:
         """
-        After a function call has been finalized, this function provides the function call formatted as a single line.
-
-        :return: A single string representation of the function call.
+        This function will provide the modified version of a single function call.  Right now this
+        function simply rewrites the function call as a single line with no functional change.
+        The advantage of this is that we can verify our parsing by simply rewriting all function calls
+        in the source code, reapplying Clang Format, and making sure we didn't break anything.
+        In the future, this function will utilize action-specific functionality to manipulate the
+        function call in more meaningful ways.  There will likely be a callback function on the
+        action/Refactor class that takes a function name and argument list and returns the modified
+        version.
+
+        :return: A string representation of the function call after changes have been applied.
         """
-        return ''.join(self.as_cleaned_multiline()).strip()
+        args = ', '.join(self.parse_arguments())
+        return f"{self.function_name}({args});"
 
     def parse_arguments(self) -> list[str]:
         """
@@ -104,7 +115,7 @@ def parse_arguments(self) -> list[str]:
                     reading_comment_until_new_line = False
             elif inside_raw_literal:
                 current_arg += c
-                if c == '"' and one_string[i-1] == ')':
+                if c == '"' and one_string[i - 1] == ')':
                     grouping_stack.pop()
             elif inside_literal and c == '\\':
                 current_arg += c
@@ -122,6 +133,15 @@ def parse_arguments(self) -> list[str]:
                         about_to_enter_string_literal = False
                     else:
                         grouping_stack.append(c)
+            elif c == '\'':
+                current_arg += c
+                if grouping_stack[-1] == '\'':
+                    grouping_stack.pop()
+                elif grouping_stack[-1] == '\"':
+                    # then we have a raw apostrophe inside a literal, just keep going
+                    pass  # we already added to current_arg a few lines up, just go
+                else:
+                    grouping_stack.append(c)
             elif inside_literal:
                 current_arg += c
             elif c == '(':
@@ -140,18 +160,25 @@ def parse_arguments(self) -> list[str]:
             elif c == ',' and len(grouping_stack) == 1:
                 args.append(current_arg)
                 current_arg = ''
-            elif c == 'R' and one_string[i+1] == '"' and one_string[i+2] == '(':
+            elif c == 'R' and one_string[i + 1] == '"' and one_string[i + 2] == '(':
                 # it appears we are about to enter a raw literal
                 about_to_enter_string_literal = True
                 current_arg += c
-            elif c == '/' and one_string[i+1] == '/':
+            elif c == '/' and one_string[i + 1] == '/':
                 reading_comment_until_new_line = True
             elif c == '\n':
                 continue  # just eat the newline
             else:
                 current_arg += c
         return [a.strip() for a in args]
 
+    def summary(self) -> dict:
+        return {
+            'type': self.call_type, 'line_start': self.starting_line_number,
+            'line_end': self.ending_line_number, 'args': self.parse_arguments()
+        }
+
     def __str__(self):
         """String representation summary of the function call"""
-        return f"{self.starting_line_number} - {self.ending_line_number} : {self.as_single_line()[:35]}"
+        single_line = ''.join(self.as_cleaned_multiline()).strip()
+        return f"{self.starting_line_number} - {self.ending_line_number} : {single_line[:35]}"
diff --git a/energyplus_refactor_helper/function_call_group.py b/energyplus_refactor_helper/function_call_group.py
@@ -0,0 +1,53 @@
+from itertools import groupby
+from typing import Optional
+
+from energyplus_refactor_helper.function_call import FunctionCall
+
+
+class FunctionCallGroup:
+    def __init__(self, initial_call: Optional[FunctionCall] = None):
+        """
+        This class represents a contiguous chunk of function calls within a source file.  This is essentially just
+        a list of function call instances, but some extra intelligence can apply specifically to a group of calls.
+        so a class is here to provide that context.
+        """
+        self.function_calls = []
+        self.started = False
+        if initial_call:
+            self.add_function_call(initial_call)
+
+    def add_function_call(self, function_call_info: FunctionCall) -> None:
+        """
+        Add a function call instance to this chunk.
+        """
+        self.started = True
+        self.function_calls.append(function_call_info)
+
+    def summary_dict(self) -> dict:
+        """
+        This function creates a dict summary of a chunk of contiguous function calls.  It is expected this function will
+        change to returning a nice structure instead of a loosely defined dictionary.
+
+        :return: A single dictionary summary.
+        """
+        num_calls_in_this_chunk = len(self.function_calls)
+        call_types = [e['type'] for e in self.function_calls]
+        cleaned_call_types = [i[0] for i in groupby(call_types)]  # remove duplicates
+        chunk_start_line = self.function_calls[0]['line_start']
+        chunk_end_line = self.function_calls[-1]['line_end']
+        try:
+            concatenated_messages = ' *** '.join([e['args'][1] for e in self.function_calls])
+        except IndexError:  # pragma: no cover
+            # this is almost certainly indicative of a parser problem, so we can't cover it
+            raise Exception(f"Something went wrong with the arg processing for this chunk! {self.function_calls}")
+        return {
+            'num_calls_in_this_chunk': num_calls_in_this_chunk,
+            'call_types': call_types,
+            'cleaned_call_types': cleaned_call_types,
+            'chunk_start_line': chunk_start_line,
+            'chunk_end_line': chunk_end_line,
+            'concatenated_messages': concatenated_messages
+        }
+
+    def to_json(self) -> dict:
+        return {'summary': self.summary_dict(), 'original': self.function_calls}
diff --git a/energyplus_refactor_helper/source_file.py b/energyplus_refactor_helper/source_file.py
@@ -1,7 +1,7 @@
-from itertools import groupby
 from pathlib import Path
 from typing import Optional
 
+from energyplus_refactor_helper.function_call_group import FunctionCallGroup
 from energyplus_refactor_helper.function_call import FunctionCall
 
 
@@ -17,11 +17,11 @@ def __init__(self, path: Path, function_call_list: list[str]):
         self.original_file_text = self.path.read_text()
         self.file_lines = self.original_file_text.split('\n')
         self.found_functions = self.find_functions_in_original_text()
-        self.function_distribution = self.get_function_distribution()
+        self.function_distribution = self.get_binary_function_distribution()
         self.advanced_function_distribution = self.get_advanced_function_distribution()
 
     @staticmethod
-    def type_and_start_index_from_raw_line(functions: list[str], full_raw_line: str) -> tuple[Optional[int], int]:
+    def find_function_in_raw_line(functions: list[str], full_raw_line: str) -> tuple[Optional[int], int]:
         """
         A simple worker function that searches a source line looking for one of the function call strings, and if found,
         returns both the found value along with the index in the line where it was found.
@@ -30,9 +30,10 @@ def type_and_start_index_from_raw_line(functions: list[str], full_raw_line: str)
         :param full_raw_line: A full line from the rwa source code, to be searched.
         :return: A tuple, where the first item is an optional found error call type int, and the second is the index.
         """
-        for f_index, f in enumerate(functions):
-            if f in full_raw_line:
-                return f_index, full_raw_line.index(f)
+        for func_index, func in enumerate(functions):
+            func_call = f"{func}("
+            if func_call in full_raw_line:
+                return func_index, full_raw_line.index(func)
         return None, -1
 
     def find_functions_in_original_text(self) -> list[FunctionCall]:
@@ -47,7 +48,7 @@ def find_functions_in_original_text(self) -> list[FunctionCall]:
         parsing_multiline = False
         raw_line_start_char_index = 0
         raw_line_end_char_index = -1
-        found_errors = []
+        found_functions = []
         while line_number <= len(self.file_lines):
             raw_line = self.file_lines[line_number - 1]
             raw_line_end_char_index += len(raw_line) + 1  # includes the \n at the end of the line
@@ -66,28 +67,29 @@ def find_functions_in_original_text(self) -> list[FunctionCall]:
                     call.finalize(character_end_index, True)
                     reset = True
                 if reset:
-                    found_errors.append(call)
+                    found_functions.append(call)
                     call = None
                     parsing_multiline = False
             else:
                 if any([f"{x}(" in cleaned_line for x in self.functions]):
-                    call_type, call_index_in_line = self.type_and_start_index_from_raw_line(self.functions, raw_line)
+                    call_type, call_index_in_line = self.find_function_in_raw_line(self.functions, raw_line)
+                    function_name = self.functions[call_type]
                     character_start_index = raw_line_start_char_index + call_index_in_line
                     call = FunctionCall(
-                        call_type, line_number, character_start_index, call_index_in_line, raw_line
+                        call_type, function_name, line_number, character_start_index, call_index_in_line, raw_line
                     )
                     if cleaned_line.strip().endswith(';'):
                         character_end_index = raw_line_start_char_index + raw_line.rfind(';')
                         call.finalize(character_end_index, True)
-                        found_errors.append(call)
+                        found_functions.append(call)
                         call = None
                     else:
                         parsing_multiline = True
             raw_line_start_char_index = raw_line_end_char_index + 1
             line_number += 1
-        return found_errors
+        return found_functions
 
-    def get_function_distribution(self) -> list[int]:
+    def get_binary_function_distribution(self) -> list[int]:
         """
         Returns a distribution of function calls for the given file.  This simply returns a 0 or 1, where
         1 indicates the line is part of a function call, and 0 means it is not.
@@ -115,81 +117,48 @@ def get_advanced_function_distribution(self) -> list[int]:
                 line_values[line_num] = max(line_values[line_num], fe.call_type)
         return line_values
 
-    def file_text_fixed_up(self) -> str:
+    def get_new_file_text(self) -> str:
         """
         Modifies the original file text, replacing every function call with the modified version.
 
         :return: Returns the modified source code as a Python string.
         """
         new_text = self.original_file_text
         for fe in reversed(self.found_functions):
-            new_text = new_text[:fe.char_start_in_file] + fe.as_single_line() + new_text[fe.char_end_in_file + 1:]
+            new_text = new_text[:fe.char_start_in_file] + fe.as_new_version() + new_text[fe.char_end_in_file + 1:]
         return new_text
 
-    def fixup_file_in_place(self) -> None:
-        # open the file, rewrite with new text
-        self.file_text_fixed_up()
-
-    @staticmethod
-    def create_function_call_chunk_summary(call_group: list[dict]) -> dict:
+    def write_new_text_to_file(self) -> None:
         """
-        This function creates a dict summary of a chunk of contiguous function calls.  It is expected this function will
-        change to returning a nice structure instead of a loosely defined dictionary.
+        Overwrites existing file contents with the modified version, replacing each function call with the new version
+        as defined by the action instance itself.
 
-        :param call_group: A list of dictionaries where each item is a single function call dict.  It is expected that
-                           this argument will change to a list of structs eventually.
-        :return: A single dictionary summary.
+        :return: None
         """
-        # TODO: Change this to return a struct, not a dict
-        num_calls_in_this_chunk = len(call_group)
-        call_types = [e['type'] for e in call_group]
-        cleaned_call_types = [i[0] for i in groupby(call_types)]  # remove duplicates
-        chunk_start_line = call_group[0]['line_start']
-        chunk_end_line = call_group[-1]['line_end']
-        try:
-            concatenated_messages = ' *** '.join([e['args'][1] for e in call_group])
-        except IndexError:  # pragma: no cover
-            # this is almost certainly indicative of a parser problem, so we can't cover it
-            raise Exception(f"Something went wrong with the arg processing for this chunk! {call_group}")
-        return {
-            'num_calls_in_this_chunk': num_calls_in_this_chunk,
-            'call_types': call_types,
-            'cleaned_call_types': cleaned_call_types,
-            'chunk_start_line': chunk_start_line,
-            'chunk_end_line': chunk_end_line,
-            'concatenated_messages': concatenated_messages
-        }
-
-    def group_and_summarize_function_calls(self) -> list[dict]:
+        self.path.write_text(self.get_new_file_text())
+
+    def get_function_call_groups(self) -> list[FunctionCallGroup]:
         """
-        This function loops over all found function calls in this file, groups them together into contiguous chunks,
-        and results in a list summary of all the function calls.
+        This function loops over all found function calls in this file, groups them together into FunctionCallChunk
+        instances.
 
-        :return: A list of dicts containing full function call info for this file.  It is expected that this will
-                 eventually be converted over to return a list of structs instead of a list of dicts.
+        :return: A list of FunctionCallChunk instances containing full function call info for this file.
         """
-        # TODO: Change this to return a struct, not a dict
         all_args_for_file = []
         last_call_ended_on_line_number = -1
-        latest_chunk = []
+        group = FunctionCallGroup()
         last_call_index = len(self.found_functions) - 1
-        for i, fe in enumerate(self.found_functions):
-            this_single_call = {
-                'type': fe.call_type, 'line_start': fe.starting_line_number,
-                'line_end': fe.ending_line_number, 'args': fe.parse_arguments()
-            }
-            if fe.starting_line_number == last_call_ended_on_line_number + 1:
-                latest_chunk.append(this_single_call)
+        for i, f in enumerate(self.found_functions):
+            this_single_call = f.summary()
+            if f.starting_line_number == last_call_ended_on_line_number + 1:
+                group.add_function_call(this_single_call)
                 if i == last_call_index:
-                    summary = self.create_function_call_chunk_summary(latest_chunk)
-                    all_args_for_file.append({'summary': summary, 'original': latest_chunk})
+                    all_args_for_file.append(group)
             else:
-                if latest_chunk:
-                    summary = self.create_function_call_chunk_summary(latest_chunk)
-                    all_args_for_file.append({'summary': summary, 'original': latest_chunk})
-                latest_chunk = [this_single_call]  # reset the list starting with the current one
+                if group.started:
+                    all_args_for_file.append(group)
+                group = FunctionCallGroup(this_single_call)  # reset the list starting with the current one
                 if i == last_call_index:  # this is the last error, add it to the list before leaving
-                    summary = self.create_function_call_chunk_summary(latest_chunk)
-                    all_args_for_file.append({'summary': summary, 'original': latest_chunk})
-            last_call_ended_on_line_number = fe.ending_line_number
+                    all_args_for_file.append(group)
+            last_call_ended_on_line_number = f.ending_line_number
         return all_args_for_file