Skip to content

Commit

Permalink
Merge pull request #10 from Myoldmopar/FixDocsAndApostropheHandling
Browse files Browse the repository at this point in the history
More fixes
  • Loading branch information
Myoldmopar committed Jan 19, 2024
2 parents c7e4b84 + 1a15254 commit 28c617e
Show file tree
Hide file tree
Showing 11 changed files with 232 additions and 141 deletions.
1 change: 1 addition & 0 deletions .coveragerc
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,4 @@ source = energyplus_refactor_helper
[report]
omit =
*/energyplus_refactor_helper/main.py
*/energyplus_refactor_helper/__main__.py
6 changes: 6 additions & 0 deletions docs/function_call_group.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
Function Call Group
===================

.. autoclass:: energyplus_refactor_helper.function_call_group.FunctionCallGroup
:members:
:class-doc-from: init
1 change: 1 addition & 0 deletions docs/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ This is a small package to help inspect code, and prepare and apply changes to t

action
function_call
function_call_group
logger
main
source_file
Expand Down
51 changes: 39 additions & 12 deletions energyplus_refactor_helper/function_call.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,10 @@


class FunctionCall:

MAX_LINES_FOR_SINGLE_CALL = 13 # to detect/avoid parsing issues

def __init__(self, call: int, line_start: int, file_start_index: int, line_start_index: int, first_line_text: str):
def __init__(self, call: int, func_name: str, line_start: int, file_start_index: int, line_start_index: int,
first_line_text: str):
"""
This class represents a single function call in the EnergyPlus source code.
The parsing algorithms here rely on specific EnergyPlus source code style/structure assumptions, and are surely
Expand All @@ -16,14 +16,17 @@ def __init__(self, call: int, line_start: int, file_start_index: int, line_start
function call when the end is reached. The function call can then be parsed into arguments using methods here.
:param call: This represents an integer call type, which is essentially the index of the function in the
derived :meth:`RefactorBase.function_calls()` method.
derived :meth:`energyplus_refactor_helper.action.RefactorBase.function_calls()` method.
:param func_name: This is the function name for this function call as defined in the
:meth:`energyplus_refactor_helper.action.RefactorBase.function_calls()` method.
:param line_start: This is the 1-based line number where this function call starts in the file.
:param file_start_index: This is the character index in the raw file text where the function call starts.
:param line_start_index: This is the character index in the first line of the function call where the
call starts
:param first_line_text: This is the raw first line text where the function call starts.
"""
self.call_type = call
self.function_name = func_name
self.multiline_text = [first_line_text]
self.starting_line_number = line_start
self.ending_line_number = line_start # initialize here
Expand Down Expand Up @@ -68,13 +71,21 @@ def as_cleaned_multiline(self) -> list[str]:
skip_first_line_to_call_start.append(this_line_content)
return [x.strip() for x in skip_first_line_to_call_start]

def as_single_line(self) -> str:
def as_new_version(self) -> str:
"""
After a function call has been finalized, this function provides the function call formatted as a single line.
:return: A single string representation of the function call.
This function will provide the modified version of a single function call. Right now this
function simply rewrites the function call as a single line with no functional change.
The advantage of this is that we can verify our parsing by simply rewriting all function calls
in the source code, reapplying Clang Format, and making sure we didn't break anything.
In the future, this function will utilize action-specific functionality to manipulate the
function call in more meaningful ways. There will likely be a callback function on the
action/Refactor class that takes a function name and argument list and returns the modified
version.
:return: A string representation of the function call after changes have been applied.
"""
return ''.join(self.as_cleaned_multiline()).strip()
args = ', '.join(self.parse_arguments())
return f"{self.function_name}({args});"

def parse_arguments(self) -> list[str]:
"""
Expand Down Expand Up @@ -104,7 +115,7 @@ def parse_arguments(self) -> list[str]:
reading_comment_until_new_line = False
elif inside_raw_literal:
current_arg += c
if c == '"' and one_string[i-1] == ')':
if c == '"' and one_string[i - 1] == ')':
grouping_stack.pop()
elif inside_literal and c == '\\':
current_arg += c
Expand All @@ -122,6 +133,15 @@ def parse_arguments(self) -> list[str]:
about_to_enter_string_literal = False
else:
grouping_stack.append(c)
elif c == '\'':
current_arg += c
if grouping_stack[-1] == '\'':
grouping_stack.pop()
elif grouping_stack[-1] == '\"':
# then we have a raw apostrophe inside a literal, just keep going
pass # we already added to current_arg a few lines up, just go
else:
grouping_stack.append(c)
elif inside_literal:
current_arg += c
elif c == '(':
Expand All @@ -140,18 +160,25 @@ def parse_arguments(self) -> list[str]:
elif c == ',' and len(grouping_stack) == 1:
args.append(current_arg)
current_arg = ''
elif c == 'R' and one_string[i+1] == '"' and one_string[i+2] == '(':
elif c == 'R' and one_string[i + 1] == '"' and one_string[i + 2] == '(':
# it appears we are about to enter a raw literal
about_to_enter_string_literal = True
current_arg += c
elif c == '/' and one_string[i+1] == '/':
elif c == '/' and one_string[i + 1] == '/':
reading_comment_until_new_line = True
elif c == '\n':
continue # just eat the newline
else:
current_arg += c
return [a.strip() for a in args]

def summary(self) -> dict:
return {
'type': self.call_type, 'line_start': self.starting_line_number,
'line_end': self.ending_line_number, 'args': self.parse_arguments()
}

def __str__(self):
"""String representation summary of the function call"""
return f"{self.starting_line_number} - {self.ending_line_number} : {self.as_single_line()[:35]}"
single_line = ''.join(self.as_cleaned_multiline()).strip()
return f"{self.starting_line_number} - {self.ending_line_number} : {single_line[:35]}"
53 changes: 53 additions & 0 deletions energyplus_refactor_helper/function_call_group.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
from itertools import groupby
from typing import Optional

from energyplus_refactor_helper.function_call import FunctionCall


class FunctionCallGroup:
def __init__(self, initial_call: Optional[FunctionCall] = None):
"""
This class represents a contiguous chunk of function calls within a source file. This is essentially just
a list of function call instances, but some extra intelligence can apply specifically to a group of calls.
so a class is here to provide that context.
"""
self.function_calls = []
self.started = False
if initial_call:
self.add_function_call(initial_call)

def add_function_call(self, function_call_info: FunctionCall) -> None:
"""
Add a function call instance to this chunk.
"""
self.started = True
self.function_calls.append(function_call_info)

def summary_dict(self) -> dict:
"""
This function creates a dict summary of a chunk of contiguous function calls. It is expected this function will
change to returning a nice structure instead of a loosely defined dictionary.
:return: A single dictionary summary.
"""
num_calls_in_this_chunk = len(self.function_calls)
call_types = [e['type'] for e in self.function_calls]
cleaned_call_types = [i[0] for i in groupby(call_types)] # remove duplicates
chunk_start_line = self.function_calls[0]['line_start']
chunk_end_line = self.function_calls[-1]['line_end']
try:
concatenated_messages = ' *** '.join([e['args'][1] for e in self.function_calls])
except IndexError: # pragma: no cover
# this is almost certainly indicative of a parser problem, so we can't cover it
raise Exception(f"Something went wrong with the arg processing for this chunk! {self.function_calls}")
return {
'num_calls_in_this_chunk': num_calls_in_this_chunk,
'call_types': call_types,
'cleaned_call_types': cleaned_call_types,
'chunk_start_line': chunk_start_line,
'chunk_end_line': chunk_end_line,
'concatenated_messages': concatenated_messages
}

def to_json(self) -> dict:
return {'summary': self.summary_dict(), 'original': self.function_calls}
107 changes: 38 additions & 69 deletions energyplus_refactor_helper/source_file.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from itertools import groupby
from pathlib import Path
from typing import Optional

from energyplus_refactor_helper.function_call_group import FunctionCallGroup
from energyplus_refactor_helper.function_call import FunctionCall


Expand All @@ -17,11 +17,11 @@ def __init__(self, path: Path, function_call_list: list[str]):
self.original_file_text = self.path.read_text()
self.file_lines = self.original_file_text.split('\n')
self.found_functions = self.find_functions_in_original_text()
self.function_distribution = self.get_function_distribution()
self.function_distribution = self.get_binary_function_distribution()
self.advanced_function_distribution = self.get_advanced_function_distribution()

@staticmethod
def type_and_start_index_from_raw_line(functions: list[str], full_raw_line: str) -> tuple[Optional[int], int]:
def find_function_in_raw_line(functions: list[str], full_raw_line: str) -> tuple[Optional[int], int]:
"""
A simple worker function that searches a source line looking for one of the function call strings, and if found,
returns both the found value along with the index in the line where it was found.
Expand All @@ -30,9 +30,10 @@ def type_and_start_index_from_raw_line(functions: list[str], full_raw_line: str)
:param full_raw_line: A full line from the rwa source code, to be searched.
:return: A tuple, where the first item is an optional found error call type int, and the second is the index.
"""
for f_index, f in enumerate(functions):
if f in full_raw_line:
return f_index, full_raw_line.index(f)
for func_index, func in enumerate(functions):
func_call = f"{func}("
if func_call in full_raw_line:
return func_index, full_raw_line.index(func)
return None, -1

def find_functions_in_original_text(self) -> list[FunctionCall]:
Expand All @@ -47,7 +48,7 @@ def find_functions_in_original_text(self) -> list[FunctionCall]:
parsing_multiline = False
raw_line_start_char_index = 0
raw_line_end_char_index = -1
found_errors = []
found_functions = []
while line_number <= len(self.file_lines):
raw_line = self.file_lines[line_number - 1]
raw_line_end_char_index += len(raw_line) + 1 # includes the \n at the end of the line
Expand All @@ -66,28 +67,29 @@ def find_functions_in_original_text(self) -> list[FunctionCall]:
call.finalize(character_end_index, True)
reset = True
if reset:
found_errors.append(call)
found_functions.append(call)
call = None
parsing_multiline = False
else:
if any([f"{x}(" in cleaned_line for x in self.functions]):
call_type, call_index_in_line = self.type_and_start_index_from_raw_line(self.functions, raw_line)
call_type, call_index_in_line = self.find_function_in_raw_line(self.functions, raw_line)
function_name = self.functions[call_type]
character_start_index = raw_line_start_char_index + call_index_in_line
call = FunctionCall(
call_type, line_number, character_start_index, call_index_in_line, raw_line
call_type, function_name, line_number, character_start_index, call_index_in_line, raw_line
)
if cleaned_line.strip().endswith(';'):
character_end_index = raw_line_start_char_index + raw_line.rfind(';')
call.finalize(character_end_index, True)
found_errors.append(call)
found_functions.append(call)
call = None
else:
parsing_multiline = True
raw_line_start_char_index = raw_line_end_char_index + 1
line_number += 1
return found_errors
return found_functions

def get_function_distribution(self) -> list[int]:
def get_binary_function_distribution(self) -> list[int]:
"""
Returns a distribution of function calls for the given file. This simply returns a 0 or 1, where
1 indicates the line is part of a function call, and 0 means it is not.
Expand Down Expand Up @@ -115,81 +117,48 @@ def get_advanced_function_distribution(self) -> list[int]:
line_values[line_num] = max(line_values[line_num], fe.call_type)
return line_values

def file_text_fixed_up(self) -> str:
def get_new_file_text(self) -> str:
"""
Modifies the original file text, replacing every function call with the modified version.
:return: Returns the modified source code as a Python string.
"""
new_text = self.original_file_text
for fe in reversed(self.found_functions):
new_text = new_text[:fe.char_start_in_file] + fe.as_single_line() + new_text[fe.char_end_in_file + 1:]
new_text = new_text[:fe.char_start_in_file] + fe.as_new_version() + new_text[fe.char_end_in_file + 1:]
return new_text

def fixup_file_in_place(self) -> None:
# open the file, rewrite with new text
self.file_text_fixed_up()

@staticmethod
def create_function_call_chunk_summary(call_group: list[dict]) -> dict:
def write_new_text_to_file(self) -> None:
"""
This function creates a dict summary of a chunk of contiguous function calls. It is expected this function will
change to returning a nice structure instead of a loosely defined dictionary.
Overwrites existing file contents with the modified version, replacing each function call with the new version
as defined by the action instance itself.
:param call_group: A list of dictionaries where each item is a single function call dict. It is expected that
this argument will change to a list of structs eventually.
:return: A single dictionary summary.
:return: None
"""
# TODO: Change this to return a struct, not a dict
num_calls_in_this_chunk = len(call_group)
call_types = [e['type'] for e in call_group]
cleaned_call_types = [i[0] for i in groupby(call_types)] # remove duplicates
chunk_start_line = call_group[0]['line_start']
chunk_end_line = call_group[-1]['line_end']
try:
concatenated_messages = ' *** '.join([e['args'][1] for e in call_group])
except IndexError: # pragma: no cover
# this is almost certainly indicative of a parser problem, so we can't cover it
raise Exception(f"Something went wrong with the arg processing for this chunk! {call_group}")
return {
'num_calls_in_this_chunk': num_calls_in_this_chunk,
'call_types': call_types,
'cleaned_call_types': cleaned_call_types,
'chunk_start_line': chunk_start_line,
'chunk_end_line': chunk_end_line,
'concatenated_messages': concatenated_messages
}

def group_and_summarize_function_calls(self) -> list[dict]:
self.path.write_text(self.get_new_file_text())

def get_function_call_groups(self) -> list[FunctionCallGroup]:
"""
This function loops over all found function calls in this file, groups them together into contiguous chunks,
and results in a list summary of all the function calls.
This function loops over all found function calls in this file, groups them together into FunctionCallChunk
instances.
:return: A list of dicts containing full function call info for this file. It is expected that this will
eventually be converted over to return a list of structs instead of a list of dicts.
:return: A list of FunctionCallChunk instances containing full function call info for this file.
"""
# TODO: Change this to return a struct, not a dict
all_args_for_file = []
last_call_ended_on_line_number = -1
latest_chunk = []
group = FunctionCallGroup()
last_call_index = len(self.found_functions) - 1
for i, fe in enumerate(self.found_functions):
this_single_call = {
'type': fe.call_type, 'line_start': fe.starting_line_number,
'line_end': fe.ending_line_number, 'args': fe.parse_arguments()
}
if fe.starting_line_number == last_call_ended_on_line_number + 1:
latest_chunk.append(this_single_call)
for i, f in enumerate(self.found_functions):
this_single_call = f.summary()
if f.starting_line_number == last_call_ended_on_line_number + 1:
group.add_function_call(this_single_call)
if i == last_call_index:
summary = self.create_function_call_chunk_summary(latest_chunk)
all_args_for_file.append({'summary': summary, 'original': latest_chunk})
all_args_for_file.append(group)
else:
if latest_chunk:
summary = self.create_function_call_chunk_summary(latest_chunk)
all_args_for_file.append({'summary': summary, 'original': latest_chunk})
latest_chunk = [this_single_call] # reset the list starting with the current one
if group.started:
all_args_for_file.append(group)
group = FunctionCallGroup(this_single_call) # reset the list starting with the current one
if i == last_call_index: # this is the last error, add it to the list before leaving
summary = self.create_function_call_chunk_summary(latest_chunk)
all_args_for_file.append({'summary': summary, 'original': latest_chunk})
last_call_ended_on_line_number = fe.ending_line_number
all_args_for_file.append(group)
last_call_ended_on_line_number = f.ending_line_number
return all_args_for_file

0 comments on commit 28c617e

Please sign in to comment.