In [6]:
from pathlib import Path
import re

# Sanitize code

In [48]:
def remove_block_comment(code: str) -> str:
    regex = r"/\*[\s\S]*?\*/"
    return re.sub(regex, '', code, flags=re.MULTILINE)

def remove_single_line_comment(code: str) -> str:
    # regex = r"^.*(--.*)$"
    regex = r"(.*?)(--.*$)"
    # return re.sub(regex, '', code, flags=re.MULTILINE)
    return re.sub(regex, r'\1', code, flags=re.MULTILINE)

In [52]:
# s = """
# test
# /* this
# is the 
# /* beginning 
# of 
# /* a comment */
# foo
# """

# print(remove_block_comment(s))


# s = """
# test
# -- This is wrong
# line of code -- foo

# OK
# """

# print(remove_single_line_comment(s))

In [49]:
for file in Path("../codebase").glob("*.pkb"):
    code = file.read_text().lower()
    break

In [50]:
code = remove_block_comment(code)
code = remove_single_line_comment(code)

In [51]:
with open("test.pks", "w") as f:
    f.write(code)

# Parse

In [33]:
def extract_function(pl_sql_code: str) -> list:

    # Updated regular expressions for matching functions and procedures
    function_pattern = re.compile(r'(?i)(?:CREATE\s+(?:OR\s+REPLACE\s+)?)?FUNCTION\s+(\w+)', re.MULTILINE)
    procedure_pattern = re.compile(r'(?i)(?:CREATE\s+(?:OR\s+REPLACE\s+)?)?PROCEDURE\s+(\w+)', re.MULTILINE)
    
    # Find all functions and procedures
    functions = function_pattern.finditer(pl_sql_code)
    procedures = procedure_pattern.finditer(pl_sql_code)
    
    results = []

    for match in list(functions) + list(procedures):
        name = match.group(1)
        start = match.start()
        
        # Find the corresponding END
        end_match = re.search(r'(?i)\bEND\s+' + re.escape(name) + r'\s*;', pl_sql_code[start:])
        if end_match:
            end = start + end_match.end()
            body = pl_sql_code[start:end]
            
            # Check if it has BEGIN/END
            has_begin_end = 'BEGIN' in body.upper() and 'END' in body.upper()
            
            # Determine if it's a function or procedure
            type_ = 'FUNCTION' if 'FUNCTION' in body.upper() else 'PROCEDURE'
            
            results.append({
                'name': name,
                'type': type_,
                'has_begin_end': has_begin_end,
                'body': body.strip()
            })
    return results

In [36]:
with open("test.pks", "r") as f:
    code = f.read()

In [37]:
analysis_results = extract_function(code)

for item in analysis_results:
    print(f"Name: {item['name']}")
    print(f"Type: {item['type']}")
    print(f"Has BEGIN/END: {item['has_begin_end']}")
    print("Body:")
    # print(item['body'])
    print("-" * 50)

Name: get_auth_string
Type: FUNCTION
Has BEGIN/END: True
Body:
--------------------------------------------------
Name: get_signature
Type: FUNCTION
Has BEGIN/END: True
Body:
--------------------------------------------------
Name: get_aws_id
Type: FUNCTION
Has BEGIN/END: True
Body:
--------------------------------------------------
Name: get_date_string
Type: FUNCTION
Has BEGIN/END: True
Body:
--------------------------------------------------
Name: get_epoch
Type: FUNCTION
Has BEGIN/END: True
Body:
--------------------------------------------------
Name: set_aws_id
Type: PROCEDURE
Has BEGIN/END: True
Body:
--------------------------------------------------
Name: set_aws_key
Type: PROCEDURE
Has BEGIN/END: True
Body:
--------------------------------------------------
Name: set_gmt_offset
Type: PROCEDURE
Has BEGIN/END: True
Body:
--------------------------------------------------
Name: init
Type: PROCEDURE
Has BEGIN/END: True
Body:
--------------------------------------------------
