In [1]:
import os
import re
import sys
import json
import codecs
import chardet
import warnings

from antlr4 import CommonTokenStream, InputStream
from antlr4.error.ErrorListener import ErrorListener

from grammar.wfl.WFLLexer import WFLLexer

from grammar.wfl.WFLParser import WFLParser
from grammar.wfl.WFLVisitorImp import WFLVisitorImp

In [2]:
# These function inherits from the repository parser
def beautify_lisp_string(in_string):
    indent_size = 3
    add_indent = ' '*indent_size
    out_string = in_string[0]  # no indent for 1st (
    indent = ''
    for i in range(1, len(in_string)):
        if in_string[i] == '(' and in_string[i+1] != ' ':
            indent += add_indent
            out_string += "\n" + indent + '('
        elif in_string[i] == ')':
            out_string += ')'
            if len(indent) > 0:
                indent = indent.replace(add_indent, '', 1)
        else:
            out_string += in_string[i]
    return out_string


def preprocess_wfl(code: str) -> str:
    # Process each line to remove 8 digits at the end of lines
    filtered_lines = []
    for line in code.splitlines():
        # Skip lines that start with %***%
        if line.strip().startswith('%***%'):
            continue
        
        # Remove 8 digits at the end of the line if they exist
        # This pattern matches exactly 8 digits at the end of a line, 
        # but not if they're inside quotes or part of a larger string
        line = re.sub(r'(\s+)\d{8}\s*$', r'\1', line)
        
        # Only add non-empty lines
        if line.strip():
            filtered_lines.append(line.rstrip())
    
    # Join the remaining lines
    code = "\n".join(filtered_lines)

    code = code.replace("/FROM/", "/FROMCONSTANTVALUE/").replace('""""','""').replace('"""','"').replace("?????", "").replace('""09""', "09").replace('""00""',"00")
    
    return code


In [3]:
# Init
encoding = "shift_jis"
file_path = "tmp/(DAIKYO)DENJOB_JOB_BTORM_RCV ON AUDPK.txt"
file_name = os.path.basename(file_path).split(".")[0]

with open(file_path, "r", encoding=encoding) as f:
        code = f.read()

# Preprocess
code = preprocess_wfl(code)

with open("Preprocessed_" + file_name + ".txt", "w", encoding=encoding) as f:
    f.write(code)

# Run lexer
stream = InputStream(code)
lexer = WFLLexer(stream)

token_stream = CommonTokenStream(lexer)
token_stream.fill()

# Get comments
comments = []
# Get symbolic names list from the lexer
symbolic_names = lexer.symbolicNames

# Print token details
for token in token_stream.tokens:
    token_name = symbolic_names[token.type] if token.type < len(symbolic_names) else "UNKNOWN"
    if token_name == "COMMENT":
        cmt = {
            "text": token.text,
            "index": token.tokenIndex,
            "line": token.line,
            "column": token.column,
            "start": token.start,
            "stop": token.stop
        }
        comments.append(cmt)


# Run parser
parser = WFLParser(token_stream)
parser.buildParseTrees = True

# Build tree
tree = parser.startRule()

In [4]:
lisp_tree = beautify_lisp_string(tree.toStringTree(recog=parser))

print(lisp_tree)

with open("Tree_" + file_name +  ".txt" ,"w", encoding="utf-8") as f:
    f.write(lisp_tree)

(startRule 
   (job 
      (beginJob ? BEGIN JOB 
         (filePath 
            (filePathName 
               (filePathNameChar SETUBI)) / 
            (filePathName 
               (filePathNameChar BTORM)) / 
            (filePathName 
               (filePathNameChar RCV))) ;) 
      (attributes 
         (attribute 
            (userAttribute USER = 
               (filePath 
                  (filePathName 
                     (filePathNameChar DAIKYO)) / 
                  (filePathName 
                     (filePathNameChar A16))) ;)) 
         (attribute 
            (accessCodeAttribute ACCESSCODE = 
               (filePath 
                  (filePathName 
                     (filePathNameChar ADAIKYO)) / 
                  (filePathName 
                     (filePathNameChar ADAIKYO01))) ;)) 
         (attribute 
            (familyAttribute FAMILY 
               (reservedKeyword DISK) = 
               (storageUnit U1) OTHERWISE 
               (reservedKeyword DISK

In [5]:
# Visit tree and Collect Information
visitor = WFLVisitorImp()
visitor.visit(tree)

# Get Parsed Information
job_name = visitor.job_name
subroutines = [subroutine.dict() for subroutine in visitor.subroutines]
statements = [ statement.dict() for statement in visitor.statements]
attributes =[attr.dict() for attr in visitor.attributes]
parameters = [param.dict() for param in visitor.parameters]
declarations = [dec.dict() for dec in visitor.declarations]

parsed_program = {"encoding": encoding,
                      "job_name": job_name,
                      "parameters": parameters,
                      "attributes": attributes,
                      "declarations": declarations,
                      "subroutines": subroutines,
                      "statements": statements,
                      "comments": comments
    }

# Save json file
with open(file_name + ".json", "w", encoding=encoding) as f:
    json.dump(parsed_program, f, ensure_ascii=False, indent=4)

/var/folders/r6/7qscv8x947vbb_7d9s7m02c80000gp/T/ipykernel_4964/1624526274.py:8: PydanticDeprecatedSince20: The `dict` method is deprecated; use `model_dump` instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.10/migration/
  statements = [ statement.dict() for statement in visitor.statements]
/var/folders/r6/7qscv8x947vbb_7d9s7m02c80000gp/T/ipykernel_4964/1624526274.py:9: PydanticDeprecatedSince20: The `dict` method is deprecated; use `model_dump` instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.10/migration/
  attributes =[attr.dict() for attr in visitor.attributes]
