In [6]:
from tree_sitter import Language, Parser
Language.build_library(
    'build/my-languages.so', 
    ['tree-sitter-java']      
)

JAVA_LANGUAGE = Language('build/my-languages.so', 'java')
parser = Parser()
parser.set_language(JAVA_LANGUAGE)

with open('../Data/Solution.java', 'r') as file:
    source_code = file.read()

tree = parser.parse(bytes(source_code, 'utf8'))

def traverse(node, stats=None):
    if stats is None:
        stats = {}
    node_type = node.type
    stats[node_type] = stats.get(node_type, 0) + 1
    for child in node.children:
        traverse(child, stats)
    return stats

root_node = tree.root_node
stats = traverse(root_node)
print("Syntax Node Statistics:", stats)


Syntax Node Statistics: {'program': 1, 'package_declaration': 1, 'package': 1, 'scoped_identifier': 5, 'identifier': 34, '.': 9, ';': 10, 'line_comment': 3, 'import_declaration': 2, 'import': 2, 'class_declaration': 1, 'modifiers': 2, 'public': 2, 'class': 1, 'class_body': 1, '{': 6, 'method_declaration': 1, 'array_type': 2, 'integral_type': 6, 'int': 6, 'dimensions': 4, '[': 6, ']': 6, 'formal_parameters': 1, '(': 7, 'formal_parameter': 2, ',': 5, ')': 7, 'block': 3, 'local_variable_declaration': 3, 'generic_type': 2, 'type_identifier': 5, 'type_arguments': 2, '<': 3, '>': 2, 'variable_declarator': 3, '=': 3, 'object_creation_expression': 1, 'new': 3, 'argument_list': 4, 'for_statement': 1, 'for': 1, 'decimal_integer_literal': 3, 'binary_expression': 2, 'field_access': 1, 'update_expression': 1, '++': 1, '-': 3, 'array_access': 2, 'if_statement': 1, 'if': 1, 'parenthesized_expression': 1, 'method_invocation': 3, 'return_statement': 2, 'return': 2, 'array_creation_expression': 2, 'arra



In [7]:
import re

def parse_ebnf(file_path):
    rules = {}
    with open(file_path, 'r') as file:
        for line in file:
            match = re.match(r'^(\w+)\s*::=\s*(.+)$', line)
            if match:
                rule_name, definition = match.groups()
                rules[rule_name] = definition.strip()
    return rules
ebnf_rules = parse_ebnf('java/java.ebnf')
print("Extracted EBNF Rules:", ebnf_rules)


Extracted EBNF Rules: {'extras': '{ line_comment block_comment /\\s/ }', 'supertypes': '{ expression declaration statement primary_expression _literal _type _simple_type _unannotated_type module_directive }', 'inline': '{ _name _simple_type _class_body_declaration _variable_initializer }', 'conflicts': '{ { modifiers annotated_type receiver_parameter } { modifiers annotated_type module_declaration package_declaration } { _unannotated_type primary_expression inferred_parameters } { _unannotated_type primary_expression } { _unannotated_type primary_expression scoped_type_identifier } { _unannotated_type scoped_type_identifier } { _unannotated_type generic_type } { generic_type primary_expression } { expression statement } { lambda_expression primary_expression } { inferred_parameters primary_expression } { argument_list record_pattern_body } { yield_statement _reserved_identifier } }', 'word': 'identifier'}


In [8]:
def match_rules_with_ast(stats, ebnf_rules):
    rule_stats = {}
    for rule_name in ebnf_rules:
        if rule_name in stats:
            rule_stats[rule_name] = stats[rule_name]
    return rule_stats

rule_stats = match_rules_with_ast(stats, ebnf_rules)
print("Rule-Based Statistics:", rule_stats)


Rule-Based Statistics: {}


In [13]:
import os
import re
from tree_sitter import Language, Parser

if not os.path.exists('build'):
    os.mkdir('build')

Language.build_library(
    'build/my-languages.so',
    [
        'tree-sitter-java' 
    ]
)

JAVA_LANGUAGE = Language('build/my-languages.so', 'java')

parser = Parser()
parser.set_language(JAVA_LANGUAGE)

with open('../Data/Solution.java', 'r', encoding='utf-8') as file:
    source_code = file.read()

tree = parser.parse(bytes(source_code, 'utf8'))

def traverse(node, stats=None):
    if stats is None:
        stats = {}
    node_type = node.type
    stats[node_type] = stats.get(node_type, 0) + 1
    for child in node.children:
        traverse(child, stats)
    return stats

root_node = tree.root_node
stats = traverse(root_node)
print("Syntax Node Statistics:", stats)


Syntax Node Statistics: {'program': 1, 'package_declaration': 1, 'package': 1, 'scoped_identifier': 5, 'identifier': 34, '.': 9, ';': 10, 'line_comment': 3, 'import_declaration': 2, 'import': 2, 'class_declaration': 1, 'modifiers': 2, 'public': 2, 'class': 1, 'class_body': 1, '{': 6, 'method_declaration': 1, 'array_type': 2, 'integral_type': 6, 'int': 6, 'dimensions': 4, '[': 6, ']': 6, 'formal_parameters': 1, '(': 7, 'formal_parameter': 2, ',': 5, ')': 7, 'block': 3, 'local_variable_declaration': 3, 'generic_type': 2, 'type_identifier': 5, 'type_arguments': 2, '<': 3, '>': 2, 'variable_declarator': 3, '=': 3, 'object_creation_expression': 1, 'new': 3, 'argument_list': 4, 'for_statement': 1, 'for': 1, 'decimal_integer_literal': 3, 'binary_expression': 2, 'field_access': 1, 'update_expression': 1, '++': 1, '-': 3, 'array_access': 2, 'if_statement': 1, 'if': 1, 'parenthesized_expression': 1, 'method_invocation': 3, 'return_statement': 2, 'return': 2, 'array_creation_expression': 2, 'arra

In [14]:
def parse_ebnf(file_path):
    rules = {}
    pattern = re.compile(r'(\w+)\s*::=\s*([^=]+)')

    with open(file_path, 'r', encoding='utf-8') as file:
        for line in file:
            line = line.strip()

            if not line or line.startswith(';'):
                continue
            
            matches = pattern.findall(line)
            for match in matches:
                rule_name, definition = match
                rule_name = rule_name.strip()
                definition = definition.strip()
                rules[rule_name] = definition

    return rules

ebnf_rules = parse_ebnf('java/java.ebnf')
print("Extracted EBNF Rules:", ebnf_rules.keys())


Extracted EBNF Rules: dict_keys(['extras', 'supertypes', 'inline', 'conflicts', 'word', 'program', '_toplevel_statement', '_literal', 'decimal_integer_literal', 'hex_integer_literal', 'octal_integer_literal', 'binary_integer_literal', 'decimal_floating_point_literal', 'hex_floating_point_literal', 'true', 'false', 'character_literal', 'string_literal', '_string_literal', '_multiline_string_literal', 'string_fragment', '_multiline_string_fragment', 'string_interpolation', '_escape_sequence', 'escape_sequence', 'null_literal', 'expression', 'cast_expression', 'assignment_expression', 'binary_expression', 'instanceof_expression', 'lambda_expression', 'inferred_parameters', 'ternary_expression', 'unary_expression', 'update_expression', 'primary_expression', 'array_creation_expression', 'dimensions_expr', 'parenthesized_expression', 'class_literal', 'object_creation_expression', '_unqualified_object_creation_expression', 'field_access', 'template_expression', 'array_access', 'method_invocat

  """


In [15]:
def match_rules_with_ast(stats, ebnf_rules):
    rule_stats = {}
    for rule_name in ebnf_rules:
        if rule_name in stats:
            rule_stats[rule_name] = stats[rule_name]
    return rule_stats

rule_stats = match_rules_with_ast(stats, ebnf_rules)
print("Rule-Based Statistics:", rule_stats)


Rule-Based Statistics: {'program': 1, 'decimal_integer_literal': 3, 'binary_expression': 2, 'unary_expression': 2, 'update_expression': 1, 'array_creation_expression': 2, 'parenthesized_expression': 1, 'object_creation_expression': 1, 'field_access': 1, 'array_access': 2, 'method_invocation': 3, 'argument_list': 4, 'type_arguments': 2, 'dimensions': 4, 'block': 3, 'expression_statement': 1, 'return_statement': 2, 'if_statement': 1, 'for_statement': 1, 'package_declaration': 1, 'import_declaration': 2, 'class_declaration': 1, 'modifiers': 2, 'class_body': 1, 'scoped_identifier': 5, 'variable_declarator': 3, 'array_initializer': 2, 'generic_type': 2, 'array_type': 2, 'integral_type': 6, 'formal_parameters': 1, 'formal_parameter': 2, 'local_variable_declaration': 3, 'method_declaration': 1, 'identifier': 34, 'line_comment': 3}


In [22]:
import re
import os
from RRD.railroad import (
    Diagram,
    Sequence,
    Terminal,
    NonTerminal,
    Skip,
    Style,
)

def parse_ebnf_line(line: str):
    line = line.strip()
    if not line or line.startswith(";"):
        return None

    match = re.match(r'^(\w+)\s*::=\s*(.+)$', line)
    if match:
        rule_name, definition = match.groups()
        return rule_name.strip(), definition.strip()
    return None

def definition_to_diagram_items(definition: str):

    tokens = definition.split()

    diagram_items = []
    for token in tokens:
        token = token.strip()
        if not token:
            continue

        if (token.startswith("'") and token.endswith("'")) or (token.startswith('"') and token.endswith('"')):
            literal = token[1:-1]
            diagram_items.append(Terminal(literal))
        else:
            diagram_items.append(NonTerminal(token))

    return [Sequence(*diagram_items)]

def generate_railroad(rule_name: str, definition: str) -> Diagram:

    items = definition_to_diagram_items(definition)
    if len(items) == 1:
        main_seq = items[0]
    else:
        main_seq = Sequence(*items)

    diagram = Diagram(
        main_seq

    )
    return diagram


def save_diagram_as_svg(diagram: Diagram, out_file: str):

    diagram.format()
    with open(out_file, "w", encoding="utf-8") as f:
        diagram.writeStandalone(f.write)


def main():
    ebnf_file = "java/java.ebnf"
    out_dir = "railroads"
    os.makedirs(out_dir, exist_ok=True)

    rules = {}
    with open(ebnf_file, "r", encoding="utf-8") as f:
        for line in f:
            parsed = parse_ebnf_line(line)
            if parsed is None:
                continue
            rule_name, definition = parsed
            rules[rule_name] = definition

    for rule_name, definition in rules.items():
        svg_path = os.path.join(out_dir, f"{rule_name}.svg")
        diagram = generate_railroad(rule_name, definition)
        save_diagram_as_svg(diagram, svg_path)
        print(f"Generated {svg_path}")

if __name__ == "__main__":
    main()


[OK] Generated railroads/extras.svg
[OK] Generated railroads/supertypes.svg
[OK] Generated railroads/inline.svg
[OK] Generated railroads/conflicts.svg
[OK] Generated railroads/word.svg
[OK] Generated railroads/program.svg
[OK] Generated railroads/_toplevel_statement.svg
[OK] Generated railroads/_literal.svg
[OK] Generated railroads/decimal_integer_literal.svg
[OK] Generated railroads/hex_integer_literal.svg
[OK] Generated railroads/octal_integer_literal.svg
[OK] Generated railroads/binary_integer_literal.svg
[OK] Generated railroads/decimal_floating_point_literal.svg
[OK] Generated railroads/hex_floating_point_literal.svg
[OK] Generated railroads/true.svg
[OK] Generated railroads/false.svg
[OK] Generated railroads/character_literal.svg
[OK] Generated railroads/string_literal.svg
[OK] Generated railroads/_string_literal.svg
[OK] Generated railroads/_multiline_string_literal.svg
[OK] Generated railroads/string_fragment.svg
[OK] Generated railroads/_multiline_string_fragment.svg
[OK] Gene