In [19]:
import javalang
import hashlib

# Function to generate the AST
def generate_ast(java_code):
    tokens = javalang.tokenizer.tokenize(java_code)
    parser = javalang.parser.Parser(tokens)
    tree = parser.parse()
    return tree

# Function to normalize AST by converting identifiers to a standard form
def normalize_ast(node):
    if isinstance(node, javalang.ast.Node):
        normalized_node = {
            'type': type(node).__name__,
            'attributes': {},
            'children': []
        }
        for attr, value in node.__dict__.items():
            if isinstance(value, list):
                normalized_node['children'].extend([normalize_ast(child) for child in value if isinstance(child, javalang.ast.Node)])
            elif isinstance(value, javalang.ast.Node):
                normalized_node['children'].append(normalize_ast(value))
            else:
                # Normalize identifiers and literals
                if attr in ('name', 'value'):
                    normalized_node['attributes'][attr] = 'normalized'
                else:
                    normalized_node['attributes'][attr] = value
        return normalized_node
    return str(node)

# Function to calculate tree edit distance
def tree_edit_distance(tree1, tree2):
    # Placeholder for a robust tree edit distance calculation
    # This could be implemented using an external library or a custom algorithm
    if tree1 == tree2:
        return 0
    else:
        return float('inf')

# Example usage
java_code_example1 = """
public class HelloWorld {
    public static void main(String[] args) {
        System.out.println("Hello, World!");
    }
}
"""

java_code_example2 = """
public class GreetingApp {
    public static void main(String[] args) {
        System.out.println("¡Hola, mundo!");
    }
}
"""

ast1 = generate_ast(java_code_example1)
ast2 = generate_ast(java_code_example2)
ast1_normalized = normalize_ast(ast1)
ast2_normalized = normalize_ast(ast2)

# Calculate the tree edit distance (assuming a placeholder implementation)
distance = tree_edit_distance(ast1_normalized, ast2_normalized)
threshold = 15  # Example threshold for similarity
similar = distance <= threshold

print(f"Los códigos son {'similares' if similar else 'diferentes'} con una distancia de edición de {distance}.")


Los códigos son similares con una distancia de edición de 0.
