In [28]:
import ast
import re

class Node:
    def __init__(self, node_type, value=None):
        self.node_type = node_type
        self.value = value
        self.children = []

    def add_child(self, child_node):
        self.children.append(child_node)

    def __repr__(self):
        return f'Node(type={self.node_type}, value={self.value}, children={len(self.children)})'

class LibraryUsageNode(Node):
    def __init__(self, value):
        super().__init__(node_type="LibraryUsage", value=value)

class CommentNode(Node):
    def __init__(self, value):
        super().__init__(node_type="Comment", value=value)

class UAST:
    def __init__(self):
        self.root = None
        self.nodes = []

    def set_root(self, node):
        self.root = node

    def add_node(self, parent, node):
        parent.add_child(node)
        self.nodes.append(node)

    def traverse(self, node=None, depth=0):
        if node is None:
            node = self.root
        print('  ' * depth + repr(node))
        for child in node.children:
            self.traverse(child, depth + 1)

def extract_comments_and_library_usages(code, language):
    comments = []
    library_usages = []

    if language == "python":
        # Extract library usages
        ast_tree = ast.parse(code)
        for node in ast_tree.body:
            if isinstance(node, ast.Import):
                for alias in node.names:
                    library_usages.append(alias.name)
        
        # Extract comments directly from the source code
        lines = code.split("\n")
        for line in lines:
            line = line.strip()
            if line.startswith("#"):
                comments.append(line)
                
        return comments, library_usages

    elif language == "cpp":
        # Extract include directives
        includes = re.findall(r'#include\s*<([^>]*)>', code)
        for include in includes:
            library_usages.append(include)
        
        # Extract comments
        comments = re.findall(r'(?://[^\n]*|/\*.*?\*/)', code, re.DOTALL)

        return comments, library_usages



# Sample code for both Python and C++
python_code = """
import os
# This is a comment
import sys
#Another comment
import numpy as np
"""

cpp_code = """
// This is a single-line comment
#include<math.h>
// Another single-line comment
"""

# Create the UAST for Python
print("Generating UAST for Python code:")
uast_python = UAST()
root_node = Node("Program")
uast_python.set_root(root_node)

comments, library_usages = extract_comments_and_library_usages(python_code, "python")

# Add library usage nodes
for lib_usage in library_usages:
    lib_usage_node = LibraryUsageNode(lib_usage)
    uast_python.add_node(root_node, lib_usage_node)

# Add comment nodes
for comment in comments:
    comment_node = CommentNode(comment.strip())
    uast_python.add_node(root_node, comment_node)

# Test the UAST for Python
print("\nTraversing the UAST for Python:")
uast_python.traverse()

# Create the UAST for C++
print("\nGenerating UAST for C++ code:")
uast_cpp = UAST()
root_node = Node("Program")
uast_cpp.set_root(root_node)

comments, library_usages = extract_comments_and_library_usages(cpp_code, "cpp")

# Add library usage nodes
for lib_usage in library_usages:
    lib_usage_node = LibraryUsageNode(lib_usage)
    uast_cpp.add_node(root_node, lib_usage_node)

# Add comment nodes
for comment in comments:
    comment_node = CommentNode(comment.strip())
    uast_cpp.add_node(root_node, comment_node)

# Test the UAST for C++
print("\nTraversing the UAST for C++:")
uast_cpp.traverse()


Generating UAST for Python code:

Traversing the UAST for Python:
Node(type=Program, value=None, children=5)
  Node(type=LibraryUsage, value=os, children=0)
  Node(type=LibraryUsage, value=sys, children=0)
  Node(type=LibraryUsage, value=numpy, children=0)
  Node(type=Comment, value=# This is a comment, children=0)
  Node(type=Comment, value=#Another comment, children=0)

Generating UAST for C++ code:

Traversing the UAST for C++:
Node(type=Program, value=None, children=3)
  Node(type=LibraryUsage, value=math.h, children=0)
  Node(type=Comment, value=// This is a single-line comment, children=0)
  Node(type=Comment, value=// Another single-line comment, children=0)


In [6]:
import ast
import re
import json

class Node:
    def __init__(self, node_type, value=None):
        self.node_type = node_type
        self.value = value
        self.children = []

    def add_child(self, child_node):
        self.children.append(child_node)

    def __repr__(self):
        return f'Node(type={self.node_type}, value={self.value}, children={len(self.children)})'

class LibraryUsageNode(Node):
    def __init__(self, value):
        super().__init__(node_type="LibraryUsage", value=value)

class CommentNode(Node):
    def __init__(self, value):
        super().__init__(node_type="Comment", value=value)

class UAST:
    def __init__(self):
        self.root = None
        self.nodes = []

    def set_root(self, node):
        self.root = node

    def add_node(self, parent, node):
        parent.add_child(node)
        self.nodes.append(node)

    def traverse(self, node=None, depth=0):
        if node is None:
            node = self.root
        print('  ' * depth + repr(node))
        for child in node.children:
            self.traverse(child, depth + 1)

def extract_comments_and_library_usages(code, language):
    comments = []
    library_usages = []

    if language == "python":
        # Extract library usages
        ast_tree = ast.parse(code)
        for node in ast_tree.body:
            if isinstance(node, ast.Import):
                for alias in node.names:
                    library_usages.append(alias.name)
        
        # Extract comments directly from the source code
        lines = code.split("\n")
        for line in lines:
            line = line.strip()
            if line.startswith("#"):
                comments.append(line)
                
        return comments, library_usages

    elif language == "cpp":
        # Extract include directives
        includes = re.findall(r'#include\s*<([^>]*)>', code)
        for include in includes:
            library_usages.append(include)
        
        # Extract comments
        comments = re.findall(r'(?://[^\n]*|/\*.*?\*/)', code, re.DOTALL)

        return comments, library_usages

def convert_to_json(node):
    json_data = {}

    json_data['type'] = node.node_type
    json_data['value'] = node.value
    json_data['children'] = []

    for child in node.children:
        json_data['children'].append(convert_to_json(child))

    return json_data

# Sample code for both Python and C++
python_code = """
import os
# This is a comment
import sys
# Another comment
import numpy as np
"""

cpp_code = """
// This is a single-line comment
#include<math.h>
// Another single-line comment
"""

# Create the UAST for Python
print("Generating UAST for Python code:")
uast_python = UAST()
root_node = Node("Program")
uast_python.set_root(root_node)

comments, library_usages = extract_comments_and_library_usages(python_code, "python")

# Add library usage nodes
for lib_usage in library_usages:
    lib_usage_node = LibraryUsageNode(lib_usage)
    uast_python.add_node(root_node, lib_usage_node)

# Add comment nodes
for comment in comments:
    comment_node = CommentNode(comment.strip())
    uast_python.add_node(root_node, comment_node)

# Create the UAST for C++
print("\nGenerating UAST for C++ code:")
uast_cpp = UAST()
root_node = Node("Program")
uast_cpp.set_root(root_node)

comments, library_usages = extract_comments_and_library_usages(cpp_code, "cpp")

# Add library usage nodes
for lib_usage in library_usages:
    lib_usage_node = LibraryUsageNode(lib_usage)
    uast_cpp.add_node(root_node, lib_usage_node)

# Add comment nodes
for comment in comments:
    comment_node = CommentNode(comment.strip())
    uast_cpp.add_node(root_node, comment_node)

# Convert Python UAST to JSON
python_json = convert_to_json(uast_python.root)

# Convert C++ UAST to JSON
cpp_json = convert_to_json(uast_cpp.root)

# Convert JSON to string
python_json_str = json.dumps(python_json, indent=2)
cpp_json_str = json.dumps(cpp_json, indent=2)

# Print JSON strings
print("\nJSON for Python UAST:")
print(python_json_str)

print("\nJSON for C++ UAST:")
print(cpp_json_str)


Generating UAST for Python code:

Generating UAST for C++ code:

JSON for Python UAST:
{
  "type": "Program",
  "value": null,
  "children": [
    {
      "type": "LibraryUsage",
      "value": "os",
      "children": []
    },
    {
      "type": "LibraryUsage",
      "value": "sys",
      "children": []
    },
    {
      "type": "LibraryUsage",
      "value": "numpy",
      "children": []
    },
    {
      "type": "Comment",
      "value": "# This is a comment",
      "children": []
    },
    {
      "type": "Comment",
      "value": "# Another comment",
      "children": []
    }
  ]
}

JSON for C++ UAST:
{
  "type": "Program",
  "value": null,
  "children": [
    {
      "type": "LibraryUsage",
      "value": "math.h",
      "children": []
    },
    {
      "type": "Comment",
      "value": "// This is a single-line comment",
      "children": []
    },
    {
      "type": "Comment",
      "value": "// Another single-line comment",
      "children": []
    }
  ]
}


In [2]:
import ast
import re
import json

class Node:
    def __init__(self, node_type, value=None):
        self.node_type = node_type
        self.value = value
        self.children = []

    def add_child(self, child_node):
        self.children.append(child_node)

    def __repr__(self):
        return f'Node(type={self.node_type}, value={self.value}, children={len(self.children)})'

class LibraryUsageNode(Node):
    def __init__(self, value):
        super().__init__(node_type="LibraryUsage", value=value)

class CommentNode(Node):
    def __init__(self, value):
        super().__init__(node_type="Comment", value=value)

class UAST:
    def __init__(self):
        self.root = None
        self.nodes = []

    def set_root(self, node):
        self.root = node

    def add_node(self, parent, node):
        parent.add_child(node)
        self.nodes.append(node)

    def traverse(self, node=None, depth=0):
        if node is None:
            node = self.root
        print('  ' * depth + repr(node))
        for child in node.children:
            self.traverse(child, depth + 1)

def extract_comments_and_library_usages(code, language):
    comments = []
    library_usages = []

    if language == "python":
        # Extract library usages
        ast_tree = ast.parse(code)
        for node in ast_tree.body:
            if isinstance(node, ast.Import):
                for alias in node.names:
                    library_usages.append(alias.name)
        
        # Extract comments directly from the source code
        lines = code.split("\n")
        for line in lines:
            line = line.strip()
            if line.startswith("#"):
                comments.append(line)

    elif language == "cpp":
        # Extract include directives
        includes = re.findall(r'#include\s*<([^>]*)>', code)
        for include in includes:
            library_usages.append(include)
        
        # Extract comments
        comments = re.findall(r'(?://[^\n]*|/\*.*?\*/)', code, re.DOTALL)

    elif language == "java":
        # Extract import statements
        imports = re.findall(r'import\s+([^;]+);', code)
        for imp in imports:
            library_usages.append(imp.strip())
        
        # Extract comments
        comments = re.findall(r'(?://[^\n]*|/\*.*?\*/)', code, re.DOTALL)

    return comments, library_usages

def uast_to_json(node):
    node_dict = {
        "type": node.node_type,
        "value": node.value,
        "children": [uast_to_json(child) for child in node.children]
    }
    return node_dict

# Sample code for Python, C++, and Java
python_code = """
import os
# This is a comment
import sys
#Another comment
import numpy as np
"""

cpp_code = """
// This is a single-line comment
#include <math.h>
// Another single-line comment
"""

java_code = """
import java.lang.*;
import java.util.Scanner;

/* This is a multi-line comment */
public class testParsing {
    public static void main(String[] args) {
        // Single-line comment
        Scanner reader = new Scanner(System.in);
        System.out.print("Enter a number: ");
    }
}
"""

# Create the UAST for Python
print("Generating UAST for Python code:")
uast_python = UAST()
root_node = Node("Program")
uast_python.set_root(root_node)

comments, library_usages = extract_comments_and_library_usages(python_code, "python")

# Add library usage nodes
for lib_usage in library_usages:
    lib_usage_node = LibraryUsageNode(lib_usage)
    uast_python.add_node(root_node, lib_usage_node)

# Add comment nodes
for comment in comments:
    comment_node = CommentNode(comment.strip())
    uast_python.add_node(root_node, comment_node)

# Test the UAST for Python
print("\nTraversing the UAST for Python:")
uast_python.traverse()

# Create the UAST for C++
print("\nGenerating UAST for C++ code:")
uast_cpp = UAST()
root_node = Node("Program")
uast_cpp.set_root(root_node)

comments, library_usages = extract_comments_and_library_usages(cpp_code, "cpp")

# Add library usage nodes
for lib_usage in library_usages:
    lib_usage_node = LibraryUsageNode(lib_usage)
    uast_cpp.add_node(root_node, lib_usage_node)

# Add comment nodes
for comment in comments:
    comment_node = CommentNode(comment.strip())
    uast_cpp.add_node(root_node, comment_node)

# Test the UAST for C++
print("\nTraversing the UAST for C++:")
uast_cpp.traverse()

# Create the UAST for Java
print("\nGenerating UAST for Java code:")
uast_java = UAST()
root_node = Node("Program")
uast_java.set_root(root_node)

comments, library_usages = extract_comments_and_library_usages(java_code, "java")

# Add library usage nodes
for lib_usage in library_usages:
    lib_usage_node = LibraryUsageNode(lib_usage)
    uast_java.add_node(root_node, lib_usage_node)

# Add comment nodes
for comment in comments:
    comment_node = CommentNode(comment.strip())
    uast_java.add_node(root_node, comment_node)

# Test the UAST for Java
print("\nTraversing the UAST for Java:")
uast_java.traverse()

# Convert UAST to JSON
uast_python_json = uast_to_json(uast_python.root)
uast_cpp_json = uast_to_json(uast_cpp.root)
uast_java_json = uast_to_json(uast_java.root)

# Print JSON representation of UASTs
print("\nJSON for Python UAST:")
print(json.dumps(uast_python_json, indent=2))

print("\nJSON for C++ UAST:")
print(json.dumps(uast_cpp_json, indent=2))

print("\nJSON for Java UAST:")
print(json.dumps(uast_java_json, indent=2))


Generating UAST for Python code:

Traversing the UAST for Python:
Node(type=Program, value=None, children=5)
  Node(type=LibraryUsage, value=os, children=0)
  Node(type=LibraryUsage, value=sys, children=0)
  Node(type=LibraryUsage, value=numpy, children=0)
  Node(type=Comment, value=# This is a comment, children=0)
  Node(type=Comment, value=#Another comment, children=0)

Generating UAST for C++ code:

Traversing the UAST for C++:
Node(type=Program, value=None, children=3)
  Node(type=LibraryUsage, value=math.h, children=0)
  Node(type=Comment, value=// This is a single-line comment, children=0)
  Node(type=Comment, value=// Another single-line comment, children=0)

Generating UAST for Java code:

Traversing the UAST for Java:
Node(type=Program, value=None, children=4)
  Node(type=LibraryUsage, value=java.lang.*, children=0)
  Node(type=LibraryUsage, value=java.util.Scanner, children=0)
  Node(type=Comment, value=/* This is a multi-line comment */, children=0)
  Node(type=Comment, valu

In [2]:
import ast
import re
import json

class Node:
    def __init__(self, node_type, value=None, loc=None):
        self.node_type = node_type
        self.value = value
        self.children = []
        self.loc = loc

    def add_child(self, child_node):
        self.children.append(child_node)

    def __repr__(self):
        loc_repr = f', loc={self.loc}' if self.loc else ''
        return f'Node(type={self.node_type}, value={self.value}, children={len(self.children)}{loc_repr})'

class LibraryUsageNode(Node):
    def __init__(self, value, loc=None):
        super().__init__(node_type="LibraryUsage", value=value, loc=loc)

class CommentNode(Node):
    def __init__(self, value, loc=None):
        super().__init__(node_type="Comment", value=value, loc=loc)

class UAST:
    def __init__(self):
        self.root = None
        self.nodes = []

    def set_root(self, node):
        self.root = node

    def add_node(self, parent, node):
        parent.add_child(node)
        self.nodes.append(node)

    def traverse(self, node=None, depth=0):
        if node is None:
            node = self.root
        print('  ' * depth + repr(node))
        for child in node.children:
            self.traverse(child, depth + 1)

def extract_comments_and_library_usages(code, language):
    comments = []
    library_usages = []

    if language == "python":
        # Extract library usages and comments with location
        ast_tree = ast.parse(code)
        for node in ast_tree.body:
            if isinstance(node, ast.Import):
                for alias in node.names:
                    library_usages.append((alias.name, node.lineno, node.col_offset, node.end_lineno, node.end_col_offset))
        
        # Extract comments directly from the source code
        for match in re.finditer(r'#.*', code):
            comment = match.group(0)
            start_line = code[:match.start()].count('\n') + 1
            start_col = match.start() - code[:match.start()].rfind('\n') - 1
            end_line = start_line
            end_col = start_col + len(comment)
            comments.append((comment, start_line, start_col, end_line, end_col))

    elif language == "cpp":
        # Extract include directives with location
        for match in re.finditer(r'#include\s*<([^>]*)>', code):
            include = match.group(1)
            start_line = code[:match.start()].count('\n') + 1
            start_col = match.start() - code[:match.start()].rfind('\n') - 1
            end_line = start_line
            end_col = start_col + len(match.group(0))
            library_usages.append((include, start_line, start_col, end_line, end_col))
        
        # Extract comments with location
        for match in re.finditer(r'(?://[^\n]*|/\*.*?\*/)', code, re.DOTALL):
            comment = match.group(0)
            start_line = code[:match.start()].count('\n') + 1
            start_col = match.start() - code[:match.start()].rfind('\n') - 1
            end_line = code[:match.end()].count('\n')
            if end_line > 0:
                end_line += start_line - 1
                end_col = len(code[:match.end()].split('\n')[-1])
            else:
                end_line = start_line
                end_col = start_col + len(comment)
            comments.append((comment, start_line, start_col, end_line, end_col))

    elif language == "java":
        # Extract import statements with location
        for match in re.finditer(r'import\s+([^;]+);', code):
            imp = match.group(1).strip()
            start_line = code[:match.start()].count('\n') + 1
            start_col = match.start() - code[:match.start()].rfind('\n') - 1
            end_line = start_line
            end_col = start_col + len(match.group(0))
            library_usages.append((imp, start_line, start_col, end_line, end_col))
        
        # Extract comments with location
        for match in re.finditer(r'(?://[^\n]*|/\*.*?\*/)', code, re.DOTALL):
            comment = match.group(0)
            start_line = code[:match.start()].count('\n') + 1
            start_col = match.start() - code[:match.start()].rfind('\n') - 1
            end_line = code[:match.end()].count('\n')
            if end_line > 0:
                end_line += start_line - 1
                end_col = len(code[:match.end()].split('\n')[-1])
            else:
                end_line = start_line
                end_col = start_col + len(comment)
            comments.append((comment, start_line, start_col, end_line, end_col))

    elif language == "go":
        # Extract import statements with location
        for match in re.finditer(r'import\s+\(([\w\s."\/\\-]+)\)', code):
            imp = match.group(1).strip()
            start_line = code[:match.start()].count('\n') + 1
            start_col = match.start() - code[:match.start()].rfind('\n') - 1
            end_line = match.end() - match.group(1).rfind('\n') - 1
            end_col = len(match.group(1))
            library_usages.append((imp, start_line, start_col, end_line, end_col))
        
        # Extract comments with location
        for match in re.finditer(r'(?://[^\n]*|/\*.*?\*/)', code, re.DOTALL):
            comment = match.group(0)
            start_line = code[:match.start()].count('\n') + 1
            start_col = match.start() - code[:match.start()].rfind('\n') - 1
            end_line = code[:match.end()].count('\n')
            if end_line > 0:
                end_line += start_line - 1
                end_col = len(code[:match.end()].split('\n')[-1])
            else:
                end_line = start_line
                end_col = start_col + len(comment)
            comments.append((comment, start_line, start_col, end_line, end_col))

    elif language == "c":
        # Extract include directives with location
        for match in re.finditer(r'#include\s*<([^>]*)>', code):
            include = match.group(1)
            start_line = code[:match.start()].count('\n') + 1
            start_col = match.start() - code[:match.start()].rfind('\n') - 1
            end_line = start_line
            end_col = start_col + len(match.group(0))
            library_usages.append((include, start_line, start_col, end_line, end_col))
        
        # Extract comments with location
        for match in re.finditer(r'(?://[^\n]*|/\*.*?\*/)', code, re.DOTALL):
            comment = match.group(0)
            start_line = code[:match.start()].count('\n') + 1
            start_col = match.start() - code[:match.start()].rfind('\n') - 1
            end_line = code[:match.end()].count('\n')
            if end_line > 0:
                end_line += start_line - 1
                end_col = len(code[:match.end()].split('\n')[-1])
        else:
            end_line = start_line
            end_col = start_col + len(comment)
        comments.append((comment, start_line, start_col, end_line, end_col))
    return comments, library_usages

def detect_language(code):
    if re.search(r'import\s+\w+(\.\w+)*;', code):
        return "java"
    elif re.search(r'#include\s*<[^>]*>', code):
        return "cpp"
    elif re.search(r'import\s+\w+', code):
        return "python"
    elif re.search(r'import\s+(?:\([\w\s."\/\\-]+\)|[\w\s."\/\\-]+);', code):
        return "go"
    elif re.search(r'#include\s*<[^>]*>', code):
        return "c"
    else:
        return None


def uast_to_json(node):
    node_dict = {
        "type": node.node_type,
        "value": node.value,
        "loc": node.loc,
        "children": [uast_to_json(child) for child in node.children]
    }
    return node_dict

def main():
    file_path = input("Enter the path to the code file: ")
    try:
        with open(file_path, 'r') as file:
            code = file.read()
    except Exception as e:
        print(f"Error reading file: {e}")
        return

    language = detect_language(code)
    if not language:
        print("Language could not be detected.")
        return

    uast = UAST()
    root_node = Node("Program", loc={
        "start": {"line": 1, "column": 0},
        "end": {"line": code.count('\n') + 1, "column": 0}
    })
    uast.set_root(root_node)

    comments, library_usages = extract_comments_and_library_usages(code, language)

    # Add library usage nodes
    for lib_usage, start_line, start_col, end_line, end_col in library_usages:
        lib_usage_node = LibraryUsageNode(lib_usage, loc={
            "start": {"line": start_line, "column": start_col},
            "end": {"line": end_line, "column": end_col}
        })
        uast.add_node(root_node, lib_usage_node)

    # Add comment nodes
    for comment, start_line, start_col, end_line, end_col in comments:
        comment_node = CommentNode(comment.strip(), loc={
            "start": {"line": start_line, "column": start_col},
            "end": {"line": end_line, "column": end_col}
        })
        uast.add_node(root_node, comment_node)

    print(f"\nTraversing the UAST for {language.capitalize()}:")
    uast.traverse()

    uast_json = uast_to_json(uast.root)
    print(f"\nJSON for {language.capitalize()} UAST:")
    print(json.dumps(uast_json, indent=2))

if __name__ == "__main__":
    main()


SyntaxError: invalid syntax (<unknown>, line 1)