In [2]:
from llama_index.core import SimpleDirectoryReader
from llama_index.core.text_splitter import CodeSplitter
from llama_index.packs.code_hierarchy import CodeHierarchyNodeParser

import os
from IPython.display import Markdown, display
from dotenv import load_dotenv
load_dotenv()


def print_python(python_text):
    """This function prints python text in ipynb nicely formatted."""
    display(Markdown("```python\n" + python_text + "```"))

### Utility functions

In [4]:
from pathlib import Path
import re

def _skip_file(path: Path) -> bool:
    # skip lock files
    path = path.name
    if path.endswith("lock") or path == "package-lock.json" or path == "yarn.lock":
        return True
    # skip tests and legacy directories
    if path in ["legacy", "test"] and self.skip_tests:
        return True
    # skip hidden files
    if path.startswith("."):
        return True
    # skip images
    if path.endswith(".png") or path.endswith(".jpg"):
        return True
    return False

def _remove_non_ascii(text):
    # Define the regular expression pattern to match ascii characters
    pattern = re.compile(r"[^\x00-\x7F]+")
    # Replace ascii characters with an empty string
    cleaned_text = pattern.sub("", text)
    return cleaned_text

def _skip_directory(directory: Path) -> bool:
    # skip hidden directories
    if directory.name.startswith("."):
        return True
    return directory == "__pycache__" or directory == "node_modules"

In [None]:
from llama_index.core import SimpleDirectoryReader



class GraphConstructor:
    global_graph_info: GlobalGraphInfo
    root: str
    skip_tests: bool
    parsers: Parsers
    max_workers: int = 50


    def __init__(self, entity_id: str, root: str, max_workers: Optional[int] = None):
        self.global_graph_info = GlobalGraphInfo(entity_id=entity_id)
        self.parsers = Parsers(self.global_graph_info, root)
        self.root = root
        self.skip_tests = True
        if max_workers is not None:
            self.max_workers = max_workers
            

    def build_graph(self):
        # process every node to create the graph structure
        print("Building graph...")
        start_time = time.time()

        nodes, relationships, imports = self._scan_directory(self.root)

        # relate imports between file nodes
        relationships.extend(self._relate_imports(imports))
        # relate functions calls
        relationships.extend(self._relate_constructor_calls(nodes, imports))
        end_time = time.time()
        execution_time = end_time - start_time
        print(f"Execution time: {execution_time} seconds")

        return nodes, relationships