In [11]:
pip install markdown pyyaml

Note: you may need to restart the kernel to use updated packages.


In [16]:
import os
import glob
import re
import json

class Node:
    def __init__(self, id, name, position=(0, 0, 0), weight=1.0, velocity=(0, 0, 0), pinned=False, block_content='', link_types=[]):
        self.id = id
        self.name = name
        self.position = position
        self.weight = weight
        self.velocity = velocity
        self.pinned = pinned
        self.block_content = block_content
        self.link_types = link_types

    def to_dict(self):
        return {
            'id': self.id,
            'name': self.name,
            'position': self.position,
            'weight': self.weight,
            'velocity': self.velocity,
            'pinned': self.pinned,
            'block_content': self.block_content,
            'link_types': self.link_types
        }

class Edge:
    def __init__(self, start_node, end_node, weight=1.0, is_active=True, link_type=''):
        self.start_node = start_node
        self.end_node = end_node
        self.weight = weight
        self.is_active = is_active
        self.link_type = link_type

    def to_dict(self):
        return {
            'start_node': self.start_node.id,
            'end_node': self.end_node.id,
            'weight': self.weight,
            'is_active': self.is_active,
            'link_type': self.link_type
        }

def parse_markdown_files(folder_path):
    nodes = []
    edges = []
    node_dict = {}
    
    for file_path in glob.glob(os.path.join(folder_path, '*.md')):
        with open(file_path, 'r', encoding='utf-8') as file:
            content = file.read()
            if not content.strip():
                continue  # Skip empty files
            file_name = os.path.basename(file_path)
            node_id = file_name
            node_name = file_name.split('.')[0]
            block_content = content
            link_types = re.findall(r'\[\[([^\]]+)\]\]', content)

            node = Node(
                id=node_id,
                name=node_name,
                block_content=block_content,
                link_types=link_types
            )
            nodes.append(node)
            node_dict[node_name] = node

            # Extract edges
            for link in link_types:
                if link in node_dict:
                    edges.append(Edge(start_node=node_dict[node_name], end_node=node_dict[link], link_type='link'))
                else:
                    print(f"Link target '{link}' not found for node '{node_name}'")

    return nodes, edges

def save_to_json(data, file_path):
    with open(file_path, 'w', encoding='utf-8') as f:
        json.dump(data, f, ensure_ascii=False, indent=4)

# Path to the folder containing markdown files
folder_path = r'C:\Users\lolic\githubs\JingSpringThing\spatialKnowledgeGraph\pythonCode\demoData'

# Parse the markdown files
nodes, edges = parse_markdown_files(folder_path)

# Save nodes and edges to JSON files
nodes_data = [node.to_dict() for node in nodes]
edges_data = [edge.to_dict() for edge in edges]

save_to_json(nodes_data, os.path.join(folder_path, 'nodes.json'))
save_to_json(edges_data, os.path.join(folder_path, 'edges.json'))

print(f"Extracted {len(nodes)} nodes and {len(edges)} edges. Data saved to 'nodes.json' and 'edges.json'.")


Link target 'Delivery Planning' not found for node 'Agentic Metaverse for Global Creatives'
Link target 'Metaverse and Telecollaboration' not found for node 'Agentic Metaverse for Global Creatives'
Link target 'Metaverse Ontology' not found for node 'Agentic Metaverse for Global Creatives'
Link target 'Trust and Safety' not found for node 'Agentic Metaverse for Global Creatives'
Link target 'open source' not found for node 'Agentic Metaverse for Global Creatives'
Link target 'Bitcoin' not found for node 'Agentic Metaverse for Global Creatives'
Link target 'Lightning and Similar L2' not found for node 'Agentic Metaverse for Global Creatives'
Link target 'nostr' not found for node 'Agentic Metaverse for Global Creatives'
Link target 'Unreal' not found for node 'Agentic Metaverse for Global Creatives'
Link target 'NVIDIA Omniverse' not found for node 'Agentic Metaverse for Global Creatives'
Link target 'Agents' not found for node 'Agentic Metaverse for Global Creatives'
Link target 'Googl