# VisualMeta

In [11]:
import pandas as pd
import networkx as nx
import json
import numpy as np
import matplotlib.cm as cm
import matplotlib.colors as mcolors
from IPython.display import display, HTML, clear_output
import ipywidgets as widgets
#import uuid  # shall be used ofr pretty unique naming
from datetime import datetime
from pyvis.network import Network
import warnings
import pathlib

# supressing warnings
warnings.filterwarnings("ignore", category=UserWarning)

def visual_meta(file_path=None):
    """
    creating interactive hierarchical network with dynamic counts recalculation on node click...
    """
    
    if file_path is None:
        file_path = input("enter the path to your file: ")
        file_path = pathlib.Path(file_path)
    
    # loading the data based on the extension :: pass the file_path as a Path() object
    if file_path.suffix in ['.csv', '.txt']:
        
        # csv with auto delimiter detection
        try:
            df = pd.read_csv(file_path)
            print(f"successfully read CSV...")
        except:
            # should fail; try common delimiters
            for delimiter in [',', ';', '\t', '|']:
                try:
                    df = pd.read_csv(file_path, delimiter=delimiter)
                    print(f"successfully read CSV with delimiter: '{delimiter}'")
                    break
                except:
                    continue
            else:
                raise ValueError("could not parse the CSV using these delimiters [',', ';', '\t', '|']...")
                
    elif file_path.suffix in ['.xlsx', '.xls']:
        
        # read excel file
        df = pd.read_excel(file_path)
        
    else:
        
        raise ValueError(f"unsupported file format. please provide a '.csv', '.txt', '.xlsx' or '.xls'... got this instead: {file_path.suffix}")
        
    print(f"successfully loaded the data with {len(df)} rows and {len(df.columns)} columns...")

    # to ensure that no columns (at least no less than the number set) would be truncated
    pd.set_option('display.max_columns', 100)    # show up to 100 columns
    
    # display the first 5 rows
    display(df)
    
    # potential hierarchy columns
    categorical_cols = df.select_dtypes(include=['object', 'category']).columns.tolist()
    
    # if not enough categorical columns, try to identify other potential ones :: the ones where the unique values are less than the unq_ident_pct the len(df)
    unq_ident_pct = 0.5  # for now set it to 50%
    if len(categorical_cols) < 2:
        for col in df.columns:
            if col not in categorical_cols and df[col].nunique() < len(df) * unq_ident_pct:  # if less than unq_ident_pct len(df) are unique values
                categorical_cols.append(col)
                print('a new category is appended: {col}')
    
    print(f"found {len(categorical_cols)} potential columns for hierarchy levels:\n{categorical_cols}\n")
    
    # create widgets
    level_slider = widgets.IntSlider(
        value=min(3, len(categorical_cols)),  # first default value
        min=1,
        max=min(8, len(categorical_cols)),
        step=1,
        description='Number of Levels:',
        continuous_update=False,
        style={'description_width': 'initial'},
        layout=widgets.Layout(width='60%')
    )
    
    # level selector
    hierarchy_container = widgets.VBox([])
    level_selectors = {}
    
    # color scheme 
    colormap_selector = widgets.Dropdown(
        options=['viridis', 'plasma', 'inferno', 'magma', 'cividis', 'rainbow', 'tab10', 'Set1', 'Set2', 'Set3'],
        value='Set1',
        description='Color Scheme:',
        style={'description_width': 'initial'}
    )
    
    # layout selector
    layout_selector = widgets.Dropdown(
        options=['hierarchical', 'force-directed', 'radial', 'circular'],
        value='hierarchical',
        description='Layout:',
        style={'description_width': 'initial'}
    )
    
    # physics toggle
    physics_toggle = widgets.Checkbox(
        value=True,
        description='Enable Physics (Interactive)',
        style={'description_width': 'initial'}
    )
    
    # direction selector
    direction_selector = widgets.Dropdown(
        options=['UD', 'DU', 'LR', 'RL'],
        value='UD',
        description='Direction:',
        style={'description_width': 'initial'},
        tooltip='UD=Top-Down, DU=Bottom-Up, LR=Left-Right, RL=Right-Left'
    )
    
    # generate button
    button = widgets.Button(
        description='Generate the Network',
        button_style='primary',
        tooltip='Click to create visualization',
        layout=widgets.Layout(width='50%')
    )
    
    # output area
    output = widgets.Output()
    
    # function to update level selectors :: observing the level selector 
    def update_level_selectors(change):
        
        # handle both direct value and change object
        if hasattr(change, 'new'):
            num_levels = change.new
        else:
            num_levels = change  # assumes direct value
        
        # clear current selectors
        hierarchy_container.children = []
        level_selectors.clear()
        
        # create selectors for each level
        for i in range(1, num_levels + 1):
            level_label = widgets.HTML(f"<b>Level {i} (Hierarchy):</b>")
            
            level_selector = widgets.Dropdown(
                options=categorical_cols,
                value=categorical_cols[i-1] if i <= len(categorical_cols) else categorical_cols[0],
                description=f'Column:',
                style={'description_width': 'initial'},
                layout=widgets.Layout(width='70%')
            )
            
            level_selectors[f'level_{i}'] = level_selector
            hierarchy_container.children += (widgets.VBox([level_label, level_selector]),)
    
    # connect slider to update function
    level_slider.observe(update_level_selectors, names='value')  # observe the value of the level slider to initiate the functoin
    
    # function to generate visualization
    def on_button_clicked(b):
        with output:
            clear_output()
            
            try:
                # get selected columns
                hierarchy_cols = [selector.value for _, selector in sorted(level_selectors.items())]  # alphabetically
                
                # check for duplicates :: wouldn't make sense if duplicated!
                if len(hierarchy_cols) != len(set(hierarchy_cols)):
                    print("Error! selected the same column multiple times. Please select different columns for each level.")
                    return
                
                print(f"creating the network {len(hierarchy_cols)} levels")
                print(f"hierarchy columns (in order): {', '.join(hierarchy_cols)}")
                
                # create clean df
                df_clean = df.copy()

                # check for duplication in rows
                if df_clean.duplicated().any():
                    print('found duplicated rows:\n')
                    print(df_clean[df_clean.duplicated()])  # show them
                    df_clean = df_clean.drop_duplicates()
                    print(f'duplicates are eliminated (only first occurence kept) and the new size is: {df_clean.shape}')
                
                # NAs :: convert to string
                for col in hierarchy_cols:
                    df_clean[col] = df_clean[col].fillna(f"{col}:None")
                    df_clean[col] = df_clean[col].astype(str)
                
                # create NetworkX graph
                G = nx.DiGraph()
                
                # generate colormap
                try:
                    # try using the newer API first :: due to a complication at some point
                    import matplotlib.pyplot as plt
                    cmap = plt.colormaps[colormap_selector.value]
                except (AttributeError, KeyError):
                    print('falling back to the older API...\n')
                    # fall back to older API if needed
                    cmap = cm.get_cmap(colormap_selector.value, len(hierarchy_cols))
                
                colors = {col: mcolors.rgb2hex(cmap(i)) for i, col in enumerate(hierarchy_cols)}
                
                # track data structures
                node_info = {}  # stores node metadata
                parent_child_map = {}  # maps parent nodes to their children
                level_nodes = {i: [] for i in range(len(hierarchy_cols))}  # maps level index to list of node IDs
                level_totals = {i: 0 for i in range(len(hierarchy_cols))}  # total count at each level
                edge_flows = {}  # maps edge identifiers to flow values
                
                # hierarchical flow data structure for dynamic recalculation
                # this will store the complete flow breakdown from each node to its descendants
                flow_data = {}  # will store detailed flow information
                
                # first pass: build the complete flow data structure
                for idx, row in df_clean.iterrows():
                    # build path from root to leaf
                    path = []
                    for col in hierarchy_cols:
                        node_id = f"{col}:{row[col]}"
                        path.append(node_id)
                    
                    # store this path in flow_data
                    if 'paths' not in flow_data:
                        flow_data['paths'] = []
                    flow_data['paths'].append(path)
                
                # process each hierarchy level
                for level_idx, col in enumerate(hierarchy_cols):
                    # get previous hierarchy columns
                    prev_cols = hierarchy_cols[:level_idx]
                    
                    if prev_cols:
                        # for deeper levels, handle relationships
                        prev_col = prev_cols[-1]
                        
                        # group by previous and current level
                        grouped = df_clean.groupby([prev_col, col]).size().reset_index(name='count')
                        
                        for _, row in grouped.iterrows():
                            parent_id = f"{prev_col}:{row[prev_col]}"
                            child_id = f"{col}:{row[col]}"
                            flow = row['count']
                            
                            # add/update child node
                            if child_id not in node_info:
                                node_info[child_id] = {
                                    'level': level_idx,
                                    'count': flow,
                                    'name': row[col],
                                    'column': col
                                }
                                level_nodes[level_idx].append(child_id)
                                level_totals[level_idx] += flow
                            else:
                                node_info[child_id]['count'] += flow
                                level_totals[level_idx] += flow
                            
                            # track parent-child relationship
                            if parent_id not in parent_child_map:
                                parent_child_map[parent_id] = []
                            parent_child_map[parent_id].append(child_id)
                            
                            # store edge flow data
                            edge_key = f"{parent_id}->{child_id}"
                            edge_flows[edge_key] = flow
                            
                            # add edge
                            G.add_edge(parent_id, child_id, weight=flow, flow=flow)
                    else:
                        # first level - just count occurrences
                        for val, count in df_clean[col].value_counts().items():
                            node_id = f"{col}:{val}"
                            
                            # add node info
                            node_info[node_id] = {
                                'level': level_idx,
                                'count': count,
                                'name': val,
                                'column': col
                            }
                            level_nodes[level_idx].append(node_id)
                            level_totals[level_idx] += count
                            
                            # add to graph
                            G.add_node(node_id)
                
                # convert data structures to JSON
                parent_child_json = json.dumps(parent_child_map)
                level_nodes_json = json.dumps(level_nodes)
                node_info_json = json.dumps({k: {
                    'level': v['level'],
                    'count': v['count'],
                    'name': v['name'],
                    'column': v['column']
                } for k, v in node_info.items()})
                edge_flows_json = json.dumps(edge_flows)
                flow_data_json = json.dumps(flow_data)
                
                # calcluate node scaling
                counts = [info['count'] for info in node_info.values()]
                min_count = min(counts) if counts else 1
                max_count = max(counts) if counts else 1
                
                # node attributes
                for node_id, info in node_info.items():
                    level = info['level']
                    count = info['count']
                    
                    # scale node size
                    if min_count == max_count:
                        size = 30
                    else:
                        size = 10 + 40 * (np.log1p(count) - np.log1p(min_count)) / (np.log1p(max_count) - np.log1p(min_count))
                    
                    # calculate percentage at this level
                    level_pct = (count / level_totals[level]) * 100 if level_totals[level] > 0 else 0
                    
                    # add attributes to node
                    G.nodes[node_id]['size'] = size
                    G.nodes[node_id]['label'] = str(info['name'])[:20] + ('...' if len(str(info['name'])) > 20 else '')
                    
                    # create a text tooltip instead of HTML
                    tooltip_text = f"{info['name']}\nCount: {count}\nLevel %: {level_pct:.1f}%\nLevel: {level+1}"
                    G.nodes[node_id]['title'] = tooltip_text
                    
                    G.nodes[node_id]['group'] = level
                    G.nodes[node_id]['value'] = count
                    G.nodes[node_id]['level'] = level
                    G.nodes[node_id]['color'] = colors[info['column']]
                    G.nodes[node_id]['count'] = count
                    G.nodes[node_id]['levelPct'] = level_pct
                    G.nodes[node_id]['name'] = info['name']
                    G.nodes[node_id]['column'] = info['column']
                
                # # set edge attributes
                # for u, v, data in G.edges(data=True):
                #     edge_key = f"{u}->{v}"
                #     flow = edge_flows.get(edge_key, data.get('weight', 1))
                #     width = 1 + 5 * flow / max_count
                    
                #     G.edges[u, v]['width'] = width
                #     G.edges[u, v]['value'] = flow
                #     G.edges[u, v]['flow'] = flow
                #     # Initialize with empty labels - no labels will be shown on edges
                #     G.edges[u, v]['label'] = ""
                #     # Basic tooltip that will be enhanced when nodes are clicked
                #     G.edges[u, v]['title'] = f"Flow: {flow} records"
                #     # Store the source node color for edges
                #     #G.edges[u, v]['color'] = colors[node_info[u]['column']]
                #     # Set edge color as an object with explicit color value
                #     G.edges[u, v]['color'] = {
                #         'color': colors[node_info[u]['column']],
                #         'highlight': colors[node_info[u]['column']],
                #         'hover': colors[node_info[u]['column']]
                #     }

                # set edge attributes :: overrides the first step of the coloring
                for u, v, data in G.edges(data=True):
                    edge_key = f"{u}->{v}"
                    flow = edge_flows.get(edge_key, data.get('weight', 1))
                    width = 1 + 5 * flow / max_count
                    
                    G.edges[u, v]['width'] = width
                    G.edges[u, v]['value'] = flow
                    G.edges[u, v]['flow'] = flow
                    G.edges[u, v]['label'] = ""
                    G.edges[u, v]['title'] = f"Flow: {flow} records"
                    
                    # set constant color for all edges
                    G.edges[u, v]['color'] = '#4A90E2'  # gray color for all edges
                    
                # configure PyVis network
                layout_type = layout_selector.value
                physics_enabled = physics_toggle.value if layout_type != 'circular' else False
                direction = direction_selector.value
                
                net = Network(
                    height="800px", 
                    width="100%", 
                    notebook=True,
                    #heading="VisualMeta",  # the issue is that when it is added to pyvis the name becomes duplicated! :: take care of it!
                    #cdn_resources='in_line',  # in_line would not nrequire net connection necessarily
                    cdn_resources='remote',
                    directed=False  # set to false to avoid arrow issues
                )
                
                # add the NetworkX graph to PyVis
                net.from_nx(G)
                
                # configure vis.js options
                options = {
                    "nodes": {
                        "shape": "dot",
                        "scaling": {
                            "min": 10,
                            "max": 50
                        },
                        "font": {
                            "size": 14
                        }
                    },
                    "edges": {
                        "color": {
                            "color": "#888888",  # all edges will be gray
                            "highlight": "#DC143C",  # darker when highlighted
                            "hover": "#666666",  # darker when hovered
                            "inherit": False
                        },
                        "smooth": {
                            "type": "continuous"
                        },
                        "arrows": {
                            "to": {
                                "enabled": False,
                                "scaleFactor": 1
                            }
                        }
                    },
                    "physics": {
                        "enabled": physics_enabled,
                        "barnesHut": {
                            "gravitationalConstant": -80000,
                            "centralGravity": 0.3,
                            "springLength": 200,
                            "springConstant": 0.05,
                            "damping": 0.09
                        },
                        "stabilization": {
                            "enabled": True,
                            "iterations": 1000,
                            "updateInterval": 100
                        }
                    },
                    "layout": {
                        "hierarchical": {
                            "enabled": (layout_type == "hierarchical"),
                            "direction": direction,
                            "sortMethod": "directed",
                            "levelSeparation": 150,
                            "nodeSpacing": 120
                        }
                    },
                    "interaction": {
                        "hover": True,
                        "tooltipDelay": 300,
                        "navigationButtons": True,
                        "keyboard": True,
                        "dragNodes": True,
                        "dragView": True,
                        "zoomView": True,
                        "selectable": True
                    },
                    "tooltip": {
                        "delay": 300,
                        "fontColor": "black",
                        "fontSize": 14,
                        "fontFace": "verdana",
                        "color": {
                            "border": "#666",
                            "background": "#fff"
                        }
                    }
                }
                
                # set options
                net.set_options(json.dumps(options))
                
                # generate legend HTML
                legend_items = ""
                for i, col in enumerate(hierarchy_cols):
                    hex_color = colors[col]
                    legend_items += f"""
                    <li style="margin:5px 0">
                        <span style="display:inline-block;width:15px;height:15px;background-color:{hex_color};border-radius:50%;margin-right:5px;"></span>
                        <b>Level {i+1}:</b> {col}
                    </li>
                    """
                
                # create custom legend HTML
                legend_html = f"""
                <div style="padding: 10px; background-color: #f8f9fa; border-radius: 5px; margin: 10px 0;">
                    <h3>Hierarchy Levels</h3>
                    <ul style="list-style-type: none; padding-left: 10px;">
                        {legend_items}
                    </ul>
                    <!--
                    <p><b>Node size</b> represents record count (dynamically recalculated on click)</p>
                    <p><b>Edge width</b> represents connection strength</p>
                    <p><b>Hover</b> over elements for details</p>
                    <p><b>Click on a node</b> to recalculate and show only flows through that node</p>
                    <button id="reset-view" style="margin-top:10px;padding:5px 10px;background-color:#007bff;color:white;border:none;border-radius:4px;cursor:pointer;">Reset View</button>
                    -->
                </div>
                """
                
                # create JavaScript with dynamic recalculation functionality
                data_js = f"""
                <script type="text/javascript">
                // Data structures from Python
                var parentChildMap = {parent_child_json};
                var levelNodes = {level_nodes_json};
                var nodeInfo = {node_info_json};
                var edgeFlows = {edge_flows_json};
                var flowData = {flow_data_json};
                var originalPhysicsEnabled = {'true' if physics_enabled else 'false'};
                
                // Store original data
                var originalNodeColors = {{}};
                var originalNodeLabels = {{}};
                var originalNodeSizes = {{}};
                var originalNodeCounts = {{}};
                var originalNodeTitles = {{}};  // workaround
                var originalEdgeWidths = {{}};
                var originalEdgeTitles = {{}};
                var originalEdgeFlows = {{}};
                var originalEdgeColors = {{}};  // Store original edge colors
                
                // Track focus state
                var focusMode = false;
                var focusedNode = null;
                
                // Track if handlers are already registered
                var handlersRegistered = false;
                
                // Calculate filtered flows through a specific node
                function calculateFilteredFlows(focusNodeId) {{
                    var filteredPaths = [];
                    var focusLevel = nodeInfo[focusNodeId].level;
                    
                    // Filter paths that go through the focused node
                    flowData.paths.forEach(function(path) {{
                        if (path[focusLevel] === focusNodeId) {{
                            filteredPaths.push(path);
                        }}
                    }});
                    
                    // Calculate new counts for all descendants
                    var newNodeCounts = {{}};
                    var newEdgeFlows = {{}};
                    
                    // Initialize counts
                    Object.keys(nodeInfo).forEach(function(nodeId) {{
                        newNodeCounts[nodeId] = 0;
                    }});
                    
                    // Count occurrences in filtered paths
                    filteredPaths.forEach(function(path) {{
                        // Count nodes
                        path.forEach(function(nodeId, idx) {{
                            if (idx >= focusLevel) {{  // Only count from focus level onwards
                                newNodeCounts[nodeId] = (newNodeCounts[nodeId] || 0) + 1;
                            }}
                        }});
                        
                        // Count edges
                        for (var i = focusLevel; i < path.length - 1; i++) {{
                            var edgeKey = path[i] + "->" + path[i + 1];
                            newEdgeFlows[edgeKey] = (newEdgeFlows[edgeKey] || 0) + 1;
                        }}
                    }});
                    
                    return {{
                        nodeCounts: newNodeCounts,
                        edgeFlows: newEdgeFlows,
                        totalFilteredPaths: filteredPaths.length
                    }};
                }}
                
                // Find all descendants of a node (recursive)
                function getAllDescendants(nodeId) {{
                    var descendants = [];
                    
                    function traverse(id) {{
                        if (parentChildMap[id]) {{
                            parentChildMap[id].forEach(function(childId) {{
                                descendants.push(childId);
                                traverse(childId);
                            }});
                        }}
                    }}
                    
                    traverse(nodeId);
                    return descendants;
                }}
                
                // Initialize function to store original data
                function initializeOriginalData() {{
                    console.log("Storing original values");
                    
                    // Store original node data
                    var allNodes = nodes.get();
                    allNodes.forEach(function(node) {{
                        originalNodeColors[node.id] = node.color;
                        originalNodeLabels[node.id] = node.label;
                        originalNodeSizes[node.id] = node.size;
                        originalNodeCounts[node.id] = node.count;
                        originalNodeTitles[node.id] = node.title || "";  // workaround :: also consider adding || ""
                    }});
                    
                    // Store original edge data
                    var allEdges = edges.get();
                    allEdges.forEach(function(edge) {{
                        originalEdgeWidths[edge.id] = edge.width;
                        originalEdgeTitles[edge.id] = edge.title || "";
                        originalEdgeFlows[edge.id] = edge.flow || edge.value || 0;
                        originalEdgeColors[edge.id] = edge.color || "#848484";  // Store original edge color
                    }});
                }}
                
                // Wait a bit for network to be fully loaded, then initialize
                setTimeout(function() {{
                    if (!handlersRegistered) {{
                        initializeOriginalData();
                        
                        // Add click handler directly (not waiting for stabilization)
                        network.on("click", function(params) {{
                            if (params.nodes.length > 0) {{
                                var clickedNodeId = params.nodes[0];
                                console.log("Node clicked:", clickedNodeId);
                                // Small delay to ensure proper event handling
                                setTimeout(function() {{
                                    handleNodeClick(clickedNodeId);
                                }}, 50);
                            }}
                        }});
                        
                        // Reset view button handler
                        var resetButton = document.getElementById('reset-view');
                        if (resetButton) {{
                            resetButton.addEventListener('click', function() {{
                                console.log("Reset button clicked");
                                resetView();
                            }});
                        }}
                        
                        handlersRegistered = true;
                        console.log("Event handlers registered successfully");
                    }}
                }}, 1000);  // Give network 1 second to fully initialize
                
                // Reset view function
                function resetView() {{
                    if (!focusMode) return;
                    console.log("Resetting view");
                    
                    // Reset all nodes
                    var allNodes = nodes.get();
                    var nodeUpdates = [];
                    
                    allNodes.forEach(function(node) {{
                        var originalCount = originalNodeCounts[node.id];
                        var levelTotal = 0;
                        
                        // Recalculate level total for percentage
                        var nodeLevel = nodeInfo[node.id].level;
                        Object.keys(nodeInfo).forEach(function(id) {{
                            if (nodeInfo[id].level === nodeLevel) {{
                                levelTotal += originalNodeCounts[id];
                            }}
                        }});
                        
                        var levelPct = (originalCount / levelTotal) * 100;
                        
                        nodeUpdates.push({{
                            id: node.id,
                            color: originalNodeColors[node.id],
                            label: originalNodeLabels[node.id],
                            size: originalNodeSizes[node.id],
                            opacity: 1.0,
                            font: {{ color: '#000000', size: 14 }},
                            count: originalCount,
                            title: originalNodeTitles[node.id]
                            // title: nodeInfo[node.id].name + "\\nCount: " + originalCount + "\\nLevel %: " + levelPct.toFixed(1) + "%\\nLevel: " + (nodeLevel + 1)
                        }});
                    }});
                    
                    // Apply updates in batch
                    nodes.update(nodeUpdates);
                    
                    // Reset all edges
                    var allEdges = edges.get();
                    var edgeUpdates = [];
                    
                    allEdges.forEach(function(edge) {{
                        edgeUpdates.push({{
                            id: edge.id,
                            width: originalEdgeWidths[edge.id],
                            opacity: 1.0,
                            color: originalEdgeColors[edge.id],  // Reset to original color
                            label: "",
                            title: originalEdgeTitles[edge.id],
                            font: {{ color: '#000000', size: 10 }},
                            flow: originalEdgeFlows[edge.id],
                            arrows: {{ to: {{ enabled: false, scaleFactor: 1.0 }} }}
                        }});
                    }});
                    
                    // Apply updates in batch
                    edges.update(edgeUpdates);
                    
                    // Force visual update regardless of physics mode
                    if (originalPhysicsEnabled) {{
                        // With physics: briefly toggle to force update
                        network.setOptions({{ physics: {{ enabled: false }} }});
                        setTimeout(function() {{
                            network.setOptions({{ physics: {{ enabled: true }} }});
                        }}, 50);
                    }} else {{
                        // Without physics: just force redraw
                        network.redraw();
                    }}
                    
                    focusMode = false;
                    focusedNode = null;
                }}
                
                // Scale a value between min and max
                function scaleValue(value, min, max, targetMin, targetMax) {{
                    if (min === max) return (targetMin + targetMax) / 2;
                    return targetMin + (targetMax - targetMin) * (Math.log1p(value) - Math.log1p(min)) / (Math.log1p(max) - Math.log1p(min));
                }}
                
                // Handle node click event
                function handleNodeClick(nodeId) {{
                    // If clicking the same node that's already focused, reset view
                    if (focusMode && focusedNode === nodeId) {{
                        resetView();
                        return;
                    }}
                    
                    // Set focus mode
                    focusMode = true;
                    focusedNode = nodeId;
                    
                    // Get the clicked node data
                    var clickedNode = nodes.get(nodeId);
                    var clickedNodeLevel = clickedNode.level;
                    
                    // Calculate filtered flows
                    var filteredData = calculateFilteredFlows(nodeId);
                    var newNodeCounts = filteredData.nodeCounts;
                    var newEdgeFlows = filteredData.edgeFlows;
                    
                    // Get all descendants of the clicked node
                    var descendants = getAllDescendants(nodeId);
                    descendants.push(nodeId); // Include the clicked node itself
                    
                    // Create set of nodes with actual filtered flow for quick lookup
                    var nodesWithFlow = new Set();
                    descendants.forEach(function(id) {{
                        if (newNodeCounts[id] > 0) {{
                            nodesWithFlow.add(id);
                        }}
                    }});
                    
                    // Calculate min/max for scaling
                    var descendantCounts = [];
                    descendants.forEach(function(id) {{
                        if (newNodeCounts[id] > 0) {{
                            descendantCounts.push(newNodeCounts[id]);
                        }}
                    }});
                    
                    var minDescCount = Math.min.apply(null, descendantCounts) || 1;
                    var maxDescCount = Math.max.apply(null, descendantCounts) || 1;
                    
                    // Calculate level totals for the filtered data
                    var filteredLevelTotals = {{}};
                    descendants.forEach(function(id) {{
                        var level = nodeInfo[id].level;
                        filteredLevelTotals[level] = (filteredLevelTotals[level] || 0) + newNodeCounts[id];
                    }});

                    // Get all nodes at the clicked level
                    var sameLevel = levelNodes[clickedNodeLevel];
                    
                    // Get ancestors (all nodes at levels < clickedNodeLevel)
                    var ancestors = [];
                    for (var i = 0; i < clickedNodeLevel; i++) {{
                        if (levelNodes[i]) {{
                            ancestors = ancestors.concat(levelNodes[i]);
                        }}
                    }}
                    
                    // Get non-descendant nodes at levels > clickedNodeLevel
                    var nonDescendantLowerLevels = [];
                    for (var i = clickedNodeLevel + 1; i < Object.keys(levelNodes).length; i++) {{
                        if (levelNodes[i]) {{
                            levelNodes[i].forEach(function(id) {{
                                if (!descendants.includes(id)) {{
                                    nonDescendantLowerLevels.push(id);
                                }}
                            }});
                        }}
                    }}
                    
                    // Prepare batch updates for nodes
                    var nodeUpdates = [];
                    
                    // Process nodes based on their relationship to the clicked node
                    nodes.forEach(function(node) {{
                        var nodeId = node.id;
                        var nodeLevel = node.level;
                        var nodeName = nodeInfo[nodeId].name;
                        var originalCount = originalNodeCounts[nodeId];
                        var newCount = newNodeCounts[nodeId] || 0;
                        var rootNodeCount = originalNodeCounts[focusedNode];
                        
                        // 1. CLICKED NODE: Show original count (all flows through this node)
                        if (nodeId === focusedNode) {{
                            // var levelPct = (originalCount / filteredLevelTotals[nodeLevel]) * 100;  // check that this should always show 100%
                            nodeUpdates.push({{
                                id: nodeId,
                                color: {{ background: originalNodeColors[nodeId], border: '#000000' }},
                                label: nodeName + "\\n" + originalCount,
                                font: {{ color: '#000000', size: 16, bold: true }},
                                opacity: 1.0,
                                size: originalNodeSizes[nodeId],
                                title: nodeName + "\\nTotal Count: " + originalCount + "\\nFiltered view shows 100% of this node"
                            }});
                        }}
                        // 2. OTHER NODES AT SAME LEVEL: Dimmed with original counts
                        else if (sameLevel.includes(nodeId) && nodeLevel === clickedNodeLevel) {{
                            nodeUpdates.push({{
                                id: nodeId,
                                color: originalNodeColors[nodeId],
                                label: nodeName + "\\n" + originalCount,
                                font: {{ color: '#666666', size: 12 }},
                                opacity: 0.2,
                                size: originalNodeSizes[nodeId]
                            }});
                        }}
                        // 3. ANCESTORS: Only show category names, dimmed
                        else if (ancestors.includes(nodeId)) {{
                            nodeUpdates.push({{
                                id: nodeId,
                                color: originalNodeColors[nodeId],
                                label: nodeName,
                                font: {{ color: '#999999', size: 12 }},
                                opacity: 0.2,
                                size: originalNodeSizes[nodeId]
                            }});
                        }}
                        // 4. DESCENDANTS: Show filtered counts and resize
                        else if (descendants.includes(nodeId) && nodeLevel > clickedNodeLevel) {{
                            if (newCount > 0) {{
                                // Calculate percentage relative to filtered level total
                                // var filteredLevelPct = (newCount / filteredLevelTotals[nodeLevel]) * 100;
                                var filteredPct = (newCount / rootNodeCount) * 100;
                                // Calculate percentage relative to original count
                                var retentionPct = (newCount / originalCount) * 100;
                                
                                // Resize node based on new count
                                var newSize = scaleValue(newCount, minDescCount, maxDescCount, 10, 50);
                                
                                nodeUpdates.push({{
                                    id: nodeId,
                                    color: originalNodeColors[nodeId],
                                    label: nodeName + "\\n" + newCount + " (" + filteredPct.toFixed(1) + "%)",
                                    font: {{ color: '#000000', size: 14 }},
                                    opacity: 1.0,
                                    size: newSize,
                                    title: nodeName + "\\nFiltered Count: " + newCount + " (from originally " + originalCount + ")\\n" +
                                           "Filtered %: " + filteredPct.toFixed(1) + "%\\n" +
                                           "Retention %: " + retentionPct.toFixed(1) + "% of original"
                                }});
                            }} else {{
                                // No flow through this descendant from the clicked node
                                nodeUpdates.push({{
                                    id: nodeId,
                                    color: originalNodeColors[nodeId],
                                    label: "",
                                    font: {{ color: '#aaaaaa', size: 1 }},
                                    opacity: 0.2,
                                    title: originalNodeTitles[nodeId],  // workaround
                                    size: 5
                                }});
                            }}
                        }}
                        // 5. NON-DESCENDANTS at lower levels
                        else if (nonDescendantLowerLevels.includes(nodeId)) {{
                            nodeUpdates.push({{
                                id: nodeId,
                                color: originalNodeColors[nodeId],
                                label: "",
                                font: {{ color: '#aaaaaa', size: 1 }},
                                opacity: 0.2,
                                title: originalNodeTitles[nodeId],  // workaround
                                size: 5
                            }});
                        }}
                    }});
                    
                    // Apply all node updates in a batch
                    nodes.update(nodeUpdates);
                    
                    // Calculate edge flow ranges for scaling
                    var activeEdgeFlows = [];
                    Object.keys(newEdgeFlows).forEach(function(key) {{
                        if (newEdgeFlows[key] > 0) {{
                            activeEdgeFlows.push(newEdgeFlows[key]);
                        }}
                    }});
                    
                    var minEdgeFlow = Math.min.apply(null, activeEdgeFlows) || 1;
                    var maxEdgeFlow = Math.max.apply(null, activeEdgeFlows) || 1;
                    
                    // Prepare batch updates for edges
                    var edgeUpdates = [];
                    
                    // Process edges based on their relationship to the clicked node
                    edges.forEach(function(edge) {{
                        var fromId = edge.from;
                        var toId = edge.to;
                        var edgeKey = fromId + "->" + toId;
                        var newFlow = newEdgeFlows[edgeKey] || 0;
                        var originalFlow = originalEdgeFlows[edge.id];
                        
                        // Check if this edge is in the descendant path AND both nodes have flow
                        var isDescendantEdge = descendants.includes(fromId) && 
                                            descendants.includes(toId) && 
                                            nodes.get(toId).level > nodes.get(fromId).level &&
                                            nodesWithFlow.has(fromId) &&
                                            nodesWithFlow.has(toId);
                        
                        if (isDescendantEdge && newFlow > 0) {{
                            // Calculate new width based on filtered flow
                            var newWidth = scaleValue(newFlow, minEdgeFlow, maxEdgeFlow, 1, 6);
                            
                            // Calculate percentage of flow
                            var fromFilteredCount = newNodeCounts[fromId];
                            var flowPct = (newFlow / fromFilteredCount) * 100;
                            var retentionPct = (newFlow / originalFlow) * 100;
                            
                            edgeUpdates.push({{
                                id: edge.id,
                                width: newWidth,
                                opacity: 1.0,
                                color: '#DC143C', // override it with a constant color (red) :: originalEdgeColors[edge.id],  // Use original color, not dimmed
                                label: "",
                                title: "Filtered Flow: " + newFlow + " (from originally " + originalFlow + ")\\n" +
                                        flowPct.toFixed(1) + "% of immediate source node\\n" +
                                        "Retention %: " + retentionPct.toFixed(1) + "% of original",
                                font: {{ color: '#000000', size: 12, bold: true }},
                                flow: newFlow,
                                arrows: {{ to: {{ enabled: false, scaleFactor: 1.5 }} }}
                            }});
                        }}
                        // All other edges - dimmed or hidden
                        else {{
                            edgeUpdates.push({{
                                id: edge.id,
                                width: originalEdgeWidths[edge.id] * 0.2,
                                opacity: 0.1,
                                color: "#cccccc",  // Dimmed color for non-active edges
                                label: "",
                                title: originalEdgeTitles[edge.id],
                                arrows: {{ to: {{ enabled: false, scaleFactor: 0.5 }} }}  
                                // once set the arrows to true; it was working but ONLY the colors are not changing!
                                // gotcha! the problem is that there is NO separate control for the edge color... it is overridden by the source nodes border color code!
                            }});
                        }}
                    }});
                    
                    // Apply all edge updates in a batch
                    edges.update(edgeUpdates);
                    
                    // Force visual update based on physics mode
                    if (!originalPhysicsEnabled) {{
                        // Without physics: force immediate redraw
                        network.redraw();
                    }}
                    
                    console.log("Focus view updated. Showing", filteredData.totalFilteredPaths, "paths through", focusedNode);
                }}
                </script>
                """
                
                # replace with direct JavaScript (no string formatting issues)
                custom_js = data_js
                
                # # generate unique filename
                # filename = f"VisualMeta_{uuid.uuid4().hex[:8]}.html"

                now_ = datetime.now().strftime('%Y%m%d-%H%M%S')
                filename = f"VisualMeta_{now_}.html"
                
                # save the network without custom JavaScript
                net.save_graph(filename)
                
                # read the generated file and insert our custom code
                with open(filename, 'r', encoding='utf-8') as f:
                    html_content = f.read()
                
                # insert legend before the network div
                html_content = html_content.replace('<div id="mynetwork">', legend_html + '<div id="mynetwork">')
                
                # insert custom JS before the closing body tag
                html_content = html_content.replace('</body>', custom_js + '</body>')
                
                # write the modified file
                with open(filename, 'w', encoding='utf-8') as f:
                    f.write(html_content)
                
                # display in notebook
                display(HTML(legend_html))
                display(HTML(f'<iframe src="{filename}" width="100%" height="800px" frameBorder="0"></iframe>'))
                
                # show stats
                print(f"\n### network statistics ###")
                print(f"Total nodes: {len(G.nodes())}")
                print(f"Total connections: {len(G.edges())}")
                print(f"Total data paths: {len(flow_data['paths'])}")
                
                # count nodes per level
                level_counts = {}
                for level, nodes_list in level_nodes.items():
                    level_counts[level] = len(nodes_list)
                
                print("\n### categories per Level ###")
                for level, count in sorted(level_counts.items()):
                    print(f"Level {level+1} ({hierarchy_cols[level]}): {count} categories")
            
            except Exception as e:
                print(f"Error creating visualization: {e}")
                import traceback
                traceback.print_exc()
    
    # connect button to handler
    button.on_click(on_button_clicked)
    
    # initial setup of hierarchy selectors
    update_level_selectors(level_slider.value)
    
    # display widgets
    display(widgets.HTML("<h2>VisualMeta</h2>"))
    display(widgets.VBox([
        widgets.HTML("<h3>1. Select Number of Hierarchy Levels</h3>"),
        level_slider,
        widgets.HTML("<h3>2. Select Columns for Each Level</h3>"),
        hierarchy_container,
        widgets.HTML("<h3>3. Customize Visualization</h3>"),
        widgets.HBox([colormap_selector, layout_selector]),
        widgets.HBox([direction_selector, physics_toggle]),
        widgets.HTML("<h3>4. Generate Visualization</h3>"),
        button
    ]))
    display(output)
    
    #return df  # return the dataframe for potential further use :: it can impose redundancy at the end
    return None  # avoid redundancy

# exec it
data_path = pathlib.Path.cwd() / 'data'  # the current directory when this notebook is executed from 
#file_path = data_path / "meta-table.xls"
#file_path = data_path / "MetaTable-inclusion-batch_20250606_220814.csv"
file_path = data_path / "MetaTable-exclusion-batch_20250606_220814.csv"

visual_meta(file_path)

successfully read CSV...
successfully loaded the data with 152 rows and 36 columns...


Unnamed: 0,AnimalID,Crush,Crush-CrushCondition,CrushCondition,DBS-DBSCondition,FileName,Flox,Genotype,MainGroup,MolecularSubtype,MolecularSubtype1,MolecularSubtype2,Opsin,Path,Pharma,RNAIntervention,RNAType,RNAType-RNAIntervention,RealvsSham,Sex,Stim,Stim-StimCondition,StimCondition,StressCondition,Subtype,Subtype-MolecularSubtype,Subtype1,Subtype1-MolecularSubtype1-Subtype2-MolecularSubtype2,Subtype2,Surgery,Surgery-RealvsSham,TimeOfTesting,Timepoint,Timepoint-TimeOfTesting,Timepoint.1,Timepoint-TimeOfTesting.1
0,10,Crush,Crush-True,True,,Dystonia_DYT1-GAG3_10_GAG3_M_PreOP_Crush-True_...,,GAG3,Dystonia,GAG3,,,,Z:\Johannes1\Forschung\Projects\Project_OPF_Ki...,MicrogliaDepletionMonth3,,,,,M,,,,,DYT1,DYT1-GAG3,,,,,,,PreOP,,PreOP,
1,4,Crush,Crush-True,True,,Dystonia_DYT1-GAG3_4_GAG3_M_PreOP_Crush-True_M...,,GAG3,Dystonia,GAG3,,,,Z:\Johannes1\Forschung\Projects\Project_OPF_Ki...,MicrogliaDepletionMonth3,,,,,M,,,,,DYT1,DYT1-GAG3,,,,,,,PreOP,,PreOP,
2,18,Crush,Crush-True,True,,Dystonia_DYT1-GAG3_18_GAG3_M_PreOP_Crush-True_...,,GAG3,Dystonia,GAG3,,,,Z:\Johannes1\Forschung\Projects\Project_OPF_Ki...,,,,,,M,,,,,DYT1,DYT1-GAG3,,,,,,,PreOP,,PreOP,
3,10,Crush,Crush-True,True,,Dystonia_DYT1-GAG3_10_GAG3_M_Week1_Crush-True_...,,GAG3,Dystonia,GAG3,,,,Z:\Johannes1\Forschung\Projects\Project_OPF_Ki...,MicrogliaDepletionMonth3,,,,,M,,,,,DYT1,DYT1-GAG3,,,,,,,Week1,,Week1,
4,4,Crush,Crush-True,True,,Dystonia_DYT1-GAG3_4_GAG3_M_Week1_Crush-True_M...,,GAG3,Dystonia,GAG3,,,,Z:\Johannes1\Forschung\Projects\Project_OPF_Ki...,MicrogliaDepletionMonth3,,,,,M,,,,,DYT1,DYT1-GAG3,,,,,,,Week1,,Week1,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
147,18,Crush,Crush-True,True,,Wildtype_None_18_Wt_M_Week1_Crush-True_NoneDLC...,,Wt,Wildtype,,,,,Z:\Johannes1\Forschung\Projects\Project_OPF_Ki...,,,,,,M,,,,,,,,,,,,,Week1,,Week1,
148,25,Crush,Crush-True,True,,Wildtype_None_25_Wt_M_Week1_Crush-True_NoneDLC...,,Wt,Wildtype,,,,,Z:\Johannes1\Forschung\Projects\Project_OPF_Ki...,,,,,,M,,,,,,,,,,,,,Week1,,Week1,
149,34,Crush,Crush-True,True,,Wildtype_None_34_Wt_M_Week1_Crush-True_NoneDLC...,,Wt,Wildtype,,,,,Z:\Johannes1\Forschung\Projects\Project_OPF_Ki...,,,,,,M,,,,,,,,,,,,,Week1,,Week1,
150,267-1,,,,,Wildtype_None_267-1_Wt_M_Week7_None_LevodopaDL...,,Wt,Wildtype,,,,,Z:\Johannes1\Forschung\Projects\Project_OPF_Ki...,Levodopa,,,,,M,,,,,,,,,,,,,Week7,,Week7,


found 29 potential columns for hierarchy levels:
['AnimalID', 'Crush', 'Crush-CrushCondition', 'CrushCondition', 'DBS-DBSCondition', 'FileName', 'Flox', 'Genotype', 'MainGroup', 'MolecularSubtype', 'MolecularSubtype1', 'Opsin', 'Path', 'Pharma', 'Sex', 'Stim', 'Stim-StimCondition', 'StimCondition', 'Subtype', 'Subtype-MolecularSubtype', 'Subtype1', 'Subtype1-MolecularSubtype1-Subtype2-MolecularSubtype2', 'Surgery', 'Surgery-RealvsSham', 'TimeOfTesting', 'Timepoint', 'Timepoint-TimeOfTesting', 'Timepoint.1', 'Timepoint-TimeOfTesting.1']



HTML(value='<h2>VisualMeta</h2>')

VBox(children=(HTML(value='<h3>1. Select Number of Hierarchy Levels</h3>'), IntSlider(value=3, continuous_upda…

Output()