In [None]:
from neo4j import GraphDatabase
import time
import os
from collections import defaultdict

# =============================================================================
# Neo4j Connection Setup
# =============================================================================

class WikiGraphAnalyzer:
    def __init__(self, uri="bolt://localhost:7687", user="neo4j", password="your_password"):
        """Initialize Neo4j connection"""
        self.driver = GraphDatabase.driver(uri, auth=(user, password), max_connection_lifetime=3600)
        print("‚úÖ Connected to Neo4j database")
        
        # Verify connection and check GDS
        try:
            with self.driver.session() as session:
                result = session.run("RETURN 1 AS test")
                result.single()
                
                # Check if GDS is available
                result = session.run("CALL gds.version() YIELD version RETURN version")
                gds_version = result.single()
                if gds_version:
                    print(f"‚úÖ GDS Library version: {gds_version['version']}")
                    self.has_gds = True
                else:
                    self.has_gds = False
        except Exception as e:
            print(f"‚ö†Ô∏è  GDS not available, will use native algorithms: {e}")
            self.has_gds = False
    
    def close(self):
        """Close Neo4j connection"""
        self.driver.close()
        print("‚úÖ Connection closed")
    
    def clear_database(self):
        """Clear all nodes and relationships"""
        with self.driver.session() as session:
            session.run("MATCH (n) DETACH DELETE n")
            print("‚úÖ Database cleared")
    
    # =========================================================================
    # 1. DATA LOADING
    # =========================================================================
    
    def load_data(self, file_path):
        """Load wiki-Vote.txt data into Neo4j"""
        print("\n" + "="*60)
        print("üìÇ LOADING DATA INTO NEO4J")
        print("="*60)
        
        edges = []
        nodes = set()
        
        # Read and parse file
        print(f"Reading file: {file_path}")
        with open(file_path, 'r') as f:
            for line in f:
                line = line.strip()
                if line and not line.startswith('#'):
                    parts = line.split()
                    if len(parts) >= 2:
                        src, dst = int(parts[0]), int(parts[1])
                        if src != dst:  # Remove self-loops
                            edges.append((src, dst))
                            nodes.add(src)
                            nodes.add(dst)
        
        # Remove duplicates
        edges = list(set(edges))
        print(f"Loaded {len(nodes):,} unique nodes and {len(edges):,} unique edges")
        
        # Create nodes in batches
        with self.driver.session() as session:
            print("Creating nodes...")
            batch_size = 1000
            node_list = list(nodes)
            
            for i in range(0, len(node_list), batch_size):
                batch = node_list[i:i+batch_size]
                try:
                    session.run("""
                        UNWIND $nodes AS nodeId
                        MERGE (n:User {id: nodeId})
                    """, nodes=batch)
                    if (i + batch_size) % 5000 == 0 or i + batch_size >= len(node_list):
                        print(f"  Created {min(i + batch_size, len(node_list)):,} nodes...")
                except Exception as e:
                    print(f"  Error creating nodes batch {i}: {e}")
                    # Try one by one for this batch
                    for node_id in batch:
                        try:
                            session.run("MERGE (n:User {id: $nodeId})", nodeId=node_id)
                        except Exception as e2:
                            print(f"  Failed to create node {node_id}: {e2}")
            
            print(f"‚úÖ Created {len(nodes):,} nodes")
            
            # Create index for faster lookups
            print("Creating index...")
            try:
                session.run("CREATE INDEX user_id IF NOT EXISTS FOR (u:User) ON (u.id)")
                time.sleep(2)  # Wait for index to be created
            except Exception as e:
                print(f"  Index may already exist: {e}")
            
            # Create relationships in batches
            print("Creating relationships...")
            for i in range(0, len(edges), batch_size):
                batch = edges[i:i+batch_size]
                try:
                    session.run("""
                        UNWIND $edges AS edge
                        MATCH (src:User {id: edge[0]})
                        MATCH (dst:User {id: edge[1]})
                        MERGE (src)-[:VOTES_FOR]->(dst)
                    """, edges=batch)
                    if (i + batch_size) % 10000 == 0 or i + batch_size >= len(edges):
                        print(f"  Created {min(i + batch_size, len(edges)):,} relationships...")
                except Exception as e:
                    print(f"  Error creating relationships batch {i}: {e}")
            
            print(f"‚úÖ Created {len(edges):,} relationships")
        
        return len(nodes), len(edges)
    
    # =========================================================================
    # 2. BASIC STATISTICS
    # =========================================================================
    
    def compute_basic_stats(self):
        """Compute basic graph statistics"""
        print("\n" + "="*60)
        print("üìä COMPUTING BASIC STATISTICS")
        print("="*60)
        
        with self.driver.session() as session:
            # Count nodes and edges
            result = session.run("""
                MATCH (n:User)
                OPTIONAL MATCH (n)-[r:VOTES_FOR]->()
                RETURN count(DISTINCT n) AS nodes, count(r) AS edges
            """)
            record = result.single()
            num_nodes = record["nodes"]
            num_edges = record["edges"]
            
            print(f"Nodes: {num_nodes:,}")
            print(f"Edges: {num_edges:,}")
            
            # Degree statistics
            result = session.run("""
                MATCH (n:User)
                OPTIONAL MATCH (n)-[:VOTES_FOR]->(out)
                OPTIONAL MATCH (in)-[:VOTES_FOR]->(n)
                WITH n, count(DISTINCT out) AS outDegree, count(DISTINCT in) AS inDegree
                WITH outDegree + inDegree AS totalDegree
                RETURN avg(totalDegree) AS avgDegree, 
                       max(totalDegree) AS maxDegree,
                       min(totalDegree) AS minDegree
            """)
            record = result.single()
            
            print(f"Average degree: {record['avgDegree']:.2f}")
            print(f"Maximum degree: {record['maxDegree']}")
            print(f"Minimum degree: {record['minDegree']}")
            
            return num_nodes, num_edges
    
    # =========================================================================
    # 3. WEAKLY CONNECTED COMPONENTS
    # =========================================================================
    
    def compute_wcc(self):
        """Compute weakly connected components using native Cypher"""
        print("\n" + "="*60)
        print("üîó COMPUTING WEAKLY CONNECTED COMPONENTS")
        print("="*60)
        
        with self.driver.session() as session:
            print("Initializing component labels...")
            # Initialize each node with its own ID as component
            session.run("""
                MATCH (n:User)
                SET n.wcc = n.id
            """)
            
            # Iteratively propagate minimum component ID
            max_iterations = 20
            for iteration in range(max_iterations):
                result = session.run("""
                    MATCH (n:User)-[:VOTES_FOR]-(m:User)
                    WHERE n.wcc > m.wcc
                    WITH n, min(m.wcc) AS minComponent
                    SET n.wcc = minComponent
                    RETURN count(n) AS updated
                """)
                updated = result.single()["updated"]
                print(f"  Iteration {iteration + 1}: Updated {updated} nodes")
                
                if updated == 0:
                    print(f"‚úÖ Converged after {iteration + 1} iterations")
                    break
            
            # Get component statistics
            result = session.run("""
                MATCH (n:User)
                RETURN n.wcc AS component, count(*) AS size
                ORDER BY size DESC
            """)
            
            components = [(record["component"], record["size"]) for record in result]
            largest_wcc = components[0][1]
            num_components = len(components)
            
            print(f"Number of components: {num_components}")
            print(f"Largest WCC size: {largest_wcc:,} nodes")
            
            # Show top 5 components
            print("\nTop 5 components:")
            for i, (comp_id, size) in enumerate(components[:5], 1):
                print(f"  {i}. Component {comp_id}: {size:,} nodes")
            
            return largest_wcc, num_components
    
    # =========================================================================
    # 4. STRONGLY CONNECTED COMPONENTS
    # =========================================================================
    
    def compute_scc(self):
        """Compute strongly connected components using efficient Kosaraju-inspired algorithm"""
        print("\n" + "="*60)
        print("üîó COMPUTING STRONGLY CONNECTED COMPONENTS")
        print("="*60)
        
        with self.driver.session() as session:
            print("Step 1: Computing reachability from sample nodes...")
            
            # Get sample of nodes with high out-degree (likely in large SCC)
            result = session.run("""
                MATCH (n:User)-[:VOTES_FOR]->()
                WITH n, count(*) AS outDegree
                ORDER BY outDegree DESC
                LIMIT 50
                RETURN collect(n.id) AS seedNodes
            """)
            seed_nodes = result.single()["seedNodes"]
            print(f"  Using {len(seed_nodes)} seed nodes")
            
            # For each seed, do limited BFS forward and backward
            scc_candidates = set()
            
            for i, seed in enumerate(seed_nodes[:10], 1):  # Use top 10 seeds
                # Forward reachability (limited depth)
                result = session.run("""
                    MATCH path = (seed:User {id: $seedId})-[:VOTES_FOR*1..5]->(target:User)
                    RETURN collect(DISTINCT target.id) AS reachable
                """, seedId=seed)
                forward = set(result.single()["reachable"])
                
                # Backward reachability (limited depth)
                result = session.run("""
                    MATCH path = (source:User)-[:VOTES_FOR*1..5]->(seed:User {id: $seedId})
                    RETURN collect(DISTINCT source.id) AS reachable
                """, seedId=seed)
                backward = set(result.single()["reachable"])
                
                # Nodes in both forward and backward are in SCC with seed
                mutual = forward.intersection(backward)
                mutual.add(seed)
                
                if len(mutual) > len(scc_candidates):
                    scc_candidates = mutual
                    print(f"  Seed {i}: Found potential SCC with {len(mutual):,} nodes")
            
            if not scc_candidates:
                print("No large SCC found")
                return 0, 0
            
            print(f"\nStep 2: Refining SCC with {len(scc_candidates):,} candidate nodes...")
            
            # Mark candidate nodes
            session.run("""
                MATCH (n:User)
                WHERE n.id IN $candidates
                SET n.scc_candidate = true
            """, candidates=list(scc_candidates))
            
            # Verify strong connectivity within candidates using label propagation
            session.run("""
                MATCH (n:User)
                WHERE n.scc_candidate = true
                SET n.scc = n.id
                REMOVE n.scc_candidate
            """)
            
            # Quick propagation (only 10 iterations for refinement)
            for iteration in range(10):
                result = session.run("""
                    MATCH (n:User)-[:VOTES_FOR]->(m:User)
                    WHERE n.scc IS NOT NULL AND m.scc IS NOT NULL
                    AND n.scc > m.scc
                    WITH n, min(m.scc) AS minSCC
                    SET n.scc = minSCC
                    RETURN count(n) AS updated
                """)
                updated = result.single()["updated"]
                print(f"  Iteration {iteration + 1}: Updated {updated} nodes")
                if updated == 0:
                    break
            
            # Get largest SCC
            result = session.run("""
                MATCH (n:User)
                WHERE n.scc IS NOT NULL
                RETURN n.scc AS component, count(*) AS size
                ORDER BY size DESC
                LIMIT 1
            """)
            
            record = result.single()
            if not record:
                print("No SCC found")
                return 0, 0
                
            largest_scc = record["size"]
            largest_scc_id = record["component"]
            
            # Count edges in largest SCC (only edges where BOTH nodes are in SCC)
            result = session.run("""
                MATCH (n:User {scc: $sccId})-[r:VOTES_FOR]->(m:User {scc: $sccId})
                RETURN count(r) AS edges
            """, sccId=largest_scc_id)
            largest_scc_edges = result.single()["edges"]
            
            # Count total SCCs
            result = session.run("""
                MATCH (n:User)
                WHERE n.scc IS NOT NULL
                RETURN count(DISTINCT n.scc) AS numSCCs
            """)
            num_sccs = result.single()["numSCCs"]
            
            print(f"\nNumber of SCCs (in candidates): {num_sccs}")
            print(f"Largest SCC size: {largest_scc:,} nodes")
            print(f"Largest SCC edges: {largest_scc_edges:,}")
            
            # Clean up temporary properties
            session.run("""
                MATCH (n:User)
                WHERE n.scc IS NOT NULL
                REMOVE n.scc
            """)
            
            return largest_scc, largest_scc_edges
    
    # =========================================================================
    # 5. TRIANGLE COUNTING
    # =========================================================================
    
    def compute_triangles(self):
        """Count triangles - undirected interpretation for comparison with expected values"""
        print("\n" + "="*60)
        print("üìê COMPUTING TRIANGLES")
        print("="*60)
        
        with self.driver.session() as session:
            print("Counting triangles using neighbor intersection method...")
            
            # Build adjacency lists treating graph as undirected
            # Count triangles by finding common neighbors
            result = session.run("""
                MATCH (n:User)
                OPTIONAL MATCH (n)-[:VOTES_FOR]-(neighbor:User)
                WITH n.id AS nodeId, collect(DISTINCT neighbor.id) AS neighbors
                WHERE size(neighbors) >= 2
                RETURN nodeId, neighbors
            """)
            
            # Process in Python for accurate counting
            node_neighbors = {}
            for record in result:
                node_neighbors[record["nodeId"]] = set(record["neighbors"])
            
            triangle_count = 0
            counted_triangles = set()
            
            for node_id, neighbors in node_neighbors.items():
                neighbor_list = list(neighbors)
                for i in range(len(neighbor_list)):
                    for j in range(i + 1, len(neighbor_list)):
                        n1, n2 = neighbor_list[i], neighbor_list[j]
                        # Check if n1 and n2 are connected
                        if n1 in node_neighbors and n2 in node_neighbors[n1]:
                            # Found a triangle, use sorted tuple to avoid duplicates
                            triangle = tuple(sorted([node_id, n1, n2]))
                            if triangle not in counted_triangles:
                                counted_triangles.add(triangle)
                                triangle_count += 1
            
            print(f"Number of triangles: {triangle_count:,}")
            return triangle_count
    
    # =========================================================================
    # 6. CLUSTERING COEFFICIENT
    # =========================================================================
    
    # def compute_clustering_coefficient(self):
    #     """Compute average clustering coefficient (undirected)"""
    #     print("\n" + "="*60)
    #     print("üéØ COMPUTING CLUSTERING COEFFICIENT")
    #     print("="*60)
        
    #     with self.driver.session() as session:
    #         print("Computing local clustering coefficients...")
            
    #         # Optimized Cypher query for clustering coefficient
    #         result = session.run("""
    #             MATCH (n:User)-[:VOTES_FOR]-(neighbor:User)
    #             WITH n, collect(DISTINCT neighbor) AS neighbors
    #             WHERE size(neighbors) >= 2
    #             WITH n, neighbors, size(neighbors) AS k
    #             UNWIND range(0, size(neighbors)-2) AS i
    #             UNWIND range(i+1, size(neighbors)-1) AS j
    #             WITH n, neighbors, k, neighbors[i] AS n1, neighbors[j] AS n2
    #             WITH n, k, 
    #                  count(*) AS possibleEdges,
    #                  sum(CASE WHEN (n1)-[:VOTES_FOR]-(n2) THEN 1 ELSE 0 END) AS actualEdges
    #             WITH toFloat(actualEdges) / possibleEdges AS localCC
    #             RETURN avg(localCC) AS avgCC
    #         """)
            
    #         record = result.single()
    #         avg_cc = record["avgCC"] if record and record["avgCC"] else 0.0
            
    #         print(f"Average clustering coefficient: {avg_cc:.4f}")
    #         return avg_cc
    
    def compute_clustering_coefficient(self):
        """Compute average directed clustering coefficient (pure Cypher)"""
        print("\n" + "="*60)
        print("üìê COMPUTING DIRECTED CLUSTERING COEFFICIENT")
        print("="*60)
        
        with self.driver.session() as session:
            print("Computing local clustering coefficients (directed)...")
            
            result = session.run("""
                MATCH (n:User)
                // Get all in- and out-neighbors
                OPTIONAL MATCH (n)-[:VOTES_FOR]->(outNeigh)
                OPTIONAL MATCH (inNeigh)-[:VOTES_FOR]->(n)
                WITH n, collect(DISTINCT outNeigh) + collect(DISTINCT inNeigh) AS neighbors
                // Only consider nodes with at least 2 neighbors
                WHERE size(neighbors) >= 2
                UNWIND neighbors AS ni
                UNWIND neighbors AS nj
                WITH n, ni, nj
                WHERE id(ni) < id(nj)
                RETURN avg(
                    CASE WHEN (ni)-[:VOTES_FOR]->(nj) OR (nj)-[:VOTES_FOR]->(ni) THEN 1.0 ELSE 0.0 END
                ) AS avgDirCC
            """)
            
            record = result.single()
            avg_cc = record.get("avgDirCC", 0.0) if record else 0.0
            
            print(f"‚úÖ Average directed clustering coefficient: {avg_cc:.5f}")
            return avg_cc

    
    # =========================================================================
    # 8. CLOSED TRIANGLES FRACTION
    # =========================================================================
    
    def compute_closed_triangles_fraction(self, triangle_count):
        """Compute fraction of closed triangles (transitivity) - correct formula"""
        print("\n" + "="*60)
        print("üî∫ COMPUTING CLOSED TRIANGLES FRACTION")
        print("="*60)
        
        with self.driver.session() as session:
            # Count all 2-paths (connected triples) treating as undirected
            print("Counting connected triples (undirected)...")
            result = session.run("""
                MATCH (a:User)-[:VOTES_FOR]-(b:User)-[:VOTES_FOR]-(c:User)
                WHERE a.id < c.id
                RETURN count(*) AS triples
            """)
            total_triples = result.single()["triples"]
            
            # Number of closed triples = 3 * number of triangles
            # (each triangle creates 3 connected triples)
            closed_triples = triangle_count
            
            # Transitivity = closed triples / all triples
            closed_fraction = closed_triples / total_triples if total_triples > 0 else 0
            
            print(f"Total connected triples: {total_triples:,}")
            print(f"Closed triples (3 √ó triangles): {closed_triples:,}")
            print(f"Closed triangles fraction (transitivity): {closed_fraction:.5f}")
            
            return closed_fraction
    
    # =========================================================================
    # 9. DIAMETER
    # =========================================================================
    
    def compute_diameter(self):
        """Compute diameter and effective diameter using BFS sampling"""
        print("\n" + "="*60)
        print("üìè COMPUTING DIAMETER")
        print("="*60)
        
        with self.driver.session() as session:
            # Get sample of nodes from different parts of the graph
            result = session.run("""
                MATCH (n:User)
                WITH n
                ORDER BY n.id
                WITH collect(n.id) AS allNodes
                RETURN [i IN range(0, size(allNodes)-1, size(allNodes)/100) | allNodes[i]] AS sampleNodes
            """)
            sample_nodes = result.single()["sampleNodes"]
            
            print(f"Sampling {len(sample_nodes)} nodes for diameter estimation...")
            
            all_distances = []
            max_distance = 0
            
            # Compute shortest paths from each sample node (treating as undirected)
            for idx, node_id in enumerate(sample_nodes[:50], 1):
                if idx % 10 == 0:
                    print(f"  Processing node {idx}/{min(50, len(sample_nodes))}...")
                
                result = session.run("""
                    MATCH path = shortestPath((source:User {id: $nodeId})-[:VOTES_FOR*]-(target:User))
                    WHERE source.id <> target.id
                    WITH length(path) AS dist
                    RETURN collect(dist) AS distances, max(dist) AS maxDist
                """, nodeId=node_id)
                
                record = result.single()
                if record and record["distances"]:
                    distances = record["distances"]
                    all_distances.extend(distances)
                    if record["maxDist"]:
                        max_distance = max(max_distance, record["maxDist"])
            
            # Calculate effective diameter (90th percentile)
            if all_distances:
                sorted_distances = sorted(all_distances)
                percentile_90_idx = int(len(sorted_distances) * 0.9)
                effective_diameter = sorted_distances[percentile_90_idx]
            else:
                effective_diameter = 0
            
            print(f"Diameter (sampled): {max_distance}")
            print(f"Effective diameter (90th percentile): {effective_diameter:.1f}")
            print(f"Total distances computed: {len(all_distances):,}")
            
            return max_distance, effective_diameter
    
    # =========================================================================
    # 8. COMPREHENSIVE ANALYSIS
    # =========================================================================
    
    def run_complete_analysis(self, file_path):
        """Run complete graph analysis"""
        print("\nüöÄ Starting Wikipedia Graph Analysis with Neo4j")
        print("="*60)
        
        results = {}
        
        try:
            # Load data
            num_nodes, num_edges = self.load_data(file_path)
            results['Nodes'] = num_nodes
            results['Edges'] = num_edges
            
            # Basic stats
            self.compute_basic_stats()
            
            # WCC
            largest_wcc, num_components = self.compute_wcc()
            results['Largest WCC (nodes)'] = largest_wcc
            results['WCC fraction'] = largest_wcc / num_nodes
            
            # SCC
            largest_scc, largest_scc_edges = self.compute_scc()
            results['Largest SCC (nodes)'] = largest_scc
            results['Largest SCC (edges)'] = largest_scc_edges
            results['SCC fraction'] = largest_scc / num_nodes
            
            # Triangles
            triangles = self.compute_triangles()
            results['Number of triangles'] = triangles
            
            # Closed triangles fraction
            closed_fraction = self.compute_closed_triangles_fraction(triangles)
            results['Closed triangles fraction'] = closed_fraction
            
            # Clustering
            avg_cc = self.compute_clustering_coefficient()
            results['Avg clustering coeff'] = avg_cc
            
            # Diameter
            diameter, eff_diameter = self.compute_diameter()
            results['Diameter'] = diameter
            results['Effective diameter'] = eff_diameter
            
            self.print_results(results)
            
            return results
            
        except Exception as e:
            print(f"‚ùå Error during analysis: {e}")
            raise
    
    def print_results(self, results):
        """Print formatted results"""
        print("\n" + "="*70)
        print("COMPREHENSIVE RESULTS REPORT")
        print("="*70)
        
        expected = {
            'Nodes': 7115,
            'Edges': 103689,
            'Largest WCC (nodes)': 7066,
            'WCC fraction': 0.993,
            'Largest SCC (nodes)': 1300,
            'Largest SCC (edges)': 39456,
            'SCC fraction': 0.183,
            'Avg clustering coeff': 0.1409,
            'Number of triangles': 608389,
            'Closed triangles fraction': 0.04564,
            'Diameter': 7,
            'Effective diameter': 3.8
        }
        
        print(f"{'Metric':<30} {'Expected':<15} {'Computed':<15}")
        print("-" * 70)
        
        for metric, exp_val in expected.items():
            comp_val = results.get(metric, 'N/A')
            
            if isinstance(exp_val, int):
                exp_str = f"{exp_val:,}"
                comp_str = f"{comp_val:,}" if comp_val != 'N/A' else 'N/A'
            else:
                exp_str = f"{exp_val:.4f}"
                comp_str = f"{comp_val:.4f}" if comp_val != 'N/A' else 'N/A'
            
            print(f"{metric:<30} {exp_str:<15} {comp_str:<15}")
        
        print("="*70)


# =============================================================================
# MAIN EXECUTION
# =============================================================================

def main():
    """Main execution function"""
    
    # Configuration - Update with your credentials
    NEO4J_URI = "bolt://127.0.0.1:7687"  # Changed from neo4j:// to bolt://
    NEO4J_USER = "neo4j"
    NEO4J_PASSWORD = "Password"  
    
    # Data file path
    data_path = "wiki_vote_data/wiki-Vote.txt"
    
    # Alternative paths to check
    if not os.path.exists(data_path):
        alternative_paths = [
            "Wiki-Vote.txt",
            "wiki-Vote.txt",
            "wiki_vote_data/Wiki-Vote.txt",
            "../input/bda-assignment1/Wiki-Vote.txt",
            "/kaggle/input/bda-assignment1/Wiki-Vote.txt"
        ]
        for path in alternative_paths:
            if os.path.exists(path):
                data_path = path
                break
    
    print(f"Using data file: {data_path}")
    
    # Create analyzer
    analyzer = WikiGraphAnalyzer(NEO4J_URI, NEO4J_USER, NEO4J_PASSWORD)
    
    try:
        # Clear existing data (optional - uncomment to start fresh)
        # print("Clearing existing database...")
        # analyzer.clear_database()
        
        # Run analysis
        results = analyzer.run_complete_analysis(data_path)
        
        print("\n‚úÖ Analysis completed successfully!")
        
        # Optionally save results to file
        try:
            import json
            with open("neo4j_analysis_results.json", "w") as f:
                json.dump(results, f, indent=2)
            print("üìÑ Results saved to neo4j_analysis_results.json")
        except Exception as e:
            print(f"Could not save results to file: {e}")
        
    except Exception as e:
        print(f"\n‚ùå Error: {e}")
        import traceback
        traceback.print_exc()
    finally:
        analyzer.close()


if __name__ == "__main__":
    main()

Using data file: wiki_vote_data/wiki-Vote.txt
‚úÖ Connected to Neo4j database
‚ö†Ô∏è  GDS not available, will use native algorithms: {neo4j_code: Neo.ClientError.Statement.SyntaxError} {message: Unknown procedure output: `version` (line 1, column 26 (offset: 25))
"CALL gds.version() YIELD version RETURN version"
                          ^} {gql_status: 42001} {gql_status_description: error: syntax error or access rule violation - invalid syntax}

üöÄ Starting Wikipedia Graph Analysis with Neo4j

üìÇ LOADING DATA INTO NEO4J
Reading file: wiki_vote_data/wiki-Vote.txt
Loaded 7,115 unique nodes and 103,689 unique edges
Creating nodes...
  Created 5,000 nodes...
  Created 7,115 nodes...
‚úÖ Created 7,115 nodes
Creating index...
Creating relationships...
  Created 10,000 relationships...
  Created 20,000 relationships...
  Created 30,000 relationships...
  Created 40,000 relationships...
  Created 50,000 relationships...
  Created 60,000 relationships...
  Created 70,000 relationships...




‚úÖ Average directed clustering coefficient: 0.13620

üìè COMPUTING DIAMETER
Sampling 101 nodes for diameter estimation...
  Processing node 10/50...
  Processing node 20/50...
  Processing node 30/50...
  Processing node 40/50...
  Processing node 50/50...
Diameter (sampled): 7
Effective diameter (90th percentile): 4.0
Total distances computed: 353,250

COMPREHENSIVE RESULTS REPORT
Metric                         Expected        Computed       
----------------------------------------------------------------------
Nodes                          7,115           7,115          
Edges                          103,689         103,689        
Largest WCC (nodes)            7,066           7,066          
WCC fraction                   0.9930          0.9931         
Largest SCC (nodes)            1,300           1,298          
Largest SCC (edges)            39,456          39,443         
SCC fraction                   0.1830          0.1824         
Avg clustering coeff           0.1409 