### Import to Neo4j

In [1]:
from neo4j import GraphDatabase
import csv
import os

# Neo4j connection parameters
URI = "bolt://localhost:7687"  
USERNAME = "neo4j"  
PASSWORD = "Password1234"  

class Neo4jImporter:
    def __init__(self, uri, username, password):
        self.driver = GraphDatabase.driver(uri, auth=(username, password))
        
    def close(self):
        self.driver.close()
        
    def clear_database(self):
        with self.driver.session() as session:
            
            session.run("MATCH (n) DETACH DELETE n")
            print("Database cleared.")
    
    def create_constraints(self):
        with self.driver.session() as session:
            
            try:
                session.run("CREATE CONSTRAINT station_id_unique IF NOT EXISTS FOR (s:Station) REQUIRE s.id IS UNIQUE")
                print("Constraint created.")
            except Exception as e:
                
                print(f"Note: {e}")
                
    def import_stations(self, csv_file):
        station_count = 0
        with open(csv_file, 'r', encoding='utf-8') as file:
            reader = csv.DictReader(file)
            with self.driver.session() as session:
                for row in reader:
                    
                    query = """
                    CREATE (s:Station {
                        id: $id,
                        name: $name,
                        x_coord: toFloat($x_coord),
                        y_coord: toFloat($y_coord),
                        type: $type
                    })
                    """
                    session.run(query, 
                               id=row['StationID'], 
                               name=row['Name'], 
                               x_coord=row['X_Coordinate'], 
                               y_coord=row['Y_Coordinate'], 
                               type=row['Type'])
                    station_count += 1
        print(f"Imported {station_count} stations.")
                
    def import_connections(self, csv_file):
        connection_count = 0
        with open(csv_file, 'r', encoding='utf-8') as file:
            reader = csv.DictReader(file)
            with self.driver.session() as session:
                for row in reader:
                    
                    query = """
                    MATCH (a:Station {id: $source}), (b:Station {id: $target})
                    CREATE (a)-[r:CONNECTS_TO {
                        distance: toFloat($distance),
                        passengers: toInteger($passengers)
                    }]->(b)
                    """
                    session.run(query, 
                               source=row['SourceStationID'], 
                               target=row['TargetStationID'], 
                               distance=row['Distance_km'], 
                               passengers=row['DailyPassengers'])
                    connection_count += 1
        print(f"Imported {connection_count} connections.")


stations_file = os.path.join('data', 'stations.csv')
connections_file = os.path.join('data', 'connections.csv')


if __name__ == "__main__":
    try:
        
        importer = Neo4jImporter(URI, USERNAME, PASSWORD)
        print("Connected to Neo4j database.")
        
        
        importer.clear_database()
        importer.create_constraints()
        
        # Import data
        print("Importing station data...")
        importer.import_stations(stations_file)
        print("Importing connection data...")
        importer.import_connections(connections_file)
        
        print("Import complete!")
        importer.close()
    except Exception as e:
        print(f"Error: {e}")

Connected to Neo4j database.
Database cleared.
Constraint created.
Importing station data...
Imported 50 stations.
Importing connection data...
Imported 167 connections.
Import complete!


In [4]:
import networkx as nx
import matplotlib.pyplot as plt
import pandas as pd
from neo4j import GraphDatabase
import numpy as np
from community.community_louvain import best_partition
import seaborn as sns

class GraphModeler:
    def __init__(self, uri, username, password):
        self.driver = GraphDatabase.driver(uri, auth=(username, password))
        
    def close(self):
        self.driver.close()
        
    def extract_stations(self):
        """Extract station data from Neo4j"""
        with self.driver.session() as session:
            result = session.run("""
                MATCH (s:Station) 
                RETURN s.id AS id, s.name AS name, s.x_coord AS x, s.y_coord AS y, s.type AS type
            """)
            
            stations = [(record["id"], {
                "name": record["name"],
                "x": record["x"],
                "y": record["y"],
                "type": record["type"]
            }) for record in result]
            
            return stations
    
    def extract_connections(self):
        """Extract connection data from Neo4j"""
        with self.driver.session() as session:
            result = session.run("""
                MATCH (a:Station)-[r:CONNECTS_TO]->(b:Station)
                RETURN a.id AS source, b.id AS target, r.distance AS distance, r.passengers AS passengers
            """)
            
            connections = [(record["source"], record["target"], {
                "distance": record["distance"],
                "passengers": record["passengers"]
            }) for record in result]
            
            return connections
    
    def create_network_graph(self):
        """Create a NetworkX graph from Neo4j data"""
        G = nx.DiGraph()
        
        # Add stations as nodes
        stations = self.extract_stations()
        G.add_nodes_from(stations)
        
        # Add connections as edges
        connections = self.extract_connections()
        G.add_edges_from(connections)
        
        return G
    
    def visualize_graph(self, G, filename="transportation_network.png"):
        """Visualize the graph using NetworkX"""
        plt.figure(figsize=(12, 10))
        
        # Create position layout based on coordinates
        pos = {node: (G.nodes[node]['x'], G.nodes[node]['y']) for node in G.nodes()}
        
        # Node colors based on type
        node_types = [G.nodes[node]['type'] for node in G.nodes()]
        unique_types = list(set(node_types))
        color_map = {t: i for i, t in enumerate(unique_types)}
        node_colors = [color_map[G.nodes[node]['type']] for node in G.nodes()]
        
        # Edge weights based on passengers
        edge_weights = [G[u][v]['passengers']/5000 for u, v in G.edges()]
        
        # Draw the network
        nx.draw_networkx_nodes(G, pos, node_size=100, node_color=node_colors, cmap=plt.cm.tab10)
        nx.draw_networkx_edges(G, pos, width=edge_weights, alpha=0.6, edge_color='gray', arrows=False)
        
        # Add node labels for main stations
        main_stations = {node: G.nodes[node]['name'] for node in G.nodes() 
                         if G.degree(node) > 3}  # Only label major stations
        nx.draw_networkx_labels(G, pos, labels=main_stations, font_size=8)
        
        plt.title("Urban Transportation Network")
        plt.axis('off')
        plt.savefig(filename, dpi=300, bbox_inches='tight')
        plt.close()
        
        return filename
    
    def detect_communities(self, G):
        """Apply Louvain community detection algorithm"""
        # Convert to undirected for community detection
        G_undirected = G.to_undirected()
        
        # Apply Louvain algorithm
        partition = best_partition(G_undirected)
        
        # Add community information to the original graph
        nx.set_node_attributes(G, partition, 'community')
        
        return G, partition
    
    def visualize_communities(self, G, partition, filename="transportation_communities.png"):
        """Visualize the communities"""
        plt.figure(figsize=(12, 10))
        
        # Create position layout based on coordinates
        pos = {node: (G.nodes[node]['x'], G.nodes[node]['y']) for node in G.nodes()}
        
        # Set color for each community
        communities = set(partition.values())
        colors = sns.color_palette("hls", len(communities))
        
        # Draw nodes colored by community
        for i, community in enumerate(communities):
            nodes = [node for node in G.nodes() if partition[node] == community]
            nx.draw_networkx_nodes(G, pos, nodelist=nodes, node_color=[colors[i]]*len(nodes), 
                                  node_size=100, label=f"Community {community}")
        
        # Draw edges with transparency
        nx.draw_networkx_edges(G, pos, alpha=0.3, arrows=False)
        
        # Add main stations labels
        main_stations = {node: G.nodes[node]['name'] for node in G.nodes() 
                        if G.degree(node) > 3}
        nx.draw_networkx_labels(G, pos, labels=main_stations, font_size=8)
        
        plt.title("Urban Transportation Network Communities")
        plt.axis('off')
        plt.legend(loc='upper right')
        plt.savefig(filename, dpi=300, bbox_inches='tight')
        plt.close()
        
        return filename
    
    def analyze_graph(self, G):
        """Analyze the graph metrics"""
        analysis = {
            'nodes': G.number_of_nodes(),
            'edges': G.number_of_edges(),
            'average_degree': sum(dict(G.degree()).values()) / G.number_of_nodes(),
            'density': nx.density(G),
            'is_connected': nx.is_strongly_connected(G),
            'average_shortest_path': nx.average_shortest_path_length(G) if nx.is_strongly_connected(G) else "N/A"
        }
        
        # Identify key stations (hubs)
        betweenness = nx.betweenness_centrality(G)
        top_hubs = sorted(betweenness.items(), key=lambda x: x[1], reverse=True)[:5]
        analysis['top_hubs'] = [(G.nodes[node]['name'], score) for node, score in top_hubs]
        
        return analysis

# Neo4j connection parameters
URI = "bolt://localhost:7687"  
USERNAME = "neo4j"  
PASSWORD = "Password1234"

if __name__ == "__main__":
    try:
        # Create graph modeler
        modeler = GraphModeler(URI, USERNAME, PASSWORD)
        print("Connected to Neo4j database.")
        
        # Create NetworkX graph from Neo4j data
        print("Creating graph from Neo4j data...")
        G = modeler.create_network_graph()
        print(f"Graph created with {G.number_of_nodes()} nodes and {G.number_of_edges()} edges.")
        
        # Visualize the graph
        print("Visualizing the transportation network...")
        graph_image = modeler.visualize_graph(G)
        print(f"Graph visualization saved as {graph_image}")
        
        # Apply community detection
        print("Detecting communities using Louvain algorithm...")
        G, partition = modeler.detect_communities(G)
        communities_count = len(set(partition.values()))
        print(f"Detected {communities_count} communities.")
        
        # Visualize communities
        print("Visualizing communities...")
        communities_image = modeler.visualize_communities(G, partition)
        print(f"Communities visualization saved as {communities_image}")
        
        # Analyze the graph
        print("Analyzing graph metrics...")
        analysis = modeler.analyze_graph(G)
        print("\nGraph Analysis Results:")
        for key, value in analysis.items():
            print(f"- {key}: {value}")
            
        modeler.close()
        print("\nGraph modeling complete!")
        
    except Exception as e:
        print(f"Error: {e}")

Connected to Neo4j database.
Creating graph from Neo4j data...
Graph created with 50 nodes and 167 edges.
Visualizing the transportation network...
Graph visualization saved as transportation_network.png
Detecting communities using Louvain algorithm...
Detected 5 communities.
Visualizing communities...
Communities visualization saved as transportation_communities.png
Analyzing graph metrics...

Graph Analysis Results:
- nodes: 50
- edges: 167
- average_degree: 6.68
- density: 0.06816326530612245
- is_connected: False
- average_shortest_path: N/A
- top_hubs: [('Station 10', 0.13557239363999565), ('Station 35', 0.11947127065111758), ('Station 12', 0.10035042786956053), ('Station 4', 0.09536497138537954), ('Station 34', 0.08872894395853578)]

Graph modeling complete!


In [6]:
import networkx as nx
import matplotlib.pyplot as plt
import pandas as pd
from neo4j import GraphDatabase
import numpy as np
from community.community_louvain import best_partition # Assurez-vous d'avoir 'python-louvain' installé (pip install python-louvain)
import seaborn as sns # Assurez-vous d'avoir 'seaborn' installé (pip install seaborn)
import csv
import os

# --- Paramètres de connexion ---
URI = "bolt://localhost:7687"
USERNAME = "neo4j"
PASSWORD = "Password1234"

# --- Importation dans Neo4j ---
class Neo4jImporter:
    def __init__(self, uri, username, password):
        self.driver = GraphDatabase.driver(uri, auth=(username, password))
    def close(self):
        self.driver.close()
    def clear_database(self):
        with self.driver.session() as session:
            session.run("MATCH (n) DETACH DELETE n")
            print("Database cleared.")
    def create_constraints(self):
        with self.driver.session() as session:
            try:
                session.run("CREATE CONSTRAINT station_id_unique IF NOT EXISTS FOR (s:Station) REQUIRE s.id IS UNIQUE")
                print("Constraint created.")
            except Exception as e:
                print(f"Note: {e}")
    def import_stations(self, csv_file):
        station_count = 0
        with open(csv_file, 'r', encoding='utf-8') as file:
            reader = csv.DictReader(file)
            with self.driver.session() as session:
                for row in reader:
                    query = """
                    CREATE (s:Station {
                        id: $id,
                        name: $name,
                        x_coord: toFloat($x_coord),
                        y_coord: toFloat($y_coord),
                        type: $type
                    })
                    """
                    session.run(query,
                        id=row['StationID'],
                        name=row['Name'],
                        x_coord=row['X_Coordinate'],
                        y_coord=row['Y_Coordinate'],
                        type=row['Type'])
                    station_count += 1
        print(f"Imported {station_count} stations.")
    def import_connections(self, csv_file):
        connection_count = 0
        with open(csv_file, 'r', encoding='utf-8') as file:
            reader = csv.DictReader(file)
            with self.driver.session() as session:
                for row in reader:
                    query = """
                    MATCH (a:Station {id: $source}), (b:Station {id: $target})
                    CREATE (a)-[r:CONNECTS_TO {
                        distance: toFloat($distance),
                        passengers: toInteger($passengers)
                    }]->(b)
                    """
                    session.run(query,
                        source=row['SourceStationID'],
                        target=row['TargetStationID'],
                        distance=row['Distance_km'],
                        passengers=row['DailyPassengers'])
                    connection_count += 1
        print(f"Imported {connection_count} connections.")

# --- Modélisation et analyse du graphe ---
class GraphModeler:
    def __init__(self, uri, username, password):
        self.driver = GraphDatabase.driver(uri, auth=(username, password))
    def close(self):
        self.driver.close()
    def extract_stations(self):
        with self.driver.session() as session:
            result = session.run("""
                MATCH (s:Station)
                RETURN s.id AS id, s.name AS name, s.x_coord AS x, s.y_coord AS y, s.type AS type
            """)
            stations = [(record["id"], {
                "name": record["name"],
                "x": record["x"],
                "y": record["y"],
                "type": record["type"]
            }) for record in result]
            return stations
    def extract_connections(self):
        with self.driver.session() as session:
            result = session.run("""
                MATCH (a:Station)-[r:CONNECTS_TO]->(b:Station)
                RETURN a.id AS source, b.id AS target, r.distance AS distance, r.passengers AS passengers
            """)
            connections = [(record["source"], record["target"], {
                "distance": record["distance"],
                "passengers": record["passengers"]
            }) for record in result]
            return connections
    def create_network_graph(self):
        G = nx.DiGraph()
        stations = self.extract_stations()
        G.add_nodes_from(stations)
        connections = self.extract_connections()
        weighted_connections = [(u, v, {'weight': data['passengers']}) for u, v, data in connections]
        G.add_edges_from(weighted_connections)
        return G
    def visualize_graph(self, G, filename="transportation_network.png"):
        plt.figure(figsize=(12, 10))
        pos = {node: (G.nodes[node]['x'], G.nodes[node]['y']) for node in G.nodes()}
        node_types = [G.nodes[node]['type'] for node in G.nodes()]
        unique_types = list(set(node_types))
        color_map = {t: i for i, t in enumerate(unique_types)}
        node_colors = [color_map[G.nodes[node]['type']] for node in G.nodes()]
        edge_weights = [G[u][v].get('weight', 1)/5000 for u, v in G.edges()]
        nx.draw_networkx_nodes(G, pos, node_size=100, node_color=node_colors, cmap=plt.cm.tab10)
        nx.draw_networkx_edges(G, pos, width=edge_weights, alpha=0.6, edge_color='gray', arrows=False)
        main_stations = {node: G.nodes[node]['name'] for node in G.nodes() if G.degree(node) > 3}
        nx.draw_networkx_labels(G, pos, labels=main_stations, font_size=8)
        plt.title("Urban Transportation Network")
        plt.axis('off')
        plt.savefig(filename, dpi=300, bbox_inches='tight')
        plt.close()
        return filename
    def detect_communities(self, G):
        G_undirected = G.to_undirected()
        partition = best_partition(G_undirected, weight='weight')
        nx.set_node_attributes(G, partition, 'community')
        return G, partition
    def update_communities_in_neo4j(self, partition):
        """Met à jour les nœuds Neo4j avec l'information de communauté (en tant que propriété)."""
        with self.driver.session() as session:
            print("Updating community information in Neo4j (as property)...")
            tx = session.begin_transaction()
            for node_id, community_id in partition.items():
                query = """
                MATCH (s:Station {id: $id})
                SET s.community = $community
                """
                tx.run(query, id=node_id, community=str(community_id))
            tx.commit()
            print("Community information updated in Neo4j (property).")
    def visualize_communities(self, G, partition, filename="transportation_communities.png"):
        plt.figure(figsize=(12, 10))
        pos = {node: (G.nodes[node]['x'], G.nodes[node]['y']) for node in G.nodes()}
        communities = set(partition.values())
        colors = sns.color_palette("hls", len(communities))
        for i, community in enumerate(communities):
            nodes = [node for node in G.nodes() if partition[node] == community]
            nx.draw_networkx_nodes(G, pos, nodelist=nodes, node_color=[colors[i]]*len(nodes),
                                   node_size=100, label=f"Community {community}")
        nx.draw_networkx_edges(G, pos, alpha=0.3, arrows=False)
        main_stations = {node: G.nodes[node]['name'] for node in G.nodes() if G.degree(node) > 3}
        nx.draw_networkx_labels(G, pos, labels=main_stations, font_size=8)
        plt.title("Urban Transportation Network Communities")
        plt.axis('off')
        plt.legend(loc='upper right')
        plt.savefig(filename, dpi=300, bbox_inches='tight')
        plt.close()
        return filename

    def add_community_labels_to_neo4j(self, partition):
        """
        Ajoute un label de communauté dynamique (ex: :Community0) à chaque nœud
        basé sur la partition Louvain. Nécessite APOC.
        """
        with self.driver.session() as session:
            print("Adding dynamic community labels to Neo4j (requires APOC)...")
            tx = session.begin_transaction()
            for node_id, community_id in partition.items():
                label_name = f"Community{community_id}"
                query = """
                MATCH (s:Station {id: $id})
                CALL apoc.create.addLabels(s, [$label_name]) YIELD node
                RETURN node
                """
                tx.run(query, id=node_id, label_name=label_name)
            tx.commit()
            print("Dynamic community labels added.")

    def remove_community_labels_from_neo4j(self):
        """
        Supprime tous les labels de communauté dynamiques (ex: :Community0) des nœuds.
        Nécessite APOC.
        """
        with self.driver.session() as session:
            print("Removing dynamic community labels from Neo4j...")
            tx = session.begin_transaction()
            query = """
            MATCH (n)
            UNWIND labels(n) AS label
            WITH n, label
            WHERE label STARTS WITH 'Community'
            CALL apoc.create.removeLabels(n, [label]) YIELD node
            RETURN count(node) AS nodesUpdated
            """
            result = tx.run(query).single()[0]
            tx.commit()
            print(f"{result} dynamic community labels removed.")

# --- Exécution principale ---
stations_file = os.path.join('data', 'stations.csv')
connections_file = os.path.join('data', 'connections.csv')

if __name__ == "__main__":
    try:
        # 1. Importation dans Neo4j
        importer = Neo4jImporter(URI, USERNAME, PASSWORD)
        print("Connected to Neo4j database for import.")
        
        # Optionnel: Vider et réimporter la base de données à chaque exécution
        importer.clear_database()
        importer.create_constraints()
        print("Importing station data...")
        importer.import_stations(stations_file)
        print("Importing connection data...")
        importer.import_connections(connections_file)
        importer.close()
        print("Import complete!")

        # 2. Création et analyse du graphe
        modeler = GraphModeler(URI, USERNAME, PASSWORD)
        print("\nConnected to Neo4j database for modeling.")
        
        print("Creating graph from Neo4j data...")
        G = modeler.create_network_graph()
        print(f"Graph created with {G.number_of_nodes()} nodes and {G.number_of_edges()} edges.")

        print("Visualizing the transportation network...")
        graph_image = modeler.visualize_graph(G)
        print(f"Graph visualization saved as {graph_image}")

        print("Detecting communities using Louvain algorithm...")
        G, partition = modeler.detect_communities(G)
        communities_count = len(set(partition.values()))
        print(f"Detected {communities_count} communities.")

        # --- APOC: Remove existing dynamic community labels before adding new ones ---
        modeler.remove_community_labels_from_neo4j()
        # --- APOC: Add dynamic community labels to Neo4j ---
        modeler.add_community_labels_to_neo4j(partition) 
        
        print("Updating Neo4j with community information (as property)...")
        modeler.update_communities_in_neo4j(partition)

        print("Visualizing communities...")
        communities_image = modeler.visualize_communities(G, partition)
        print(f"Communities visualization saved as {communities_image}")
        
        # Analyse the graph
        print("Analyzing graph metrics...")
        analysis = modeler.analyze_graph(G)
        print("\nGraph Analysis Results:")
        for key, value in analysis.items():
            print(f"- {key}: {value}")

        modeler.close()
        print("\nGraph modeling complete!")
        
    except FileNotFoundError as fnf_error:
        print(f"Error: {fnf_error}. Please ensure '{stations_file}' and '{connections_file}' are in the correct 'data' subdirectory relative to the script.")
    except Exception as e:
        print(f"An unexpected error occurred: {e}")

Connected to Neo4j database for import.
Database cleared.
Constraint created.
Importing station data...
Imported 50 stations.
Importing connection data...
Imported 167 connections.
Import complete!

Connected to Neo4j database for modeling.
Creating graph from Neo4j data...
Graph created with 50 nodes and 167 edges.
Visualizing the transportation network...
Graph visualization saved as transportation_network.png
Detecting communities using Louvain algorithm...
Detected 6 communities.
Removing dynamic community labels from Neo4j...
An unexpected error occurred: {code: Neo.ClientError.Procedure.ProcedureNotFound} {message: There is no procedure with the name `apoc.create.removeLabels` registered for this database instance. Please ensure you've spelled the procedure name correctly and that the procedure is properly deployed.}
