In [None]:
    import osmnx as ox

    # Download Bangalore road network
    G = ox.graph_from_place('Bangalore, India', network_type='drive')

    # Convert to nodes and edges DataFrames
    nodes, edges = ox.graph_to_gdfs(G)

    # Save to CSV
    nodes.to_csv('bangalore_nodes.csv')
    edges.to_csv('bangalore_edges.csv')

In [3]:
import osmnx as ox
import pandas as pd
from shapely.geometry import LineString, Polygon

# Configure OSMnx
ox.settings.log_console = True
ox.settings.timeout = 600

# 1. Get Bangalore road network (graph structure)
print("Downloading road network...")
G = ox.graph_from_place("Bangalore, India", network_type="drive")
nodes, edges = ox.graph_to_gdfs(G)

# 2. Get area boundaries (neighborhoods, suburbs)
print("Downloading area boundaries...")
area_tags = {'place': ['suburb', 'neighborhood', 'village']}
areas = ox.features_from_place("Bangalore, India", tags=area_tags)

# Prepare data for Neo4j
print("Preparing data for Neo4j...")

# A. Process Nodes (road intersections)
nodes_df = nodes[['osmid', 'x', 'y']].reset_index()
nodes_df.columns = ['node_id', 'osmid', 'lon', 'lat']

# B. Process Edges (roads)
edges_df = edges[['u', 'v', 'name', 'highway', 'length']].reset_index(drop=True)
edges_df.columns = ['from_node', 'to_node', 'road_name', 'road_type', 'length_meters']

# C. Process Areas
areas_df = areas[['name', 'geometry']].copy()
areas_df['area_wkt'] = areas_df['geometry'].apply(lambda g: g.wkt)
areas_df = areas_df[['name', 'area_wkt']].dropna()

# Save to CSV
nodes_df.to_csv('bangalore_nodes.csv', index=False)
edges_df.to_csv('bangalore_roads.csv', index=False)
areas_df.to_csv('bangalore_areas.csv', index=False)

print("Data extraction complete!")
print(f"- Road nodes: {len(nodes_df)}")
print(f"- Road segments: {len(edges_df)}")
print(f"- Named areas: {len(areas_df)}")

Downloading road network...
Downloading area boundaries...
Preparing data for Neo4j...


KeyError: "['osmid'] not in index"

In [11]:
nodes_reset = nodes.reset_index()

In [13]:
nodes_df = nodes_reset[['osmid', 'x', 'y']].copy()
nodes_df.columns = ['node_id', 'lon', 'lat']

In [15]:
edges_reset = edges.reset_index()

In [17]:
edges_reset

Unnamed: 0,u,v,key,osmid,highway,lanes,name,oneway,reversed,length,...,maxspeed,ref,bridge,width,access,junction,tunnel,est_width,area,service
0,17327095,248007842,0,32261256,residential,2,2nd Main Road,True,False,244.091315,...,,,,,,,,,,
1,17327095,305154531,0,1367650597,secondary,2,9th Cross Road,True,False,29.723619,...,,,,,,,,,,
2,17327095,5354313543,0,1367650758,residential,,2nd Main Road,False,True,5.961520,...,,,,,,,,,,
3,17327139,443224605,0,111814615,primary,2,Mahayogi Vemana Road,True,False,44.630750,...,,,,,,,,,,
4,17327139,309593057,0,28186701,residential,,7th Cross Road,False,False,50.303992,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
388657,13009062796,12139153728,0,1415640024,tertiary,,,False,True,12.693999,...,,,,,,,,,,
388658,13009062797,12139153725,0,408519781,secondary,2,Millers Road,True,False,7.880269,...,,,,,,,,,,
388659,13009062797,13009062796,0,1415640024,tertiary,,,False,True,10.245460,...,,,,,,,,,,
388660,13009125925,521144182,0,1415647262,tertiary,,,False,True,26.287830,...,,,,,,,,,,


In [19]:
edges_df = edges_reset[['u', 'v', 'name', 'highway', 'length']].copy()
edges_df.columns = ['from_node', 'to_node', 'road_name', 'road_type', 'length_meters']

In [20]:
areas = ox.features_from_place("Bangalore, India", tags={'place': ['suburb', 'neighborhood']})
areas_df = areas[['name', 'geometry']].copy()
areas_df['area_wkt'] = areas_df['geometry'].apply(lambda g: g.wkt)

In [21]:
nodes_df.to_csv('bangalore_nodes.csv', index=False)
edges_df.to_csv('bangalore_roads.csv', index=False)
areas_df[['name', 'area_wkt']].to_csv('bangalore_areas.csv', index=False)

In [22]:
from neo4j import GraphDatabase
import pandas as pd

# Neo4j Connection Settings (update these!)
URI = "neo4j://localhost:7687"  # Default Neo4j Bolt port
USER = "neo4j"
PASSWORD = "citygraph"  # Change to your Neo4j password
DATABASE = "graph"  # Default DB (or your DB name)

# Initialize Driver
driver = GraphDatabase.driver(URI, auth=(USER, PASSWORD))

# --------------------------------------------
# 1. Load Data from CSVs
# --------------------------------------------
print("Loading CSV files...")
nodes_df = pd.read_csv("bangalore_nodes.csv")
roads_df = pd.read_csv("bangalore_roads.csv")
areas_df = pd.read_csv("bangalore_areas.csv")

# --------------------------------------------
# 2. Define Cypher Queries
# --------------------------------------------
CREATE_NODES_QUERY = """
    CREATE (n:RoadNode {
        nodeId: $node_id,
        longitude: toFloat($lon),
        latitude: toFloat($lat)
    })
"""

CREATE_ROADS_QUERY = """
    MATCH (u:RoadNode {nodeId: $from_node})
    MATCH (v:RoadNode {nodeId: $to_node})
    MERGE (u)-[r:ROAD {
        name: $road_name,
        type: $road_type,
        length_meters: toFloat($length_meters)
    }]->(v)
"""

CREATE_AREAS_QUERY = """
    CREATE (a:Area {
        name: $name,
        polygon: $area_wkt
    })
"""

# --------------------------------------------
# 3. Upload Data to Neo4j
# --------------------------------------------
def run_query(query, params):
    with driver.session(database=DATABASE) as session:
        session.run(query, **params)

print("Uploading data to Neo4j...")

# Upload Road Nodes
for _, row in nodes_df.iterrows():
    run_query(CREATE_NODES_QUERY, {
        "node_id": row["node_id"],
        "lon": row["lon"],
        "lat": row["lat"]
    })

# Upload Roads (Relationships)
for _, row in roads_df.iterrows():
    run_query(CREATE_ROADS_QUERY, {
        "from_node": row["from_node"],
        "to_node": row["to_node"],
        "road_name": row["road_name"],
        "road_type": row["road_type"],
        "length_meters": row["length_meters"]
    })

# Upload Areas
for _, row in areas_df.iterrows():
    run_query(CREATE_AREAS_QUERY, {
        "name": row["name"],
        "area_wkt": row["area_wkt"]
    })

# --------------------------------------------
# 4. Create Indexes
# --------------------------------------------
INDEX_QUERIES = [
    "CREATE INDEX FOR (n:RoadNode) ON (n.nodeId)",
    "CREATE INDEX FOR (a:Area) ON (a.name)",
    "CREATE POINT INDEX FOR (n:RoadNode) ON (n.location)"
]

with driver.session(database=DATABASE) as session:
    for query in INDEX_QUERIES:
        session.run(query)

print("Data upload complete!")
driver.close()

Loading CSV files...
Uploading data to Neo4j...


ClientError: {code: Neo.ClientError.Statement.SemanticError} {message: Cannot merge the following relationship because of NaN property value for 'name': (u)-[:ROAD {name: NaN}]->(v)}

In [23]:
roads_df = roads_df.dropna(subset=["road_name"])  # Remove rows where road_name is NaN

In [None]:
for _, row in roads_df.iterrows():
    run_query(CREATE_ROADS_QUERY, {
        "from_node": row["from_node"],
        "to_node": row["to_node"],
        "road_name": row["road_name"],
        "road_type": row["road_type"],
        "length_meters": row["length_meters"]
    })