# Create complete network
- get all layers created so far (grid, roads for cars and bikes, public transport layer(s))
- Make sure they are connected to each other (aligning nodes or creating new edges when needed)
- Add everything to at NetworkX graph 

In [1]:
import networkx as nx
import os
os.environ['USE_PYGEOS'] = '1'
import geopandas as gpd
import pandas as pd
import matplotlib.pyplot as plt
import shapely
import snkit
import snkit.network
crs_fr = 2154 #4326 ?
plt.rcParams['figure.figsize'] = (10,10)



In [2]:
# --- From Anastassia
# import packages
%run -i packages.py

# CUSTOM FUNCTION

# define function that creates attribute dictionary for nodes and edges
# (for input to nx.add_edges_from/add_nodes_from)
def make_attr_dict(*args, **kwargs): 
    
    argCount = len(kwargs)
    
    if argCount > 0:
        attributes = {}
        for kwarg in kwargs:
            attributes[kwarg] = kwargs.get(kwarg, None)
        return attributes
    else:
        return None # (if no attributes are given)

## Get layers

### grid, boundary

In [3]:
# Get boundary of GPM (for plots)

GPM = gpd.read_file('data/raw/GPM_geometry/GPM.geojson').to_crs(crs_fr)

In [4]:
# Get grid and centroids
grid = pd.read_csv('data/processed/IRIS_GPM.csv').rename(columns = {'geometry' : 'cells'})

# turn into geodataframe and separate centroids (for plots)
for c in ['cells', 'centroid']:
    grid[c] = grid[c].apply(shapely.wkt.loads)
    
grid = gpd.GeoDataFrame(grid, crs = crs_fr, geometry= 'cells' )
centroids = grid.set_geometry('centroid').set_crs(crs_fr)


In [5]:
# Create a unique number for each centroid
centroids = centroids.reset_index(drop = False).rename(columns = {'index' : 'id'})
centroids['nodetype'] = 'centroid'

### roads 

In [6]:
# Get roads (car/bike)
edges_all = pd.read_csv("./data/processed/carbike_edges.csv").drop(columns = 'key')

In [None]:
### Processing: taken from Anastassia's code
    
## Step 1: turn dataframes into geodataframes
# Edges 
edges_all["geometry"] = edges_all.apply(lambda x: shapely.wkt.loads(x.geometry), axis = 1)
edges_all = gpd.GeoDataFrame(edges_all, geometry = "geometry") 

# add edge ids (strings with "id1, id2" sorted (id1 < id2))
edges_all["edge_id"] = edges_all.apply(lambda x: str(sorted([x["u"], x["v"]])), axis = 1)
# (edge ids are set as strings; converting back: with "from ast import literal_eval" fct)
# finding duplicates by ["osmid", "oneway", "edge_id", "length"]
# simplifying network into undirected - beu and ceu contain the "undirected" edges
# (removing all parallel edges)

# MOD: removed oneway and length from the subset
edges_all_unique = edges_all.drop_duplicates(subset = ["osmid", "edge_id"],
                  keep = "first",
                  inplace = False,
                  ignore_index = True).copy()

# remove duplicates
edges_all_unique = edges_all_unique.drop_duplicates(subset = ["u", "v", "osmid", "edge_id", "edgetype"], 
                          keep = "first",
                          ignore_index = True, 
                          inplace = False)

edges_all_unique_tokeep = edges_all_unique[edges_all_unique.duplicated("edge_id", keep = False) & (edges_all_unique["edgetype"]=="bike")].index
edges_all_unique_todrop = edges_all_unique[edges_all_unique.duplicated("edge_id", keep = False) & (edges_all_unique["edgetype"] == "car")].index

# MOD changed "multi" to "both"
edges_all_unique.loc[edges_all_unique_tokeep, "edgetype"] = "both"
edges_all_unique = edges_all_unique.drop(edges_all_unique_todrop)

# add attribute dictionary (for nx)
edges_all_unique["attr_dict"] = edges_all_unique.apply(lambda x: make_attr_dict(edgetype = x.edgetype,
                                                    edge_id = x.edge_id,
                                                    # MOD-Stephan: used to be coord
                                                    geometry = x.geometry,
                                                    intnodes = []), # intnodes attribute: for storing simplification info on interstitial nodes 
                             axis = 1)

# sort by "left" node (id1 < id2 - to control order of tuple keys in nx)
edges_all_unique["order"] = edges_all_unique.apply(lambda x: np.min([x["u"], x["v"]]), axis = 1)
edges_all_unique = edges_all_unique.sort_values(by = "order").reset_index(drop = True)
# MOD: used "x" and "y" insteads of "orgig" and "dest"
edges_all_unique["x"] = edges_all_unique.apply(lambda x: np.min([x["u"], x["v"]]), axis = 1)
edges_all_unique["y"] = edges_all_unique.apply(lambda x: np.max([x["u"], x["v"]]), axis = 1)
edges_all_unique = edges_all_unique.drop(columns = ["order", "u", "v"]) # instead of "u" and "v",
# we will use "origin" and "destination" where osmid(origin) < osmid (destination)!

del(edges_all_unique_todrop, edges_all_unique_tokeep)  

In [8]:
# # Plot
# ax = plt.axes()
# edges_all.plot(ax=ax, linewidth = 0.3, alpha = 0.5)
# nodes_all.plot(ax=ax, markersize = 0.05, facecolor = 'red', alpha = 1)
# GPM.plot(ax=ax, facecolor = 'none', linewidth = 2)

# Connect layers
A network is created with
- nodes = centroids
- edges = roads

These edges and nodes are connected by creating an edge between each centroid and its nearest edge.

In [77]:
# create the network based on current nodes/edges

nodes = centroids.rename(columns = {'centroid' : 'geometry'}).set_geometry('geometry').set_crs(crs_fr).to_crs(4326)
edges = gpd.GeoDataFrame(edges_all_unique, geometry = 'geometry', crs = 4326)
base_network = snkit.Network(nodes, edges)

# # plot
# ax = plt.axes()
# base_network.edges.plot(ax=ax, linewidth = 1, alpha = 0.6)
# base_network.nodes.plot(ax=ax, facecolor = 'red', markersize = 3)

In [80]:
%%time 
# link centroids to their nearest edge (takes a while)

linked = snkit.network.link_nodes_to_nearest_edge(base_network) #wth is this warning?

# # plot
# ax = plt.axes()
# linked.edges.plot(ax=ax, linewidth = 1, alpha = 0.5)
# linked.nodes.plot(ax=ax, facecolor = 'red', markersize = 3, alpha = 1)



CPU times: user 12min 8s, sys: 5.14 s, total: 12min 13s
Wall time: 12min 17s


In [109]:
# Separate old nodes (centroids) from new ones (intersection between centroid and nearest edge)
new_nodes = linked.nodes.tail(linked.nodes.isna().sum()[2])

# Separate old edges (carbike network) from new ones (modified carbike edges and new edges)
new_edges = linked.edges.tail(linked.edges.isna().sum()[0])

# Post-linking processing
- The edges have been modified and new ones were created -> check it all makes sense! 
- The new nodes (intersection between new edge and old edge) need to be added to the car/bike node network

In [97]:
# Create attr_dict for centroids 
## Adapted from Anastassia
centroids["attr_dict"] = centroids.apply(lambda x: make_attr_dict(geometry = x.centroid,
                                                                  code_iris = x.CODE_IRIS,
                                                                  nodetype = x.nodetype),
                                                                  axis = 1) 

In [98]:
# Get carbike/nodes
nodes_all = pd.read_csv("./data/processed/carbike_nodes.csv")

In [99]:
# making new_nodes df compatible with nodes_all df
new_nodes = new_nodes.drop(columns = ['NOM_COM',
                                      'CODE_IRIS',
                                      'NOM_IRIS',
                                      'osm_id',
                                      'cells',
                                      'nodetype',
                                     'id'])

new_nodes = new_nodes.reset_index(drop = False).rename(columns = {'index' : 'osmid'})
new_nodes['nodetype'] = 'bike'
new_nodes['x'] = new_nodes.geometry.apply(lambda p: p.x)
new_nodes['y'] = new_nodes.geometry.apply(lambda p: p.y)
new_nodes.head(1)

Unnamed: 0,osmid,geometry,nodetype,x,y
0,2844,POINT (2.37468 48.86970),bike,2.374681,48.869698


In [100]:
# dealing with the nodes like we used to (anastassia) 

# Creating the column "nodetype" 
bike_nodes = set(edges_all.loc[edges_all['edgetype'] == 'bike']['u'])

nodes_all['nodetype'] = 0

for index, row in nodes_all.iterrows():
    if row["osmid"] in bike_nodes:
        nodes_all.loc[index, "nodetype"] = 'bike'
    else:
        nodes_all.loc[index, "nodetype"] = 'car'

nodes_all["geometry"] = nodes_all.apply(lambda x: shapely.wkt.loads(x.geometry), axis = 1)

# MOD: Add the new_nodes to the old nodes 
nodes_all = pd.concat([nodes_all, new_nodes])

nodes_all = gpd.GeoDataFrame(nodes_all, geometry = "geometry")

## Sort values and drop duplicates 
nodes_all = nodes_all.sort_values(by = "osmid").reset_index(drop = True) # sort by osmid
# make attribute dictionary with type and geocoordinates for each node
# MOD: removed category_node = x.type 
# MOD: changed coord to geometry
# MOD-Stpehan : split geometry into lat and lon
nodes_all["attr_dict"] = nodes_all.apply(lambda x: make_attr_dict(lat = x.x,
                                                                  lon = x.y,
                                                                  nodetype = x.nodetype),
                                                                  axis = 1) 


In [101]:
# Save latest nodes
nodes_final = pd.DataFrame(nodes_all)
nodes_final.to_csv("./data/processed/carbike_centroids_link_nodes.csv")

In [49]:
# what about the edges?
edges_all_unique.head(1)

Unnamed: 0,osmid,edgetype,geometry,edge_id,attr_dict,x,y
0,"[332409156, 39750470, 39750471, 1105411815, 42...",car,"LINESTRING (2.46355 48.88408, 2.46236 48.88415...","[122926, 318399738]","{'edgetype': 'car', 'edge_id': '[122926, 31839...",122926,318399738


In [111]:
# GOAL: modify linked.edges so it can become the new edges_all_unique
edges_new = linked.edges

# add edgetype
edges_new['edgetype'] = edges_new['edgetype'].fillna('both')

# add edge_ID (also gives x and y)
# edges_new['x'] = edges_new.edge_id.apply(lambda p: p[0])
# edges_new['y'] = edges_new.edge_id.apply(lambda p: p[1])


new_edges.tail(1)

Unnamed: 0,osmid,edgetype,geometry,edge_id,attr_dict,x,y
167858,,both,"LINESTRING (2.27666 48.76719, 2.27667 48.76703)",,,,


# Creating the NetworkX Graph

In [24]:
# Create NetworkX graph of the roads (Anastassia code)

# CREATE NX OBJECTS
# make multinetwork containing ALL edges
car_bike_graph = nx.Graph()
car_bike_graph.add_nodes_from(nodes_all.loc[:,["osmid", "attr_dict"]].itertuples(index = False))

# MOD: x and y instead of orig and dest
car_bike_graph.add_edges_from(edges_all_unique.loc[:,["x", "y", "attr_dict"]].itertuples(index = False))

# MOD: removed the LCC part

NameError: name 'nodes_all' is not defined

In [None]:
networkx.shortest_path(G, 'node_0', 'node_10')

# What's next?

THIS WEEK
- try re-running the whole thing without an id column for the nodes -> does snkit give them an id and is it added to the edges info?
- adapt code accordingly if so
- if not, figure out how to find the edges that were cut into two, so they are not duplicate in the dataframe (are they even still there in their uncut form?)
- move the cell that changes edges_all into edges_all_unique into the post-linking part as much as possbiel (at east the attr_dict part)
YOU GOT THIS

BUILDING THE SKELETAL NETWORK --April--
- finish cleaning up edges/nodes for carbike after linking them with the centroids
- import everything to networkX
- make network travelable -> can I go from one centroid to another? 
- separate cars and bikes (add travel time as an attribute)
- add restriction about how ot travel (either only bike or only car, not both)
- add RER, but this time move the stations to nodes instead of changing stuff
- send visuals and stuff to Anastassia and Trivik
GOAL: I can pick two centroids and find the shortest path, which is all walk, all bike, or walk/bike to station then walk 


SOCIAL STUFF -- second midterm (May 22nd)--
- define needed data for POIs and people 
- find data for POIs and people
- add all of it to centroids
- find/define OD matrix
GOAL: I can pick one pop and one POI and find the shortest path

OPTIMISATION STUFF (June)
- find algorithm
- modify network to allow for optimisation
- do whatever it takes to get stuff to run
- first results!! 
