## This notebook is used to __store variations of functions__, so they are available to check in case other solutions are required

### 2025/03/04 - Function identify_uncovered() from 01_PL_04_Combine_networks [vref repository] before update that considers overlapping_test. Also, shortens edges by 0.5size instead of 0.9size (Latest version)

In [None]:
def identify_uncovered(base_nodes, base_edges, complementary_nodes, complementary_edges, contact_analysis_dist, projected_crs="EPSG:6372"):
    """ This function identifies zones within a complementary network (nodes and edges) where currently there's no coverture in a base network.
	Args:
		base_nodes (geopandas.GeoDataFrame): GeoDataFrame containing nodes of the base network. 
        base_edges (geopandas.GeoDataFrame): GeoDataFrame containing edges of the base network. 
		complementary_nodes  (geopandas.GeoDataFrame): GeoDataFrame containing nodes of the complementary network.
		complementary_edges  (geopandas.GeoDataFrame): GeoDataFrame containing edges of the complementary network.
		contact_analysis_dist (float): Distance (meters) used when deciding which nodes from the complementary network should be added to the base network.
                                A buffer of {contact_analysis_dist} is created around all center points of each complementary_edge.
                                If the buffer touches any base_edges, the complementary_edge is considered as already covered by the base network. 
                                If the buffer does not touches any base_edge, the complementary_edge is considered uncovered.
		projected_crs (str, optional): string containing projected crs to be used depending on area of interest. Defaults to "EPSG:6372".

	Returns:
        complementary_uncovered_nodes (geopandas.GeoDataFrame): GeoDataFrame with nodes from the complementary network that are located 
                                                                in a zone not covered by the base network.
        complementary_uncovered_nodes (geopandas.GeoDataFrame): GeoDataFrame with edges from the complementary network that are located
                                                                in a zone not covered by the base network.
		contact_nodes (geopandas.GeoDataFrame): GeoDataFrame with nodes from the complementary network that could be used to 
                                                connect an uncovered zone to a covered zone.
	"""

    # Turn on or off function logs (General Logs)
    function_logs = True
    # Turn on or off debbugging logs (Lots of logs used for debugging)
    debugging_logs = False
    
    # 1.0 --------------- Extract mid_point of each complementary edge
    if function_logs:
        print("1.0 - Extracting mid_point point of each complementary edge.")
    # ------------------- INPUT USED - READ COMPLEMENTARY EDGES
    complementary_edges = complementary_edges.copy()
    complementary_edges = complementary_edges.to_crs(projected_crs)
    # ------------------- INPUT USED - READ COMPLEMENTARY EDGES

    # Create unique ID for each edge using u+v+key
    complementary_edges = src.create_unique_edge_id(complementary_edges)
    # Find mid_point of each edge
    complementary_edges['mid_point'] = complementary_edges.interpolate(complementary_edges.length / 2)
    # Assign mid_point to its own gdf and drop column 'mid_point' from complementary_edges
    mid_points = complementary_edges[['edge_id','mid_point']].copy()
    mid_points.rename(columns={'mid_point':'geometry'},inplace=True)
    complementary_edges.drop(columns=['mid_point'],inplace=True)
    
    # 2.0 --------------- Create contact-analysis buffer around mid_points using contact_analysis_dist 
    # ------------------- (keep edge-of-origin data)
    if function_logs:
        print("2.0 - Creating contact-analysis buffer around each mid_point.")
    
    # Reset mid_points's index (Keeps data ordered starting from 0)
    mid_points.reset_index(inplace=True,drop=True) #--> Resets index without saving col 'index'
    # Save each mid_point's reseted index in a column named 'index'
    points_to_buffer = mid_points.copy()
    points_to_buffer.reset_index(inplace=True) #--> Also creates a col 'index', same order since it is reseted
    # Create a gdf containing the contact-analysis buffer around mid_points
    mid_points_buffer = points_to_buffer.buffer(contact_analysis_dist)
    mid_points_buffer = gpd.GeoDataFrame(geometry=mid_points_buffer)
    mid_points_buffer.reset_index(inplace=True) #--> Also creates a col 'index', same order since it is reseted
    # Transfer data from mid_points to it's buffer using the previously reseted index as merge col
    points_to_buffer.drop(columns=['geometry'],inplace=True)
    mid_points_buffer = pd.merge(mid_points_buffer,points_to_buffer,on='index') #--> Merges using common reseted col 'index'
    mid_points_buffer.drop(columns=['index'],inplace=True)

    # Save disk space
    del points_to_buffer
    
    # 3.0 --------------- Find mid_points whose buffer does not intersect with any part of the base network (Considering base_edges).
    # ------------------- [This step creates function output COMPLEMENTARY_UNCOVERED_EDGES]
    if function_logs:
        print("3.0 - Extracting complementary_uncovered_edges.")
    
    # ------------------- INPUT USED - READ BASE EDGES
    base_edges = base_edges.copy()
    base_edges = base_edges.to_crs(projected_crs)
    # ------------------- INPUT USED - READ BASE EDGES
    
    # Buffers that touch any base edge
    buffer_touch = mid_points_buffer.sjoin(base_edges)
    # All unique complementary edge_ids whose mid_point's buffer touched any base_edge
    edge_id_touch_lst = list(buffer_touch.edge_id.unique())
    # Complementary edges that are NOT(~) near any base edge
    complementary_uncovered_edges = complementary_edges.loc[~complementary_edges.edge_id.isin(edge_id_touch_lst)].copy()
    complementary_uncovered_edges.reset_index(inplace=True,drop=True) #--> Resets index without saving col 'index'

    # 4.0 --------------- Select the complementary_nodes that connect to the complementary_uncovered_edges
    # ------------------- [This step creates function output COMPLEMENTARY_UNCOVERED_NODES]
    if function_logs:
        print("4.0 - Extracting complementary_uncovered_nodes.")
    
    # ------------------- INPUT USED - READ COMPLEMENTARY NODES
    complementary_nodes = complementary_nodes.copy()
    complementary_nodes = complementary_nodes.to_crs(projected_crs)
    # ------------------- INPUT USED - READ COMPLEMENTARY NODES 
    
    # List of unique 'u's and 'v's that are connected to the complementary_uncovered_edges 
    complementary_uncovered_osmid_lst = set(list(complementary_uncovered_edges.u.unique()) + list(complementary_uncovered_edges.v.unique()))
    # Select any node where its 'osmid' IS in complementary_uncovered_osmid_lst
    complementary_uncovered_nodes = complementary_nodes.loc[complementary_nodes.osmid.isin(complementary_uncovered_osmid_lst)].copy()
    # [Note: This nodes won't necessarily be in the uncovered zone since they could belong to 
    # an edge whose mid_point is far from the base network, but whose path extends into the base network.]

    # 5.0 --------------- Find the nodes that would be used to connect the uncovered part of the complementary network to the base network.
    # ------------------- [This step creates function output CONTACT_NODES]
    if function_logs:
        print("5.0 - Extracting contact_nodes.")

    # 5.1 --- Create a buffer around all complementary_uncovered_nodes 
    # Reset complementary_uncovered_nodes's index
    complementary_uncovered_nodes.reset_index(inplace=True,drop=True) #--> Resets index without saving col 'index'
    # Save each complementary_uncovered_nodes's reseted index in a column named 'index'
    nodes_to_buffer = complementary_uncovered_nodes.copy()
    nodes_to_buffer.reset_index(inplace=True) #--> Also creates a col 'index', same order since it is reseted
    # Create a gdf containing the buffer around complementary_uncovered_nodes
    complementary_uncovered_nodes_buffer = nodes_to_buffer.buffer(contact_analysis_dist)
    complementary_uncovered_nodes_buffer = gpd.GeoDataFrame(geometry=complementary_uncovered_nodes_buffer)
    complementary_uncovered_nodes_buffer.reset_index(inplace=True) #--> Also creates a col 'index', same order since it is reseted
    # Transfer data from complementary_uncovered_nodes to it's buffer using the index as merge col
    nodes_to_buffer.drop(columns=['geometry'],inplace=True)
    complementary_uncovered_nodes_buffer = pd.merge(complementary_uncovered_nodes_buffer,nodes_to_buffer,on='index') #--> Merges using common reseted col 'index'
    complementary_uncovered_nodes_buffer.drop(columns=['index'],inplace=True)

    # Save disk space
    del nodes_to_buffer
    
    # 5.2 --- Find complementary_uncovered_nodes whose buffer DOES intersect with any part of the base network (Considering base_edges).
    # Buffers that touch any base edge
    buffer_touch = complementary_uncovered_nodes_buffer.sjoin(base_edges)
    # All unique osmids that touched any base_edge
    contact_osmids = list(buffer_touch.osmid.unique())
    # Complementary nodes that ARE near any base_edge
    contact_nodes = complementary_uncovered_nodes.loc[complementary_uncovered_nodes.osmid.isin(contact_osmids)].copy()
    contact_nodes.reset_index(inplace=True,drop=True) #--> Resets index without saving col 'index'

    # 6.0 --------------- Identify and shorten edges that extend into the base network and would be usefull to create connections.
    # ------------------- (Up to this step, edges whose mid_points_buffer is in contact with the base network are not included in the
    # ------------------- function's output since they are considered to be in an already-covered zone. However, some edges (if shortened)
    # ------------------- could be usefull to connect the uncovered zone to the covered zone. This step takes the edge and shortens (Clips)
    # ------------------- the edge until it's mid_point_buffer is no longer in contact with the base network)
    # ------------------- [This step updates the uncovered network (nodes and edges) and the contact nodes]
    if function_logs:
        print("6.0 - Creating missing connections through complementary_edges that travel from the uncovered zone to the base network.")

    # PREPARATION FOR ANALYSIS:

    # Keep track of the amount of edges that underwent a shortening process
    fabricated_count = 0

    # LOG CODE - Progress logs
    # Will create progress logs when progress reaches these percentages:
    progress_logs = [0,10,20,30,40,50,60,70,80,90,100] # for log statistics
    osmid_count = 0
    # LOG CODE - Progress logs

    # Create empty GeoDataFrame to store all original_diverging_nodes and original_diverging_edges (Used for GIS visualization purposes)
    original_diverging_nodes = gpd.GeoDataFrame()
    original_diverging_edges = gpd.GeoDataFrame()
    
    # Find all complementary_uncovered_osmids (from previously created complementary_uncovered_osmid_lst)
    # that are NOT a contact osmid (contact osmids are those that already serve as a connection to the base network)
    non_contact_osmids = [osmid for osmid in complementary_uncovered_osmid_lst if osmid not in contact_osmids]
    
    # Previously produced osmid. Since will be creating non-existing nodes, function produce_osmid() will use a starting number for
    # trying to produce unique osmids. That function will check if that osmid already exists in either the base or complementary network.
    # Start with number 0.
    previously_produced = 0

    # Read base_nodes once (will be used to assess if the osmid being produced is unique to both input networks)
    # ------------------- INPUT USED - READ BASE EDGES
    base_nodes = base_nodes.copy()
    base_nodes = base_nodes.to_crs(projected_crs)
    # ------------------- INPUT USED - READ BASE EDGES

    # Keep track of which parts of the network where fabricated with the following code.
    # All edges that keep its original geometry will be assigned 'clipping_i' = 0.
    # All edges whose geometry was clipped will be assigned the amount of shortening (clipping) iterations used in them.
    complementary_uncovered_nodes['clipping_i'] = 0
    complementary_uncovered_edges['clipping_i'] = 0
    complementary_uncovered_edges['original_edge_id'] = np.nan
    contact_nodes['clipping_i'] = 0

    # Shortening dict
    # Sometimes an edge could get shortened from both sides. 
    # (Clipped with starting point 'u' and then, on another case, clipped with starting point 'u')
    # If an edge will be shortened from both sides exactly once (Shortened from 'u' to midpoint and from 'v' to midpoint),
    # there would be two different new nodes in the same place. The dict helps make sure that only one contact_node is created.
    # If not considered, this particular situation can create two different edges that coincide in two different contact_nodes exactly in the midpoint.
    # (This is the main reason why it is necessary to keep track of which edges where shortened and up to which point)
    shortening_dict = {}
    
    # Review each node in the uncovered zone of the complementary network that is not already a contact_node
    # (Previously created non_contact_osmids)
    for osmid in non_contact_osmids:

        # Development check
        #if osmid != 436813694931:
        #    continue
        #else:
        #    print(osmid)
        
        # LOG CODE - Progress logs
        # Measures current progress, prints if passed a checkpoint of progress_logs list.
        current_progress = (osmid_count / len(non_contact_osmids))*100
        for checkpoint in progress_logs:
            if (current_progress >= checkpoint) and function_logs:
                print(f"6.0 - Exploring osmids. {checkpoint}% done.")
                progress_logs.remove(checkpoint)
                break
        # LOG CODE - Progress logs

        # Retrieve it's edges (Will be refered as diverging_edges). 
        # Must consider all edges (complementary_edges) and not only complementary_uncovered_edges since
        # since it's looking to identify if an edge that comes out of that osmid goes towards base_network.
        diverging_edges = complementary_edges.loc[(complementary_edges.u==osmid) | (complementary_edges.v==osmid)].copy()
        diverging_edges_ids = list(diverging_edges.edge_id.unique())
        
        # For each edge diverging from current node:
        for diverging_edge_id in diverging_edges_ids:
            # If the edge DOES touch the base network:
            if diverging_edge_id in edge_id_touch_lst: # (Previously created edge_id_touch_lst)
                # If an edge reaches this part of the code, it means that it is a complementary_edge that:
                # a) Comes out from an node that's located in the uncovered zone (complementary_uncovered_node).
                # b) The node it came out from is NOT a contact_node, it is far from the base network (According to contact_analysis_dist)
                # c) The edge's current mid_point is located in proximity to the base network (According to contact_analysis_dist)
                # --> Between the base_network and this edge itself, a connection point should be identified.
                # --> Objective: Identify that connection (new contact_node) between the complementary and base network.

                # 6.1 --- Retrieve current diverging_osmid and diverging_node
                # Save the osmid from the node which the current edge uses to come out from the uncovered zone into the base network.
                diverging_osmid = osmid
                if debugging_logs:
                    print(f"Diverging osmid: {diverging_osmid}.")
                # Extract its node
                diverging_node = complementary_uncovered_nodes.loc[complementary_uncovered_nodes.osmid == diverging_osmid].copy()
                # Reset index (so that accessing its geometry is always .loc[0,'geometry'])
                diverging_node.reset_index(inplace=True, drop=True)
                if debugging_logs:
                    print(f"Diverging node's geometry: {diverging_node.loc[0,'geometry']}.")
                # Add current diverging_node to original_diverging_nodes gdf (For GIS visualization purposes)
                original_diverging_nodes = pd.concat([original_diverging_nodes, diverging_node])

                # 6.2 --- Retrieve current diverging_edge as the connection_edge
                # Select the edge that diverts from the diverging_node towards the base_network
                connection_edge = complementary_edges.loc[complementary_edges.edge_id == diverging_edge_id].copy()
                # Reset index (so that accessing its data is always .loc[0,'data'])
                connection_edge.reset_index(inplace=True,drop=True)
                # The connection_edge geometry will suffer modifications, save original
                original_connection_edge = connection_edge.copy() 
                # Add current connection_edge to original_diverging_edges gdf (For GIS visualization purposes)
                original_diverging_edges = pd.concat([original_diverging_edges, connection_edge])

                # 6.3 --- Clip connection_edge until it's mid_point is no longer in proximity to the base network.
                # ------- When this point is reached, assign its ending_point (previous mid_point) as a new contact_node.
            
                # Kickstart while loop for current connection_edge
                stop = False
                shorten_i = 0
                
                while (stop == False):

                    # Limit of attempts
                    limit_of_attempts = 20
                    if shorten_i >= limit_of_attempts:
                        print(f"Tried shortening {limit_of_attempts} times edge u {original_connection_edge.u.unique()[0]} and v {original_connection_edge.v.unique()[0]}. Stopped.")
                        break
                        
                    # 6.3.1 --- Calculate the edge's mid_point.
                    # --------- [Will become the clipping_point in function edge_clipping()].
                
                    # Calculate the connection_edge's mid_point
                    connection_edge['mid_point'] = connection_edge.interpolate(connection_edge.length / 2)
                    # Assign mid_point to its own gdf and drop column 'mid_point' from connection_edge gdf
                    edge_mid_point = connection_edge[['edge_id','mid_point']].copy()
                    edge_mid_point.rename(columns={'mid_point':'geometry'},inplace=True)
                    connection_edge.drop(columns=['mid_point'],inplace=True)
                
                    # 6.3.2 --- Evaluate if the current mid_point is still in proximity to the base network
                    # 6.3.2.a - Create contact-analysis buffer around the edge_mid_point
                    # Reset edge_mid_point's index
                    edge_mid_point.reset_index(inplace=True,drop=True) #--> Resets index without saving col 'index'
                    # Save edge_mid_point's reseted index in a column named 'index'
                    point_to_buffer = edge_mid_point.copy()
                    point_to_buffer.reset_index(inplace=True) #--> Also creates a col 'index', same order since it is reseted
                    # Create a gdf containing the contact-analysis buffer around the edge_mid_point
                    mid_point_buffer = point_to_buffer.buffer(contact_analysis_dist)
                    mid_point_buffer = gpd.GeoDataFrame(geometry=mid_point_buffer)
                    mid_point_buffer.reset_index(inplace=True) #--> Also creates a col 'index', same order since it is reseted
                    # Transfer data from edge_mid_point to it's buffer using the previously reseted index as merge col
                    point_to_buffer.drop(columns=['geometry'],inplace=True)
                    mid_point_buffer = pd.merge(mid_point_buffer,point_to_buffer,on='index') #--> Merges using common reseted col 'index'
                    mid_point_buffer.drop(columns=['index'],inplace=True)
                    
                    # 6.3.2.b - Find if current mid_point it's still in proximity to base network
                    # Buffers that touch any base edge
                    buffer_touch = mid_point_buffer.sjoin(base_edges)
                    
                    if len(buffer_touch) > 0:
                        
                        # 6.3.3 --- If it still touches, reduce in size.
                        # If the mid_point_buffer still touches the base edges, the line is most likely still overlaping with the base network.
                        # Apply edge_clipping function to shorten the edge up until the current mid_point
                        connection_edge = edge_clipping(starting_point_gdf = diverging_node,
                                                        edge_gdf = connection_edge,
                                                        clipping_point_gdf = edge_mid_point,
                                                        projected_crs = projected_crs)
                        # Keep connection_edge format
                        connection_edge['edge_id'] = diverging_edge_id
                        # Count shortening iteration
                        shorten_i+=1
                    
                    else:
                        # 6.3.3 --- If the buffer no longer touches any part of the base network, shortening was a success.
                        # --------- However, before registering the shortened edge it it necessary to run a test:
                        
                        # Double-once-shortening test STARTS ### ### ### ### ### ### ### ### ### ### ### ### 
                        # EXPLANATION:
                        # A complementary_edge could be shortened twice, once from each starting_point. This is a "double shortening".
                        # It occurs when an edge was shortened from node 1 (e.g. from 'u') and now is being shortened from node 2 (e.g. from 'v').
                        # This process produces no problems, **unless both shortening processess undergo just 1 iteration**.
                        # In that specific case, both lines start in their nodes ('u' or 'v') and end at the original line's midpoint.
                        # This produces TWO different contact_nodes in a very similar location, that could result in TWO similar but different treatments and strange geometries.
                        # In order to avoid this, this double_shortening check is conduced in order to make sure that 
                        # at the end of both processes, just ONE contact node is produced and used by both edges ending on it.

                        # If this is NOT the first fabrication case and edge AND the current edge was shortened ONLY once:
                        if (fabricated_count > 0) and (shorten_i==1):
    
                            # Load all original edge_ids that have been shortened ONLY once
                            already_shortened_once = complementary_uncovered_edges.loc[complementary_uncovered_edges.clipping_i==1].copy()
                            already_shortened_once_lst = list(already_shortened_once.original_edge_id.unique())

                            # Check if current diverging_edge_id has already been shortened ONLY once
                            if diverging_edge_id in already_shortened_once_lst:
                                # If an edge reaches this part of the code, the edge was already shortened ONCE from one end, and was being shortened again ONCE from the other end.
                                # Next step --> Do NOT produce a new contact osmid and new point for current edge. Use the existing one. 
                                # ------------> Register the edge under the existing contact_node instead of creating a new one.

                                if debugging_logs:
                                    print(f"Edge {diverging_edge_id} was already shortened [once]. Shortening [once] again from osmid {diverging_osmid}.")

                                # 6.3.3.a1 - Identify the previously produced middle osmid.
                                # Retrieve the ORIGINAL (current original_connection_edge's) 'u' and 'v'
                                original_u = original_connection_edge.u.unique()[0]
                                original_v = original_connection_edge.v.unique()[0]
                                original_osmids = [original_u, original_v]
                                if debugging_logs:
                                    print(f"Edge's original osmids (u,v) were {original_osmids}.")
                                
                                # Retrieve the PREVIOUSLY PRODUCED (previously shortened edge) 'u' and 'v'
                                prev_produced_edge = complementary_uncovered_edges.loc[complementary_uncovered_edges.original_edge_id == diverging_edge_id].copy()
                                new_u = prev_produced_edge.u.unique()[0]
                                new_v = prev_produced_edge.v.unique()[0]
                                new_osmids = [new_u, new_v]
                                if debugging_logs:
                                    print(f"The first shortening is using osmids (u,v) {new_osmids}.")
                                
                                # Identify which osmid is in the new_osmids list but not in the original_osmids list.
                                # (Which osmid was produced here, in step 6.3.3)
                                for osmid_check in new_osmids:
                                    if osmid_check not in original_osmids:
                                        produced_osmid = osmid_check

                                # 6.3.3.a2 - Add the connection_edge as a new edge to complementary_uncovered_edges gdf (But not the node, the node is already there).
                                # Retrieve the diverging_osmid's position in the ORIGINAL connection_edge. (Whether 'u' or 'v')
                                # In order to keep that original diverging_node in its position and add the produced_osmid in the other position.
                                if diverging_osmid == original_u:
                                    connection_edge_u = diverging_osmid
                                    connection_edge_v = produced_osmid
                                    if debugging_logs:
                                        print(f"The new shortening will use osmids (u,v) = {[connection_edge_u,connection_edge_v]}.")
                                elif diverging_osmid == original_v:
                                    connection_edge_u = produced_osmid
                                    connection_edge_v = diverging_osmid
                                    if debugging_logs:
                                        print(f"The new shortening will use osmids (u,v) = {[connection_edge_u,connection_edge_v]}.")
                                else:
                                    print(f"ERROR while trying to set 'u' and 'v' for shortened connection edge {diverging_edge_id}.")
                                    print(f"The new shortening was trying to use osmids (u,v) = {[connection_edge_u,connection_edge_v]}.")
                                    intended_crash
                                # Use the same key that was used in the original_connection_edge
                                connection_edge_key = original_connection_edge.key.unique()[0]
                                # Retrieve the modified edge's (connection_edge's) geometry
                                connection_edge_geom = connection_edge['geometry'].unique()[0]
                                # Find current last position
                                iloc_edge = len(complementary_uncovered_edges)
                                # Register new edge
                                complementary_uncovered_edges.loc[iloc_edge+1,'u'] = connection_edge_u
                                complementary_uncovered_edges.loc[iloc_edge+1,'v'] = connection_edge_v
                                complementary_uncovered_edges.loc[iloc_edge+1,'key'] = connection_edge_key
                                complementary_uncovered_edges.loc[iloc_edge+1,'geometry'] = connection_edge_geom
                                complementary_uncovered_edges.loc[iloc_edge+1,'edge_id'] = str(connection_edge_u)+str(connection_edge_v)+str(connection_edge_key)
                                complementary_uncovered_edges.loc[iloc_edge+1,'original_edge_id'] = diverging_edge_id
                                complementary_uncovered_edges.loc[iloc_edge+1,'clipping_i'] = shorten_i

                                # 6.3.3.a3 - Finished registering. Stop while loop. Continue with the next diverging_edge_id of current osmid.
                                if function_logs:
                                    print(f"6.0 - Reused contact node {produced_osmid} for original edge {diverging_edge_id}.")
                                
                                fabricated_count += 1
                                stop = True
                                continue

                            # Else, this is the first time that the edge is shortened once. 
                            # Not relevant, continue as usual.
                            else:
                                pass
                                
                        # Else, either this is the first edge to be shortened or shorten_i is not equall to 1. 
                        # Not relevant, continue as usual.
                        else:
                            pass
                        # Double-once-shortening test ENDS ### ### ### ### ### ### ### ### ### ### ### ### 

                        # CONTINUATION OF NORMAL PROCESS:
                        # The current mid_point is not in proximity to the base network.
                        # But the previous mid_point (The current connection_edge's endpoint) WAS KNOWN TO BE in proximity to the base network.
                        # Next step --> Transform the last mid_point (The currend endpoint) into a contact_node and update the edge.

                        # 6.3.3.b1 - Produce a unique osmid (That doesn't exist in either network) in order to add the edge and point
                        # Produce a unique osmid
                        produced_osmid = produce_osmid(base_nodes, complementary_nodes, previously_produced+1)
                        # Save produced osmid to avoid trying numbers unnecessarily
                        previously_produced = produced_osmid
                        if debugging_logs:
                            print(f"Produced new osmid: {produced_osmid} for current diverging edge {diverging_edge_id}.")
                
                        # 6.3.3.b2 - Find the ending_point's coordinates.
                        # --------- It is unsure whether the diverging_node is located at the coordinate 0 or at the last coordinate of the connection_edge.
                        # --------- So it is necessary to identify which one is which.
                        # Extract the edge's coordinates list
                        connection_edge_coords = list(connection_edge['geometry'][0].coords)
                        # Extract the starting_point's coordinates (Known to be the diverging_node)
                        starting_point_coords = diverging_node.loc[0,'geometry'].coords[0]
                        # Obtain the ending_point's coordinates (It is the previous iteration edge's mid_point)
                        # (It is either the first or last coordinate of the connection_edge)
                        if starting_point_coords == connection_edge_coords[0]:
                            ending_point_coords = connection_edge_coords[-1]
                        elif starting_point_coords == connection_edge_coords[-1]:
                            ending_point_coords = connection_edge_coords[0]
                        else:
                            print(f"ERROR while trying to find the starting and ending point of shortened connection edge {diverging_edge_id}.")
                            print(f"Diverging node osmid: {diverging_osmid}.")
                            print(f"Diverging node (starting point) coords: {starting_point_coords}.")
                            print(f"Connection edge's coords [0]: {connection_edge_coords[0]}.")
                            print(f"Connection edge's coords [-1]: {connection_edge_coords[-1]}.")
                            intended_crash
                
                        # 6.3.3.b3 - Add the ending_point as a node
                        # Register to complementary_uncovered_nodes gdf
                        iloc_node = len(complementary_uncovered_nodes)+1
                        complementary_uncovered_nodes.loc[iloc_node,'osmid'] = produced_osmid
                        complementary_uncovered_nodes.loc[iloc_node,'geometry'] = Point(ending_point_coords)
                        complementary_uncovered_nodes.loc[iloc_node,'clipping_i'] = shorten_i
                        # Register to contact_nodes gdf
                        iloc_node = len(contact_nodes)+1
                        contact_nodes.loc[iloc_node,'osmid'] = produced_osmid
                        contact_nodes.loc[iloc_node,'geometry'] = Point(ending_point_coords)
                        contact_nodes.loc[iloc_node,'clipping_i'] = shorten_i
                
                        # 6.3.3.b4 - Add the connection_edge as a new edge to complementary_uncovered_edges gdf
                        # Retrieve the diverging_osmid its position in the ORIGINAL connection_edge. (Whether 'u' or 'v')
                        # In order to keep that original diverging_node in its position and add the produced_osmid in the other position.
                        original_u = original_connection_edge.u.unique()[0]
                        original_v = original_connection_edge.v.unique()[0]
                        if diverging_osmid == original_u:
                            connection_edge_u = diverging_osmid
                            connection_edge_v = produced_osmid
                        elif diverging_osmid == original_v:
                            connection_edge_u = produced_osmid
                            connection_edge_v = diverging_osmid
                        else:
                            print(f"ERROR while trying to set 'u' and 'v' for shortened connection edge {diverging_edge_id}.")
                            intended_crash
                        # Use the same key that was used in the original connection_edge
                        connection_edge_key = original_connection_edge.key.unique()[0]
                        # Retrieve the modified edge's (connection_edge's) geometry
                        connection_edge_geom = connection_edge['geometry'].unique()[0]
                        # Find last position
                        iloc_edge = len(complementary_uncovered_edges)+1
                        # Register new edge
                        complementary_uncovered_edges.loc[iloc_edge,'u'] = connection_edge_u
                        complementary_uncovered_edges.loc[iloc_edge,'v'] = connection_edge_v
                        complementary_uncovered_edges.loc[iloc_edge,'key'] = connection_edge_key
                        complementary_uncovered_edges.loc[iloc_edge,'geometry'] = connection_edge_geom
                        complementary_uncovered_edges.loc[iloc_edge,'edge_id'] = str(connection_edge_u)+str(connection_edge_v)+str(connection_edge_key)
                        complementary_uncovered_edges.loc[iloc_edge,'original_edge_id'] = diverging_edge_id
                        complementary_uncovered_edges.loc[iloc_edge,'clipping_i'] = shorten_i
                        
                        # 6.3.3.b5 - Finished registering. Stop while loop. Continue with the next diverging_edge_id of current osmid.
                        fabricated_count += 1
                        stop = True
        
        # LOG CODE - Progress logs
        # Finished reviewing current osmid. Continue with next osmid in non_contact_osmids.
        osmid_count+=1
        # LOG CODE - Progress logs

    if function_logs:
        print(f"Finished. Fabricated {fabricated_count}.")
                
    return complementary_uncovered_nodes, complementary_uncovered_edges, contact_nodes

### 2025/03/04 - Part 02 - Step 04 - Draw new edges and identify consequential intersections from 01_PL_04_Combine_networks [vref repository] before update that considers all edges (not only ntw_01 edges) for CASE A and all edges (not only ntw_02 edges) for CASE B)

In [None]:
casetype_logs = False

##### Time
time_1 = time.time()
##### Time

# Reset previously_produced osmids (Used in function produce_osmid())
previously_produced = 0

# Create intersection_nodes_3, a GeoDataFrame that will be used in function network_intersections_update()
# to perform a third round of intersections (Used to fix consequential intersections)
# ----- OUTPUT DTYPES MANAGEMENT FOR intersection_nodes_3 GEODATAFRAME -----
intersection_nodes_3_cols = {"osmid": "int64", # Node that is the intersection (clipping point)
                             "u": "int64", # Edge that is intersected
                             "v": "int64", # Edge that is intersected
                             "key": "int64", # Edge that is intersected
                             "retain_how":"string", #Stablishes which part of the split edge to keep
                             "edge_origin":"string" #Helps divide network_intersections_update() process
                            }
intersection_nodes_3 = gpd.GeoDataFrame(columns=list(intersection_nodes_3_cols.keys()) + ["geometry"], crs=projected_crs).astype({**intersection_nodes_3_cols, "geometry": "geometry"})
# Reorder columns
intersection_nodes_3 = intersection_nodes_3[["osmid", "geometry", # Node to be created to serve as a connection point in ntw_01
                                             "u", "v", "key", # Edge from ntw_01 to be split by the created node
                                             "retain_how", "edge_origin"]] # Data about the split and division of splitting process
# ----- OUTPUT DTYPES MANAGEMENT FOR intersection_nodes_3 GEODATAFRAME -----
fix_idx = 0 #Row count of the created gdf

# Store the osmids and edge_ids created to join both networks (To identify them after the process)
all_join_osmids = []
all_join_edgeids = []

# Consequential_intersections and its three cases (A, B and C) explanation.
# The concatenated_edges are renamed joined_edges_concat.
# The following code iterates over the connected_nodes df (The dataframe with defined relations between networks) and
# draws the new needed edges (when needed). The new edges sometimes intersect other edges as a consequence of them being straight lines between
# nodes. These new intersections are called consequential_intersections.
# The first connected_node explores consequential_intersections with the existing concatenated_edges (renamed as joined_edges_concat), but
# since the new-drawn-edge gets added to joined_edges_concat, the following connected_node also explores consequential_intersections with any
# recently-drawn edge. This results in cases A, B and C explored at the end of the for loop.

# Create joined_edges_concat with a column for edge_id
joined_edges_concat = src.create_unique_edge_id(concatenated_edges)

# LOG CODE - Progress logs
# Will create progress logs when progress reaches these percentages:
progress_logs = [0,10,20,30,40,50,60,70,80,90,100] # for log statistics
progress_count = 0
# LOG CODE - Progress logs

# Iterate over each relation stablished
for idx, connected_node in connected_nodes.iterrows():

    # LOG CODE - Progress logs
    # Measures current progress, prints if passed a checkpoint of progress_logs list.
    current_progress = (progress_count / len(connected_nodes))*100
    for checkpoint in progress_logs:
        if (current_progress >= checkpoint):
            print(f"Categorizing node relation types. {checkpoint}% done.")
            progress_logs.remove(checkpoint)
            break
    # LOG CODE - Progress logs
    
    # Development checks -----------------------------------
    #current_osmid = connected_node.connecting_ntw02_osmid
    #osmid_checks = [1837]
    #if current_osmid not in osmid_checks:
    #    continue
    #else:
    #    print(current_osmid)
    # Development checks -----------------------------------
    
    # Extract relation's data
    # The following words are used to refer to the origin of data:
    # 'connection' refers to the data from network 01,
    # 'connecting' referes to data from network 02.
    # The reasoning used is that network 02 (complementary) is ---connecting--> to network 01 (base)
    connection_ntw01_osmid = connected_node.connection_ntw01_osmid
    connection_type = connected_node.connection_type
    connecting_ntw02_osmid = connected_node.connecting_ntw02_osmid

    # If the connection for the current connecting_node was identified to already exist nearby (Part 02 - Step 02), --> skip (continue)
    if connection_type == 'existing':
        # LOG CODE - Progress logs
        progress_count+=1
        # LOG CODE - Progress logs
        continue # Next nodes_relation

    # Identify the node's coordinates on ntw_01 that's going to get connected to the node on ntw_02
    connection_node_gdf = concatenated_nodes.loc[concatenated_nodes.osmid==connection_ntw01_osmid].copy()
    # Safety check (in case a node failed in re-update of the network)
    if len(connection_node_gdf)<1:
        print(f"WARNING: Skipping node not found. Connection (ntw01): {connection_ntw01_osmid}. Connecting (ntw02): {connecting_ntw02_osmid}.")
        continue
    connection_node_geom = connection_node_gdf.geometry.unique()[0]
    connection_node_coords = connection_node_geom.coords[0]
    if casetype_logs:
        print(f"ntw_01 - connection_node_geom: {connection_node_geom}.") 
    
    # Identify the node's coordinates on ntw_02 that's going to get connected to the node on ntw_01
    connecting_node_gdf = concatenated_nodes.loc[concatenated_nodes.osmid==connecting_ntw02_osmid].copy()
    # Safety check (in case a node failed in re-update of the network)
    if len(connecting_node_gdf)<1:
        print(f"WARNING: Skipping node not found. Connection (ntw01): {connection_ntw01_osmid}. Connecting (ntw02): {connecting_ntw02_osmid}.")
        continue
    connecting_node_geom = connecting_node_gdf.geometry.unique()[0]
    connecting_node_coords = connecting_node_geom.coords[0]
    if casetype_logs:
        print(f"ntw_02 - connecting_node_geom: {connecting_node_geom}.") 

    # Create LineString between connection_node and connecting_node
    line_geom = LineString([[connection_node_coords[0],connection_node_coords[1]],[connecting_node_coords[0],connecting_node_coords[1]]])
    
    # Store new LineString to concat to joined_edges_concat and to analyse consequential_intersections
    # ----- OUTPUT REGISTRATION FOR new_edge IN joined_edges_concat GEODATAFRAME ----- [Concatenates after overlay]
    # Create temporary df with the current case's data
    df_temporal = pd.DataFrame({'u': [int(connecting_ntw02_osmid)], # Edge that is being created
                                'v': [int(connection_ntw01_osmid)], # Edge that is being created
                                'key': [int(0)], # Edge that is being created
                                'geometry':[line_geom], # Edge that is being created
                                'ntw_join':[int(1)], # Helps identify which edges were created to join both networks
                                'ntw_origin':['ntw_join'] # Has data of all origins, created edges get assigned "ntw_join"
                               }
                              )
    new_edge = gpd.GeoDataFrame(df_temporal, geometry='geometry', crs=projected_crs)
    # Force all datatypes to match the datatypes of the gdf to where the data will be merged
    dtypes_dict = joined_edges_concat.dtypes.to_dict() #Dict with TARGET dtypes
    filtered_dtypes = {col: dtypes_dict[col] for col in new_edge.columns if col in dtypes_dict} # Filters for cols in case of mismatch
    new_edge = new_edge.astype(filtered_dtypes) # Assigns those types
    if casetype_logs:
        print(f"Created edge between connection and connecting nodes.") 
    # ----- OUTPUT REGISTRATION FOR new_edge IN joined_edges_concat GEODATAFRAME ----- [Concatenates after overlay]
    
    # Create unique edge_id for new_edge (requires input ID cols as int)
    #new_edge['u'] = new_edge['u'].astype('int')
    #new_edge['v'] = new_edge['v'].astype('int')
    #new_edge['key'] = new_edge['key'].astype('int')
    new_edge = src.create_unique_edge_id(new_edge)

    # Store the edge_ids created to join both networks (To identify them after the process)
    join_edge_ids = list(new_edge.edge_id.unique())
    all_join_edgeids = all_join_edgeids + join_edge_ids
    
    # Find all intersections that the new edge creates on the current network edges
    # (Used to search for consequential_intersections)
    produced_intersections = joined_edges_concat.overlay(new_edge,how='intersection',keep_geom_type=False)
    
    # Concatenate new_edge to joined_edges_concat gdf 
    # (It is after .overlay(), else the LineString intersects with itself)
    joined_edges_concat = pd.concat([joined_edges_concat,new_edge])
    joined_edges_concat.reset_index(inplace=True,drop=True)

    # CONSEQUENTIAL INTERSECTIONS ANALYSIS
    # Explode the produced_intersections (If the new edge intersected the same edge twice or more, it produces MultiPoints)
    produced_intersections = produced_intersections.explode(index_parts=False)
    produced_intersections.reset_index(inplace=True,drop=True)
    # Remove from the produced_intersections the nodes that are currently being joined (Obviously, the line that connects them intersects with them)
    if casetype_logs:
        print(f"Looking for produced intersections. Dropping intersections with {joined_nodes_lst}.")
    
    joined_nodes_lst = [connection_node_geom,connecting_node_geom]

    # Function that compares geometries considering a tolerance
    def geometries_are_equal_with_tolerance(geom1, geom2, tolerance=0.001): # 1mm tolerance
        return geom1.equals(geom2) or geom1.distance(geom2) < tolerance
    # Compare and filter
    produced_intersections = produced_intersections.loc[ ~produced_intersections['geometry'].apply(
        lambda x: any(geometries_are_equal_with_tolerance(x, node_geom) for node_geom in joined_nodes_lst)
    )].copy()

    # Previous way of comparing (Sometimes failed to identify when the line intersected it's own node)
    #produced_intersections = produced_intersections.loc[~produced_intersections['geometry'].isin(joined_nodes_lst)].copy()
    
    if casetype_logs:
        print(f"Found intersections at the following points: {produced_intersections['geometry'].unique()}.") 
    
    # If there are any intersections remaining, they are consequential_intersections.
    # (Meaning, the drawn line is intersecting with other lines in an undesired way)
    if len(produced_intersections)>0:
        # Rename gdf as "consequential_intersections"
        consequential_intersections = produced_intersections.copy()
        consequential_intersections.reset_index(inplace=True,drop=True)
        del produced_intersections

        # Rename columns to distinguish network origin
        # (Overlay produced _1 for joined_edges_concat data and _2 for new_edge data)
        consequential_intersections.rename(columns={'edge_id_1':'intersected_edge_id',
                                                    'edge_id_2':'drawn_edge_id'},inplace=True)
        
        # Load network 01 edges related to the connection
        # (Network 01 edges that are related to the current connection_node)
        connection_idx = ((joined_edges_concat.u==connection_ntw01_osmid)|(joined_edges_concat.v==connection_ntw01_osmid)) & (joined_edges_concat.ntw_origin=='ntw_01')
        related_ntw01_edges = joined_edges_concat.loc[connection_idx].copy()
        related_ntw01_edges_ids = list(related_ntw01_edges['edge_id'].unique())
        
        # Load network 02 edges related to the connection
        # (Network 02 edges that are related to the current connecting_node)
        connecting_idx = ((joined_edges_concat.u==connecting_ntw02_osmid)|(joined_edges_concat.v==connecting_ntw02_osmid)) & (joined_edges_concat.ntw_origin=='ntw_02')
        related_ntw02_edges = joined_edges_concat.loc[connecting_idx].copy()
        related_ntw02_edges_ids = list(related_ntw02_edges['edge_id'].unique())
        
        # Iterate over consequential_intersections:
        for idx,intersection in consequential_intersections.iterrows():
    
            # Extract intersection's data
            # Edge that was intersected
            intersected_u = int(intersection.u_1)
            intersected_v = int(intersection.v_1)
            intersected_key = intersection.key_1
            intersected_edge_id = intersection.intersected_edge_id
            # Drawn edge that is intersecting
            drawn_edge_u = int(intersection.u_2)
            drawn_edge_v = int(intersection.v_2)
            drawn_edge_key = intersection.key_2
            drawn_edge_id = intersection.drawn_edge_id

            # The intersection (In any studied case) will become a new node.
            # Extract its geometry and produce a unique osmid
            intersectionpoint_geom = intersection.geometry
            produced_osmid = produce_osmid(concatenated_nodes, concatenated_nodes, previously_produced)
            # Since concatenated_nodes are not being updated, next time try with next possible osmid
            previously_produced = produced_osmid+1 
            # Store the osmids created to join both networks (To identify them after the process)
            all_join_osmids.append(produced_osmid)

            # Cases analysis
            if intersected_edge_id in related_ntw01_edges_ids:
                # CASE A: The new edge intersects network 01 at an edge comming out of the node 
                #         TO where the connection was being performed.
                # --> CASE A will draw the connection from the connecting_node on ntw02 to the intersection point ONLY.
                # -->        because thats were there's already a known connection with ntw01.
                # -->        ntw01 edge gets split to add a new node, keeping both sides of the edge.
                # -->        drawn edge gets split at the intersection, keeping one side of the edge.
                case = 'CASE A'
            elif intersected_edge_id in related_ntw02_edges_ids:
                # CASE B: The new edge intersects network 02 at an edge comming out of the node 
                #         FROM where the connection was being performed.
                # --> CASE B will draw the connection from the intersection point to the connection_node on ntw_01 ONLY.
                # -->        because the rest (from intersection point to ntw02) is redundant.
                # -->        ntw02 edge gets split to add a new node, keeping both sides of the edge.
                # -->        drawn edge gets split at the intersection, keeping one side of the edge.
                case = 'CASE B'
            else:
                # CASE C: The new edge intersects with either network 01 or network 02 at an UNRELATED edge.
                #         (Or at a related ntw02 edge, but one that shouldn't be cut)
                # --> CASE C will draw only create a new node on the intersection and split the edges, 
                # -->        keeping both sides on both split edges.
                # -->        ntw01 or ntw02 edge gets split to add a new node, keeping both sides of the edge.
                # -->        drawn edge gets split at the intersection, keeping both sides of the edge.
                case = 'CASE C'
    
            # n.n --------------- CASE A
            if case == 'CASE A':
                if casetype_logs:
                    print(f"CASE A: New edge from connecting node {connecting_ntw02_osmid} intersects network 01 at an edge comming out of the node TO where the connection was being performed.")
    
                # Register how the __INTERSECTED EDGE__ (from network 01) will be clipped
                # (In the intersection, keeping both ends)
                # ----- OUTPUT REGISTRATION FOR intersection_nodes_3 GEODATAFRAME -----
                # Create temporary df with the current case's data
                df_temporal = pd.DataFrame({'osmid': [int(produced_osmid)], # Node that is the intersection (clipping point)
                                            'geometry': [intersectionpoint_geom], # Node that is the intersection (clipping point)
                                            'u': [int(intersected_u)], # Edge that is intersected
                                            'v':[int(intersected_v)], # Edge that is intersected
                                            'key':[int(intersected_key)], # Edge that is intersected
                                            'retain_how':['both'], #Sets retain_all=True (keeps both parts of the split edge)
                                            'edge_origin':['ntw_01'] #Helps divide network_intersections_update() process
                                           }
                                          )
                gdf_temporal = gpd.GeoDataFrame(df_temporal, geometry='geometry', crs=projected_crs)
                # Force all datatypes to match the datatypes of the gdf to where the data will be merged
                dtypes_dict = intersection_nodes_3.dtypes.to_dict() #Dict with TARGET dtypes
                filtered_dtypes = {col: dtypes_dict[col] for col in gdf_temporal.columns if col in dtypes_dict} # Filters for cols in case of mismatch
                gdf_temporal = gdf_temporal.astype(filtered_dtypes) # Assigns those types
                # Concatenate to TARGET without altering original dtypes
                intersection_nodes_3 = pd.concat([intersection_nodes_3, gdf_temporal], ignore_index=True)
                # ----- OUTPUT REGISTRATION FOR intersection_nodes_3 GEODATAFRAME -----                
                fix_idx+=1
            
                # Register how the __DRAWN EDGE__ will be clipped
                # (From the connecting_ntw02_node to the intersection, keeping just that segment)
                # ----- OUTPUT REGISTRATION FOR intersection_nodes_3 GEODATAFRAME -----
                # Create temporary df with the current case's data
                df_temporal = pd.DataFrame({'osmid': [int(produced_osmid)], # Node that is the intersection (clipping point)
                                            'geometry': [intersectionpoint_geom], # Node that is the intersection (clipping point)
                                            'u': [int(drawn_edge_u)], # Edge that is intersected (u is from ntw02)
                                            'v':[int(drawn_edge_v)], # Edge that is intersected (v is from ntw01)
                                            'key':[int(drawn_edge_key)], # Edge that is intersected
                                            'retain_how':['u'], #Sets retain_all=False, states which part to keep (u, comming from ntw02)
                                            'edge_origin':['ntw_join'] #Helps divide network_intersections_update() process
                                           }
                                          )
                gdf_temporal = gpd.GeoDataFrame(df_temporal, geometry='geometry', crs=projected_crs)
                # Force all datatypes to match the datatypes of the gdf to where the data will be merged
                dtypes_dict = intersection_nodes_3.dtypes.to_dict() #Dict with TARGET dtypes
                filtered_dtypes = {col: dtypes_dict[col] for col in gdf_temporal.columns if col in dtypes_dict} # Filters for cols in case of mismatch
                gdf_temporal = gdf_temporal.astype(filtered_dtypes) # Assigns those types
                # Concatenate to TARGET without altering original dtypes
                intersection_nodes_3 = pd.concat([intersection_nodes_3, gdf_temporal], ignore_index=True)
                # ----- OUTPUT REGISTRATION FOR intersection_nodes_3 GEODATAFRAME ----- 
                fix_idx+=1
    
            # CASE B
            elif case == 'CASE B':
                if casetype_logs:
                    print(f"CASE B: New edge from connecting node {connecting_ntw02_osmid} intersects network 02 at an edge comming out of the node FROM where the connection was being performed.")
                
                # Register how the __INTERSECTED EDGE__ (from network 02) will be clipped
                # (In the intersection, keeping both ends)
                # ----- OUTPUT REGISTRATION FOR intersection_nodes_3 GEODATAFRAME -----
                # Create temporary df with the current case's data
                df_temporal = pd.DataFrame({'osmid': [int(produced_osmid)], # Node that is the intersection (clipping point)
                                            'geometry': [intersectionpoint_geom], # Node that is the intersection (clipping point)
                                            'u': [int(intersected_u)], # Edge that is intersected
                                            'v':[int(intersected_v)], # Edge that is intersected
                                            'key':[int(intersected_key)], # Edge that is intersected
                                            'retain_how':['both'], #Sets retain_all=True (keeps both parts of the split edge)
                                            'edge_origin':['ntw_02'] #Helps divide network_intersections_update() process
                                           }
                                          )
                gdf_temporal = gpd.GeoDataFrame(df_temporal, geometry='geometry', crs=projected_crs)
                # Force all datatypes to match the datatypes of the gdf to where the data will be merged
                dtypes_dict = intersection_nodes_3.dtypes.to_dict() #Dict with TARGET dtypes
                filtered_dtypes = {col: dtypes_dict[col] for col in gdf_temporal.columns if col in dtypes_dict} # Filters for cols in case of mismatch
                gdf_temporal = gdf_temporal.astype(filtered_dtypes) # Assigns those types
                # Concatenate to TARGET without altering original dtypes
                intersection_nodes_3 = pd.concat([intersection_nodes_3, gdf_temporal], ignore_index=True)
                # ----- OUTPUT REGISTRATION FOR intersection_nodes_3 GEODATAFRAME ----- 
                fix_idx+=1
                
                # Register how the __DRAWN EDGE__ will be clipped
                # (From the connection_ntw01_node to the intersection, keeping just that segment)
                
                # Keep starting point from the connection_ntw01_osmid
                if drawn_edge_u == connection_ntw01_osmid:
                    drawn_edge_starting_point = 'u' #Sets retain_all=False, keeping the part connected to ntw_01
                elif drawn_edge_v ==connection_ntw01_osmid:
                    drawn_edge_starting_point = 'v' #Sets retain_all=False, states which part connected to ntw_01
                else:
                    intented_crash

                # ----- OUTPUT REGISTRATION FOR intersection_nodes_3 GEODATAFRAME -----
                # Create temporary df with the current case's data
                df_temporal = pd.DataFrame({'osmid': [int(produced_osmid)], # Node that is the intersection (clipping point)
                                            'geometry': [intersectionpoint_geom], # Node that is the intersection (clipping point)
                                            'u': [int(drawn_edge_u)], # Edge that is intersected (u is from ntw02)
                                            'v':[int(drawn_edge_v)], # Edge that is intersected (v is from ntw01)
                                            'key':[int(drawn_edge_key)], # Edge that is intersected
                                            'retain_how':[drawn_edge_starting_point], #Sets retain_all=False, states which part to keep ('u' or 'v')
                                            'edge_origin':['ntw_join'] #Helps divide network_intersections_update() process
                                           }
                                          )
                gdf_temporal = gpd.GeoDataFrame(df_temporal, geometry='geometry', crs=projected_crs)
                # Force all datatypes to match the datatypes of the gdf to where the data will be merged
                dtypes_dict = intersection_nodes_3.dtypes.to_dict() #Dict with TARGET dtypes
                filtered_dtypes = {col: dtypes_dict[col] for col in gdf_temporal.columns if col in dtypes_dict} # Filters for cols in case of mismatch
                gdf_temporal = gdf_temporal.astype(filtered_dtypes) # Assigns those types
                # Concatenate to TARGET without altering original dtypes
                intersection_nodes_3 = pd.concat([intersection_nodes_3, gdf_temporal], ignore_index=True)
                # ----- OUTPUT REGISTRATION FOR intersection_nodes_3 GEODATAFRAME ----- 
                fix_idx+=1
                
            # CASE C
            elif case == 'CASE C':
                if casetype_logs:
                    print(f"CASE C: New edge from connecting node {connecting_ntw02_osmid} intersects with either network 01 or network 02 at an UNRELATED edge.")
    
                # Register how the __INTERSECTED EDGE__ (from either network 01 or network 02) will be clipped
                # (In the intersection, keeping both ends)
                # ----- OUTPUT REGISTRATION FOR intersection_nodes_3 GEODATAFRAME -----
                # Create temporary df with the current case's data
                df_temporal = pd.DataFrame({'osmid': [int(produced_osmid)], # Node that is the intersection (clipping point)
                                            'geometry': [intersectionpoint_geom], # Node that is the intersection (clipping point)
                                            'u': [int(intersected_u)], # Edge that is intersected
                                            'v':[int(intersected_v)], # Edge that is intersected
                                            'key':[int(intersected_key)], # Edge that is intersected
                                            'retain_how':['both'], #Sets retain_all=True (keeps both parts of the split edge)
                                            'edge_origin':['ntw_01_or_02'] #Helps divide network_intersections_update() process
                                           }
                                          )
                gdf_temporal = gpd.GeoDataFrame(df_temporal, geometry='geometry', crs=projected_crs)
                # Force all datatypes to match the datatypes of the gdf to where the data will be merged
                dtypes_dict = intersection_nodes_3.dtypes.to_dict() #Dict with TARGET dtypes
                filtered_dtypes = {col: dtypes_dict[col] for col in gdf_temporal.columns if col in dtypes_dict} # Filters for cols in case of mismatch
                gdf_temporal = gdf_temporal.astype(filtered_dtypes) # Assigns those types
                # Concatenate to TARGET without altering original dtypes
                intersection_nodes_3 = pd.concat([intersection_nodes_3, gdf_temporal], ignore_index=True)
                # ----- OUTPUT REGISTRATION FOR intersection_nodes_3 GEODATAFRAME ----- 
                fix_idx+=1
            
                # Register how the __DRAWN EDGE__ will be clipped
                # (From the connecting_ntw02_node to the intersection, keeping just that segment)
                # ----- OUTPUT REGISTRATION FOR intersection_nodes_3 GEODATAFRAME -----
                # Create temporary df with the current case's data
                df_temporal = pd.DataFrame({'osmid': [int(produced_osmid)], # Node that is the intersection (clipping point)
                                            'geometry': [intersectionpoint_geom], # Node that is the intersection (clipping point)
                                            'u': [int(drawn_edge_u)], # Edge that is intersected (u is from ntw02)
                                            'v':[int(drawn_edge_v)], # Edge that is intersected (v is from ntw01)
                                            'key':[int(drawn_edge_key)], # Edge that is intersected
                                            'retain_how':['both'], #Sets retain_all=True (keeps both parts of the split edge)
                                            'edge_origin':['ntw_join'] #Helps divide network_intersections_update() process
                                           }
                                          )
                gdf_temporal = gpd.GeoDataFrame(df_temporal, geometry='geometry', crs=projected_crs)
                # Force all datatypes to match the datatypes of the gdf to where the data will be merged
                dtypes_dict = intersection_nodes_3.dtypes.to_dict() #Dict with TARGET dtypes
                filtered_dtypes = {col: dtypes_dict[col] for col in gdf_temporal.columns if col in dtypes_dict} # Filters for cols in case of mismatch
                gdf_temporal = gdf_temporal.astype(filtered_dtypes) # Assigns those types
                # Concatenate to TARGET without altering original dtypes
                intersection_nodes_3 = pd.concat([intersection_nodes_3, gdf_temporal], ignore_index=True)
                # ----- OUTPUT REGISTRATION FOR intersection_nodes_3 GEODATAFRAME ----- 
                fix_idx+=1

    # LOG CODE - Progress logs
    # Finished reviewing current osmid. Continue with next osmid in non_contact_osmids.
    progress_count+=1
    # LOG CODE - Progress logs


# Set unique identifiers to int for both outputs
#joined_edges_concat['u'] = joined_edges_concat['u'].astype('int')
#joined_edges_concat['v'] = joined_edges_concat['v'].astype('int')
#joined_edges_concat['key'] = joined_edges_concat['key'].astype('int')

#intersection_nodes_3['osmid'] = intersection_nodes_3['osmid'].astype('int')
#intersection_nodes_3['u'] = intersection_nodes_3['u'].astype('int')
#intersection_nodes_3['v'] = intersection_nodes_3['v'].astype('int')
#intersection_nodes_3['key'] = intersection_nodes_3['key'].astype('int')

# Fix edge_origin 'ntw_01_or_02' assignment in intersection_nodes_3
print("Finding origin for edges assigned 'ntw_01_or_02'.")
# Extract list of edge_ids that are known to be either ntw_01 or ntw_02
ntw01_edges_ids = list(joined_edges_concat.loc[joined_edges_concat.ntw_origin=='ntw_01'].edge_id.unique())
ntw02_edges_ids = list(joined_edges_concat.loc[joined_edges_concat.ntw_origin=='ntw_02'].edge_id.unique())
# Create a edge_id col for the intersected edges registered for each intersection on intersection_nodes_3 gdf
intersection_nodes_3 = src.create_unique_edge_id(intersection_nodes_3)
# Fix edge_origin 'ntw_01_or_02'
idx_01 = (intersection_nodes_3.edge_origin=='ntw_01_or_02') & (intersection_nodes_3.edge_id.isin(ntw01_edges_ids))
intersection_nodes_3.loc[idx_01,'edge_origin'] = 'ntw_01'
idx_02 = (intersection_nodes_3.edge_origin=='ntw_01_or_02') & (intersection_nodes_3.edge_id.isin(ntw02_edges_ids))
intersection_nodes_3.loc[idx_02,'edge_origin'] = 'ntw_02'

##### Time
time_2 = time.time()
print(f"TIME: {time_2-time_1} seconds on drawing edges and identifying consequential intersections.")
##### Time

# Show
print(joined_edges_concat.shape)
print(f"Duplicates: {len(joined_edges_concat.loc[joined_edges_concat.duplicated('edge_id')])}.")
joined_edges_concat.tail(2)

### Previous network's update and removal of duplicates

Previously the network was updated and then duplicates were removed. Function network_intersections_update was updated to avoid the creation of duplicates.

In [None]:
def network_intersections_update(current_ntw_nodes, current_ntw_edges, intersection_nodes, projected_crs, function_logs=False):

    """ This function takes points with osmid located over existing edges (intersection_nodes) and updates
        a network. The intersection_nodes become new nodes and each intersected edge get split 
        into two separate edges with new 'u', 'v' and 'key' data.
    
	Args:
		current_ntw_nodes (geopandas.GeoDataFrame): GeoDataFrame containing the nodes from the network to update.
                                                    Requires a unique identifier 'osmid'.
        current_ntw_edges (geopandas.GeoDataFrame): GeoDataFrame containing the edges from the network to update.
                                                    Requires the unique identifiers 'u ,'v' and 'key'.
        intersection_nodes (geopandas.GeoDataFrame): GeoDataFrame containing the points in where each split is performed.
                                                    Requires points with 'osmid', and the edge to split ('u','v' and 'key').
        projected_crs (str, optional): String containing projected crs to be used depending on area of interest. 
                                        Defaults to "EPSG:6372".
        function_logs (bool,optional): Boolean that (if True) prints logs during the functions execution. Defaults to False.

                                                 
	Returns:
        updated_ntw_nodes (geopandas.GeoDataFrame): GeoDataFrame containing the updated nodes for the network.
        updated_ntw_edges (geopandas.GeoDataFrame): GeoDataFrame containing the updated edges for the network. 
        
	""" 
    print(f"Updating network...")
    
    # ------------------- INPUT USED - READ AND FILTER EDGES
    current_ntw_nodes = current_ntw_nodes.copy()
    current_ntw_nodes = current_ntw_nodes.to_crs(projected_crs)
    # Set an identifier to make it easier to locate nodes that resulted from an intersection between networks
    current_ntw_nodes['intersecting'] = 0

    current_ntw_edges = current_ntw_edges.copy()
    current_ntw_edges = current_ntw_edges.to_crs(projected_crs)
    # Set an identifier to make it easier to locate edges that were split
    current_ntw_edges['intersecting'] = 0
    # ------------------- INPUT USED - READ AND FILTER EDGES

    # Iterate over each intersection between both networks (intersection_nodes)
    for idx, node in intersection_nodes.iterrows():
        
        # 3.1 --------------- Split the current_ntw intersected edge using the intersection_node as clipping_point. 
        # ------------------- This split (Using function edge_clipping()) creates two separate edges:
        # ------------------- The first edge will be related to the starting_point_gdf (We'll set intersected edge 'u')
        # ------------------- The second edge will be related to the opposite side (Will be intersected edge 'v')
    
        # Current intersection_node's data
        intersection_node_osmid = node['osmid']
        intersected_u = node['u']
        intersected_v = node['v']
        intersected_key = node['key']
        intersected_retain_how = node['retain_how']        

        if function_logs:
            print(f"network_intersections_update(): Iterating over intersection node osmid {intersection_node_osmid}.") #Debugging check
            print(f"network_intersections_update(): Intersected edge with u {intersected_u} type {type(intersected_u)}.") #Debugging check
            print(f"network_intersections_update(): Intersected edge with v {intersected_v} type {type(intersected_v)}.") #Debugging check
            print(f"network_intersections_update(): Intersected edge with key {intersected_key} type {type(intersected_key)}.") #Debugging check
            
        # Extract current intersection node as a gdf (Becomes clipping_point_gdf in function edge_clipping)
        # (In most cases, osmid is the only value needed to identify the intersection node. In a very specific case where two edges from current_ntw_edges cross at the very same point
        #  where an edge from the other network created an intersection, 'u', 'v' and 'key' would also be needed)
        intersection_node_idx = (intersection_nodes.u==intersected_u)&(intersection_nodes.v==intersected_v)&(intersection_nodes.key==intersected_key)&(intersection_nodes.osmid==intersection_node_osmid)
        intersection_node = intersection_nodes.loc[intersection_node_idx].copy()
        intersection_node.reset_index(inplace=True,drop=True)
        
        # Extract current_ntw intersected edge (Becomes edge_gdf in function edge_clipping)
        try:
            # Try loading the edge registered as intersected in the intersection_nodes gdf.
            # ('intersected_u', 'intersected_v' and 'intersected_key')
            intersected_edge = current_ntw_edges.loc[(current_ntw_edges['u'] == intersected_u) & 
                                                     (current_ntw_edges['v'] == intersected_v) &
                                                     (current_ntw_edges['key'] == intersected_key)].copy()
            intersected_edge.reset_index(inplace=True,drop=True)
            # If it has len=0, it failed.
            if len(intersected_edge) == 0:
                fail_try
                
        except:
            # If it doesn't work, it means that the edge no longer exists (deleted in following steps in this function)
            # This happens because that edge had another intersection along its lenght and 
            # that original unsplit edge was split and deleted.
            # Now a new already split edge lies underneath the current intersection_node.
            # --> Find that split edge's data
            if function_logs:
                print(f"""network_intersections_update(): Searching for already split edge originating from edge with u {intersected_u}, v {intersected_v} and key {intersected_key}.""")
            # Create a VERY SMALL buffer around the intersection_node
            intersection_node_buffer = intersection_node.buffer(1e-9)
            intersection_node_buffer = gpd.GeoDataFrame(geometry=intersection_node_buffer)
            # Find and rewrite the data of the split edge underneath the intersection_node
            edge_data = intersection_node_buffer.sjoin(current_ntw_edges)
            if len(edge_data) == 1:
                intersected_u = edge_data.u.unique()[0]
                intersected_v = edge_data.v.unique()[0]
                intersected_key = edge_data.key.unique()[0]
            else:
                # If this happens, it means that there are two or more intersection_nodes located exactly
                # in this point over the intersected_edge. The first has already split the edge, the next is attempting.
                # Since the edge doesn't need to be split again, skip this intersection_node.
                continue
            
            # Retrieve intersected edge
            intersected_edge = current_ntw_edges.loc[(current_ntw_edges['u'] == intersected_u) & 
                                                     (current_ntw_edges['v'] == intersected_v) &
                                                     (current_ntw_edges['key'] == intersected_key)].copy()
            intersected_edge.reset_index(inplace=True,drop=True)
        
        # Extract current_ntw intersected edge's u node 
        # (Always becomes starting_point_gdf in function edge_clipping when using 'return_all')
        u_node = current_ntw_nodes.loc[(current_ntw_nodes['osmid'] == intersected_u)].copy()
        u_node.reset_index(inplace=True,drop=True)

        # Extract current_ntw intersected edge's v node 
        v_node = current_ntw_nodes.loc[(current_ntw_nodes['osmid'] == intersected_v)].copy()
        v_node.reset_index(inplace=True,drop=True)
    
        # Apply edge_clipping function and assign the corresponding 'u', 'v' or 'key' data.
        if intersected_retain_how == 'both':
            # Clip edge
            split_edge_gdf = edge_clipping(starting_point_gdf = u_node,
                                           edge_gdf = intersected_edge,
                                           clipping_point_gdf = intersection_node,
                                           projected_crs = projected_crs,
                                           return_all = True,
                                           function_logs = function_logs)
            # Assign data
            # When return_all=True in function edge_clipping, 
            # assigns 'starting' to the edge related to the starting_point_gdf
            # and 'ending' to edge on the opposite side.
            u_idx = split_edge_gdf.relation=='starting'
            split_edge_gdf.loc[u_idx,'u'] = intersected_u # We assigned 'u' as starting_point_gdf
            split_edge_gdf.loc[u_idx,'v'] = intersection_node_osmid # Intersection
            split_edge_gdf.loc[u_idx,'key'] = 0 #Since this 'u' and 'v' relation is new, key=0
        
            v_idx = split_edge_gdf.relation=='ending'
            split_edge_gdf.loc[v_idx,'u'] = intersection_node_osmid # Intersection
            split_edge_gdf.loc[v_idx,'v'] = intersected_v # Opposite side
            split_edge_gdf.loc[v_idx,'key'] = 0 #Since this 'u' and 'v' relation is new, key=0
        
        elif intersected_retain_how == 'u':
            # Clip edge
            split_edge_gdf = edge_clipping(starting_point_gdf = u_node,
                                           edge_gdf = intersected_edge,
                                           clipping_point_gdf = intersection_node,
                                           projected_crs = projected_crs,
                                           return_all = False)
            # Assign data
            split_edge_gdf.loc[0,'u'] = intersected_u
            split_edge_gdf.loc[0,'v'] = intersection_node_osmid # Intersection
            split_edge_gdf.loc[0,'key'] = 0 #Since this 'u' and 'v' relation is new, key=0
            
        elif intersected_retain_how == 'v':
            # Clip edge
            split_edge_gdf = edge_clipping(starting_point_gdf = v_node,
                                           edge_gdf = intersected_edge,
                                           clipping_point_gdf = intersection_node,
                                           projected_crs = projected_crs,
                                           return_all = False)
            # Assign data
            split_edge_gdf.loc[0,'u'] = intersection_node_osmid # Intersection
            split_edge_gdf.loc[0,'v'] = intersected_v
            split_edge_gdf.loc[0,'key'] = 0 #Since this 'u' and 'v' relation is new, key=0
            
        else:
            print(f"Error splitting edge with u {intersected_u}, v {intersected_v} and key {intersected_key}.")
            print("Make sure to include in gdf intersection_nodes column 'retain_how' with either 'u','v' or 'both'.")
            intended_crash    
    
        # 3.2 --------------- Register changes on current_ntw
        # Set an identifier to make it easier to locate nodes that resulted from an intersection between networks
        intersection_node['intersecting'] = 1
        # Prepare node for concatenation
        intersection_node = intersection_node[['osmid','intersecting','geometry']]
        # Add new node
        current_ntw_nodes = pd.concat([current_ntw_nodes,intersection_node])
        # Reset index
        current_ntw_nodes.reset_index(inplace=True,drop=True)
    
        # Keep all edges except the edge that was split
        # (Must remove to avoid duplicating edge's geometries)
        current_ntw_edges = current_ntw_edges.loc[~((current_ntw_edges['u'] == intersected_u) &
                                                    (current_ntw_edges['v'] == intersected_v) &
                                                    (current_ntw_edges['key'] == intersected_key))].copy()
        # Prepare edges for concatenation
        split_edge_gdf = split_edge_gdf[['u','v','key','geometry']]
        # Set an identifier to make it easier to locate edges that were split
        split_edge_gdf['intersecting'] = 1
        # Add new edge
        current_ntw_edges = pd.concat([current_ntw_edges,split_edge_gdf])
        # Reset index
        current_ntw_edges.reset_index(inplace=True,drop=True)

    print(f"Updated network. Formating output.")
    updated_ntw_nodes = current_ntw_nodes[['osmid','intersecting','geometry']].copy()
    # Set unique identifiers to int
    updated_ntw_nodes['osmid'] = updated_ntw_nodes['osmid'].astype('int')
    del current_ntw_nodes
    updated_ntw_edges = current_ntw_edges[['u','v','key','intersecting','geometry']].copy()
    # Set unique identifiers to int
    updated_ntw_edges['u'] = updated_ntw_edges['u'].astype('int')
    updated_ntw_edges['v'] = updated_ntw_edges['v'].astype('int')
    updated_ntw_edges['key'] = updated_ntw_edges['key'].astype('int')
    del current_ntw_edges
    
    # After iterating over both networks, return result
    return updated_ntw_nodes, updated_ntw_edges

In [None]:
def drop_intersection_network_duplicates(nodes_gdf,edges_gdf):

    """ This function was created as a complement to function network_intersections_update().
        Whenever three or more lines intersect exactly at the same point, function network_intersections_update() creates duplicated nodes and edges.
        Those duplicates cannot be easly dropped since lines can be mirrored. Example:
        Line 1: u=1, v=2, key=0, geom=((1,1),(2,1))
        Line 2: u=2, v=1, key=0, geom=((2,1),(1,1))
        
        This function solves for those duplicated network geometries.
    
	Args:
		nodes_gdf (geopandas.GeoDataFrame): GeoDataFrame containing the nodes from the recently updated network.
                                                    Requires a unique identifier 'osmid'.
        edges_gdf (geopandas.GeoDataFrame): GeoDataFrame containing the edges from the recently updated network.
                                                    Requires the unique identifiers 'u ,'v' and 'key'.

	Returns:
        nodes_gdf (geopandas.GeoDataFrame): GeoDataFrame containing the updated nodes without duplicates.
        edges_gdf (geopandas.GeoDataFrame): GeoDataFrame containing the updated edges without duplicates.
        
	""" 

    # Copy input to avoid rewritting
    nodes_gdf = nodes_gdf.copy()
    edges_gdf = edges_gdf.copy()

    # 1.0 --------------- Dropping duplicates on nodes
    print(f"Dropping duplicates on updated nodes and edges.")
    # Dropping duplicates on nodes by using osmid and geometry.
    current_len = len(nodes_gdf)
    nodes_gdf.drop_duplicates(subset=['osmid','geometry'],inplace=True)
    updated_len = len(nodes_gdf)
    print(f"Dropped {current_len-updated_len} nodes that had the same osmid and geometry.")

    # 2.0 --------------- Dropping duplicates on edges
    
    # 2.1 --------------- Identify potential duplicates by comparing edge_id in a regular and inverted order
    # Unique edge id with regular order ('u','v','key')
    edges_gdf = create_unique_edge_id(edges_gdf)
    edges_gdf.rename(columns={'edge_id':'edge_id_1'},inplace=True)
    current_len = len(edges_gdf)
    edges_gdf.drop_duplicates(subset=['edge_id_1'],inplace=True)
    updated_len = len(edges_gdf)
    print(f"Dropped {current_len-updated_len} edges that had the same edge_id.")
    
    # Unique edge id with inverted order ('v','u','key')
    edges_gdf = create_unique_edge_id(edges_gdf,order='vukey')
    edges_gdf.rename(columns={'edge_id':'edge_id_2'},inplace=True)
    dup_inverted_lst = []
    
    # Identify edges where edge_id is in both regular and inverted order
    for edge_id in list(edges_gdf.edge_id_1.unique()):
        if edge_id in list(edges_gdf.edge_id_2.unique()):
            dup_inverted_lst.append(edge_id)

    # 2.2 --------------- Verify potential duplicates and register one of them to be dropped
    # Verify those edges are duplicated
    confirmed_dup_edge_lst = []
    already_dropped_dict = {}
    for edge_id in dup_inverted_lst:
        regular_edge = edges_gdf.loc[edges_gdf.edge_id_1==edge_id]
        inverted_edge = edges_gdf.loc[edges_gdf.edge_id_2==edge_id]
        
        # Discard different roads by length
        # (Two roads may share start and end point if they each form half a circle)
        regular_edge_length = regular_edge.length.unique()[0]
        inverted_edge_length = inverted_edge.length.unique()[0]
        if regular_edge_length != inverted_edge_length:
            continue
            
        # Discard loop roads 
        # (One road may have the same start-end and end-start if it is a loop. Also, it would have the same length)
        regular_edge_index = regular_edge.index[0]
        inverted_edge_index = inverted_edge.index[0]
        if regular_edge_index == inverted_edge_index:
            continue
            
        # If reached here, it is duplicated. Check if the mirror relation was already registered
        value_1 = regular_edge.u.unique()[0]
        value_2 = regular_edge.v.unique()[0]
        # If value_1 is already registered in the dictionary and it contains value_2, continue.
        if (value_1 in already_dropped_dict.keys()) and (value_2 in already_dropped_dict[value_1]):
            print(f"Relation {value_1}<-->{value_2} already registered.")
            continue
        # If value_2 is already registered in the dictionary and it contains value_1, continue.
        elif (value_2 in already_dropped_dict.keys()) and (value_1 in already_dropped_dict[value_2]):
            print(f"Relation {value_1}<-->{value_2} already registered.")
            continue
        
        # Else, it has not been registered
        else:
            # Confirm that this edge_id will be dropped
            confirmed_dup_edge_lst.append(edge_id)
            # Save the relation that's being dropped   
            if value_1 not in already_dropped_dict.keys():
                already_dropped_dict[value_1] = list([value_2])
                print(f"Saved relationship {value_1}<-->{value_2} to be dropped.")
            else:
                already_dropped_dict[value_1] = already_dropped_dict[value_1].append(value_2)
                print(f"Saved relationship {value_1}<-->{value_2} to be dropped.")
            
    # 2.3 --------------- Drop confirmed relations
    current_len = len(edges_gdf)
    edges_gdf = edges_gdf.loc[~edges_gdf.edge_id_2.isin(confirmed_dup_edge_lst)]
    updated_len = len(edges_gdf)
    print(f"Dropped {current_len-updated_len} edges that had the same edge_id but one was inverted.")
    # Drop columns used for dropping duplicates inside drop_intersection_network_duplicates()
    edges_gdf.drop(columns=['edge_id_1','edge_id_2'],inplace=True)

    return nodes_gdf,edges_gdf

### First iteration of networks update that avoids the creation of duplicates (Dictionary was node{original_edge_id{[new_edges]}} instead of edge_id{node{[new_edges]}}

In [1]:
def network_intersections_update(current_ntw_nodes, current_ntw_edges, intersection_nodes, projected_crs,
                                 intersection_logs=False, clipping_logs=False):

    """ This function takes points with osmid located over existing edges (intersection_nodes) and updates
        a network. The intersection_nodes become new nodes and each intersected edge get split 
        into two separate edges with new 'u', 'v' and 'key' data.
    
	Args:
		current_ntw_nodes (geopandas.GeoDataFrame): GeoDataFrame containing the nodes from the network to update.
                                                    Requires a unique identifier 'osmid'.
        current_ntw_edges (geopandas.GeoDataFrame): GeoDataFrame containing the edges from the network to update.
                                                    Requires the unique identifiers 'u ,'v' and 'key'.
        intersection_nodes (geopandas.GeoDataFrame): GeoDataFrame containing the points in where each split is performed.
                                                    Requires points with 'osmid', and the edge to split ('u','v' and 'key').
        projected_crs (str, optional): String containing projected crs to be used depending on area of interest. 
                                        Defaults to "EPSG:6372".
        intersection_logs (bool,optional): Boolean that (if True) prints logs during the current function's execution. Defaults to False.
        clipping_logs (bool,optional): Boolean that (if True) prints logs during the edge_clipping() function's execution. Defaults to False.

                                                 
	Returns:
        updated_ntw_nodes (geopandas.GeoDataFrame): GeoDataFrame containing the updated nodes for the network.
        updated_ntw_edges (geopandas.GeoDataFrame): GeoDataFrame containing the updated edges for the network. 
        
	""" 
    
    print(f"Updating network...")
    
    # ------------------- INPUT USED - READ AND FILTER EDGES
    current_ntw_nodes = current_ntw_nodes.copy()
    current_ntw_nodes = current_ntw_nodes.to_crs(projected_crs)
    # Set an identifier to make it easier to locate nodes that resulted from an intersection between networks
    current_ntw_nodes['intersecting'] = 0

    current_ntw_edges = current_ntw_edges.copy()
    current_ntw_edges = current_ntw_edges.to_crs(projected_crs)
    # Set an identifier to make it easier to locate edges that were split
    current_ntw_edges['intersecting'] = 0
    # ------------------- INPUT USED - READ AND FILTER EDGES

    # Iterate over each intersection between both networks (intersection_nodes)

    # MULTIPLE INTERSECTION ADAPTATION
    # EXPLANATION
    # Function find_intersection_nodes() was originaly designed to create a dataframe that stablishes intersection points between
    # 1 (one) edge of a network and 1 (one) edge of another network. The dataframe stores the IDs of each intersected edge and the intersecting node.
    # However, cases were found where three or more edges intersect exactly at the same point (particularly due to the use of tessellations-generated networks).
    # The result is having two (or more) intersection_nodes located exactly at the same point.
    # e.g. one intersecting edge_1 from network "x" and edge_1 from network "y", 
    # and the other intersecting edge_2 from network "x" and edge_1 from network "y".

    # Following that example, in order to avoid intersecting edge_1 from network "y" twice with the same node,
    # this MULTIPLE INTERSECTION ADAPTATION saves already performed intersections
    performed_intersections = {}
    
    for idx, node in intersection_nodes.iterrows():
        
        # Current intersection_node's data
        intersection_node_osmid = node['osmid']
        intersected_u = node['u']
        intersected_v = node['v']
        intersected_key = node['key']
        intersected_retain_how = node['retain_how']

        # Helps debug:
        #check_lst = [3991,3992]
        #if intersection_node_osmid not in check_lst:
        #    continue

        # 1.1 --------------- MULTIPLE INTERSECTION ADAPTATION CHECK
        # ------------------- This step reviews the performed_intersections dictionary to verify if the current intersection_node has
        # ------------------- already split the current intersected edge

        # Dictionary format:
        # Each intersection_node stores the original_edge_id it intersected and the resulting edge_ids
        # {intersection_node_osmid:{original_edge_id:[new_edge_ids]}}

        # Find original unique edge_id of current edge being intersected
        original_edge_id = str(intersected_u)+str(intersected_v)+str(intersected_key)
        # Check dictionary
        if intersection_node_osmid in performed_intersections.keys():
            nodes_dictionary = performed_intersections[intersection_node_osmid]
            if original_edge_id in nodes_dictionary.keys():
                # Case A: The current intersection_node already intersected the current edge_id.
                # Approach: Do not intersect again (continue)
                if intersection_logs:
                    print(f"network_intersections_update(): Skipping intersection between osmid {intersection_node_osmid} and edge {original_edge_id}. It already happened.")
                continue
            else:
                # Case B: The current edge_id has not been intersected, but the intersection_node has already been used.
                # Approach: Append the current edge_id to the node's intersected list.
                nodes_dictionary[original_edge_id] = [] #Inserting new key for the new edge_id with an empty list of resulting edge_ids.
                performed_intersections[intersection_node_osmid] = nodes_dictionary # Updating nodes_dictionary inside the general dictionary
                if intersection_logs:
                    print(f"network_intersections_update(): Registering intersection between osmid {intersection_node_osmid} and edge {original_edge_id}.")
        else:
            # Case C: The current intersection_node has not intersected any edge_id (First time for this node)
            # Approach: Add to dictionary and create list
            performed_intersections[intersection_node_osmid] = {original_edge_id:[]} # Inserting the node key, nodes_dictionary and its list
            if intersection_logs:
                print(f"network_intersections_update(): First time registering intersection between osmid {intersection_node_osmid} and edge {original_edge_id}.")
        
        # 1.2 --------------- Split the current_ntw's intersected edge using the intersection_node as clipping_point. 
        # ------------------- This split (Using function edge_clipping()) creates two separate edges:
        # ------------------- The first edge will be related to the starting_point_gdf (We'll set intersected edge 'u')
        # ------------------- The second edge will be related to the opposite side (Will be intersected edge 'v')
    
            
        # 1.2.1 - Extract current intersection node as a gdf (Becomes clipping_point_gdf in function edge_clipping)
        # ------- MULTIPLE INTERSECTION ADAPTATION:
        # ------- In cases where the intersection_nodes always intersect two edges only, osmid is only needed to identify the current node.
        # ------- Else, more data and dropping duplicates is required.
        intersection_node_idx = (intersection_nodes.u==intersected_u)&(intersection_nodes.v==intersected_v)&(intersection_nodes.key==intersected_key)&(intersection_nodes.osmid==intersection_node_osmid)
        intersection_node = intersection_nodes.loc[intersection_node_idx].copy()
        intersection_node.drop_duplicates(inplace=True)
        intersection_node.reset_index(inplace=True,drop=True)
        if intersection_logs:
            print(f"network_intersections_update(): Printing intersection node for osmid {intersection_node_osmid}.")
            print(intersection_node)
        
        # 1.2.2 - Extract current_ntw's intersected edge (Becomes edge_gdf in function edge_clipping)
        try:
            # 1.2.2a TRY: Load the edge registered as intersected in the intersection_nodes gdf.
            # (Using the edge's original u('intersected_u'), v('intersected_v') and key('intersected_key'))
            intersected_edge = current_ntw_edges.loc[(current_ntw_edges['u'] == intersected_u) & 
                                                     (current_ntw_edges['v'] == intersected_v) &
                                                     (current_ntw_edges['key'] == intersected_key)].copy()
            intersected_edge.reset_index(inplace=True,drop=True)          
            if len(intersected_edge) == 0:
                # If it has len=0, it means that the edge no longer exists (deleted in following steps in this function).
                # This happens because that edge had another intersection along its lenght and that original unsplit edge was split and deleted.
                fail_try
            if intersection_logs:
                print(f"network_intersections_update(): Intersection_node {intersection_node_osmid} found {len(intersected_edge)} edges.")  
                
        except:
            # 1.2.2b EXCEPT: Find the new (already split) edge by buffering the current intersection_node.
            if intersection_logs:
                print(f"network_intersections_update(): Searching for already split edge originating from edge with u {intersected_u}, v {intersected_v} and key {intersected_key}.")
            # Create a VERY SMALL buffer around the intersection_node
            intersection_node_buffer = intersection_node.buffer(1e-9)
            intersection_node_buffer = gpd.GeoDataFrame(geometry=intersection_node_buffer)
            # Find and rewrite the data of the split edge underneath the intersection_node
            edge_data = intersection_node_buffer.sjoin(current_ntw_edges)
            if len(edge_data) == 1:
                # CASE A: Found one edge touching the intersection_node_buffer.
                intersected_u = edge_data.u.unique()[0]
                intersected_v = edge_data.v.unique()[0]
                intersected_key = edge_data.key.unique()[0]
            elif len(edge_data) == 0:
                # CASE B: Found no edges touching the intersection_node_buffer. (Problem)
                print(f"ERROR: Problem on intersection_node {intersection_node_osmid}. Found {len(edge_data)} edges.")
                intended_crash
            else:
                # CASE C: Found multiple edges touching the intersection_node_buffer.
                # ------- MULTIPLE INTERSECTION ADAPTATION
                # ------- It is possible that the node is intersecting two (or more) edges and that some have already been clipped.
                # ------- Remove from the found edge_data the already intersected edges in order to keep the ones that could be intersected.
                # Create a unique edge_id for all edges found
                edge_data['u'] = edge_data['u'].apply(lambda x: int(round(float(x),0)))
                edge_data['v'] = edge_data['v'].apply(lambda x: int(round(float(x),0)))
                edge_data['key'] = edge_data['key'].apply(lambda x: int(round(float(x),0)))
                edge_data = create_unique_edge_id(edge_data)
                # Discard already intersected edges
                previously_created_edge_ids_lst = [] # Set an empty list of previously created edge_ids by current intersection_node
                nodes_dictionary = performed_intersections[intersection_node_osmid] # Read the node's history
                for original_edge_id in nodes_dictionary.keys(): # For each original_edge_id that the node has split
                    for new_edge_id in nodes_dictionary[original_edge_id]: # Read the resulting new_edge_ids
                        previously_created_edge_ids_lst.append(new_edge_id) # Register them to the list
                edge_data = edge_data.loc[~edge_data.edge_id.isin(previously_created_edge_ids_lst)].copy() # Discard those edges
                # Find the edge to be split
                if len(edge_data) == 1:
                    intersected_u = edge_data.u.unique()[0]
                    intersected_v = edge_data.v.unique()[0]
                    intersected_key = edge_data.key.unique()[0]
                else:    
                    # With the new MULTIPLE INTERSECTION ADAPTATION, if any intersection_node reaches this part of the code, it may be a problem.
                    # It means that either zero or +2 edges with the same u, v and key are located where the intersection_node is.
                    print(f"ERROR: Problem on intersection_node {intersection_node_osmid}.")
                    print(f"Found {len(edge_data)} edges while searching for substitute for edge {original_edge_id}.")
                    print("Printing edge_data")
                    print(edge_data)
                    print(f"Previously created edge_ids_lst: {previously_created_edge_ids_lst}.")
                    intended_crash
            # Retrieve the found edge
            intersected_edge = current_ntw_edges.loc[(current_ntw_edges['u'] == intersected_u) & 
                                                     (current_ntw_edges['v'] == intersected_v) &
                                                     (current_ntw_edges['key'] == intersected_key)].copy()
            intersected_edge.reset_index(inplace=True,drop=True)
            if intersection_logs:
                print(f"Intersection_node {intersection_node_osmid} found already intersected edge u {intersected_u}, v {intersected_v}, key {intersected_key}.")

        # 1.2.3 - Clip the intersected_edge with the intersection_node
        # Extract current_ntw's intersected edge's u node as a point
        # (Always becomes starting_point_gdf in function edge_clipping when using 'return_all')
        u_node = current_ntw_nodes.loc[(current_ntw_nodes['osmid'] == intersected_u)].copy()
        u_node.reset_index(inplace=True,drop=True)
        # Extract current_ntw's intersected edge's v node as a point
        v_node = current_ntw_nodes.loc[(current_ntw_nodes['osmid'] == intersected_v)].copy()
        v_node.reset_index(inplace=True,drop=True)
        # Apply edge_clipping function and assign the corresponding 'u', 'v' or 'key' data.
        if intersected_retain_how == 'both':
            # Clip edge
            split_edge_gdf = edge_clipping(starting_point_gdf = u_node,
                                           edge_gdf = intersected_edge,
                                           clipping_point_gdf = intersection_node,
                                           projected_crs = projected_crs,
                                           return_all = True,
                                           function_logs = clipping_logs)
            if intersection_logs:
                print(f"network_intersections_update(): Intersection_node {intersection_node_osmid} split the edge and created {len(split_edge_gdf)} new edges.")
            # Assign data
            # When return_all=True in function edge_clipping, 
            # assigns 'starting' to the edge related to the starting_point_gdf
            # and 'ending' to edge on the opposite side.
            # Identify split edge 1
            u_idx = split_edge_gdf.relation=='starting'
            split_edge_gdf.loc[u_idx,'u'] = intersected_u # We assigned 'u' as starting_point_gdf
            split_edge_gdf.loc[u_idx,'v'] = intersection_node_osmid # Intersection
            split_edge_gdf.loc[u_idx,'key'] = 0 #Since this 'u' and 'v' relation is new, key=0
            # Identify split edge 2
            v_idx = split_edge_gdf.relation=='ending'
            split_edge_gdf.loc[v_idx,'u'] = intersection_node_osmid # Intersection
            split_edge_gdf.loc[v_idx,'v'] = intersected_v # Opposite side
            split_edge_gdf.loc[v_idx,'key'] = 0 #Since this 'u' and 'v' relation is new, key=0

            # ------- MULTIPLE INTERSECTION ADAPTATION
            # Prepare new_edge_ids to register in dictionary
            new_edge_id_1 = str(intersected_u)+str(intersection_node_osmid)+str(0)
            new_edge_id_2 = str(intersection_node_osmid)+str(intersected_v)+str(0)
            # Read from general dictionary the edge_ids previously created by this intersection_node in this original_edge
            already_created_edge_ids = performed_intersections[intersection_node_osmid][original_edge_id] 
            # Add to the list the new generated edge_ids
            already_created_edge_ids.append(new_edge_id_1)
            already_created_edge_ids.append(new_edge_id_2)
            # Insert data to general dictionary
            performed_intersections[intersection_node_osmid][original_edge_id] = already_created_edge_ids
        
        elif intersected_retain_how == 'u':
            # Clip edge
            split_edge_gdf = edge_clipping(starting_point_gdf = u_node,
                                           edge_gdf = intersected_edge,
                                           clipping_point_gdf = intersection_node,
                                           projected_crs = projected_crs,
                                           return_all = False,
                                           function_logs = clipping_logs)
            if intersection_logs:
                print(f"network_intersections_update(): Intersection_node {intersection_node_osmid} split the edge and created {len(split_edge_gdf)} new edges.")
            # Assign data
            split_edge_gdf.loc[0,'u'] = intersected_u
            split_edge_gdf.loc[0,'v'] = intersection_node_osmid # Intersection
            split_edge_gdf.loc[0,'key'] = 0 #Since this 'u' and 'v' relation is new, key=0
        elif intersected_retain_how == 'v':
            # Clip edge
            split_edge_gdf = edge_clipping(starting_point_gdf = v_node,
                                           edge_gdf = intersected_edge,
                                           clipping_point_gdf = intersection_node,
                                           projected_crs = projected_crs,
                                           return_all = False,
                                           function_logs = clipping_logs)
            if intersection_logs:
                print(f"network_intersections_update(): Intersection_node {intersection_node_osmid} split the edge and created {len(split_edge_gdf)} new edges.")
            # Assign data
            split_edge_gdf.loc[0,'u'] = intersection_node_osmid # Intersection
            split_edge_gdf.loc[0,'v'] = intersected_v
            split_edge_gdf.loc[0,'key'] = 0 #Since this 'u' and 'v' relation is new, key=0
        else:
            print(f"ERROR splitting edge with u {intersected_u}, v {intersected_v} and key {intersected_key}.")
            print("Make sure to include in gdf intersection_nodes column 'retain_how' with either 'u','v' or 'both'.")
            intended_crash    
    
        # 1.3 --------------- Register changes on current_ntw
        # ------------------- The intersection_node is concatenated into current_ntw_nodes.
        # ------------------- The split edge(s) is(are) concatenated into current_ntw_edges.

        # 1.3.1 - Register node
        # Set an identifier to make it easier to locate nodes that resulted from an intersection between networks
        intersection_node['intersecting'] = 1
        # Prepare node for concatenation
        intersection_node = intersection_node[['osmid','intersecting','geometry']]
        # Add new node
        current_ntw_nodes = pd.concat([current_ntw_nodes,intersection_node])
        # Reset index
        current_ntw_nodes.reset_index(inplace=True,drop=True)
        if intersection_logs:
                print(f"network_intersections_update(): Concatenated {len(intersection_node)} nodes.")

        # 1.3.1 - Register edge(s)
        # Keep all edges except the edge that was split
        # (Must remove to avoid duplicating edge's geometries)
        current_ntw_edges = current_ntw_edges.loc[~((current_ntw_edges['u'] == int(intersected_u)) &
                                                    (current_ntw_edges['v'] == int(intersected_v)) &
                                                    (current_ntw_edges['key'] == int(intersected_key)))].copy()
        # Prepare edges for concatenation
        split_edge_gdf = split_edge_gdf[['u','v','key','geometry']]
        # Set an identifier to make it easier to locate edges that were split
        split_edge_gdf['intersecting'] = 1
        # Add new edge
        current_ntw_edges = pd.concat([current_ntw_edges,split_edge_gdf])
        # Reset index
        current_ntw_edges.reset_index(inplace=True,drop=True)
        if intersection_logs:
            print(f"network_intersections_update(): Concatenated {len(split_edge_gdf)} edges.")

    # 1.4 --------------- Format final output
    # ------------------- Filters for columns of interest and sets column types
    updated_ntw_nodes = current_ntw_nodes[['osmid','intersecting','geometry']].copy()
    # Set unique identifiers to int
    updated_ntw_nodes['osmid'] = updated_ntw_nodes['osmid'].astype('int')
    del current_ntw_nodes
    updated_ntw_edges = current_ntw_edges[['u','v','key','intersecting','geometry']].copy()
    # Set unique identifiers to int
    updated_ntw_edges['u'] = updated_ntw_edges['u'].astype('int')
    updated_ntw_edges['v'] = updated_ntw_edges['v'].astype('int')
    updated_ntw_edges['key'] = updated_ntw_edges['key'].astype('int')
    del current_ntw_edges

    print(f"Finished updating network.")
    
    # After iterating over both networks, return result
    return updated_ntw_nodes, updated_ntw_edges

### Part 02 Step 04's update before function that removes duplicates from network was created

In [None]:
# Apply network_intersections_update() function
# Splitting the process using col 'edge_origin' was necessary because function network_intersections_update
# iterates over each osmid and intersection_nodes_3 can have 2 rows for each osmid.
# (e.g. a consequential_intersection cuts 1 ntw_01 edge and 1 ntw_join edge in a point that's better suited for being the
#  connection point, so both edges get cut at that intersection point).

# Nodes get updated every time network_intersections_update runs (Creates node duplicates, will remove)
joined_nodes_fix = concatenated_nodes.copy()
# Gets updated by concatenating each iteration's joined_edges_current.
# Each iteration updates the edges from each origin.
# (Prevents crash from trying to split multiple close edges with the same node at once)
joined_edges_fix = gpd.GeoDataFrame()

edge_origins = ['ntw_01','ntw_02','ntw_join']

for current_edge_origin in edge_origins:
    print(f"Fixing edges from edge_origin {current_edge_origin}.")
    # Isolate current origin
    current_edges = joined_edges_concat.loc[joined_edges_concat.ntw_origin==current_edge_origin]
    # Intersecting edges that were originaly located in edge_origin
    intersection_nodes_3_current = intersection_nodes_3.loc[intersection_nodes_3.edge_origin==current_edge_origin]
    # (Second round of intersections, derived from new nodes created from network_02 to edges on network_01)
    joined_nodes_fix, joined_edges_current = network_intersections_update(current_ntw_nodes = joined_nodes_fix,
                                                                          current_ntw_edges = current_edges,
                                                                          intersection_nodes = intersection_nodes_3_current,
                                                                          projected_crs = projected_crs)
    
    # Store the edge_ids created to join both networks
    joined_edges_current = create_unique_edge_id(joined_edges_current)
    join_idx = joined_edges_current.intersecting==1
    edge_ids = list(joined_edges_current.loc[join_idx].edge_id.unique())
    all_join_edgeids = all_join_edgeids + edge_ids
    
    # Concatenate resulting edges
    joined_edges_fix = pd.concat([joined_edges_fix,joined_edges_current])

# Drop node duplicates and cols 'intersecting'
joined_nodes_fix.drop(columns=['intersecting'],inplace=True)
joined_nodes_fix.drop_duplicates(inplace=True)
joined_edges_fix.drop(columns=['intersecting'],inplace=True)

# Reset indexes
joined_nodes_fix.reset_index(inplace=True,drop=True)
joined_edges_fix.reset_index(inplace=True,drop=True)

# Show
print(joined_nodes_fix.dtypes)
print(joined_nodes_fix.shape)
joined_nodes_fix.head(2)