### Title - Graph-based road segmentation method for urban traffic accident risk analysis

### Step.0 | Packages to use |

In [1]:
###0. Packages to use
import osmnx as ox
from shapely.geometry import Point, LineString, MultiLineString
from shapely import ops

import geopandas as gpd
import pandas as pd
import numpy as np
import folium
import itertools 
from tqdm import tqdm
import warnings 

warnings.filterwarnings("ignore")

---
**OSMnx ox.graph_from_place의 network_type**

- `drive` : 운전 가능한 공공 도로 (but not service roads)
- `drive_service` : 운전가능한 도로 (including service roads)
- `walk` : 보행자가 사용할 수 있는 모든 거리와 경로 확보 (도보는 방향성을 무시합니다)
- `bike` : 자전거가 이용할 수 있는 거리와 경로
- `all` : 모든(비공개) OSM 거리와 경로를 다운로드
- `all_private` : 비공개 액세스를 포함하여 모든 OSM 거리와 경로를 다운로드
---
cf. edges data 
- keys : u,v 노드가 같은 것이 있는 edges는 둘 중 하나는 0, 나머지 하나는 1 값을 갖는다. 
- ref : 범위가 큰 도로의 중앙 분리대가 있는 곳의 짝을 표시하는 변수이다(모든 분리 도로가 완벽하게 표시되어 있지 않다).
---

### Step.01 |Data Loader|

1. **원하는 지역 nodes, edges 데이터 로드** 
    - 도로는 osmnx 패키지를 이용하여 두 개의 그래프를 다운로드 한다. 
    - 사용 함수 `osmnx.graph_from_place`
    - 파라미터 옵션 `network_type="drive_service, simplify=True`인 그래프를 메인으로 사용하며, 터널의 자세한 위치 정보를 얻기 위하여
    `network_type="drive_service" ,simplify=False`인 그래프를 추가로 불러옵니다.
2. **방향성 제거**
    - osmnx 그래프는 통행에 특화 되어 있기 때문에 한 도로가 양방향으로 통행할 수 있다면, 한 도로가 linestring 정방향 정보와 역방향 정보를 가진 데이터가 분리되어있습니다. 해당 그래프로 교통사고 위험도 분석을 할 때 데이터 매핑에서 오류가 있을 수 있기 때문에 linestring data가 겹치는 도로를 제거하여 겹치는 도로를 제거합니다. 
3. **고속도로 제거**
    - 해당 분할법은 도시지역 교통사고 위험도 분석 하는 것이기 때문에 고속도로는 제거합니다.


cf. Graph Load 시 nodes_data의 state(모든 교차점, 끝 점) 1로 부여, 터널 2로 부여 하여 반환한다.

In [3]:
def Graph_Loader(place, names=None, motorway=None):
    #######################################################################################################################################
    ###Data Load
    def graph_from_place(place):
        #base
        G1 = ox.graph_from_place(place, network_type="drive_service", simplify=True)
        G2 = ox.graph_from_place(place, network_type="drive_service", simplify=False)
        base_nodes, base_edges = ox.graph_to_gdfs(G1)
        #tunnel
        G2_nodes, G2_edges = ox.graph_to_gdfs(G2)
        tunnel_edges = G2_edges.loc[[not i for i in G2_edges.tunnel.isnull()]]
        non_tunnel_edges = G2_edges.loc[G2_edges.tunnel.isnull()]
        
        tunnel_nodes = G2_nodes.loc[set(np.array(tunnel_edges.index.tolist())[:,:2].reshape(-1))]
        tunnel_mask = set(np.array(tunnel_edges.index.tolist())[:,:2].reshape(-1)) & set(np.array(non_tunnel_edges.index.tolist())[:,:2].reshape(-1))
        
        return [base_nodes, base_edges], [tunnel_nodes, tunnel_edges], tunnel_mask
    ########################################################################################################################################
    ###도로 방향성 제거
    def create_undirected_graph(data_edges, names):
        ###remove
        edge = data_edges.copy()
        edge["idx"] = range(len(edge))
            
        def list_check(data):
            return 1 if type(data) == list else 0    
            
        def geometry_convert_list(data):
            return list(itertools.chain(*data.coords[:]))

        def convert_list_to_tuple(data):
            return tuple(sorted(data)) if type(data) == list else data 
            
        edge["geometry"] = list(map(lambda data: geometry_convert_list(data), edge["geometry"].values.tolist()))
            
        for idx,i in enumerate(edge):
            sub_data = edge.columns[idx]
            check = sum(list(map(lambda data: list_check(data), edge[sub_data].values.tolist())))
            if check > 0:
                edge[sub_data] = list(map(lambda data: convert_list_to_tuple(data), edge[sub_data].values.tolist()))
                    
        edge = edge.drop_duplicates(subset=['geometry'])
    
        print(f"\t{names} | create_undirected : rink {len(data_edges)-len(edge)}개 삭제")   
        return data_edges.iloc[edge.idx.values.tolist()]
    ############################################################################################################################################
    ###고속도로 제거
    def select_motorway_or_city_road(data_nodes, data_edges, names, motorway=None):
            #edges 데이터에 기반한 nodes 데이터 filter
            def remove_no_use_node(data_edges, data_nodes):
                return data_nodes.loc[set(np.array(data_edges.index.values.tolist())[:,:2].reshape(-1,))]
            
            data_edges["idx"] = range(len(data_edges))
            city_road_edges = data_edges.copy()
            ##city_road_edges filter 
            city_road_edges = city_road_edges.loc[list(map(lambda data: not("motorway" in data), city_road_edges.highway))] #고속도로
            city_road_edges = city_road_edges.loc[list(map(lambda data: not("motorway_link" in data), city_road_edges.highway))] #고속도로 교차로
            city_road_edges = city_road_edges.loc[list(map(lambda data: not("rest_area" in data), city_road_edges.highway))] #고속도로 졸음쉼터
            city_road_edges = city_road_edges.loc[list(map(lambda data: not("services" in data), city_road_edges.highway))] #고속도로 휴계소
            city_road_edges = city_road_edges.loc[list(map(lambda data: not("trunk" in data), city_road_edges.highway))] #천변 도시 고속도로
            city_road_edges = city_road_edges.loc[list(map(lambda data: not("trunk_link" in data), city_road_edges.highway))] #천변 도시 고속도로 교차로
            city_road_edges_mask = set(city_road_edges["idx"].values)
            city_road_edges = data_edges.iloc[list(city_road_edges_mask)] #city_road_edges  
            
            ##city_road filter
            motorway_road_edges_mask = set(range(len(data_edges))) - city_road_edges_mask
            motorway_road_edges = data_edges.iloc[list(motorway_road_edges_mask)] #city_road_edges
            
            #고속도로와 도시도로가 서로 연결되어 있는 nodes 정보
            connect_node_inf = set(np.array(city_road_edges.index.tolist())[:,:2].reshape(-1,)) & set(np.array(motorway_road_edges.index.tolist())[:,:2].reshape(-1,))
            #고속도로 edges, nodes 반환
            if motorway == True:
                motorway_road_edges = motorway_road_edges.drop(["idx"], axis=1)
                motorway_nodes = remove_no_use_node(motorway_road_edges, data_nodes)
                
                print(f"\t{names} | select motorway or city road : rink {len(data_edges)-len(motorway_road_edges)}개, node : {len(data_nodes)-len(motorway_nodes)} 삭제")
                return motorway_nodes, motorway_road_edges, connect_node_inf
            #도시도로 edges, nodes 반환
            else:
                city_road_edges = city_road_edges.drop(["idx"], axis=1)
                city_nodes = remove_no_use_node(city_road_edges, data_nodes)
            
                print(f"\t{names} | select motorway or city road : rink {len(data_edges)-len(city_road_edges)}개, node : {len(data_nodes)-len(city_nodes)} 삭제")
                return city_nodes, city_road_edges, connect_node_inf
    #################################################################################################################################################
    ###Base_Prepared_Data
    def base_prepared_data(data, names, motorway):
        nodes, edges = data
        #도로 방향성 제거
        edges = create_undirected_graph(edges, names)
        #고속도로 제거
        nodes, edges, connect_node_inf = select_motorway_or_city_road(nodes, edges, names, motorway)
        #state = Cross : 1, tunnel : 2 (교차로의 노드는 1, 터널의 노드는 2로 표시해주는 state 열 추가)
        if names == "tunnel":
            nodes["state"] = 2
        else:
            nodes["state"] = 1
        return nodes, edges, connect_node_inf
    
    ###데이터 불러오기 
    print("1. Graph_Load -START-")
    base, tunnel,tunnel_mask = graph_from_place(place)
    ###방향성 제거 및 고속도로 제거 / state 열 추가
    base_nodes, base_edges, base_connect_node_inf = base_prepared_data(base,"road", motorway)
    tunnel_nodes, tunnel_edges, tunnel_connect_node_inf = base_prepared_data(tunnel,"tunnel", motorway)    
    return [base_nodes, base_edges, base_connect_node_inf], [tunnel_nodes, tunnel_edges, tunnel_connect_node_inf], tunnel_mask

---

### Step.02 |Data Preparation|

1. **고속도로 연결에 사용되었던 노드 제거 및 노드 제거 후 노드에 의해 연결되있던 링크 연장 (모든 도로, 터널따로 진행)**
2. **간선도로, 집산도로 분리**
3. **간선도로 데이터 간선도로와 집산도로가 연결 되어있던 노드 제거 및 노드 제거 후 노드에 의해 연결되있던 링크 연장** 
4. **노드 데이터에 터널 노드 정보 추가 / 새로운 노드 및 링크 추가를 위한 노드 ID 초기화 (ex. 23~25 -> 0~2)**
5. **끝 점 노드 state(상태정보) -1로 변경**
6. **추가된 터널 노드를 통한 링크 데이터 분할**


In [4]:
def Basic_Preparation(road_gdfs, tunnel_gdfs, tunnel_mask):   
    #linestring 연장 함수
    def linestring_extension(data, simple_extension=False):
        #############################################################################################################################################################
        #############연장 하기 전 필요한 정보 생성 (1. 미사용,사용 edges 분리/ 2. 끝 노드 id 추출/ 3. 연장 edges 연결 정보 생성)
        #linestring 연장에 사용되는 edges, 미사용 edges 분리 (도로 종류 분리 시 사용) / 그냥 연장 시 pass
        def split_use_edge_inf(data_edges, connect_node_inf):
            need_connect_edge = data_edges.loc[list(map(lambda data: data[0] in connect_node_inf or data[1] in connect_node_inf, np.array(data_edges.index.tolist())[:,:2]))]
            need_non_connect_edge = data_edges.loc[list(map(lambda data: not(data[0] in connect_node_inf or data[1] in connect_node_inf) , np.array(data_edges.index.tolist())[:,:2]))]
            return [need_connect_edge, need_non_connect_edge]   

        #연장에 사용하는 node가 끝점, 중간 점 node ID 추출 (사용 : middle,start_to_end, 미사용 : all/debugging을 위해 출력)
        def check_node_state(data_edges, connect_node_inf, simple_extension):
            nodes_inf = pd.DataFrame(pd.DataFrame(np.array(data_edges.index.tolist())[:,:2].reshape(-1)).value_counts())
            ###연결에 사용되는 모든 nodes
            all = set(np.array(nodes_inf.index.tolist()).reshape(-1))
            if simple_extension:
                #단순한 연결에 사용 (끝점과 교차로만 빼고 나머지 연결)
                start_to_end = set(np.array(nodes_inf.loc[list(np.array(nodes_inf[0] == 1) + np.array(nodes_inf[0] > 2))].index.tolist()).reshape(-1)) | connect_node_inf
            else:
                ###시작이자 끝 점인 nodes = 끝 점 + 교차로 + 도로 필수 노드 정보 
                start_to_end = set(np.array(nodes_inf.loc[list(np.array(nodes_inf[0] == 1) + np.array(nodes_inf[0] > 2))].index.tolist()).reshape(-1)) \
                    | (all-connect_node_inf)
            return start_to_end

        #edge data의 nodes 연결 정보를 생성
        def graph_inf(data_edges, connect_node_inf, simple_extension):
            #그래프 list를 인덱싱하기 위한 dictionary 생성 
            node_osmid_dict = dict()
            for idx, i in enumerate(sorted(set(np.array(data_edges.index.tolist())[:,:2].reshape(-1)))):
                node_osmid_dict[i] = idx

            #nodes, edges index 변수 생성
            nodes_index = sorted(set(np.array(data_edges.index.tolist())[:,:2].reshape(-1)))
            edges_index = np.array(data_edges.index.tolist())[:,:2].tolist()

            #연결 정보 생성
            adj = [[] for _ in range(len(nodes_index))]

            for src, dst in edges_index:
                adj[node_osmid_dict[src]].append(dst)
                adj[node_osmid_dict[dst]].append(src)
            
            #연결 정보 dictionary로 표출   
            graph = dict()
            for node_idx, connect_list in zip(nodes_index, adj):
                graph[node_idx] = connect_list
                
            list_to_exclude = check_node_state(data_edges, connect_node_inf, simple_extension)
            return graph, list_to_exclude
        ######################################################################################################################################################
        ###linestring 연결 및 data 통합
        def dfs(graph, start_node, list_to_exclude, data_nodes):    
            ## 기본은 항상 두개의 리스트를 별도로 관리해주는 것
            visited, need_visited = list(), list()
            
            need_visited.append(start_node)
            ## 만약 아직도 방문이 필요한 노드가 있다면,
            while need_visited:
                ## 그 중에서 가장 마지막 데이터를 추출 (스택 구조의 활용)
                for i in need_visited:
                    check_point = []
                    if i not in visited:
                        visited.extend([i]) 
                        if i in list_to_exclude:
                            node = []
                        else:    
                            node = graph.pop(i)
                        ## 만약 그 노드가 방문한 목록에 없다면
                        for j in node:
                            if j not in visited:
                                ## 방문한 목록에 추가하기 
                                check_point.extend([j])
                                visited.extend([j])
                                if j in list_to_exclude:
                                    need_visited.extend([])
                                else:
                                    need_visited.extend(graph[j])
                if len(check_point) == 0:
                    break            

            for i in visited:
                try:
                    graph.pop(i)
                except:
                    pass
            return graph, visited       
        
        def connect_point_list(data_nodes, data_edges, connect_node_inf, simple_extension):
            graph, list_to_exclude = graph_inf(data_edges, connect_node_inf, simple_extension)
            start_and_end = set(data_nodes.index) - list_to_exclude
            start_and_end, list_to_exclude = list(start_and_end), list(list_to_exclude)
            end_point_list = []
            n = len(graph)
            print(f"\t | Linestring_extraction")
                        
            while start_and_end:
                graph, visited = dfs(graph, start_and_end[0], list_to_exclude, data_nodes)
                end_point_list.append(visited)
                for j in visited:
                    try:
                        start_and_end.remove(j)
                    except:
                        pass
                print(f"\t {n-len(graph)}/{n}",end="\r")        
            while graph:
                if len(graph) > 0:
                    graph, visited = dfs(graph, list(graph.keys())[0], list_to_exclude, data_nodes)
                    end_point_list.append(visited)
                    for j in visited:
                        try:
                            start_and_end.remove(j)
                        except:
                            pass
                print(f"\t {n-len(graph)}/{n}",end="\r")
            return end_point_list, list_to_exclude
        
        def connect_check(data, mask, list_to_exclude):
            if data[0] in list_to_exclude and data[1] in list_to_exclude:
                return 9999
            else: 
                if data[0] in list_to_exclude: 
                    return mask[data[1]]
                elif data[1] in list_to_exclude: 
                    return mask[data[0]]
                else: 
                    return mask[data[0]]

        def connect_inf(data_nodes, data_edges, connect_node_inf, simple_extension):
            end_connect_inf, list_to_exclude = connect_point_list(data_nodes, data_edges, connect_node_inf, simple_extension)
            MC = dict()
            for idx, i in enumerate(end_connect_inf):
                MC[idx] = i
                
            mc = dict()
            for k, v in zip(MC.keys(), MC.values()):
                for i in v:
                    mc[i] = k
                    
            data_edges["cn_inf"] = list(map(lambda data: connect_check(data, mc, list_to_exclude), np.array(data_edges.index.tolist())[:,:2].tolist()))
            return data_edges        
        
        #######edges data 정보 통합
        def merge_inf(data_nodes, need_connect_edge, need_non_connect_edge): 
            def merge_information(data_edge_subset, data_nodes):
                #정보 합치기 (list 일때와 아닐 때 구분)
                def check_in_list(data):
                    list_data = []
                    for i in data:
                        if type(i) == list:
                            list_data.extend(i)
                        else:
                            list_data.append(i)
                    return list_data
                
                #합쳐진 target edge 저장할 빈 DataFrame
                data_edge_target = data_edge_subset.iloc[[0]].drop(index=[data_edge_subset.index[0]])

                if len(data_edge_subset) > 1:
                    feature = []
                    for i in data_edge_subset.columns:
                        if i == "geometry":
                            multi_line = MultiLineString(data_edge_subset[i].values)
                            geometry = ops.linemerge(multi_line) 
                            feature.extend([geometry]) 
                        elif i == "length":
                            length = sum(data_edge_subset[i])
                            
                            feature.extend([length])
                        elif len(set(check_in_list(data_edge_subset[i]))) > 1:
                            globals()[f"{i}"] = list(set(check_in_list(data_edge_subset[i])))
                            
                            feature.extend([globals()[f"{i}"]])
                        else:
                            globals()[f"{i}"] = list(set(check_in_list(data_edge_subset[i])))[0]
                            feature.extend([globals()[f"{i}"]])       
                    node_info = set(np.array(data_edge_subset.index.tolist())[:,:2].reshape(-1))
                    nodes_subset = data_nodes.loc[node_info]

                    u = nodes_subset.loc[[i == Point(geometry.coords[0]) for i in nodes_subset.geometry.values]].index.values[0]
                    v = nodes_subset.loc[[i == Point(geometry.coords[-1]) for i in nodes_subset.geometry.values]].index.values[0]
                    key = 0      
                    
                    idx = [u,v,key]

                    connect_edge_data = gpd.GeoDataFrame(np.array([feature]), 
                                                            columns= data_edge_subset.columns,
                                                            index=np.array([idx]).T.tolist())
                    connect_edge_data.index.names = ["u","v","key"]
                    data_edge_target = gpd.GeoDataFrame(pd.concat([data_edge_target, connect_edge_data]))
                else:
                    data_edge_target = gpd.GeoDataFrame(pd.concat([data_edge_target, data_edge_subset]))
                return data_edge_target    
            
            for i in set(need_connect_edge.cn_inf):
                need_connect_edge_subset = need_connect_edge.loc[need_connect_edge.cn_inf == i]
                if i != 9999:
                    merge_edge_data = merge_information(need_connect_edge_subset, need_connect_node)
                else:
                    merge_edge_data = need_connect_edge_subset
                need_non_connect_edge = pd.concat([need_non_connect_edge, merge_edge_data])
            need_non_connect_edge = need_non_connect_edge.drop(["cn_inf"], axis=1)
            return need_non_connect_edge
        
        #########################        
        ##연결 정보 생성
        data_nodes, data_edges, connect_node_inf  = data

        if simple_extension:
            need_connect_edge = data_edges #전체가 연장 타겟
            need_non_connect_edge = need_connect_edge.iloc[[0]].drop(index=[need_connect_edge.index[0]]) #dummy  
            need_connect_node = data_nodes.loc[set(np.array(need_connect_edge.index.tolist())[:,:2].reshape(-1))]           
        else:
            need_connect_edge, need_non_connect_edge = split_use_edge_inf(data_edges, connect_node_inf) 
            need_connect_node = data_nodes.loc[set(np.array(need_connect_edge.index.tolist())[:,:2].reshape(-1))]
        #########################
        ###Linestring 연장
        need_connect_edge = connect_inf(need_connect_node, need_connect_edge, connect_node_inf, simple_extension)
        edges = merge_inf(data_nodes, need_connect_edge, need_non_connect_edge)
        mask = set(data_nodes.index) & set(np.array(edges.index.tolist())[:,:2].reshape(-1,))
        nodes = data_nodes.loc[mask]
        ###########################
        return nodes, edges   
   ##########################################################################################################################################################
   ###노드 추가를 위한 노드 ID 재정의
    def generate_new_node_id(data_nodes, data_edges): 
        node_osmid_dict = dict()
        if "connect_inf" in data_nodes.columns: 
            data_nodes = data_nodes.sort_values(by='connect_inf', ascending=False)
        for idx, i in enumerate(list(data_nodes.index)):
            node_osmid_dict[i] = idx
        ###data_nodes -> node_id 재정의 
        data_nodes.index = pd.Int64Index([node_osmid_dict[i] for i in data_nodes.index], dtype="int64", name="osmid") 

        ###data_edges -> node_id 재정의 
        edges_multi_index = np.array([[node_osmid_dict[i[0]] for i in data_edges.index],
                                    [node_osmid_dict[i[1]] for i in data_edges.index],
                                    [i[2] for i in data_edges.index]])
        data_edges.index = pd.MultiIndex.from_arrays(edges_multi_index, names=["u", "v", "key"])
        return data_nodes, data_edges, node_osmid_dict
    ###########################################################################################################################################################
    ###간선도로(main_road), 집산도로(non_main_road) 분류
    def split_based_on_mainroad(data_nodes, data_edges):   
        print("\t || split_based_on_mainroad")
        data_edges["idx"] = range(len(data_edges))
        ###edges filter
        #main_road_edges filter
        primary = data_edges.loc[list(map(lambda data: "primary" in data, data_edges.highway))] #주간선도로
        secondary =  data_edges.loc[list(map(lambda data: "secondary" in data, data_edges.highway))] #간선도로
        tertiary = data_edges.loc[list(map(lambda data: "tertiary" in data, data_edges.highway))] #보조간선도로
        main_road = pd.concat([primary, secondary, tertiary])
        main_road_edges_mask = set(main_road.idx)
        main_road_edges = data_edges.iloc[sorted(main_road_edges_mask)]
        
        #non_main_road_edges filter
        non_main_road_mask = set(data_edges.idx) - set(main_road_edges.idx)
        non_main_road_edges = data_edges.iloc[sorted(non_main_road_mask)]
        
        connect_node_inf = set(np.array(main_road_edges.index.tolist())[:,:2].reshape(-1,)) & set(np.array(non_main_road_edges.index.tolist())[:,:2].reshape(-1,))
        
        ###nodes filter
        main_road_nodes_mask = set(np.array(main_road_edges.index.tolist())[:,:2].reshape(-1,))
        non_main_road_nodes_mask = set(np.array(non_main_road_edges.index.tolist())[:,:2].reshape(-1,))

        main_road_nodes = data_nodes.loc[main_road_nodes_mask]
        non_main_road_nodes = data_nodes.loc[non_main_road_nodes_mask]

        main_road_edges = main_road_edges.drop(["idx"],axis=1)
        non_main_road_edges = non_main_road_edges.drop(["idx"],axis=1)
        non_main_road_nodes["connect_inf"] = [1 if i else 0 for i in list(map(lambda data : data in connect_node_inf, non_main_road_nodes.index))]
        return [main_road_nodes, main_road_edges, connect_node_inf], [non_main_road_nodes, non_main_road_edges]
    
    ###끝 점 state 변경 함수
    def end_point_check(data_nodes, data_edges):
        node_count = pd.DataFrame(pd.DataFrame(np.array(data_edges.index.tolist())[:,:2].reshape(-1)).value_counts())
        end_point = set(np.array(node_count.loc[(node_count == 1).values.reshape(-1)].index.tolist()).reshape(-1))
        end_point_mask = []
        for idx, state in zip(data_nodes.index, data_nodes.state):
            if idx in end_point and state == 1:
                end_point_mask.append(-1)
            else:
                end_point_mask.append(state)
        data_nodes.state = end_point_mask
        return data_nodes
    
    ######################################################################################################################################################    
    #터널 노드 정보 합치기 및 노드 ID 초기화
    def combine_data(data_1, data_2):
        basenodes, baseedges = data_1
        tunnelnodes, tunneledges = data_2
        
        #road에 속해있지 않은 터널은 제거
        mask = set(tunneledges[["geometry"]].sjoin(baseedges[["geometry"]],how="left",op="within").dropna().index)
        tunneledges = tunneledges.loc[mask]
        tunnelnodes = tunnelnodes.loc[set(np.array(tunneledges.index.tolist())[:,:2].reshape(-1))]
        
        mask = set(basenodes.index) & set(tunnelnodes.index)
        basenodes = basenodes.loc[set(basenodes.index)- set(tunnelnodes.index)] 
        basenodes = pd.concat([basenodes, tunnelnodes])
        nodes, edges, id_dict = generate_new_node_id(basenodes, baseedges)
        mask = set([id_dict[i] for i in mask])
        return [nodes, edges, mask]

    
    ###도로 노드 제거 및 링크 연장 (고속도로 제거 후 연장된 도로)
    print("2. Data Preparation")
    
    road_nodes, road_edges = linestring_extension(road_gdfs, simple_extension=False)
    ###tunnel 분리된 링크 연장 필요 없는 노드 제거 -> 연장 후 확인 및 노드 추출
    if len(tunnel_gdfs[-1]) == 0:
        tunnel_nodes, tunnel_edges, _ = tunnel_gdfs
    else:
        tunnel_nodes, tunnel_edges = linestring_extension(tunnel_gdfs, simple_extension=False)
    tunnel_nodes, tunnel_edges = linestring_extension([tunnel_nodes, tunnel_edges, tunnel_mask], simple_extension=True)
    ###간선도로, 집산도로 분리
    main_road, non_main_road = split_based_on_mainroad(road_nodes, road_edges)
    main_tunnel, non_main_tunnel = split_based_on_mainroad(tunnel_nodes, tunnel_edges)
    ###집산도로에 의해 분리된 간선도로 연장
    main_road_nodes, main_road_edges = linestring_extension(main_road, simple_extension=False)
    ###자세한 nodes 터널 정보 추가
    main_road_nodes, main_road_edges, main_road_mask= combine_data([main_road_nodes, main_road_edges], main_tunnel[:2])
    non_main_road_nodes, non_main_road_edges, non_main_road_mask = combine_data(non_main_road, non_main_tunnel)
    ###끝 점 state  변경
    main_road_nodes = end_point_check(main_road_nodes, main_road_edges)
    non_main_road_nodes = end_point_check(non_main_road_nodes, non_main_road_edges)
    ###features 정렬 고정
    main_road_nodes = main_road_nodes[['y', 'x', 'street_count', 'ref', 'highway', 'geometry', 'state']]
    main_road_edges = main_road_edges[['osmid', 'oneway', 'lanes', 'highway', 'maxspeed', 'length', 'geometry',
                                       'name', 'ref', 'bridge', 'tunnel', 'service', 'access', 'junction','width']]
    non_main_road_nodes = non_main_road_nodes[['y', 'x', 'street_count', 'ref', 'highway', 'geometry', 'state', 'connect_inf']]
    non_main_road_edges = non_main_road_edges[['osmid', 'oneway', 'lanes', 'highway', 'maxspeed', 'length', 'geometry',
                                               'name', 'ref', 'bridge', 'tunnel', 'service', 'access', 'junction','width']]
    
    return [main_road_nodes, main_road_edges, main_road_mask], [non_main_road_nodes, non_main_road_edges, non_main_road_mask]

> 노드생성, 노드 기반 엣지 생성 (Nodes_Edges_Segmentation)
- **Mode**
    - **cross** : *교차로를 중심으로 노드생성* | *엣지 분할* 
    - **single_way** : *단일로 노드 생성* | *엣지 분할* 
    - **dummy** : 단일로 노드 생성과 같은 원리 *dummy 노드만 생성*
    - **dummy_cross** : *간선도로 교차로 cluster를 위한 dummy 노드만 생성*

In [5]:
def Nodes_Edges_Segmentation(data_nodes, data_edges, mask=None, meter = 150, mode = None, tunnel=False, dummy = None):
    def euclid_distance_cal(meter):
        ###유클리드 거리와 실제 거리를 기반으로 1미터당 유클리드 거리 추출
        #점 쌍 사이의 유클리드 거리를 계산
        dis_1 = ox.distance.euclidean_dist_vec(36.367658 , 127.447499, 36.443928, 127.419678)
        #직선거리 계산
        dis_2 = ox.distance.great_circle_vec(36.367658 , 127.447499, 36.443928, 127.419678)

        return dis_1/dis_2 * meter
    ######################################################################################################################
    def generate_new_nodes(data_nodes, data_edges, meter, mode = None, dummy = None): 
        if mode == "cross" or mode == "single_way":
            segment_node = [] #new_nodes
            edge_mask = [] #-> 분할에 사용한 data_edges를 edges 분할 시 넘겨줘서 살펴보기 분할이 필요없는 data_edges 데이터까지 살펴보는 일 없게 한다.
        else: 
            segment_node = []
        #mode : cross 또는 dummy : dummy_cross 일 경우 끝 점은 교차로로 인식하지 않지 위해 끝점 노드 ID를 만들어 전달한다.
        if mode == "cross" or dummy == "dummy_cross":
            not_cross_node = pd.DataFrame(np.array(data_edges.index.tolist())[:,:2].reshape(-1,1))
            not_cross_node = pd.DataFrame(not_cross_node.value_counts())
            not_cross_node = np.array(not_cross_node.loc[not_cross_node[0] == 1].index.values.tolist()).reshape(-1,) 
        ###data_edges 분할   
        for idx, i in enumerate(data_edges["geometry"]):
            #dummy : dummy_cross일때 아래 기준일 때는 많은 노드를 만들고, 아닐 경우는 교차로 중심으로 노드를 생성 하도록 하는 것
            if dummy == "dummy_cross":
                if data_edges.iloc[idx].length < 180 and "link" in data_edges.iloc[idx].highway:
                    mode = None
                else:
                    mode = "cross"   
            else: #군집이 할당되있는 경우 패스(mode : cross, single_way)
                if dummy != "dummy":
                    if data_edges.iloc[idx].cluster != -1 and mode == "cross":  
                        continue
                    elif mode == "cross" and data_edges.iloc[idx].tunnel == 21:
                        continue                                                 
            ###기준으로 분할
            new_node = list(ox.utils_geo.interpolate_points(i, euclid_distance_cal(meter)))
            #생성된 노드가 2개인 경우는 분할 되지 않은 것임으로 2개 이상이 출력되는 경우 실행
            if len(new_node) > 2:
                ###dummy 노드 생성 할 경우(노드만 생성하기 때문에 엣지를 위한 데이터 생성을 하지 않는다.)
                if dummy == "dummy" or dummy == "dummy_cross":
                    if len(new_node) == 3:
                        segment_node.append([new_node[1]])
                    elif len(new_node) > 3:
                        if mode == "cross":
                            if len(set(data_edges.index[idx][0:2]) & set(not_cross_node)) == 1:
                                index_mask = np.where(np.array([[i in data_edges.index[idx][0:2]] for i in not_cross_node]).reshape(-1,) == True)[0][0]
                                index_mask = np.where(np.array(data_edges.index[idx][0:2]) == not_cross_node[index_mask])[0][0]
                                segment_node.append([new_node[-2 if index_mask == 0 else 1]])
                            else:
                                segment_node.append([new_node[1], new_node[-2]])
                        else: 
                            segment_node.append(list(itertools.chain(*[new_node[1:-1]])))
                ###mode : cross, single_way일 경우
                else:
                    if len(new_node) == 3:
                        segment_node.append([new_node[1]])
                        edge_mask.append([idx,1,data_edges.index[idx]])
                    #생성된 교차가 4개 이상인 경우
                    elif len(new_node) > 3: 
                        #교차로 중심으로 분할 하는 경우
                        if mode == "cross":
                            #끝 점과 연결된 교차로 edge인 경우 끝점에서는 node를 생성안함
                            if len(set(data_edges.index[idx][:2]) - set(not_cross_node)) == 1 and len(set(data_edges.index[idx][:2])) == 2:
                                index_mask = np.where(np.array([[i in data_edges.index[idx][:2]] for i in not_cross_node]).reshape(-1,) == True)[0][0]
                                index_mask = np.where(np.array(data_edges.index[idx][:2]) == not_cross_node[index_mask])[0][0]
                                segment_node.append([new_node[-2 if index_mask == 0 else 1]])
                                edge_mask.append([idx,1,data_edges.index[idx]])
                            #모든 노드와 연결되지 않은 경우 넘어간다.
                            elif len(set(data_edges.index[idx][:2]) - set(not_cross_node)) == 0:
                                continue
                            #위와 같은 조건에 만족하지 않는 경우 그냥 양 끝점의 노드를 생성한다.
                            else:
                                segment_node.append([new_node[1], new_node[-2]])
                                edge_mask.append([idx,2,data_edges.index[idx]])
                        #교차로가 아닌 nodes를 생성할때는 linestring의 시작과 끝점인 0과 -1를 뺀 모든 node를 생성한다.
                        else: 
                            segment_node.extend([new_node[1:-1]])   
                            edge_mask.append([idx, len(new_node[1:-1]), data_edges.index[idx]])                    
                
        #segment_node 이중 리스트 풀어주기
        segment_node = list(itertools.chain(*segment_node)) 
        #x, y (columns) 만들기
        segment_xy = list(map(lambda data: [data[1], data[0]], segment_node))
        segment_nodes = gpd.GeoDataFrame(segment_xy, columns=["y","x"])
        #geometry (columns) 만들기
        segment_nodes["geometry"] = list(map(lambda data: Point(data), segment_node))  
        #state : 3 교차로를 나눠주는 노드/ state : 4 단일로를 나눠주는 node/ state: 999 dummy 노드  
        if dummy == "dummy" or dummy == "dummy_cross":
            segment_nodes["state"] = 999
        else:
            mask = list(map(lambda data: data[0], edge_mask))
            segment_nodes["state"] = 3 if mode == "cross" else 4  
            #edge id (u,v,key, osmid)
            segment_edge_id = list(map(lambda data:[data[2]]* data[1], edge_mask))
            segment_nodes["edge_id"] = list(itertools.chain(*segment_edge_id))
        #segment_nodes_data. index 설정
        segment_nodes.index = pd.Int64Index([i for i in range(data_nodes.index[-1]+1, data_nodes.index[-1]+1+len(segment_nodes))],dtype="int64", name="osmid") 
        data_nodes = pd.concat([data_nodes,segment_nodes])
        if dummy == "dummy" or dummy == "dummy_cross":
            return data_nodes
        else:
            return data_nodes, mask    
    ################################################################################################################################################
    ### Step6(utils) - linestring split, select_data, link length calculation, find the node  
    def Line_split(line_string, point):
        line_string = line_string.geometry.values[0]
        point = point.geometry.values[0]
        #point가 linestring 위에 있는지 확인
        assert point.distance(line_string) < 1e-12, "point가 linestring 위에 있지 않습니다!!"
        #linestring split
        if len(line_string.coords) == 2:
            Line1,Line2 = LineString([Point(line_string.coords[0]), point]),\
                LineString([point, Point(line_string.coords[1])])
            return Line1, Line2
        else: 
            check_num = None
            for i in range(len(line_string.coords)-1):
                check_Line = LineString([Point(line_string.coords[i]),Point(line_string.coords[i+1])])
                if point.distance(check_Line) < 1e-10: check_num = i
            L1_point = [Point(line_string.coords[i]) for i in range(0, check_num+1)]
            L2_point = [Point(line_string.coords[i]) for i in range(check_num+1, len(line_string.coords))]
            L1_point.append(point)
            L2_point.insert(0, point)
            Line1,Line2 = LineString(L1_point),LineString(L2_point)
            return Line1, Line2

    #select data function
    def select_data(data, point, index, tunnel=False):
        MASK = [point.geometry.iloc[0].distance(data.geometry.iloc[i]) <1e-10 for i  in range(len(data))]
        using_data_1 = data.iloc[[not i for i  in MASK]]
        deleted_data_1 = data.iloc[MASK]
        if tunnel:
            return using_data_1, deleted_data_1
        if len(deleted_data_1) > 1:
            using_data_2 = deleted_data_1.loc[[index != i for i in deleted_data_1.index]]
            deleted_data_2 = deleted_data_1.loc[[index == i for i in deleted_data_1.index]]
            using_data = pd.concat([using_data_1, using_data_2])
            return using_data, deleted_data_2
        return using_data_1, deleted_data_1

    #새로운 linestring 길이 구하는 함수(단위:m)
    def linstring_length(linestring):
        linestring = linestring.coords
        return sum([ox.distance.great_circle_vec(linestring[i][1],linestring[i][0],linestring[i+1][1],linestring[i+1][0]) for i in range(len(linestring)-1)])
    
    #노드 찾는 함수        
    def find_node(data_nodes, data_edge_target):
        index = []
        for i in tqdm(range(len(data_edge_target))):
            data_edge_target_uv = data_edge_target.iloc[[i]]
            data_edge_target_uv["geometry"] = Point(data_edge_target.geometry.values[i].coords[0])#U
            u = data_nodes[["geometry"]].sjoin(data_edge_target_uv[["geometry"]], how="left", predicate="intersects").dropna(axis=0).index[0]
            data_edge_target_uv["geometry"] = Point(data_edge_target.geometry.values[i].coords[-1]) #V
            v = data_nodes[["geometry"]].sjoin(data_edge_target_uv[["geometry"]], how="left", predicate="intersects").dropna(axis=0).index[0]
            key = data_edge_target.iloc[[i]].index[0][2] #key
            index.append([u,v,key])
        index = np.array(index).T
        data_edge_target.index = pd.MultiIndex.from_arrays(index, names=["u", "v", "key"])
        return data_edge_target 
            
    ### Step6 - Create edges based on conditions            
    def generate_new_edges(data_nodes,data_edges, mask, mode = None , tunnel = False):
        #for 문에 많은 반복을 줄이기 위해 분할될 edges/ 분할되지 않을 edges를 구분해준다!
        if tunnel:
            data_edge_target = data_edges.loc[[not i for i in data_edges.tunnel.isnull()]]
            data_edge_non_target = data_edges.loc[data_edges.tunnel.isnull()]
        else:
            data_edge_target = data_edges.iloc[sorted(mask)]
            data_edge_non_target = data_edges.loc[[not(i in mask) for i in range(len(data_edges))]]

        #교차로 중심 분할일 때는 state가 3으로 정의된 노드를 중심으로 분할
        if mode == "cross":
            data_nodes_target = data_nodes.loc[data_nodes["state"] == 3]
        #터널 노드로 분할할 시 
        elif tunnel:
            data_nodes_target = data_nodes.loc[set(data_nodes.loc[data_nodes.state == 2].index) - mask]
        #단일로 분할일 때는 state가 4로 정의된 노드를 중심을 분할
        else:
            data_nodes_target = data_nodes.loc[data_nodes["state"] == 4]
        for i in range(len(data_nodes_target)):
            new_node = data_nodes_target.iloc[[i]]
            if tunnel:
                data_edge_target, deleted_data = select_data(data_edge_target, new_node, None, tunnel)
            else:
                UVKEY = new_node["edge_id"].values[0]
                data_edge_target, deleted_data = select_data(data_edge_target, new_node, UVKEY)
            Line1, Line2 = Line_split(deleted_data,new_node)
            for line in [Line1, Line2]:
                #u, v, key
                idxs = [deleted_data.index[0][0], deleted_data.index[0][1], deleted_data.index[0][2]]
                
                #osmid, oneway, lanes, name, highway, maxspeed, length, geometry
                features = deleted_data.iloc[[0]].values[0]
                features[5], features[6] = linstring_length(line), line
                features = features.tolist()
                added_edge_data = gpd.GeoDataFrame(np.array([features]), columns= deleted_data.columns, index=np.array([idxs]).T.tolist())
                added_edge_data.index.names = ["u","v","key"]
                data_edge_target = gpd.GeoDataFrame(pd.concat([data_edge_target, added_edge_data]))      
        data_edge_target = find_node(data_nodes, data_edge_target)          
        data_edges = gpd.GeoDataFrame(pd.concat([data_edge_target, data_edge_non_target]))
        if not(tunnel):
            data_nodes = data_nodes.drop(["edge_id"],axis=1)
        return data_nodes, data_edges
    #####################################################################################################################################################
    if tunnel:
        data_nodes, data_edges = generate_new_edges(data_nodes, data_edges, mask = mask, tunnel = tunnel)
        return data_nodes, data_edges
    else:
        if dummy == "dummy" or dummy == "dummy_cross":
            data_nodes = generate_new_nodes(data_nodes, data_edges, meter = meter, dummy = dummy)
            return data_nodes
        else:
            data_nodes, mask = generate_new_nodes(data_nodes, data_edges, meter = meter, mode = mode)
            data_nodes, data_edges = generate_new_edges(data_nodes, data_edges, mask = mask, mode = mode)      
            return data_nodes, data_edges

---
- 터널 종류 분류
    - 간선도로
        - 교차로 내 터널 state : 21
        - 도로 연장 터널 state : 22
    - 집산 도로   
        - 도로 연장 터널, 지하 주차장 터널 state : 2
---

- 군집화

In [6]:
def generate_cluster_data(data_nodes, data_edges, cluster_meter = None, split_meter = None, min_sample = None, mode=None):
    def euclid_distance_cal(meter):
            ###유클리드 거리와 실제 거리를 기반으로 1미터당 유클리드 거리 추출
            #점 쌍 사이의 유클리드 거리를 계산
            dis_1 = ox.distance.euclidean_dist_vec(36.367658 , 127.447499, 36.443928, 127.419678)
            #직선거리 계산
            dis_2 = ox.distance.great_circle_vec(36.367658 , 127.447499, 36.443928, 127.419678)

            return dis_1/dis_2 * meter

    def generate_cluster(data_nodes, cluster_meter, min_sample = 4):
        from sklearn.cluster import DBSCAN
        
        X_main_road = data_nodes[["y","x"]]

        main_road_dbscan = DBSCAN(eps=euclid_distance_cal(cluster_meter), min_samples=min_sample)
        main_road_cluster = main_road_dbscan.fit_predict(X_main_road)    
        data_nodes["cluster"] = main_road_cluster
        return data_nodes 

    def redefine_cluster_id(data_nodes, data_edges, mode = None):
        #outlier인 군집은 다시 -1로 전환
        if mode == "cross":
            outlier_mask =  [[id,-1] if i == 1 else [id,id] for id, i in zip(data_nodes.cluster.value_counts().index,data_nodes.cluster.value_counts())]
            outlier_dict = dict()
            for i in outlier_mask:
                outlier_dict[i[0]] = i[1]
            outlier_mask = outlier_dict
            data_nodes.cluster = [outlier_mask[i] if i else i for i in data_nodes.cluster]
            #군집 번호 다시 한번 다시 정의
            outlier_dict = dict()
            for i, j in zip(sorted(set(data_nodes.cluster)),range(-1,len(set(data_nodes.cluster))-1)):
                outlier_dict[i] = i
            outlier_mask = outlier_dict
            data_nodes.cluster = [outlier_mask[i] for i in data_nodes.cluster]   
        else:
            #tunnel 일 경우 20000 부터 시작
            data_nodes.tunnel_cluster = data_nodes.cluster.values + 20000
            data_nodes.cluster = data_nodes.cluster.values + 20000
                

        #data_edges 군집 번호 부여
        if not "cluster" in data_edges.columns:
            data_edges["cluster"] = 0
        cluster_mask = dict()
        for id, value in zip(data_nodes.cluster.index, data_nodes.cluster.values):
            cluster_mask[id] = value
        
        data_edges["cluster"] = [i[0] if i[0] == i[1] else -1 for i in [[cluster_mask[i[0]],cluster_mask[i[1]]] for i in np.array(data_edges.index.tolist())[:,:2].tolist()]]
        return data_nodes, data_edges

    def tunnel_data_prepared(data_nodes, data_edges):
        # 터널 상태 0으로 만들기
        data_edges.tunnel = 0 
        #tunnel nodes id 추출
        tunnel_node_idx = set(data_nodes.loc[data_nodes.state == 2].index)
        #edges : tunnel이면 1 아니면 0 대입
        data_edges.tunnel = [1 if i else 0 for i in list(map(lambda data: data[0] in tunnel_node_idx and data[1] in tunnel_node_idx,np.array(data_edges.index.tolist())[:,:2]))]
        
        #edges data 터널 여부 재정의
        #tunnel_nodes_list = []
        #for i in main_road_nodes.loc[(main_road_nodes.state == 2) | (main_road_nodes.state == 22)].index:
        #    tunnel_nodes_list.append(i)

        #tunnel_list = []
        #for i in np.array(main_road_edges.index.tolist())[:,:2]:
        #    if i[0] in tunnel_nodes_list and i[1] in tunnel_nodes_list:
        #        tunnel_list.append(1)
        #    else:
        #        tunnel_list.append(0) 
        return data_nodes, data_edges

    def split_tunnel_data(data_nodes, data_edges):
        tunnel_edges = data_edges.loc[data_edges.tunnel == 1]
        tunnel_nodes = data_nodes.loc[data_nodes.state == 2]
        non_tunnel_edges = data_edges.loc[data_edges.tunnel != 1]
        non_tunnel_nodes = data_nodes.loc[data_nodes.state != 2]
        return tunnel_nodes, tunnel_edges, non_tunnel_nodes, non_tunnel_edges
    
    def split_cross_data(data_nodes, data_edges):
        cross_edges = data_edges.loc[data_edges.tunnel != 21]
        non_cross_edges = data_edges.loc[data_edges.tunnel == 21]
        
        cross_node_mask = set(np.array(cross_edges.index.tolist())[:,:2].reshape(-1))
        cross_nodes = data_nodes.loc[cross_node_mask]
        non_cross_nodes = data_nodes.loc[set(np.array(data_edges.index.tolist())[:,:2].reshape(-1)) - cross_node_mask]
        return cross_nodes, cross_edges, non_cross_nodes, non_cross_edges 
    
    ###################################################################################################################################################   
    if mode == "cross":
        cross_nodes, cross_edges, non_cross_nodes, non_cross_edges = split_cross_data(data_nodes, data_edges)
        cross_dummy = Nodes_Edges_Segmentation(cross_nodes, cross_edges, meter=split_meter, dummy = "dummy_cross")
        cross_dummy = generate_cluster(cross_dummy, cluster_meter=cluster_meter,min_sample=min_sample)
        cross_nodes = cross_dummy.loc[cross_dummy.state != 999]
        cross_nodes, cross_edges = redefine_cluster_id(cross_nodes, cross_edges, mode=mode)
        
        data_nodes = pd.concat([cross_nodes, non_cross_nodes])
        data_edges = pd.concat([cross_edges, non_cross_edges])
        return data_nodes, data_edges
    
    elif mode == "tunnel_classification":
        tunnel_nodes, tunnel_edges, non_tunnel_nodes, non_tunnel_edges = split_tunnel_data(data_nodes, data_edges)
        tunnel_edges["x"] = tunnel_edges.geometry.centroid.x
        tunnel_edges["y"] = tunnel_edges.geometry.centroid.y
        geometry = tunnel_edges["geometry"].values
        tunnel_edges["geometry"] = tunnel_edges.geometry.centroid 
        
        tunnel_edges_nodes = pd.concat([tunnel_edges[["y","x","geometry"]], non_tunnel_nodes[["y","x","geometry"]]])
        
        tunnel_edges["geometry"] = geometry
        tunnel_edges = tunnel_edges.drop(["x","y"],axis=1)
        
        tunnel_nodes_cluster = generate_cluster(tunnel_edges_nodes, 30, min_sample = 4)
        tunnel_nodes_cluster = tunnel_nodes_cluster.loc[[type(i) == tuple for i in tunnel_nodes_cluster.index]]
        tunnel_cluster_inf = pd.DataFrame(tunnel_nodes_cluster.cluster.value_counts())
        mask = tunnel_cluster_inf.loc[tunnel_cluster_inf.cluster == 2].index
        
        mask_21 = set(tunnel_nodes_cluster.loc[list(map(lambda data: data in mask ,tunnel_nodes_cluster.cluster))].index)
        mask_21 = list(map(lambda data: data in mask_21,tunnel_edges.index))
        tunnel_edges.tunnel = [21 if i else 22 for i in mask_21]
        
        tunnel_state = dict()
        for i,j in zip(tunnel_edges.index,tunnel_edges.tunnel):
            tunnel_state[i[0]] = j 
            tunnel_state[i[1]] = j
        
        now_state = []
        for i,j in zip(tunnel_nodes.index, tunnel_nodes.state):
            try:
                now_state.append(tunnel_state[i])
            except:
                now_state.append(j) 
                
        tunnel_nodes['state'] = now_state
        data_nodes = pd.concat([non_tunnel_nodes, tunnel_nodes])        
        data_edges = pd.concat([non_tunnel_edges, tunnel_edges])
        return data_nodes, data_edges 
        
    else: #단순 터널 분류
        data_nodes, data_edges = tunnel_data_prepared(data_nodes, data_edges)
        tunnel_nodes, tunnel_edges, non_tunnel_nodes, non_tunnel_edges = split_tunnel_data(data_nodes, data_edges)
        tunnel_dummy = Nodes_Edges_Segmentation(tunnel_nodes, tunnel_edges, meter = 20, dummy = "dummy")
        tunnel_dummy = generate_cluster(tunnel_dummy, cluster_meter=50, min_sample = 2)
        assert sum(tunnel_dummy.cluster == -1) == 0, "터널 군집이 제대로 되지 않았습니다."
        tunnel_nodes = tunnel_dummy.loc[tunnel_dummy.state != 999]
        tunnel_nodes, tunnel_edges = redefine_cluster_id(tunnel_nodes, tunnel_edges)

        data_nodes = pd.concat([non_tunnel_nodes, tunnel_nodes])
        data_edges = pd.concat([non_tunnel_edges, tunnel_edges])
        return data_nodes, data_edges


---

### 간선도로 군집 전파

In [7]:
def Advance_main_road_cluster(data_nodes, data_edges):
    def nodes_set_cluster_of_based_on_edges(data_nodes, data_edges, mode="base"):
        if mode == "base":
            #edges data 기반 cluster dictionary 생성 (based_on_edges_cluster)
            u_cluster = np.concatenate([np.array(data_edges.index.tolist())[:,:1].reshape(-1,1),np.array(data_edges.cluster.tolist()).reshape(-1,1)],axis=1)
            v_cluster = np.concatenate([np.array(data_edges.index.tolist())[:,1:2].reshape(-1,1),np.array(data_edges.cluster.tolist()).reshape(-1,1)],axis=1)
            cluster_mask = np.concatenate([u_cluster, v_cluster], axis=0) 

            based_on_edges_cluster = dict()
            for id, cls in zip(cluster_mask[:,:1],cluster_mask[:,1:2]):
                if id[0] in based_on_edges_cluster.keys() and cls[0] ==  -1:
                    pass
                else:
                    based_on_edges_cluster[id[0]] = cls[0]
                
            ###edges 기반 nodes 군집 재부여 -> 연결되있는 군집만 유지   
            data_nodes.cluster = [based_on_edges_cluster[id] if st != 21 else data_nodes.cluster[id] for id, st in zip(data_nodes.index.tolist(), data_nodes.state)]
        else:
            data_nodes.cluster = [-1 if i == 9999 else i for i in data_nodes.cluster]
        return data_nodes

    def advance_cluster(data_nodes, data_edges, step = 1):    
        if step == 1:
            cluster_max_num = max(data_nodes.cluster)

            ###군집 부여 안된 교차로 군집 부여
            regenerate_cluster = list()
            for cls, st in zip(data_nodes.cluster, data_nodes.state):
                if st == 1 and cls == -1:
                    cluster_max_num = cluster_max_num + 1
                    regenerate_cluster.append(cluster_max_num)
                else: 
                    regenerate_cluster.append(cls)
                    
            data_nodes.cluster = regenerate_cluster
            
            cluster_id = dict()
            for idx, cls in zip(data_nodes.index, data_nodes.cluster):
                if type(cls) != list:
                    cls = int(cls)
                cluster_id[idx] = cls
                
            one_advance_cluster = []
            for idx, cls in zip(np.array(data_edges.index.tolist())[:,:2], data_edges.cluster):
                if cluster_id[idx[0]] != cluster_id[idx[1]]:
                    cls = list(set([cluster_id[idx[0]], cluster_id[idx[1]]]) - set([-1]))
                    if len(cls) ==  1:
                        one_advance_cluster.append(cls[0])
                    else: 
                        one_advance_cluster.append(cls)
                else:
                    one_advance_cluster.append(cls)
                    
            data_edges.cluster = one_advance_cluster
        else:
            cluster_id = dict()
            for idx, cls in zip(data_nodes.index, data_nodes.cluster):
                if type(cls) != list:
                    cls = int(cls)
                cluster_id[idx] = cls

            two_advance_cluster = []
            for idx, cls, lng in zip(np.array(data_edges.index.tolist())[:,:2], data_edges.cluster, data_edges["length"]):
                if type(cls) == list or lng > 150:
                    two_advance_cluster.append(cls)
                elif cluster_id[idx[0]] != cluster_id[idx[1]]: 
                    if type(cluster_id[idx[0]]) == list or type(cluster_id[idx[1]]) == list:
                        two_advance_cluster.append(cls)
                    else:
                        if cls == -1:
                            cls = list(set([cluster_id[idx[0]], cluster_id[idx[1]]]) - set([-1]))
                            if len(cls) ==  1:
                                two_advance_cluster.append(cls[0])
                            else: 
                                two_advance_cluster.append(cls)
                        else:
                            two_advance_cluster.append(cls)
                else:
                    two_advance_cluster.append(cls)
                    
            data_edges.cluster = two_advance_cluster    
        return data_nodes, data_edges
    #step 1 
    data_nodes = nodes_set_cluster_of_based_on_edges(data_nodes, data_edges)
    data_nodes, data_edges = advance_cluster(data_nodes, data_edges)
    #step 2
    data_nodes_copy = data_nodes.copy()
    data_nodes = nodes_set_cluster_of_based_on_edges(data_nodes, data_edges)
    data_nodes, data_edges = advance_cluster(data_nodes, data_edges, step = 2)
    data_nodes = data_nodes_copy
    return data_nodes, data_edges

### 집산도로 연결 군집

In [8]:
def generate_non_main_road_cluster(data_nodes, data_edges):
    #연결 기초 정보 생성
    def start_point_and_graph_inf(data_nodes, data_edges):    
        #end point 이자 start point인 한 번 언급된 point 추출   
        number_of_nodes_of_edges = pd.DataFrame(pd.DataFrame(np.array(data_edges.index.tolist())[:,:2].reshape(-1)).value_counts())
        start_end_point = np.array(number_of_nodes_of_edges.loc[number_of_nodes_of_edges.values == 1].index.tolist()).reshape(-1).tolist()
        
        nodes_index = data_nodes.index.tolist()
        edges_index = np.array(data_edges.index.tolist())[:,:2].tolist()
        
        adj = [[] for _ in range(len(data_nodes))]

        for src,dst in edges_index:
            adj[src].append(dst)
            adj[dst].append(src)
        ###graph inf    
        graph = dict()
        for node_idx,connect_list in zip(nodes_index, adj):
            graph[node_idx] = connect_list
        return graph, start_end_point


    ###DFS 알고리즘
    def dfs(graph, start_node, data_nodes):
        ###간선도로 연결 정보
        main_road_connect_nodes = set(data_nodes.loc[data_nodes.connect_inf == 1].index)
        
        ## 기본은 항상 두개의 리스트를 별도로 관리해주는 것
        visited, need_visited = list(), list()
        
        need_visited.append(start_node)
        ## 만약 아직도 방문이 필요한 노드가 있다면,
        while need_visited:
            ## 그 중에서 가장 마지막 데이터를 추출 (스택 구조의 활용)
            for i in need_visited:
                check_point = []
                if i not in visited:
                    visited.extend([i]) 
                    if i in main_road_connect_nodes:
                        node = []
                    else:    
                        node = graph.pop(i)
                    ## 만약 그 노드가 방문한 목록에 없다면
                    for j in node:
                        if j not in visited:
                            ## 방문한 목록에 추가하기 
                            check_point.extend([j])
                            visited.extend([j])
                            if j in main_road_connect_nodes:
                                need_visited.extend([])
                            else:
                                need_visited.extend(graph[j])
            if len(check_point) == 0:
                break            

        for i in visited:
            try:
                graph.pop(i)
            except:
                pass
        return graph, visited

    ###조건에 따른 그래프 DFS 진행
    def connect_point_list(data_nodes, data_edges):
        graph_inf, start_and_end = start_point_and_graph_inf(data_nodes, data_edges)
        end_point_list = []
        n = len(graph_inf)
                    
        while start_and_end:
            graph_inf, visited = dfs(graph_inf, start_and_end[0], data_nodes)
            end_point_list.append(visited)
            for j in visited:
                try:
                    start_and_end.remove(j)
                except:
                    pass
            print(f"{n-len(graph_inf)}/{n}",end="\r")        
        while graph_inf:
            if len(graph_inf) > 0:
                graph_inf, visited = dfs(graph_inf, list(graph_inf.keys())[0], data_nodes)
                end_point_list.append(visited)
                for j in visited:
                    try:
                        start_and_end.remove(j)
                    except:
                        pass
            print(f"{n-len(graph_inf)}/{n}",end="\r")
        return end_point_list

    ###군집 부여 규칙
    def cluster_check(data_nodes,data,mask):
        main_road_connect_mask = set(data_nodes.loc[data_nodes.connect_inf == 1].index)
        if data[0] in main_road_connect_mask and data[1] in main_road_connect_mask:
            return 9999
        else: 
            if data[0] in main_road_connect_mask: 
                return mask[data[1]]
            elif data[1] in main_road_connect_mask: 
                return mask[data[0]]
            else: 
                return mask[data[0]]
            
    ###nodes 군집에 따른 edges 군집 부여 
    def generate_connect_inf(data_nodes, data_edges):
        end = connect_point_list(data_nodes, data_edges)

        MC = dict()
        for idx, i in enumerate(end):
            MC[idx] = i
            
        mc = dict()
        for k, v in zip(MC.keys(), MC.values()):
            for i in v:
                mc[i] = k
                
        data_edges["M_category"] = list(map(lambda data: cluster_check(data_nodes, data, mc), np.array(data_edges.index.tolist())[:,:2].tolist()))
        return data_edges        
    
    data_nodes = data_nodes.sort_index()
    data_edges = generate_connect_inf(data_nodes, data_edges)
    return data_nodes, data_edges

In [9]:
####Data Loader
road_gdfs, tunnel_gdfs, tunnel_mask = Graph_Loader("대전 대한민국")

####
main_road, non_main_road = Basic_Preparation(road_gdfs, tunnel_gdfs, tunnel_mask)

1. Graph_Load -START-
	road | create_undirected : rink 24219개 삭제
	road | select motorway or city road : rink 334개, node : 180 삭제
	tunnel | create_undirected : rink 200개 삭제
	tunnel | select motorway or city road : rink 81개, node : 108 삭제
2. Data Preparation
	 | Linestring_extraction
	 | Linestring_extraction
	 | Linestring_extraction
	 || split_based_on_mainroad
	 || split_based_on_mainroad
	 | Linestring_extraction
	 5844/5844

In [20]:
main_road_nodes, main_road_edges = Nodes_Edges_Segmentation(main_road[0], main_road[1], mask = main_road[2], tunnel = True, dummy = False)

non_main_road_nodes, non_main_road_edges = Nodes_Edges_Segmentation(non_main_road[0], non_main_road[1], mask = non_main_road[2], tunnel=True, dummy = False)

100%|██████████| 269/269 [00:41<00:00,  6.51it/s]
100%|██████████| 216/216 [02:47<00:00,  1.29it/s]


In [23]:
###정확한 터널 node로 linestring 분할
main_road_nodes, main_road_edges = Nodes_Edges_Segmentation(main_road[0], main_road[1], mask = main_road[2], tunnel = True, dummy = False)
non_main_road_nodes, non_main_road_edges = Nodes_Edges_Segmentation(non_main_road[0], non_main_road[1], mask = non_main_road[2], tunnel=True, dummy = False)

100%|██████████| 269/269 [00:42<00:00,  6.35it/s]
100%|██████████| 216/216 [02:49<00:00,  1.27it/s]


In [28]:
##################################################################################################################################################
######main road
### 터널 군집화 main_road
main_road_nodes, main_road_edges = generate_cluster_data(main_road_nodes, main_road_edges)
### 터널 군집화 non_main_road
non_main_road_nodes, non_main_road_edges = generate_cluster_data(non_main_road_nodes, non_main_road_edges)
### 도로 연장 터널, 교차로 터널 분류
main_road_nodes, main_road_edges = generate_cluster_data(main_road_nodes, main_road_edges, mode = "tunnel_classification")
### 교차로 군집화
main_road_nodes, main_road_edges = generate_cluster_data(main_road_nodes, main_road_edges, cluster_meter = 55, split_meter = 30, min_sample = 4, mode = "cross")

In [11]:
######
#cross segement
main_road_nodes, main_road_edges = Nodes_Edges_Segmentation(main_road_nodes, main_road_edges, meter = 150, mode = "cross")
#single way segment 
main_road_nodes, main_road_edges = Nodes_Edges_Segmentation(main_road_nodes, main_road_edges, meter = 600, mode = "single_way")
###cluster fillna
main_road_nodes.cluster = main_road_nodes.cluster.fillna(9999);  main_road_edges.cluster = main_road_edges.cluster.fillna(9999)
###tunnel fillna 
main_road_edges.tunnel = main_road_edges.tunnel.fillna(0)
###advance cluster
main_road_nodes, main_road_edges = Advance_main_road_cluster(main_road_nodes, main_road_edges)

100%|██████████| 1841/1841 [04:03<00:00,  7.55it/s]
100%|██████████| 31/31 [00:04<00:00,  7.61it/s]


In [16]:
#######################################################################################################################################################
###non_main_road_cluster
non_main_road_nodes, non_main_road_edges = generate_non_main_road_cluster(non_main_road_nodes, non_main_road_edges)
##
non_main_road_edges.cluster = non_main_road_edges.cluster.fillna(-1)
#cross segement
non_main_road_nodes, non_main_road_edges = Nodes_Edges_Segmentation(non_main_road_nodes, non_main_road_edges, meter = 150, mode = "cross")
#single way segment 
non_main_road_nodes, non_main_road_edges = Nodes_Edges_Segmentation(non_main_road_nodes, non_main_road_edges, meter = 600, mode = "single_way")
###
non_main_road_nodes.connect_inf = non_main_road_nodes.connect_inf.fillna(0)
non_main_road_nodes.cluster = non_main_road_nodes.cluster.fillna(-1)

100%|██████████| 1423/1423 [06:49<00:00,  3.47it/s]
100%|██████████| 2/2 [00:00<00:00,  3.46it/s]


---
- **state**
    - 1 : 교차로 
    - 2 : 터널 (집산도로)
    - 3 : 교차로 분할
    - 4 : 단일로 분할 
    - 21 : 교차로 내 터널 (간선도로)
    - 22 : 도로 연결 터널 (간선도로)
---

#Main_Road (nodes, edges) save
main_road_nodes.to_pickle('main_road_nodes.pkl')
main_road_edges.to_pickle('main_road_edges.pkl')
#Non_Main_Road (nodes, edges) save
non_main_road_nodes.to_pickle('non_main_road_nodes.pkl')
non_main_road_edges.to_pickle('non_main_road_edges.pkl')

In [24]:
#Main_Road (nodes, edges) load
main_road_nodes = pd.read_pickle('main_road_nodes.pkl')
main_road_edges = pd.read_pickle('main_road_edges.pkl')
#Non_Main_Road (nodes, edges) load
non_main_road_nodes = pd.read_pickle('non_main_road_nodes.pkl')
non_main_road_edges = pd.read_pickle('non_main_road_edges.pkl')

---

crash hotspots using kernel density estimation

---

In [10]:
colors = {-1: 'red', 0: '#FAEBD7', 1: '#FFEFDB', 2: '#EEDFCC', 3: '#CDC0B0', 4: '#8B8378', 5: '#00FFFF', 6: '#7FFFD4', 7: '#76EEC6', 8: '#66CDAA', 9: '#458B74', 10: '#F0FFFF', 
          11: '#E0EEEE', 12: '#C1CDCD', 13: '#838B8B', 14: '#E3CF57', 15: '#F5F5DC', 16: '#FFE4C4', 17: '#EED5B7', 18: '#CDB79E', 19: '#8B7D6B', 20: '#000000', 
          21: '#FFEBCD', 22: '#0000FF', 23: '#0000EE', 24: '#0000CD', 25: '#00008B', 26: '#8A2BE2', 27: '#9C661F', 28: '#A52A2A', 29: '#FF4040', 30: '#EE3B3B', 
          31: '#CD3333', 32: '#8B2323', 33: '#DEB887', 34: '#FFD39B', 35: '#EEC591', 36: '#CDAA7D', 37: '#8B7355', 38: '#8A360F', 39: '#8A3324', 40: '#5F9EA0', 
          41: '#98F5FF', 42: '#8EE5EE', 43: '#7AC5CD', 44: '#53868B', 45: '#FF6103', 46: '#FF9912', 47: '#ED9121', 48: '#7FFF00', 49: '#76EE00', 50: '#66CD00',
          51: '#458B00', 52: '#D2691E', 53: '#FF7F24', 54: '#EE7621', 55: '#CD661D', 56: '#8B4513', 57: '#3D59AB', 58: '#3D9140', 59: '#808A87', 60: '#FF7F50', 
          61: '#FF7256', 62: '#EE6A50', 63: '#CD5B45', 64: '#8B3E2F', 65: '#6495ED', 66: '#FFF8DC', 67: '#EEE8CD', 68: '#CDC8B1', 69: '#8B8878', 70: '#DC143C', 
          71: '#00EEEE', 72: '#00CDCD', 73: '#008B8B', 74: '#B8860B', 75: '#FFB90F', 76: '#EEAD0E', 77: '#CD950C', 78: '#8B6508', 79: '#A9A9A9', 80: '#006400', 
          81: '#BDB76B', 82: '#556B2F', 83: '#CAFF70', 84: '#BCEE68', 85: '#A2CD5A', 86: '#6E8B3D', 87: '#FF8C00', 88: '#FF7F00', 89: '#EE7600', 90: '#CD6600', 
          91: '#8B4500', 92: '#9932CC', 93: '#BF3EFF', 94: '#B23AEE', 95: '#9A32CD', 96: '#68228B', 97: '#E9967A', 98: '#8FBC8F', 99: '#C1FFC1', 100: '#B4EEB4', 
          101: '#9BCD9B', 102: '#698B69', 103: '#483D8B', 104: '#2F4F4F', 105: '#97FFFF', 106: '#8DEEEE', 107: '#79CDCD', 108: '#528B8B', 109: '#00CED1', 110: '#9400D3', 
          111: '#FF1493', 112: '#EE1289', 113: '#CD1076', 114: '#8B0A50', 115: '#00BFFF', 116: '#00B2EE', 117: '#009ACD', 118: '#00688B', 119: '#696969', 120: '#1E90FF',
          121: '#1C86EE', 122: '#1874CD', 123: '#104E8B', 124: '#FCE6C9', 125: '#00C957', 126: '#B22222', 127: '#FF3030', 128: '#EE2C2C', 129: '#CD2626', 130: '#8B1A1A',
          131: '#FF7D40', 132: '#FFFAF0', 133: '#228B22', 134: '#DCDCDC', 135: '#F8F8FF', 136: '#FFD700', 137: '#EEC900', 138: '#CDAD00', 139: '#8B7500', 140: '#DAA520',
          141: '#FFC125', 142: '#EEB422', 143: '#CD9B1D', 144: '#8B6914', 145: '#808080', 146: '#030303', 147: '#1A1A1A', 148: '#1C1C1C', 149: '#1F1F1F', 150: '#212121',
          151: '#242424', 152: '#262626', 153: '#292929', 154: '#2B2B2B', 155: '#2E2E2E', 156: '#303030', 157: '#050505', 158: '#333333', 159: '#363636', 160: '#383838',
          161: '#3B3B3B', 162: '#3D3D3D', 163: '#404040', 164: '#424242', 165: '#454545', 166: '#474747', 167: '#4A4A4A', 168: '#080808', 169: '#4D4D4D', 170: '#4F4F4F',
          171: '#525252', 172: '#545454', 173: '#575757', 174: '#595959', 175: '#5C5C5C', 176: '#5E5E5E', 177: '#616161', 178: '#636363', 179: '#0A0A0A', 180: '#666666',
          181: '#6B6B6B', 182: '#6E6E6E', 183: '#707070', 184: '#737373', 185: '#757575', 186: '#787878', 187: '#7A7A7A', 188: '#7D7D7D', 189: '#0D0D0D', 190: '#7F7F7F',
          191: '#828282', 192: '#858585', 193: '#878787', 194: '#8A8A8A', 195: '#8C8C8C', 196: '#8F8F8F', 197: '#919191', 198: '#949494', 199: '#969696', 200: '#0F0F0F',
          201: '#999999', 202: '#9C9C9C', 203: '#9E9E9E', 204: '#A1A1A1', 205: '#A3A3A3', 206: '#A6A6A6', 207: '#A8A8A8', 208: '#ABABAB', 209: '#ADADAD', 210: '#B0B0B0',
          211: '#121212', 212: '#B3B3B3', 213: '#B5B5B5', 214: '#B8B8B8', 215: '#BABABA', 216: '#BDBDBD', 217: '#BFBFBF', 218: '#C2C2C2', 219: '#C4C4C4', 220: '#C7C7C7',
          221: '#C9C9C9', 222: '#141414', 223: '#CCCCCC', 224: '#CFCFCF', 225: '#D1D1D1', 226: '#D4D4D4', 227: '#D6D6D6', 228: '#D9D9D9', 229: '#DBDBDB', 230: '#DEDEDE',
          231: '#E0E0E0', 232: '#E3E3E3', 233: '#171717', 234: '#E5E5E5', 235: '#E8E8E8', 236: '#EBEBEB', 237: '#EDEDED', 238: '#F0F0F0', 239: '#F2F2F2', 240: '#F7F7F7',
          241: '#FAFAFA', 242: '#FCFCFC', 243: '#008000', 244: '#00FF00', 245: '#00EE00', 246: '#00CD00', 247: '#008B00', 248: '#ADFF2F', 249: '#F0FFF0', 250: '#E0EEE0',
          251: '#C1CDC1', 252: '#838B83', 253: '#FF69B4', 254: '#FF6EB4', 255: '#EE6AA7', 256: '#CD6090', 257: '#8B3A62', 258: '#CD5C5C', 259: '#FF6A6A', 260: '#EE6363',
          261: '#CD5555', 262: '#8B3A3A', 263: '#4B0082', 264: '#FFFFF0', 265: '#EEEEE0', 266: '#CDCDC1', 267: '#8B8B83', 268: '#292421', 269: '#F0E68C', 270: '#FFF68F',
          271: '#EEE685', 272: '#CDC673', 273: '#8B864E', 274: '#E6E6FA', 275: '#FFF0F5', 276: '#EEE0E5', 277: '#CDC1C5', 278: '#8B8386', 279: '#7CFC00', 280: '#FFFACD',
          281: '#EEE9BF', 282: '#CDC9A5', 283: '#8B8970', 284: '#ADD8E6', 285: '#BFEFFF', 286: '#B2DFEE', 287: '#9AC0CD', 288: '#68838B', 289: '#F08080', 290: '#E0FFFF',
          291: '#D1EEEE', 292: '#B4CDCD', 293: '#7A8B8B', 294: '#FFEC8B', 295: '#EEDC82', 296: '#CDBE70', 297: '#8B814C', 298: '#FAFAD2', 299: '#D3D3D3', 300: '#FFB6C1',
          301: '#FFAEB9', 302: '#EEA2AD', 303: '#CD8C95', 304: '#8B5F65', 305: '#FFA07A', 306: '#EE9572', 307: '#CD8162', 308: '#8B5742', 309: '#20B2AA', 310: '#87CEFA',
          311: '#B0E2FF', 312: '#A4D3EE', 313: '#8DB6CD', 314: '#607B8B', 315: '#8470FF', 316: '#778899', 317: '#B0C4DE', 318: '#CAE1FF', 319: '#BCD2EE', 320: '#A2B5CD',
          321: '#6E7B8B', 322: '#FFFFE0', 323: '#EEEED1', 324: '#CDCDB4', 325: '#8B8B7A', 326: '#32CD32', 327: '#FAF0E6', 328: '#FF00FF', 329: '#EE00EE', 330: '#CD00CD',
          331: '#8B008B', 332: '#03A89E', 333: '#800000', 334: '#FF34B3', 335: '#EE30A7', 336: '#CD2990', 337: '#8B1C62', 338: '#BA55D3', 339: '#E066FF', 340: '#D15FEE',
          341: '#B452CD', 342: '#7A378B', 343: '#9370DB', 344: '#AB82FF', 345: '#9F79EE', 346: '#8968CD', 347: '#5D478B', 348: '#3CB371', 349: '#7B68EE', 350: '#00FA9A',
          351: '#48D1CC', 352: '#C71585', 353: '#E3A869', 354: '#191970', 355: '#BDFCC9', 356: '#F5FFFA', 357: '#FFE4E1', 358: '#EED5D2', 359: '#CDB7B5', 360: '#8B7D7B',
          361: '#FFE4B5', 362: '#FFDEAD', 363: '#EECFA1', 364: '#CDB38B', 365: '#8B795E', 366: '#000080', 367: '#FDF5E6', 368: '#808000', 369: '#6B8E23', 370: '#C0FF3E',
          371: '#B3EE3A', 372: '#9ACD32', 373: '#698B22', 374: '#FF8000', 375: '#FFA500', 376: '#EE9A00', 377: '#CD8500', 378: '#8B5A00', 379: '#FF4500', 380: '#EE4000',
          381: '#CD3700', 382: '#8B2500', 383: '#DA70D6', 384: '#FF83FA', 385: '#EE7AE9', 386: '#CD69C9', 387: '#8B4789', 388: '#EEE8AA', 389: '#98FB98', 390: '#9AFF9A',
          391: '#90EE90', 392: '#7CCD7C', 393: '#548B54', 394: '#BBFFFF', 395: '#AEEEEE', 396: '#96CDCD', 397: '#668B8B', 398: '#DB7093', 399: '#FF82AB', 400: '#EE799F',
          401: '#CD6889', 402: '#8B475D', 403: '#FFEFD5', 404: '#FFDAB9', 405: '#EECBAD', 406: '#CDAF95', 407: '#8B7765', 408: '#33A1C9', 409: '#FFC0CB', 410: '#FFB5C5',
          411: '#EEA9B8', 412: '#CD919E', 413: '#8B636C', 414: '#DDA0DD', 415: '#FFBBFF', 416: '#EEAEEE', 417: '#CD96CD', 418: '#8B668B', 419: '#B0E0E6', 420: '#800080',
          421: '#9B30FF', 422: '#912CEE', 423: '#7D26CD', 424: '#551A8B', 425: '#872657', 426: '#C76114', 427: '#FF0000', 428: '#EE0000', 429: '#CD0000', 430: '#8B0000',
          431: '#BC8F8F', 432: '#FFC1C1', 433: '#EEB4B4', 434: '#CD9B9B', 435: '#8B6969', 436: '#4169E1', 437: '#4876FF', 438: '#436EEE', 439: '#3A5FCD', 440: '#27408B',
          441: '#FA8072', 442: '#FF8C69', 443: '#EE8262', 444: '#CD7054', 445: '#8B4C39', 446: '#F4A460', 447: '#308014', 448: '#54FF9F', 449: '#4EEE94', 450: '#43CD80',
          451: '#2E8B57', 452: '#FFF5EE', 453: '#EEE5DE', 454: '#CDC5BF', 455: '#8B8682', 456: '#5E2612', 457: '#8E388E', 458: '#C5C1AA', 459: '#71C671', 460: '#555555',
          461: '#1E1E1E', 462: '#282828', 463: '#515151', 464: '#5B5B5B', 465: '#848484', 466: '#8E8E8E', 467: '#B7B7B7', 468: '#C1C1C1', 469: '#EAEAEA', 470: '#F4F4F4',
          471: '#7D9EC0', 472: '#AAAAAA', 473: '#8E8E38', 474: '#C67171', 475: '#7171C6', 476: '#388E8E', 477: '#A0522D', 478: '#FF8247', 479: '#EE7942', 480: '#CD6839',
          481: '#8B4726', 482: '#C0C0C0', 483: '#87CEEB', 484: '#87CEFF', 485: '#7EC0EE', 486: '#6CA6CD', 487: '#4A708B', 488: '#6A5ACD', 489: '#836FFF', 490: '#7A67EE',
          491: '#6959CD', 492: '#473C8B', 493: '#708090', 494: '#C6E2FF', 495: '#B9D3EE', 496: '#9FB6CD', 497: '#6C7B8B', 498: '#FFFAFA', 499: '#EEE9E9', 500: '#CDC9C9',
          501: '#8B8989', 502: '#00FF7F', 503: '#00EE76', 504: '#00CD66', 505: '#008B45', 506: '#4682B4', 507: '#63B8FF', 508: '#5CACEE', 509: '#4F94CD', 510: '#36648B',
          511: '#D2B48C', 512: '#FFA54F', 513: '#EE9A49', 514: '#CD853F', 515: '#8B5A2B', 516: '#008080', 517: '#D8BFD8', 518: '#FFE1FF', 519: '#EED2EE', 520: '#CDB5CD',
          521: '#8B7B8B', 522: '#FF6347', 523: '#EE5C42', 524: '#CD4F39', 525: '#8B3626', 526: '#40E0D0', 527: '#00F5FF', 528: '#00E5EE', 529: '#00C5CD', 530: '#00868B',
          531: '#00C78C', 532: '#EE82EE', 533: '#D02090', 534: '#FF3E96', 535: '#EE3A8C', 536: '#CD3278', 537: '#8B2252', 538: '#808069', 539: '#F5DEB3', 540: '#FFE7BA',
          541: '#EED8AE', 542: '#CDBA96', 543: '#8B7E66', 544: '#FFFFFF', 545: '#F5F5F5', 546: '#FFFF00', 547: '#EEEE00', 548: '#CDCD00', 549: '#8B8B00'}

def number(num):
    if divmod(num, 549)[0] >= 1:
        return num - 549 * divmod(num, 549)[0]
    else:
        return num

- Main Road

In [29]:
t = main_road_edges.loc[[not i for i in main_road_edges.tunnel.isnull()]]

In [17]:
test = main_road[1].loc[[not i for i in main_road[1].tunnel.isnull()]]

In [None]:
###대전광역시_법정경계 Data
place = "대전 대한민국"

places = ox.geocode_to_gdf([place])
places = ox.project_gdf(places)

#lat, lon
latitude = places.lat.values[0]; longitude = places.lon.values[0]
#기본 지도 정의
m = folium.Map(location=[latitude, longitude],
               zoom_start=11)

#법정경계 표시
folium.Choropleth(geo_data=places.geometry,
                  fill_color="white",
                  ).add_to(m)

#Nodes
# for i in range(len(main_road_nodes)):
#     folium.CircleMarker([main_road_nodes.iloc[[i]]["geometry"].values[0].y,main_road_nodes.iloc[[i]]["geometry"].values[0].x],
#                         color = colors[number(main_road_nodes.iloc[[i]].cluster.values[0])],
#                         radius = 3
#                     ).add_to(m)

folium.Choropleth(
    test.geometry,
    line_weight = 9,
    line_color = "red"
).add_to(m)


folium.Choropleth(
    test.geometry,
    line_weight = 5,
    line_color = "blue"
).add_to(m)

m


In [28]:
###대전광역시_법정경계 Data
place = "대전 대한민국"

places = ox.geocode_to_gdf([place])
places = ox.project_gdf(places)

#lat, lon
latitude = places.lat.values[0]; longitude = places.lon.values[0]
#기본 지도 정의
m = folium.Map(location=[latitude, longitude],
               zoom_start=11)

#법정경계 표시
folium.Choropleth(geo_data=places.geometry,
                  fill_color="white",
                  ).add_to(m)

#Nodes
for i in range(len(main_road_nodes)):
    folium.CircleMarker([main_road_nodes.iloc[[i]]["geometry"].values[0].y,main_road_nodes.iloc[[i]]["geometry"].values[0].x],
                        color = colors[number(main_road_nodes.iloc[[i]].cluster.values[0])],
                        radius = 3
                    ).add_to(m)

for i in range(len(main_road_edges)):
    folium.Choropleth(
        main_road_edges.iloc[[i]]["geometry"],
        line_weight = 3,
        line_color = colors[number(main_road_edges.iloc[[i]].cluster.values[0])] if type(main_road_edges.iloc[[i]].cluster.values[0]) != list else "black"
    ).add_to(m)

m.save("final_main_road.html")

---
- Non Main Road 

In [29]:
###대전광역시_법정경계 Data
place = "대전 대한민국"

places = ox.geocode_to_gdf([place])
places = ox.project_gdf(places)

#lat, lon
latitude = places.lat.values[0]; longitude = places.lon.values[0]
#기본 지도 정의
m = folium.Map(location=[latitude, longitude],
               zoom_start=11)

#법정경계 표시
folium.Choropleth(geo_data=places.geometry,
                  fill_color="white",
                  ).add_to(m)

for i in range(len(non_main_road_edges)):
    folium.Choropleth(
        non_main_road_edges.iloc[[i]]["geometry"],
        line_weight = 1 if non_main_road_edges.iloc[[i]].M_category.values[0] == 9999 else 4,
        line_color = colors[number(non_main_road_edges.iloc[[i]].M_category.values[0])]
    ).add_to(m)
    
m.save("final_non_main_road.html")

---

In [2]:
G1 = ox.graph_from_place("대전 대한민국", network_type="drive_service", simplify=True)
base_nodes, base_edges = ox.graph_to_gdfs(G1)

In [5]:
len(base_edges)

52798

In [7]:
base_edges

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,osmid,oneway,lanes,highway,maxspeed,length,geometry,name,ref,bridge,tunnel,service,access,junction,width
u,v,key,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
274940070,8964537399,0,32078273,True,1,motorway_link,40,442.681,"LINESTRING (127.38054 36.41399, 127.38012 36.4...",,,,,,,,
274940138,8234413963,0,124013597,True,1,motorway_link,40,260.691,"LINESTRING (127.37828 36.41666, 127.37915 36.4...",,,,,,,,
275898514,8964537398,0,124013597,True,1,motorway_link,40,17.568,"LINESTRING (127.37769 36.41697, 127.37781 36.4...",,,,,,,,
275898514,8234571686,0,37397677,False,,residential,,47.920,"LINESTRING (127.37769 36.41697, 127.37764 36.4...",대덕대로989번길,,,,,,,
275898514,8234413994,0,124013611,True,,primary,,650.453,"LINESTRING (127.37769 36.41697, 127.37767 36.4...",대덕대로,32;57,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9494750817,9494750821,0,1029974235,False,,residential,,11.120,"LINESTRING (127.32154 36.37254, 127.32154 36.3...",,,,,,,,
9494750817,9494746094,0,1029974226,False,,residential,,125.356,"LINESTRING (127.32154 36.37254, 127.32076 36.3...",,,,,,,,
9494750821,9494750817,0,1029974235,False,,residential,,11.120,"LINESTRING (127.32154 36.37264, 127.32154 36.3...",,,,,,,,
9494750821,9494750821,0,1029974235,False,,residential,,104.289,"LINESTRING (127.32154 36.37264, 127.32154 36.3...",,,,,,,,
