In [18]:
import osmnx as ox
from numpy import random 
from shapely.geometry import Point

import geopandas as gpd
import pandas as pd
import numpy as np
import folium
import itertools
import re 
from tqdm import tqdm
import warnings 


warnings.filterwarnings("ignore")

In [2]:
place_names = [
    "서울특별시, 대한민국",
    "인천광역시, 대한민국",
    "경기도, 대한민국"]

G = ox.graph_from_place(place_names, network_type="drive_service", simplify=True)
nodes, edges = ox.graph_to_gdfs(G)

In [2]:
## 택시 데이터
handicapped_move_data = pd.read_csv("1_100_20210901.csv", encoding="cp949")

##행정구역 데이터
hjd_20121210 = gpd.read_file("HangJeongDong_ver20121210.geojson")

#운영지역인 "서울", "인천", "경기"만 추출
def hjd_filter(data):
    #11 서울, 23 인천, 31 경기 
    return (data[:2] == "11") | (data[:2] == "23") | (data[:2] == "31")

hjd_20121210 = hjd_20121210.loc[list(map(lambda data: hjd_filter(data) ,hjd_20121210.adm_cd))]

---

In [3]:
# "방화 제3동' 처럼 숫자 앞 "제"가 들어가 있는 것을 "방화3동"으로 변경, 
# 정규표현식으로 [제+숫자] 패턴이 있으면 "제"를 제거 후 위치 변수를 생성해준다 
def generate_places_name(main_category, middle_category):
    p = re.compile("제+[0-9]")
    mask =  p.findall(middle_category)
    mask = [mask[0][1:]] if len(list(itertools.chain(*mask))) > 2 else mask
    if len(mask) > 0:
        middle_category = middle_category.split(mask[0])[0] + mask[0][1] + middle_category.split(mask[0])[1]
    try: 
        middle_category = change_dict[middle_category]
    except:
        pass
    category = middle_category + " " + main_category
    return category

In [4]:
#데이터에 맞는 법정동 geometry 리스트 반환
def get_location_bjd_geometry(location_list, bjd_df):
    HJD_Dong_2012 = list(map(lambda data: data.split(" ")[-1], bjd_df.adm_nm))
    HJD_Sigon_2012 = list(map(lambda data: data.split(" ")[1], bjd_df.adm_nm))

    bjd_geometry = []
    for i in location_list:
        place = i.split(" ")
        step1_mask = np.where(np.array(HJD_Dong_2012) == place[0])[0].tolist()
        if len(step1_mask) == 1:
            bjd_geometry.append(bjd_df.iloc[step1_mask[0]].geometry)
        else:
            step2_mask = np.where(np.array(HJD_Sigon_2012) == place[1])[0].tolist()
            step2_mask = set(step1_mask) & set(step2_mask)
            bjd_geometry.append(bjd_df.iloc[list(step2_mask)[0]].geometry)
            
    return bjd_geometry

In [5]:
#행정구역 별 랜덤 좌표 필요한 갯수 데이터프레임 추출
def generate_location_cnt_df(move_data, where):
    pos_cnt = move_data[f"{where}pos"].value_counts().to_frame().reset_index()
    pos_cnt.columns = [f"{where}pos", "cnt"]    
    pos_cnt = pd.merge(move_data[[f"{where}pos", f"{where}_geometry"]].drop_duplicates([f"{where}pos"]), pos_cnt)
    return pos_cnt

In [6]:
#위치 좌표 랜덤 생성
def Generate_random_location(data, CNT):   #place : 관심지역,  cnt: 차량 수    
    #Meter -> Euclid : 단위 변환
    def euclid_distance_cal(meter):
        ###유클리드 거리와 실제 거리를 기반으로 1미터당 유클리드 거리 추출
        #점 쌍 사이의 유클리드 거리를 계산
        dis_1 = ox.distance.euclidean_dist_vec(36.367658 , 127.447499, 36.443928, 127.419678)
        #직선거리 계산
        dis_2 = ox.distance.great_circle_vec(36.367658 , 127.447499, 36.443928, 127.419678)
        return dis_1/dis_2 * meter
    
    #위치 좌표 랜덤 생성
    locations = []
    for i in random.choice(range(len(data)), size = CNT, replace = False):
        #교차로 중심에 생성되지 않게 고정 미터로 생성이 아닌 해당 링크 길이로 유동적인 미터 생성
        random_num = random.choice([0.1,0.2,0.3,0.4,0.5])
        random_meter = data.iloc[i]["length"] * random_num
        #좌표 생성
        new_node = list(ox.utils_geo.interpolate_points(data.iloc[i]["geometry"], euclid_distance_cal(random_meter)))
        #좌표의 처음과 끝은 노드이기 때문에 제거하고 선택
        del new_node[0], new_node[-1]
        #랜덤으로 선택한 하나의 링크에서 하나의 택시 좌표 선택 
        idx = random.choice(len(new_node), size = 1)
        location = new_node[idx[0]]
        locations.append(location)
        
    locations = list(map(lambda data: Point(data),locations))

    return locations

In [7]:
#도로 행정구역 경계로 서브셋 추출
def generate_subset(geometry, data_edges):
    data_edges["idx"] = range(len(data_edges))
    
    subset = gpd.GeoDataFrame({"geometry": [geometry]})
    subset = data_edges.iloc[sorted(gpd.sjoin(subset, data_edges,how='left', op="intersects").idx.values)]
    return subset

In [8]:
def main_random_location(data_edges, pos_cnt, move_data, where):
    random_locations = []

    for i in tqdm(range(len(pos_cnt))):
        subset = generate_subset(pos_cnt.iloc[i][f"{where}_geometry"], data_edges)
        random_location = Generate_random_location(subset, pos_cnt.iloc[i].cnt)
        random_locations.append(random_location)
    pos_cnt[f"{where}_random_location"] = random_locations
    
    node_mask_dict = dict()
    for i in range(len(pos_cnt)):
        mask =  np.where(np.array(move_data[f"{where}pos"]) == pos_cnt.iloc[i][f"{where}pos"])[0].tolist()
        nodes = pos_cnt.iloc[i][f"{where}_random_location"]
        for m,n in zip(mask,nodes):
            node_mask_dict[m] = n
            
    return pos_cnt, node_mask_dict

In [9]:
#변경된 행정구역명 변수 생성 
change_dict = {"명륜1가동": "혜화동", "명륜2가동":"혜화동", "명륜3가동":"혜화동", "명륜4가동":"혜화동", 
               "명륜5가동":"혜화동","답십리3동":"답십리1동","답십리4동":"답십리2동","장안3동":"장안2동", 
               "장안3동":"장안1동","제기1동":"제기동", "제기2동":"제기동","장안4동":"장안2동","신설동":"용신동",
               "전농3동":"전농2동","공릉1.3동":"공릉1동", "이문3동":"이문2동","고촌면":"고촌읍", "소사본1동":"소사본동",
               "양촌면":"양촌읍", "용두동":"용신동"}

#출발지, 도착지 리스트로 정의
start_location = list(map(lambda data: generate_places_name(handicapped_move_data.iloc[data]["startpos1"], handicapped_move_data.iloc[data]["startpos2"]), range(len(handicapped_move_data))))
end_location = list(map(lambda data: generate_places_name(handicapped_move_data.iloc[data]["endpos1"], handicapped_move_data.iloc[data]["endpos2"]), range(len(handicapped_move_data))))

handicapped_move_data["startpos"] = [i.replace(".","·") if "." in i else i for i in start_location]
handicapped_move_data["endpos"] = [i.replace(".","·") if "." in i else i for i in end_location]

handicapped_move_data["start_geometry"] = get_location_bjd_geometry(handicapped_move_data["startpos"], hjd_20121210)
handicapped_move_data["end_geometry"] = get_location_bjd_geometry(handicapped_move_data["endpos"], hjd_20121210)

startpos_cnt =  generate_location_cnt_df(handicapped_move_data,"start")
endpos_cnt =  generate_location_cnt_df(handicapped_move_data,"end")

startpos_cnt, start_dict = main_random_location(edges, startpos_cnt, handicapped_move_data, "start")
endpos_cnt, end_dict = main_random_location(edges, endpos_cnt, handicapped_move_data, "end")

handicapped_move_data["start_point"] = [start_dict[i] for i in range(len(handicapped_move_data))]
handicapped_move_data["end_point"] = [end_dict[i] for i in range(len(handicapped_move_data))]

100%|██████████| 426/426 [03:07<00:00,  2.27it/s]
100%|██████████| 534/534 [03:17<00:00,  2.71it/s]


In [11]:
handicapped_move_data

Unnamed: 0,no,cartype,startpos1,startpos2,endpos1,endpos2,receipttime_date,receipttime_time,settime_date,settime_time,ridetime_date,ridetime_time,startpos,endpos,start_geometry,end_geometry,start_point,end_point
0,3689,중형 승합,종로구,종로5.6가동,양주시,회천1동,2021-09-01,00:12:00,2021-09-01,01:03:13,2021-09-01,01:36:49,종로5·6가동 종로구,회천1동 양주시,"POLYGON ((127.0102890609625 37.57159066363359,...","POLYGON ((127.0878778147551 37.87207882440664,...",POINT (127.00199556 37.57001),POINT (127.0674019990707 37.84094717240174)
1,1305,중형승합,광진구,자양제2동,성동구,송정동,2021-09-01,00:27:00,2021-09-01,01:04:26,2021-09-01,01:32:35,자양2동 광진구,송정동 성동구,"POLYGON ((127.0922506385372 37.52678613714284,...","POLYGON ((127.0674990184424 37.54831146208115,...",POINT (127.0821916413784 37.52979667121378),POINT (127.0675301666667 37.55175463333334)
2,8231,중형승합,마포구,합정동,강서구,가양제3동,2021-09-01,01:47:43,2021-09-01,01:53:19,2021-09-01,02:09:28,합정동 마포구,가양3동 강서구,"POLYGON ((126.914510036218 37.54015554119607, ...","POLYGON ((126.8630358161493 37.55688024880158,...",POINT (126.9086467370915 37.55150486373537),POINT (126.8624837745973 37.56372021552721)
3,8239,중형승합,성동구,금호1가동,중랑구,면목제3.8동,2021-09-01,02:33:00,2021-09-01,02:41:55,2021-09-01,03:15:49,금호1가동 성동구,면목3·8동 중랑구,"POLYGON ((127.0295104353031 37.5539112311458, ...","POLYGON ((127.1101433618115 37.58741647964422,...",POINT (127.0237689891256 37.55649278896422),POINT (127.0885793 37.58239913333333)
4,7652,중형승합,강서구,방화제3동,강서구,발산제1동,2021-09-01,02:50:00,2021-09-01,02:58:21,2021-09-01,03:12:06,방화3동 강서구,발산1동 강서구,"POLYGON ((126.8220808436308 37.57328002465316,...","POLYGON ((126.83629886656 37.54797289672825, 1...",POINT (126.81137304 37.5800377),POINT (126.8375903358619 37.55805645424785)
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4356,7864,중형승합,도봉구,창제4동,도봉구,도봉제2동,2021-09-01,23:54:41,2021-09-01,23:56:51,2021-09-02,00:08:57,창4동 도봉구,도봉2동 도봉구,"POLYGON ((127.0512474326915 37.64519562889613,...","POLYGON ((127.048932710723 37.67120303004936, ...",POINT (127.0534945117659 37.64594172834541),POINT (127.0506880030711 37.68188122595153)
4357,7658,중형승합,강남구,논현2동,성동구,왕십리제2동,2021-09-01,23:55:00,2021-09-01,23:57:05,2021-09-02,00:10:06,논현2동 강남구,왕십리2동 성동구,"POLYGON ((127.0339086404381 37.50734622044237,...","POLYGON ((127.033510251187 37.56251805894114, ...",POINT (127.0311101333333 37.51820643333333),POINT (127.0270730331541 37.56307870498879)
4358,1182,중형승합,강남구,일원1동,송파구,거여2동,2021-09-01,23:56:12,2021-09-02,00:03:57,2021-09-02,00:06:42,일원1동 강남구,거여2동 송파구,"POLYGON ((127.0818013615625 37.48867131398421,...","POLYGON ((127.1493650686218 37.48026085292566,...",POINT (127.08206855 37.49070665),POINT (127.1444185202036 37.4919008941525)
4359,7658,중형승합,용산구,원효로제2동,동작구,사당제5동,2021-09-01,23:57:00,2021-09-02,00:28:18,2021-09-02,00:52:30,원효로2동 용산구,사당5동 동작구,"POLYGON ((126.9530081726797 37.53784747995292,...","POLYGON ((126.9653211528005 37.47956427036225,...",POINT (126.95519345 37.53331445),POINT (126.9637375220185 37.48938508060856)


In [None]:
###대전광역시_법정경계 Data
place = "서울 대한민국"

places = ox.geocode_to_gdf([place])
places = ox.project_gdf(places)

#lat, lon
latitude, longitude = places.lat.values[0], places.lon.values[0]
#기본 지도 정의

m = folium.Map(location=[latitude, longitude],
               zoom_start=11)

# #법정경계 표시
# folium.Choropleth(geo_data=places.geometry,
#                   fill_color="white",
#                   ).add_to(m)

#승차
for i in range(len(handicapped_move_data)):
    folium.CircleMarker([handicapped_move_data.iloc[[i]]["start_point"].values[0].y,handicapped_move_data.iloc[[i]]["start_point"].values[0].x],
                        color = "red",
                        radius = 2
                    ).add_to(m)

#하차 
for i in range(len(handicapped_move_data)):
    folium.CircleMarker([handicapped_move_data.iloc[[i]]["end_point"].values[0].y,handicapped_move_data.iloc[[i]]["end_point"].values[0].x],
                        color = "blue",
                        radius = 2
                    ).add_to(m)

m