- 해당 노트북 파일은 주피터 노트북을 통해 작성되었습니다.

### 패키지 불러오기

In [119]:
from haversine import haversine
import pandas as pd

import geopandas as gpd
from shapely.geometry import Point, Polygon, LineString

from tqdm import tqdm

### 데이터 불러오기

In [128]:
subway_xy = pd.read_csv('subway_xy.csv', encoding='cp949')
school_xy = pd.read_csv('school_xy.csv', encoding='cp949')
big_mart = pd.read_csv('big_mart.csv', encoding='cp949')

In [129]:
building = pd.read_csv('data_road2.csv', encoding='cp949')

- 위경도 튜플 생성

In [130]:
subway_xy['xy_tuple'] = list(zip(subway_xy.loc[:, 'lat'], subway_xy.loc[:, 'lon']))
school_xy['xy_tuple'] = list(zip(school_xy.loc[:, 'lat'], school_xy.loc[:, 'lon']))
big_mart['xy_tuple'] = list(zip(big_mart.loc[:, 'lat'], big_mart.loc[:, 'lon']))
building['xy_tuple'] = list(zip(building.loc[:, '위도'], building.loc[:, '경도']))

### 1. GeoPandas 형식에 맞춰서 좌표계 수정

In [131]:
building['geometry'] = building.apply(lambda row : Point([row['경도'], row['위도']]), axis=1)

In [132]:
building_geo = gpd.GeoDataFrame(building, geometry='geometry')
building_geo.crs = {'init':'epsg:4326'}
building_geo = building_geo.to_crs({'init':'epsg:5179'})

  in_crs_string = _prepare_from_proj_string(in_crs_string)
  in_crs_string = _prepare_from_proj_string(in_crs_string)


In [7]:
building_geo

Unnamed: 0,도로명주소,위도,경도,xy_tuple,geometry
0,서울특별시 강동구 양재대로 1540,37.542920,127.142450,"(37.5429196, 127.1424496)",POINT (968414.138 1949348.008)
1,서울특별시 강동구 양재대로 1449,37.535248,127.138685,"(37.5352477, 127.13868470000001)",POINT (968078.277 1948498.129)
2,서울특별시 강동구 양재대로128길,37.544508,127.144746,"(37.5445084, 127.14474569999999)",POINT (968617.642 1949523.507)
3,서울특별시 강동구 천호대로 1156,37.533434,127.139603,"(37.533434299999996, 127.13960279999999)",POINT (968158.619 1948296.632)
4,서울특별시 강동구 천호대로 1238,37.535540,127.148192,"(37.5355401, 127.14819159999999)",POINT (968918.323 1948527.384)
...,...,...,...,...,...
119967,서울특별시 영등포구 영중로 155,37.530283,126.905429,"(37.5302831, 126.9054287)",POINT (947466.785 1948052.068)
119968,서울특별시 영등포구 영중로 163,37.530840,126.905173,"(37.5308403, 126.9051727)",POINT (947444.557 1948114.030)
119969,서울특별시 영등포구 영중로 155-8,37.530245,126.904834,"(37.5302452, 126.9048341)",POINT (947414.222 1948048.196)
119970,서울특별시 영등포구 영중로 157-11,37.530342,126.904867,"(37.530341899999996, 126.90486659999999)",POINT (947417.161 1948058.906)


In [133]:
subway_xy['geometry'] = subway_xy.apply(lambda row : Point([row['lon'], row['lat']]), axis=1)
school_xy['geometry'] = school_xy.apply(lambda row : Point([row['lon'], row['lat']]), axis=1)
big_mart['geometry'] = big_mart.apply(lambda row : Point([row['lon'], row['lat']]), axis=1)

In [135]:
subway_geo = gpd.GeoDataFrame(subway_xy, geometry='geometry')
subway_geo.crs = {'init':'epsg:4326'}
subway_geo = subway_geo.to_crs({'init':'epsg:5179'})

school_geo = gpd.GeoDataFrame(school_xy, geometry='geometry')
school_geo.crs = {'init':'epsg:4326'}
school_geo = school_geo.to_crs({'init':'epsg:5179'})

mart_geo = gpd.GeoDataFrame(big_mart, geometry='geometry')
mart_geo.crs = {'init':'epsg:4326'}
mart_geo = mart_geo.to_crs({'init':'epsg:5179'})

  in_crs_string = _prepare_from_proj_string(in_crs_string)
  in_crs_string = _prepare_from_proj_string(in_crs_string)
  in_crs_string = _prepare_from_proj_string(in_crs_string)
  in_crs_string = _prepare_from_proj_string(in_crs_string)
  in_crs_string = _prepare_from_proj_string(in_crs_string)
  in_crs_string = _prepare_from_proj_string(in_crs_string)


- 가장 가까운 지하철역, 학교, 마트, 상권코드와의 거리를 도출할 때는 유클리디안 거리보다 하버사인 거리 적합 
- 모든 건물과 모든 지하철역까지의 거리를 계산해서 최소값을 도출해내도 되지만 많은 시간이 소모되므로 건물별 반경을 그리고 해당 반경에 들어오는 지하철역 중 가장 가까운 거리를 도출

### 2. 건물별 10km 반경 내 지하철역 중 가장 가까운 지하철역과의 하버사인 거리 추출

In [98]:
haver_dis_subway = []

for building_index in building_geo.index:
    
    x = building_geo['xy_tuple'][building_index]
    
    point = building_geo['geometry'][building_index]
    buffer = point.buffer(10000)
    
    distance = haversine(x, subway_geo['xy_tuple'][0])
    
    buffer_index = subway_geo[subway_geo['geometry'].within(buffer)==True].index
    
    for i in buffer_index:
        
        distance = min(distance, haversine(x, subway_geo['xy_tuple'][i]))
        
    haver_dis_subway.append(distance)

In [158]:
haver_dis_subway = pd.DataFrame(haver_dis_subway, columns=['haver_dis_subway'])

In [159]:
building_geo = pd.concat([building_geo, haver_dis_subway], axis=1)

In [160]:
building_geo['haver_dis_subway'].max(), building_geo['haver_dis_subway'].min()

(5.666785409042729, 0.004532649339048083)

In [126]:
building_geo

Unnamed: 0,도로명주소,위도,경도,xy_tuple,geometry,haver_dis_subway
0,서울특별시 강동구 양재대로 1540,37.542920,127.142450,"(37.5429196, 127.1424496)",POINT (968414.138 1949348.008),0.316026
1,서울특별시 강동구 양재대로 1449,37.535248,127.138685,"(37.5352477, 127.13868470000001)",POINT (968078.277 1948498.129),0.307749
2,서울특별시 강동구 양재대로128길,37.544508,127.144746,"(37.5445084, 127.14474569999999)",POINT (968617.642 1949523.507),0.207319
3,서울특별시 강동구 천호대로 1156,37.533434,127.139603,"(37.533434299999996, 127.13960279999999)",POINT (968158.619 1948296.632),0.487983
4,서울특별시 강동구 천호대로 1238,37.535540,127.148192,"(37.5355401, 127.14819159999999)",POINT (968918.323 1948527.384),0.765237
...,...,...,...,...,...,...
119967,서울특별시 영등포구 영중로 155,37.530283,126.905429,"(37.5302831, 126.9054287)",POINT (947466.785 1948052.068),0.571565
119968,서울특별시 영등포구 영중로 163,37.530840,126.905173,"(37.5308403, 126.9051727)",POINT (947444.557 1948114.030),0.505860
119969,서울특별시 영등포구 영중로 155-8,37.530245,126.904834,"(37.5302452, 126.9048341)",POINT (947414.222 1948048.196),0.555776
119970,서울특별시 영등포구 영중로 157-11,37.530342,126.904867,"(37.530341899999996, 126.90486659999999)",POINT (947417.161 1948058.906),0.546632


### 3. 건물별 5km 반경 내 건물 중 가장 가까운 학교와의 하버사인 거리 추출

- 지하철역에 비해 학교의 수가 많으므로 좁은 반경을 설정해서 거리 추출

In [142]:
haver_dis_school = []

for building_index in tqdm(building_geo.index, desc="건물별 5km 반경 내 가장 가까운 학교와의 거리 추출 중"):
    
    x = building_geo['xy_tuple'][building_index]
    
    point = building_geo['geometry'][building_index]
    buffer = point.buffer(5000)
    
    distance = haversine(x, school_geo['xy_tuple'][0])
    
    buffer_index = school_geo[school_geo['geometry'].within(buffer)==True].index
    
    for i in buffer_index:
        
        distance = min(distance, haversine(x, school_geo['xy_tuple'][i]))
        
    haver_dis_school.append(distance)

건물별 5km 반경 내 가장 가까운 학교와의 거리 추출 중: 100%|██████████████████| 119972/119972 [1:22:05<00:00, 24.36it/s]


In [161]:
haver_dis_school = pd.DataFrame(haver_dis_school, columns=['haver_dis_school'])

In [162]:
building_geo = pd.concat([building_geo, haver_dis_school], axis=1)

In [163]:
building_geo['haver_dis_school'].max(), building_geo['haver_dis_school'].min()

(3.0007216570346427, 0.0045077194153888755)

In [164]:
building_geo

Unnamed: 0,도로명주소,위도,경도,xy_tuple,geometry,haver_dis_subway,haver_dis_school
0,서울특별시 강동구 양재대로 1540,37.542920,127.142450,"(37.5429196, 127.1424496)",POINT (968414.138 1949348.008),0.316026,0.459123
1,서울특별시 강동구 양재대로 1449,37.535248,127.138685,"(37.5352477, 127.13868470000001)",POINT (968078.277 1948498.129),0.307749,0.416656
2,서울특별시 강동구 양재대로128길,37.544508,127.144746,"(37.5445084, 127.14474569999999)",POINT (968617.642 1949523.507),0.207319,0.498036
3,서울특별시 강동구 천호대로 1156,37.533434,127.139603,"(37.533434299999996, 127.13960279999999)",POINT (968158.619 1948296.632),0.487983,0.432536
4,서울특별시 강동구 천호대로 1238,37.535540,127.148192,"(37.5355401, 127.14819159999999)",POINT (968918.323 1948527.384),0.765237,0.442360
...,...,...,...,...,...,...,...
119967,서울특별시 영등포구 영중로 155,37.530283,126.905429,"(37.5302831, 126.9054287)",POINT (947466.785 1948052.068),0.571565,0.273858
119968,서울특별시 영등포구 영중로 163,37.530840,126.905173,"(37.5308403, 126.9051727)",POINT (947444.557 1948114.030),0.505860,0.252851
119969,서울특별시 영등포구 영중로 155-8,37.530245,126.904834,"(37.5302452, 126.9048341)",POINT (947414.222 1948048.196),0.555776,0.315615
119970,서울특별시 영등포구 영중로 157-11,37.530342,126.904867,"(37.530341899999996, 126.90486659999999)",POINT (947417.161 1948058.906),0.546632,0.306686


### 4. 건물별 5km 반경 내 건물 중 가장 가까운 마트와의 하버사인 거리 추출

In [165]:
haver_dis_mart = []

for building_index in tqdm(building_geo.index, desc="건물별 5km 반경 내 가장 가까운 마트와의 거리 추출 중"):
    
    x = building_geo['xy_tuple'][building_index]
    
    point = building_geo['geometry'][building_index]
    buffer = point.buffer(5000)
    
    distance = haversine(x, mart_geo['xy_tuple'][0])
    
    buffer_index = mart_geo[mart_geo['geometry'].within(buffer)==True].index
    
    for i in buffer_index:
        
        distance = min(distance, haversine(x, mart_geo['xy_tuple'][i]))
        
    haver_dis_mart.append(distance)

건물별 5km 반경 내 가장 가까운 마트와의 거리 추출 중: 100%|███████████████████| 119972/119972 [19:29<00:00, 102.60it/s]


In [170]:
haver_dis_mart = pd.DataFrame(haver_dis_mart, columns=['haver_dis_mart'])

In [171]:
building_geo = pd.concat([building_geo, haver_dis_mart], axis=1)

In [178]:
building_geo['haver_dis_mart'].max(), building_geo['haver_dis_mart'].min()

(4.671044765841274, 0.0001888427599679319)

In [179]:
building_geo

Unnamed: 0,도로명주소,위도,경도,xy_tuple,geometry,haver_dis_subway,haver_dis_school,haver_dis_mart
0,서울특별시 강동구 양재대로 1540,37.542920,127.142450,"(37.5429196, 127.1424496)",POINT (968414.138 1949348.008),0.316026,0.459123,0.110586
1,서울특별시 강동구 양재대로 1449,37.535248,127.138685,"(37.5352477, 127.13868470000001)",POINT (968078.277 1948498.129),0.307749,0.416656,0.139283
2,서울특별시 강동구 양재대로128길,37.544508,127.144746,"(37.5445084, 127.14474569999999)",POINT (968617.642 1949523.507),0.207319,0.498036,0.232098
3,서울특별시 강동구 천호대로 1156,37.533434,127.139603,"(37.533434299999996, 127.13960279999999)",POINT (968158.619 1948296.632),0.487983,0.432536,0.346491
4,서울특별시 강동구 천호대로 1238,37.535540,127.148192,"(37.5355401, 127.14819159999999)",POINT (968918.323 1948527.384),0.765237,0.442360,0.746099
...,...,...,...,...,...,...,...,...
119967,서울특별시 영등포구 영중로 155,37.530283,126.905429,"(37.5302831, 126.9054287)",POINT (947466.785 1948052.068),0.571565,0.273858,0.283304
119968,서울특별시 영등포구 영중로 163,37.530840,126.905173,"(37.5308403, 126.9051727)",POINT (947444.557 1948114.030),0.505860,0.252851,0.340109
119969,서울특별시 영등포구 영중로 155-8,37.530245,126.904834,"(37.5302452, 126.9048341)",POINT (947414.222 1948048.196),0.555776,0.315615,0.271392
119970,서울특별시 영등포구 영중로 157-11,37.530342,126.904867,"(37.530341899999996, 126.90486659999999)",POINT (947417.161 1948058.906),0.546632,0.306686,0.282294


### 5. 건물별 500m 반경 내 상권

In [442]:
commercial = pd.read_csv('commercial_code.csv', encoding='cp949')

In [224]:
sales = pd.read_csv('com_sales_pop.csv', encoding='cp949')

In [225]:
sales[sales['총_매출액'].isnull()==True].index

Int64Index([804, 1271, 1304, 1310, 1343, 1371, 1400, 1424, 1433, 1434], dtype='int64')

In [226]:
sales[sales['총_생활인구_수'].isnull()==True]

Unnamed: 0,상권_코드,총_매출액,총_건수,건당_매출액,총_생활인구_수
1277,1001278,3262668.0,288.0,11328.708333,


- 이상치 보유한 상권 제거

In [443]:
commercial_sort = commercial.sort_values('상권_코드').reset_index(drop=True)

In [445]:
commercial_sort = commercial_sort.drop([804,1271,1277,1304,1310,1343,1371,1400,1424,1433,1434], axis=0).reset_index(drop=True)

In [447]:
# 위경도 튜플 생성
commercial['xy_tuple'] = list(zip(commercial.loc[:, 'lat'], commercial.loc[:, 'lon']))
commercial_sort['xy_tuple'] = list(zip(commercial_sort.loc[:, 'lat'], commercial_sort.loc[:, 'lon']))

In [448]:
# GeoPandas 형식 맞추기
commercial['geometry'] = commercial.apply(lambda row : Point([row['lon'], row['lat']]), axis=1)
commercial_sort['geometry'] = commercial_sort.apply(lambda row : Point([row['lon'], row['lat']]), axis=1)

In [449]:
commercial_geo = gpd.GeoDataFrame(commercial, geometry='geometry')
commercial_geo.crs = {'init':'epsg:4326'}
commercial_geo = commercial_geo.to_crs({'init':'epsg:5179'})

commercial_sort_geo = gpd.GeoDataFrame(commercial_sort, geometry='geometry')
commercial_sort_geo.crs = {'init':'epsg:4326'}
commercial_sort_geo = commercial_sort_geo.to_crs({'init':'epsg:5179'})

  in_crs_string = _prepare_from_proj_string(in_crs_string)
  in_crs_string = _prepare_from_proj_string(in_crs_string)
  in_crs_string = _prepare_from_proj_string(in_crs_string)
  in_crs_string = _prepare_from_proj_string(in_crs_string)


#### 건물별 500m 반경 내 상권코드 추출 --> 인덱스 활용

In [451]:
com_code_index = []

for building_index in tqdm(building_geo.index, desc="건물별 500m 반경 내 상권코드 추출 중"):
    
    point = building_geo['geometry'][building_index]
    buffer = point.buffer(500)
    
    buffer_code = commercial_sort_geo[commercial_sort_geo['geometry'].within(buffer)==True].index
    
    code = list(buffer_code)
    
    com_code_index.append(code)

건물별 500m 반경 내 상권코드 추출 중: 100%|███████████████████████████████████| 119972/119972 [19:05<00:00, 104.75it/s]


In [453]:
com_code_index[1]

[975, 984, 989, 990, 1003, 1154]

### 6. 건물별 500m 반경 내 상권들의 평균 매출액 및 유동인구 산정

In [457]:
building_sim = building_geo.drop(['xy_tuple', 'geometry', 'com_code'], axis=1)

In [458]:
building_sim

Unnamed: 0,도로명주소,위도,경도,haver_dis_subway,haver_dis_school,haver_dis_mart
0,서울특별시 강동구 양재대로 1540,37.542920,127.142450,0.316026,0.459123,0.110586
1,서울특별시 강동구 양재대로 1449,37.535248,127.138685,0.307749,0.416656,0.139283
2,서울특별시 강동구 양재대로128길,37.544508,127.144746,0.207319,0.498036,0.232098
3,서울특별시 강동구 천호대로 1156,37.533434,127.139603,0.487983,0.432536,0.346491
4,서울특별시 강동구 천호대로 1238,37.535540,127.148192,0.765237,0.442360,0.746099
...,...,...,...,...,...,...
119967,서울특별시 영등포구 영중로 155,37.530283,126.905429,0.571565,0.273858,0.283304
119968,서울특별시 영등포구 영중로 163,37.530840,126.905173,0.505860,0.252851,0.340109
119969,서울특별시 영등포구 영중로 155-8,37.530245,126.904834,0.555776,0.315615,0.271392
119970,서울특별시 영등포구 영중로 157-11,37.530342,126.904867,0.546632,0.306686,0.282294


In [460]:
len(com_code_index)

119972

- 건물 500m 반경 내 상권 총매출액의 평균 = 평균 매출액 / 건물 500m 반경 내 상권 총 생활인구의 평균 = 평균 생활인구

In [328]:
sales = pd.read_csv('com_sales_pop.csv', encoding='cp949')

In [454]:
commercial_sort_geo.iloc[com_code_index[0],:]

Unnamed: 0,기준_년월_코드,상권_구분_코드,상권_구분_코드_명,상권_코드,상권_코드_명,엑스좌표_값,와이좌표_값,시군구_코드,행정동_코드,형태정보,lon,lat,xy_tuple,geometry
985,201810,A,골목상권,1000987,양재대로116길,212534,448842,11740,11740685,,127.142613,37.541751,"(37.5417510819111, 127.14261274142063)",POINT (968428.057 1949218.313)
986,201810,A,골목상권,1000988,양재대로124길,212784,449174,11740,11740685,,127.145447,37.544739,"(37.54473895318282, 127.14544727621728)",POINT (968679.714 1949548.852)
998,201810,A,골목상권,1001000,진황도로47길,212452,448612,11740,11740685,,127.141681,37.53968,"(37.539679909928466, 127.14168102091898)",POINT (968344.872 1948988.843)
999,201810,A,골목상권,1001001,천중로39길,212368,449352,11740,11740600,,127.140743,37.546348,"(37.546348357396155, 127.14074297239245)",POINT (968264.829 1949728.983)
1478,201810,R,전통시장,1001490,길동복조리시장,212698,448781,11740,11740685,,127.144467,37.541199,"(37.5411992465174, 127.14446737504005)",POINT (968591.667 1949156.469)


In [456]:
sales_sort.iloc[com_code_index[0], :]

Unnamed: 0,상권_코드,총_매출액,총_건수,건당_매출액,총_생활인구_수
985,1000987,14528820000.0,735740,19747.22564,5781902
986,1000988,15308670000.0,816384,18751.80399,21656275
998,1001000,23377770000.0,1141791,20474.64965,13896862
999,1001001,17573980000.0,923591,19027.88068,36070591
1478,1001490,56778810000.0,2718464,20886.35692,13721829


In [465]:
mean_sales = []
mean_pop = []

for building_index in tqdm(building_sim.index, desc='평균 매출액과 평균 생활인구 수 구하기'):
    
    df = sales.iloc[com_code_index[building_index], :]
    
    total_sales = df['총_매출액'].mean()
    total_pop = df['총_생활인구_수'].mean()
    
    mean_sales.append(total_sales)
    mean_pop.append(total_pop)

평균 매출액과 평균 생활인구 수 구하기: 100%|█████████████████████████████████| 119972/119972 [01:37<00:00, 1224.86it/s]


In [478]:
mean_sales_df = pd.DataFrame(mean_sales, columns=['mean_sales'])
mean_pop_df = pd.DataFrame(mean_pop, columns=['mean_pop'])

In [479]:
building_total = pd.concat([building_sim, mean_sales_df], axis=1)

In [480]:
building_total = pd.concat([building_total, mean_pop_df], axis=1)

In [483]:
building_total.to_csv('building_total.csv', encoding='euc-kr', index=False)

In [484]:
building_total

Unnamed: 0,도로명주소,위도,경도,haver_dis_subway,haver_dis_school,haver_dis_mart,mean_sales,mean_pop
0,서울특별시 강동구 양재대로 1540,37.542920,127.142450,0.316026,0.459123,0.110586,2.551361e+10,18225491.8
1,서울특별시 강동구 양재대로 1449,37.535248,127.138685,0.307749,0.416656,0.139283,4.715353e+10,20419730.5
2,서울특별시 강동구 양재대로128길,37.544508,127.144746,0.207319,0.498036,0.232098,2.896793e+10,22305878.2
3,서울특별시 강동구 천호대로 1156,37.533434,127.139603,0.487983,0.432536,0.346491,3.770563e+10,15761681.4
4,서울특별시 강동구 천호대로 1238,37.535540,127.148192,0.765237,0.442360,0.746099,3.966936e+10,12552664.0
...,...,...,...,...,...,...,...,...
119967,서울특별시 영등포구 영중로 155,37.530283,126.905429,0.571565,0.273858,0.283304,3.073583e+10,9318388.0
119968,서울특별시 영등포구 영중로 163,37.530840,126.905173,0.505860,0.252851,0.340109,3.073583e+10,9318388.0
119969,서울특별시 영등포구 영중로 155-8,37.530245,126.904834,0.555776,0.315615,0.271392,3.073583e+10,9318388.0
119970,서울특별시 영등포구 영중로 157-11,37.530342,126.904867,0.546632,0.306686,0.282294,3.073583e+10,9318388.0
