# 서울시 격자별 인구수 데이터 전처리
- [국토 정보 플랫폼](http://map.ngii.go.kr/ms/map/NlipMap.do) - 국토 통계지도에서 추출

![입지분석](../../이미지/입지분석/인구수_추출.png)

- 시군구 전체의 경우에는 500m 격자단위로 추출할 수가 없어 각 구별로 500m 격자단위로 추출한 뒤 병합하여 중복치 제거

## 사전 설치 라이브러리
geopandas의 경우에는 GDAL, Fiona, pyproj, Shapely의 whl파일이 필요함.

[링크](https://www.lfd.uci.edu/~gohlke/pythonlibs/#shapely)를 통해서 각자 파이썬, 운영체제 사양에 맞는 파일을 다운받은 뒤 아래 코드 실행

In [None]:
# geopandas를 설치하기 위한 사전 필요 파일들
!pip install pyproj
!pip install shapely
!pip install GDAL
!pip install Fiona

!pip install geopandas

!pip install pandas

# 지도 시각화
!pip install pydeck

필요 라이브러리 호출

In [108]:
import pandas as pd
import geopandas as gpd
import shapely

import pydeck as pdk
from shapely.geometry import Polygon, Point

### 전기차 충전소 접근성 데이터 불러오기
- 전기차 충전소 데이터를 먼저 불러오는 이유는 인구수 데이터의 경우 서울 외곽 지역의 경계선을 모두 포함하는 데이터이지만 전기차 충전소 데이터는 경계선을 포함하지 않는 데이터이기 때문에, 데이터를 전기차 충전소 데이터에 맞춰주기 위함임.

국토 정보 플랫폼에서 제공하는 데이터는 shp파일 형식이므로 geopandas의 read_file로 불러와야 함.

shp파일은 함께 다운로드 된 dbf, prj, shx파일과 같은 위치에 있어야 파일 읽기 가능

In [109]:
charger = gpd.read_file('../../데이터/charger/시범 전기차충전소(시군구격자) 접근성.shp', encoding = 'utf-8')

charger

Unnamed: 0,gid,sgg_nm_k,sgg_nm_e,sgg_cd,sido_nm_k,sido_nm_e,sido_cd,value,stats_year,raw_d_year,geometry
0,다사55a53b,종로구,Jongno-gu,11110,서울특별시,Seoul,11,1.609,2021,202112,"POLYGON ((955000.000 1953500.000, 955000.000 1..."
1,다사53a53a,종로구,Jongno-gu,11110,서울특별시,Seoul,11,0.402,2021,202112,"POLYGON ((953000.000 1953000.000, 953000.000 1..."
2,다사52b53a,종로구,Jongno-gu,11110,서울특별시,Seoul,11,0.535,2021,202112,"POLYGON ((952500.000 1953000.000, 952500.000 1..."
3,다사53a57b,종로구,Jongno-gu,11110,서울특별시,Seoul,11,1.357,2021,202112,"POLYGON ((953000.000 1957500.000, 953000.000 1..."
4,다사53a54b,종로구,Jongno-gu,11110,서울특별시,Seoul,11,0.701,2021,202112,"POLYGON ((953000.000 1954500.000, 953000.000 1..."
...,...,...,...,...,...,...,...,...,...,...,...
419167,나바98a83b,,,,,,,7.706,2021,202112,"POLYGON ((898000.000 1883500.000, 898000.000 1..."
419168,마바76a57a,,,,,,,3.802,2021,202112,"POLYGON ((1176000.000 1857000.000, 1176000.000..."
419169,나바97a87b,,,,,,,4.755,2021,202112,"POLYGON ((897000.000 1887500.000, 897000.000 1..."
419170,나바97a25b,,,,,,,0.662,2021,202112,"POLYGON ((897000.000 1825500.000, 897000.000 1..."


서울시 외의 지역도 같이 있으므로 서울시만 뽑아내 데이터프레임을 만든다.

In [110]:
charger = charger[charger['sido_nm_k'] == '서울특별시']

charger['gid']

0       다사55a53b
1       다사53a53a
2       다사52b53a
3       다사53a57b
4       다사53a54b
          ...   
2415    다사70b52a
2416    다사67b48a
2417    다사68a47a
2418    다사68a51a
2419    다사68a49b
Name: gid, Length: 2420, dtype: object

받은 데이터의 geometry열을 보면 POLYGON 형식으로 되어있으므로 이를 좌표로 변환해주어야 함.

In [111]:
def polygon_to_coordinates(x): 
    lon, lat = x.exterior.xy 
    return [[x, y] for x, y in zip(lon, lat)]

In [112]:
# 좌표계를 변환해주기 위함
charger = charger.to_crs(epsg=4326)
charger['coordinates'] = charger['geometry'].apply(polygon_to_coordinates)

# 인덱스 정리
charger.index = range(1, len(charger) + 1)

# 격자의 중심점 구하기
center_list = list()
for i in charger['coordinates']:
    center = [[(i[0][0] + i[1][0] + i[2][0] + i[3][0])/4, (i[0][1] + i[1][1] + i[2][1] + i[3][1])/4]]
    center_list.append(center)

charger['coord_center'] = pd.DataFrame(center_list)

# 데이터 시각화를 위해 정규화
charger['regularize_value'] = charger['value'] / charger['value'].max()

layer = pdk.Layer(
    'PolygonLayer', # 사용할 Layer 타입
    charger, # 시각화에 쓰일 데이터프레임
    get_polygon='coordinates', # geometry 정보를 담고있는 컬럼 이름
    get_fill_color='[255*regularize_value, 100,255*regularize_value]', # 각 데이터 별 rgb 또는 rgba 값 (0~255)
    pickable=True, # 지도와 interactive 한 동작 on
    auto_highlight=True # 마우스 오버(hover) 시 박스 출력
    ,coverage = 0
)

center_seoul = [127.001699, 37.564214]

view_state = pdk.ViewState(
    longitude=center_seoul[0],
    latitude=center_seoul[1],
    zoom=10)

r = pdk.Deck(layers=[layer], initial_view_state=view_state)
r.show() # 전기차 충전소 데이터 시각화

DeckGLWidget(carto_key=None, custom_libraries=[], google_maps_key=None, json_input='{"initialViewState": {"lat…

### 격자별 인구수 데이터 불러오기
500m 단위 격자는 시군구 전체에서는 불러올 수 없으므로 각 구별 데이터를 따로 병합해주어야 함.

In [132]:
main_population = gpd.read_file('../../데이터/population/dobong_pop.shp', encoding = 'utf-8')
dongdaemoon_pop = gpd.read_file('../../데이터/population/dongdaemoon_pop.shp', encoding = 'utf-8')
dongjak_pop = gpd.read_file('../../데이터/population/dongjak_pop.shp', encoding = 'utf-8')
eunpyeong_pop = gpd.read_file('../../데이터/population/eunpyeong_pop.shp', encoding = 'utf-8')
gangbook_pop = gpd.read_file('../../데이터/population/gangbook_pop.shp', encoding = 'utf-8')
gangdong_pop =gpd.read_file('../../데이터/population/gangdong_pop.shp', encoding = 'utf-8')
gangnam_pop = gpd.read_file('../../데이터/population/gangnam_pop.shp', encoding = 'utf-8')
gangseo_pop = gpd.read_file('../../데이터/population/gangseo_pop.shp', encoding = 'utf-8')
geumcheon_pop = gpd.read_file('../../데이터/population/geumcheon_pop.shp', encoding = 'utf-8')
guro_pop = gpd.read_file('../../데이터/population/guro_pop.shp', encoding = 'utf-8')
gwanak_pop = gpd.read_file('../../데이터/population/gwanak_pop.shp', encoding = 'utf-8')
gwangjin_pop = gpd.read_file('../../데이터/population/gwangjin_pop.shp', encoding = 'utf-8')
jonglo_pop = gpd.read_file('../../데이터/population/jonglo_pop.shp', encoding = 'utf-8')
joonggoo_pop = gpd.read_file('../../데이터/population/joonggoo_pop.shp', encoding = 'utf-8')
joongrang_pop = gpd.read_file('../../데이터/population/joongrang_pop.shp', encoding = 'utf-8')
mapo_pop = gpd.read_file('../../데이터/population/mapo_pop.shp', encoding = 'utf-8')
nowon_pop = gpd.read_file('../../데이터/population/nowon_pop.shp', encoding = 'utf-8')
seocho_pop = gpd.read_file('../../데이터/population/seocho_pop.shp', encoding = 'utf-8')
seodaemun_pop = gpd.read_file('../../데이터/population/seodaemun_pop.shp', encoding = 'utf-8')
songpa_pop = gpd.read_file('../../데이터/population/songpa_pop.shp', encoding = 'utf-8')
sungbook_pop = gpd.read_file('../../데이터/population/sungbook_pop.shp', encoding = 'utf-8')
sungdong_pop = gpd.read_file('../../데이터/population/sungdong_pop.shp', encoding = 'utf-8')
yangcheon_pop = gpd.read_file('../../데이터/population/yangcheon_pop.shp', encoding = 'utf-8')
yeongdeungpo_pop = gpd.read_file('../../데이터/population/yeongdeungpo_pop.shp', encoding = 'utf-8')
yongsan_pop = gpd.read_file('../../데이터/population/yongsan_pop.shp', encoding = 'utf-8')


모두 하나의 데이터프레임에 추가한 뒤, 'gid' 컬럼을 기준으로 중복된 데이터를 모두 제거한다.

In [133]:
main_population = main_population.append(dongdaemoon_pop)
main_population = main_population.append(dongjak_pop)
main_population = main_population.append(eunpyeong_pop)
main_population = main_population.append(gangbook_pop)
main_population = main_population.append(gangdong_pop)
main_population = main_population.append(gangnam_pop)
main_population = main_population.append(gangseo_pop)
main_population = main_population.append(geumcheon_pop)
main_population = main_population.append(guro_pop)
main_population = main_population.append(gwanak_pop)
main_population = main_population.append(gwangjin_pop)
main_population = main_population.append(jonglo_pop)
main_population = main_population.append(joonggoo_pop)
main_population = main_population.append(joongrang_pop)
main_population = main_population.append(mapo_pop)
main_population = main_population.append(nowon_pop)
main_population = main_population.append(seocho_pop)
main_population = main_population.append(seodaemun_pop)
main_population = main_population.append(songpa_pop)
main_population = main_population.append(sungbook_pop)
main_population = main_population.append(sungdong_pop)
main_population = main_population.append(yangcheon_pop)
main_population = main_population.append(yeongdeungpo_pop)
main_population = main_population.append(yongsan_pop)

main_population = main_population.drop_duplicates(subset = 'gid')

main_population

  main_population = main_population.append(dongdaemoon_pop)
  main_population = main_population.append(dongjak_pop)
  main_population = main_population.append(eunpyeong_pop)
  main_population = main_population.append(gangbook_pop)
  main_population = main_population.append(gangdong_pop)
  main_population = main_population.append(gangnam_pop)
  main_population = main_population.append(gangseo_pop)
  main_population = main_population.append(geumcheon_pop)
  main_population = main_population.append(guro_pop)
  main_population = main_population.append(gwanak_pop)
  main_population = main_population.append(gwangjin_pop)
  main_population = main_population.append(jonglo_pop)
  main_population = main_population.append(joonggoo_pop)
  main_population = main_population.append(joongrang_pop)
  main_population = main_population.append(mapo_pop)
  main_population = main_population.append(nowon_pop)
  main_population = main_population.append(seocho_pop)
  main_population = main_population.append(se

Unnamed: 0,gid,lbl,val,geometry
0,다사58b60a,4336.00,4336.0,"POLYGON ((958500.000 1960000.000, 958500.000 1..."
1,다사60b61b,295.00,295.0,"POLYGON ((960500.000 1961500.000, 960500.000 1..."
2,다사58b59b,120.00,120.0,"POLYGON ((958500.000 1959500.000, 958500.000 1..."
3,다사59b65b,442.00,442.0,"POLYGON ((959500.000 1965500.000, 959500.000 1..."
4,다사60a62a,9367.00,9367.0,"POLYGON ((960000.000 1962000.000, 960000.000 1..."
...,...,...,...,...
100,다사54b47b,,,"POLYGON ((954500.000 1947500.000, 954500.000 1..."
102,다사53a46a,,,"POLYGON ((953000.000 1946000.000, 953000.000 1..."
103,다사54a48b,,,"POLYGON ((954000.000 1948500.000, 954000.000 1..."
109,다사54a47b,,,"POLYGON ((954000.000 1947500.000, 954000.000 1..."


In [134]:
j = list() # 인구수 데이터 중 전기차 충전소 데이터의 위치와 일치하지 않는 데이터의 'gid' 추출
for i in main_population['gid']:
    if i not in list(charger['gid']):
        j.append(i)

not_in_population = main_population.copy()

not_in_population = not_in_population.set_index('gid')
not_in_population = not_in_population.loc[j,:]

not_in_population = not_in_population.reset_index()

In [135]:
not_in_population = not_in_population.to_crs(epsg=4326)
not_in_population['coordinates'] = not_in_population['geometry'].apply(polygon_to_coordinates)

not_in_population_list = list()
for i in not_in_population['coordinates']:
    center = [[(i[0][0] + i[1][0] + i[2][0] + i[3][0])/4, (i[0][1] + i[1][1] + i[2][1] + i[3][1])/4]]
    not_in_population_list.append(center)

not_in_population['coord_center'] = pd.DataFrame(not_in_population_list)
not_in_population['regularize_val'] = not_in_population['val'] / not_in_population['val'].max()
not_in_population['regularize_val'] = not_in_population['regularize_val'].fillna(0)
layer = pdk.Layer(
    'PolygonLayer', # 사용할 Layer 타입
    not_in_population, # 시각화에 쓰일 데이터프레임
    get_polygon='coordinates', # geometry 정보를 담고있는 컬럼 이름
    get_fill_color='[255, 100,255]', # 각 데이터 별 rgb 또는 rgba 값 (0~255)
    pickable=True, # 지도와 interactive 한 동작 on
    auto_highlight=True # 마우스 오버(hover) 시 박스 출력
    ,coverage = 0
)

center_seoul = [127.001699, 37.564214]

view_state = pdk.ViewState(
    longitude=center_seoul[0],
    latitude=center_seoul[1],
    zoom=10)

r = pdk.Deck(layers=[layer], initial_view_state=view_state)
r.show()

DeckGLWidget(carto_key=None, custom_libraries=[], google_maps_key=None, json_input='{"initialViewState": {"lat…

경계지점 격자 제거

In [136]:
main_population = main_population.set_index('gid')
not_in_population = not_in_population.set_index('gid')

main_population = main_population.drop(not_in_population.index)
main_population = main_population.reset_index()

In [138]:
main_population = main_population.to_crs(epsg=4326)
main_population['coordinates'] = main_population['geometry'].apply(polygon_to_coordinates)

main_population

Unnamed: 0,gid,lbl,val,geometry,coordinates
0,다사58b60a,4336.00,4336.0,"POLYGON ((127.02962 37.63854, 127.02959 37.643...","[[127.02962122758167, 37.638535251970794], [12..."
1,다사60b61b,295.00,295.0,"POLYGON ((127.05221 37.65214, 127.05218 37.656...","[[127.05220827725105, 37.652143202902444], [12..."
2,다사58b59b,120.00,120.0,"POLYGON ((127.02965 37.63403, 127.02962 37.638...","[[127.0296496365157, 37.63402867517412], [127...."
3,다사59b65b,442.00,442.0,"POLYGON ((127.04065 37.68815, 127.04062 37.692...","[[127.04064994099917, 37.68815209082502], [127..."
4,다사60a62a,9367.00,9367.0,"POLYGON ((127.04651 37.65663, 127.04649 37.661...","[[127.04651278276538, 37.6566281247312], [127...."
...,...,...,...,...,...
2415,다사54b47b,,,"POLYGON ((126.98506 37.52568, 126.98503 37.530...","[[126.98506171609876, 37.525681093742804], [12..."
2416,다사53a46a,,,"POLYGON ((126.96818 37.51209, 126.96815 37.516...","[[126.96818235085506, 37.51208598634863], [126..."
2417,다사54a48b,,,"POLYGON ((126.97934 37.53467, 126.97931 37.539...","[[126.9793406172334, 37.53466953408817], [126...."
2418,다사54a47b,,,"POLYGON ((126.97940 37.52566, 126.97937 37.530...","[[126.97940326638944, 37.52565628661525], [126..."


In [140]:
main_population.index = range(1, len(charger) + 1)

# 격자의 중심점 구하기
center_list = list()
for i in charger['coordinates']:
    center = [[(i[0][0] + i[1][0] + i[2][0] + i[3][0])/4, (i[0][1] + i[1][1] + i[2][1] + i[3][1])/4]]
    center_list.append(center)

main_population['coord_center'] = pd.DataFrame(center_list)

# 데이터 시각화를 위해 정규화
main_population['regularize_val'] = main_population['val'] / main_population['val'].max()

layer = pdk.Layer(
    'PolygonLayer', # 사용할 Layer 타입
    main_population, # 시각화에 쓰일 데이터프레임
    get_polygon='coordinates', # geometry 정보를 담고있는 컬럼 이름
    get_fill_color='[255*regularize_val, 100,255*regularize_val]', # 각 데이터 별 rgb 또는 rgba 값 (0~255)
    pickable=True, # 지도와 interactive 한 동작 on
    auto_highlight=True # 마우스 오버(hover) 시 박스 출력
    ,coverage = 0
)

center_seoul = [127.001699, 37.564214]

view_state = pdk.ViewState(
    longitude=center_seoul[0],
    latitude=center_seoul[1],
    zoom=10)

r = pdk.Deck(layers=[layer], initial_view_state=view_state)
r.show()

DeckGLWidget(carto_key=None, custom_libraries=[], google_maps_key=None, json_input='{"initialViewState": {"lat…

In [142]:
main_population.to_excel('../../데이터/입지분석/인구수_완.xlsx')