## 기본 셋팅

In [None]:
import pathlib
import random
from functools import reduce
from collections import defaultdict

import pandas as pd
import geopandas as gpd
import folium

import shapely
import numpy as np
import requests
import json

from IPython.display import display
import matplotlib.pyplot as plt
from tqdm.notebook import tqdm
import xgboost
import sklearn.cluster
import tensorflow as tf
import webbrowser 

import glob
import zipfile
import math

from sklearn.preprocessing import MinMaxScaler
from geoband import API
from math import *

## 데이터셋 다운로드

In [None]:
from geoband.API import *

# 1. 격자(100m X 100m)
GetCompasData('SBJ_2206_001', '5', '5.김해시_격자(100X100).geojson')
GetCompasData('SBJ_2206_001', '27', '16.김해시_토지소유정보.geojson')
GetCompasData('SBJ_2206_001', '9', '9.김해시_하천현황.geojson')
GetCompasData('SBJ_2206_001', '26', '8.김해시_공원현황.geojson')

# 2. 감시 요인 
# 감시 취약 지수
GetCompasData('SBJ_2206_001', '1', '1.김해시_CCTV설치현황.csv')

# 3. 범죄 요인
# 범죄 발생 지수
GetCompasData('SBJ_2206_001', '25', '4.김해시_112신고이력(격자매핑).csv')

# 4. 상권 요인
# 풍속 업소 지수
GetCompasData('SBJ_2206_001', '24', '20.김해시_치안_유관업종_현황.csv')

# 5. 인구 요인
# 유동 인구 지수
GetCompasData('SBJ_2206_001', '3', '3.김해시_성연령별_요일별_유동인구.zip')
# 부양 인구 지수
GetCompasData('SBJ_2206_001', '22', '10.김해시_성연령별_거주인구격자.geojson')
# 외국인 위험 지수
GetCompasData('SBJ_2206_001', '11', '11.김해시_외국인_읍면동별_격자.geojson')

# 6. 주택 및 입지 요인
GetCompasData('SBJ_2206_001', '23', '12.김해시_건물노후도.geojson')
# 건물 특성 지수
GetCompasData('SBJ_2206_001', '13', '13.김해시_도로명주소(건물).geojson')
# 아동 시설 지수
GetCompasData('SBJ_2206_001', '18', '18.김해시_유치원현황.csv')
GetCompasData('SBJ_2206_001', '17', '17.김해시_어린이집현황.csv')
GetCompasData('SBJ_2206_001', '19', '19.김해시_학교(초,중,고)현황.csv')

# 7. 안전 및 보안 시설 요인
# 공적 감시 취약 지수
# 대체 감시 취약 지수
GetCompasData('SBJ_2206_001', '6', '6.김해시_보안등설치현황.csv')
GetCompasData('SBJ_2206_001', '28', '7.김해시_안전비상벨설치현황.csv')
GetCompasData('SBJ_2206_001', '21', '21.김해시_아동안전지킴이집_현황.csv')

In [None]:
import zipfile

with zipfile.ZipFile('3.김해시_성연령별_요일별_유동인구.zip', 'r') as zf:
    zipinfo = zf.infolist()
    for info in zipinfo:
        info.filename = info.filename.encode('cp437').decode('euc-kr')
        zf.extract(info, './')

## 1) 분석 단위 (격자)

In [None]:
# 격자 데이터 불러오기
grid_gdf = gpd.read_file('5.김해시_격자(100X100).geojson')
grid_gdf = grid_gdf.copy()

# 토지 데이터 불러오기
land_gdf = gpd.read_file('16.김해시_토지소유정보.geojson')
land_gdf = land_gdf.copy()

# 하천 데이터 불러오기
river_gdf = gpd.read_file('9.김해시_하천현황.geojson')
river_gdf = river_gdf.copy()

# 거주 데이터 불러오기
pop_gdf = gpd.read_file('10.김해시_성연령별_거주인구격자.geojson')
pop_gdf = pop_gdf.copy()

# 격자별 거주 인구 총합
pop_gdf['pop_sum'] = pop_gdf.sum(axis = 1)
pop_gdf = pop_gdf[['gid', 'geometry', 'pop_sum']]

# 격자 & 토지 & 하천 & 인구 데이터 연결한 전체 집합 생성
set_gdf = gpd.sjoin(grid_gdf, land_gdf, how='left', op='intersects')
set_gdf = set_gdf.drop(['index_right'], axis = 1)
set_gdf = gpd.sjoin(set_gdf, river_gdf, how='left', op='intersects')
set_gdf = set_gdf.drop(['index_right'], axis = 1)
set_gdf = gpd.sjoin(set_gdf, pop_gdf, how='left', op='contains')


# 90% 이상 차지하는 지역 또는 하천인 지역 또는 3년간 거주 인구 5명 이하 인 지역 집합 생성
condition = (set_gdf.land_area >= 9000) | (set_gdf.RIVLEN2 > 0) | (set_gdf.pop_sum <= 5)
subset_gdf = set_gdf[condition]

# (전체 지역) - (조건 만족하는 지역) 
result_grid_gdf = pd.merge(
    set_gdf, subset_gdf, how = 'outer', indicator = True
    ).query(
        '_merge == "left_only"'
        ).drop(columns = ['_merge']
        )

result_grid_gdf

In [None]:
result_grid_gdf = result_grid_gdf[['gid_left', 'geometry']]
result_grid_gdf
result_grid_gdf = result_grid_gdf.drop_duplicates()
result_grid_gdf = result_grid_gdf.rename(columns = {'gid_left' : 'gid'})
result_grid_gdf

In [None]:
backup = result_grid_gdf
backup

In [None]:
center = [35.2285451, 128.8893517]

In [None]:
grid_gdf_copy = grid_gdf
grid_gdf_copy = grid_gdf_copy.drop(columns = ['gid'])
grid_gdf_copy

### 전체 Grid Map 확인하기

In [None]:
total_map = folium.Map(location= center, zoom_start=11) # folium.Map 은 (lat, lng) 로 location 을 받음에 유의!

folium.GeoJson(grid_gdf_copy, name = 'Grid').add_to(total_map)

total_map.save('total_map.html')
total_map

In [None]:
result_grid_map = folium.Map(location= center, zoom_start=11) # folium.Map 은 (lat, lng) 로 location 을 받음에 유의!

folium.GeoJson(result_grid_gdf, name = 'Grid').add_to(result_grid_map)

result_grid_map.save('result_grid_map.html')
result_grid_map

## 2) 감시 취약 지수

In [None]:
# CCTV 데이터 불러오기
cctv_df = pd.read_csv('1.김해시_CCTV설치현황.csv', encoding = 'utf-8')
cctv_df = cctv_df.copy()

# CCTV 데이터 -> geopandas 형태로 변형
cctv_gdf = gpd.GeoDataFrame(
    cctv_df, geometry=gpd.points_from_xy(cctv_df.lon, cctv_df.lat))

cctv_gdf

In [None]:
cctv_set_gdf = gpd.sjoin(result_grid_gdf, cctv_gdf, how='left', op='intersects')
cctv_set_gdf = cctv_set_gdf.groupby('gid')['cctv_nm'].agg(['count'])
cctv_set_gdf

In [None]:
cctv_set_gdf['감시취약지수_1'] = 1 - ((cctv_set_gdf['count']*50*50*pi) / 10000)
cctv_set_gdf['감시취약지수_2'] = (cctv_set_gdf['감시취약지수_1'] - cctv_set_gdf['감시취약지수_1'].min()) / ((cctv_set_gdf['감시취약지수_1']).max() - cctv_set_gdf['감시취약지수_1'].min())
cctv_set_gdf['감시취약지수_3'] = cctv_set_gdf['감시취약지수_2'] * 100
cctv_set_gdf['감시취약지수_4'] = cctv_set_gdf['감시취약지수_3'] * 0.357
cctv_set_gdf = cctv_set_gdf.sort_values(by = ['감시취약지수_4'], ascending = False)
cctv_set_gdf

In [None]:
cctv_result = cctv_set_gdf[['감시취약지수_4']]
cctv_result

In [None]:
cctv_score = cctv_set_gdf[['감시취약지수_3']]
cctv_score = pd.merge(cctv_score, result_grid_gdf, on = 'gid', how = 'left')
cctv_score

In [None]:
cctv_m = folium.Map(
    location = center,
    zoom_start=11, 
    tiles='cartodbpositron'
)


cctv_m.choropleth(geo_data=result_grid_gdf,
             data=cctv_score,
             columns = ['gid', '감시취약지수_3'],
             fill_color='YlOrRd', 
             key_on='properties.gid',
             legend_name="CCTV 감시 취약 지수"
            )

cctv_m.save('cctv_map.html')

cctv_m

## 3) 범죄 발생 지수

In [None]:
# 경찰 신고 데이터 불러오기
police_df = pd.read_csv('4.김해시_112신고이력(격자매핑).csv', encoding = 'utf-8')
police_df = police_df.copy()
police_df

In [None]:
result_grid_gdf2 = result_grid_gdf
result_grid_gdf2

In [None]:
police_tmp = pd.DataFrame(result_grid_gdf2)

police_df = pd.merge(police_tmp, police_df, how = 'left')
police_df = police_df.drop_duplicates()
police_df

In [None]:
police_df['case_type'].unique()

In [None]:
# 5대 범죄 신고 건수

#살인
crime1 = ['살인']

# 강도/강간/강제추행 
crime2 = ['성폭력', '주거침입', '납치감금', '강도']

# 절도/폭력
crime3 = ['가정폭력', '폭력', '절도', '데이트 폭력', '아동학대(가정내)', 
          '협박', '스토킹', '아동학대(기타)', '공갈', '학교폭력']
# 민원 발생
crime4 = ['위험방지', '기타형사범', '시비', '소음', 
          '주취자', '기타_타기관', '서비스요청', '상담문의', '교통사고', '교통위반', 
          '실종(실종아동 등)', '행패소란', '청소년비행', '무전취식승차', '보호조치',
            '교통불편', '기타경범', '화재', '비상벨', '재물손괴', '가출 등', '자살', 
          '사기', '풍속영업', '도박', '경비업체요청', '수배불심자', '구조요청',
          '인피도주', '치기', '노점상', '위험동물', '사망.대형사고', '재해재난', '청탁금지법',
            '보이스 피싱', '음주운전', '마약', '동물학대', '변사자']

#민원 미발생
crime5 = ['FTX', '분실습득',  '내용확인불가']

# nan
crime6 = [0]

In [None]:
police_df = police_df.fillna(0)
tmp = pd.DataFrame()

for i in crime1:    
    tmp1 = police_df.loc[police_df['case_type'] == i]
    tmp1['police_score'] = 5
    tmp = pd.concat([tmp, tmp1])
    
for i in crime2:    
    tmp2 = police_df.loc[police_df['case_type'] == i ] 
    tmp2['police_score']= 4
    tmp = pd.concat([tmp, tmp2])
    
for i in crime3:    
    tmp3 = police_df.loc[police_df['case_type'] == i ] 
    tmp3['police_score']= 3
    tmp = pd.concat([tmp, tmp3])
    
for i in crime4:    
    tmp4 = police_df.loc[police_df['case_type'] == i ] 
    tmp4['police_score']= 2
    tmp = pd.concat([tmp, tmp4])

for i in crime5:    
    tmp5 = police_df.loc[police_df['case_type'] == i ] 
    tmp5['police_score']= 1
    tmp = pd.concat([tmp, tmp5])

for i in crime6:    
    tmp6 = police_df.loc[police_df['case_type'] == i ] 
    tmp6['police_score']= 0
    tmp = pd.concat([tmp, tmp6])
    
    
    
police_df = tmp.copy()
police_df

In [None]:
police_set_gdf = pd.merge(left = result_grid_gdf2 , right = police_df, how = "left", on = "gid")
police_set_gdf = police_set_gdf.groupby(['gid', 'case_type'])['police_score'].agg(['count', 'sum'])
police_set_gdf = police_set_gdf.groupby(['gid', 'case_type'])['count', 'sum'].mean().reset_index()
police_set_gdf = police_set_gdf.groupby(['gid'])['sum'].agg(['sum'])
police_set_gdf

In [None]:
police_set_gdf['범죄발생지수_1'] = (police_set_gdf['sum'] - police_set_gdf['sum'].min()) / (police_set_gdf['sum'].max() - police_set_gdf['sum'].min())
police_set_gdf['범죄발생지수_2'] = police_set_gdf['범죄발생지수_1']*100
police_set_gdf['범죄발생지수_3'] = police_set_gdf['범죄발생지수_2']*0.201
police_set_gdf = police_set_gdf.sort_values(by = ['범죄발생지수_3'], ascending = False)
police_set_gdf

In [None]:
call_result = police_set_gdf[['범죄발생지수_3']]
call_result

In [None]:
call_score = police_set_gdf[['범죄발생지수_2']]
call_score = pd.merge(call_score, result_grid_gdf, on = 'gid', how = 'right')
call_score

In [None]:
call_m = folium.Map(
    location = center,
    zoom_start=11, 
    tiles='cartodbpositron'
)


call_m.choropleth(geo_data=result_grid_gdf,
             data=call_score,
             columns = ['gid', '범죄발생지수_2'],
             fill_color='YlOrRd',
             key_on='properties.gid',
             legend_name="범죄 발생 지수"
            )

call_m.save('call_map.html')

call_m

## 4) 풍속 업소 지수

In [None]:
busi_tmp_df = pd.read_csv('20.김해시_치안_유관업종_현황.csv')
busi_df = busi_tmp_df.copy()

busi_gdf = gpd.GeoDataFrame(
    busi_df, geometry=gpd.points_from_xy(busi_df.lon, busi_df.lat))

busi_gdf

In [None]:
busi_gdf['open_gbn'].unique()

In [None]:
# 폐업한 곳 제거
busi_gdf = busi_gdf[busi_gdf['open_gbn'] != '폐업']
busi_gdf

In [None]:
busi_set_gdf = gpd.sjoin(result_grid_gdf2, busi_gdf,  how='left', op='intersects')
busi_set_gdf = busi_set_gdf.groupby('gid')['store_gbn'].agg(['count'])
busi_set_gdf = busi_set_gdf[busi_set_gdf['count'] > 5]
busi_set_gdf

In [None]:
busi_set_gdf = pd.merge(busi_set_gdf, result_grid_gdf, how = 'right', on = 'gid')
busi_set_gdf = busi_set_gdf.fillna(0)
busi_set_gdf

In [None]:
busi_set_gdf['풍속업소지수_1'] = busi_set_gdf['count'] / busi_set_gdf['count'].sum()
busi_set_gdf['풍속업소지수_2'] = (busi_set_gdf['풍속업소지수_1'] - busi_set_gdf['풍속업소지수_1'].min()) / (busi_set_gdf['풍속업소지수_1'].max() - busi_set_gdf['풍속업소지수_1'].min())
busi_set_gdf['풍속업소지수_3'] = busi_set_gdf['풍속업소지수_2'] * 100
busi_set_gdf['풍속업소지수_4'] = busi_set_gdf['풍속업소지수_3'] * 0.133
busi_set_gdf = busi_set_gdf.sort_values(by = ['풍속업소지수_4'], ascending = False)
busi_set_gdf

In [None]:
busi_result = busi_set_gdf[['gid', '풍속업소지수_4']]
busi_result

In [None]:
busi_score = busi_set_gdf[['gid', '풍속업소지수_3']]
busi_score

In [None]:
busi_result = busi_score
busi_score['풍속업소지수_3'] = busi_score['풍속업소지수_3'] *0.133
busi_score

In [None]:
busi_m = folium.Map(
    location = center,
    zoom_start=11, 
    tiles='cartodbpositron'
)


busi_m.choropleth(geo_data=result_grid_gdf,
             data=busi_score,
             columns = ['gid', '풍속업소지수_3'],
             fill_color='YlOrRd',
             key_on='properties.gid',
             legend_name="풍속 업소 지수"
            )

busi_m.save('business_map.html')

busi_m

## 5) 유동 인구 지수

In [None]:
moving_gdf = pd.read_csv('김해시_요일별_성연령별_유동인구.csv', encoding = 'utf-8')
moving_cell_gdf = pd.read_csv('김해시_유동인구_CELLPOINT.csv', encoding = 'utf-8')

In [None]:
# 일별로 정리되어있는 유동인구 지수를 recordid를 기준으로 그룹화한 후 평균화
moving_gdf = moving_gdf.groupby('recordid').mean()

# 세대별로 정리되어있는 유동인구지수를 평균화
columns = ['m_0009','m_1019','m_2029','m_3039','m_4049','m_5059','m_6069','m_7000','w_0009','w_1019','w_2029','w_3039','w_4049','w_5059','w_6069','w_7000']
moving_gdf['id_mean'] = moving_gdf[columns].mean(axis = 1)
moving_gdf

In [None]:
moving_gdf = moving_gdf[['id_mean']]
moving_gdf.reset_index()

# 유동인구 정보(moving_gdf)와 grid 정보(moving_cell_gdf)를 recordid를 기준으로 결합
moving_gdf = pd.merge(moving_gdf, moving_cell_gdf, on = 'recordid', how = 'inner')
moving_gdf

In [None]:
moving_gdf = gpd.GeoDataFrame(
    moving_gdf, geometry=gpd.points_from_xy(moving_gdf.lon, moving_gdf.lat))
moving_gdf = moving_gdf.drop(['lon', 'lat'], axis = 1)
moving_gdf

In [None]:
# 전처리한 grid data의 multipolygon에 moving_cell의 Point가 포함되는지 여부를 파악해 gid 부여
moving_cell_gdf = gpd.sjoin(left_df=result_grid_gdf, right_df=moving_gdf, how='left', op = 'contains') #Join
moving_cell_gdf


In [None]:
# 하나의 grid(gid)에 여러개의 recordid가 존재할 수도 있음
# gid를 기준으로 recordid를 통합 유동인구 평균화 진행

moving_cell_gdf = moving_cell_gdf.drop(['index_right', 'recordid'], axis = 1 )
moving_cell_gdf = moving_cell_gdf.groupby('gid').mean()
moving_cell_gdf
moving_cell_gdf = moving_cell_gdf.fillna(0)

In [None]:
# MinMaxScaler를 이용 각 grid의 유동인구 지수를 최대-최소정규화 수행
transformer = MinMaxScaler()
transformer.fit(moving_cell_gdf)

moving_cell_scaled_gdf = transformer.transform(moving_cell_gdf)
moving_cell_gdf['score'] = moving_cell_scaled_gdf

# 0~1 사이로 최대-최소정규화를 수행한 점수에 100을 곱해 100점 만점의 점수로 변환
moving_cell_gdf['score'] = moving_cell_gdf['score'] * 100
moving_score = moving_cell_gdf.sort_values(by = 'score', ascending = False)
moving_score

In [None]:
moving_score = pd.merge(moving_score, result_grid_gdf, on = 'gid', how = 'left')
moving_score = moving_score.drop(columns = ['id_mean'])
moving_score = moving_score

In [None]:
moving_result = moving_score
moving_result['score'] = moving_result['score'] * 0.048
moving_result = moving_result.drop(columns = ['geometry'])
moving_result = moving_result.rename(columns = {'score': '유동인구_지수'})
moving_result

In [None]:

moving_m = folium.Map(
    location= center,
    zoom_start=11, 
    tiles='cartodbpositron'
)


moving_m.choropleth(geo_data=result_grid_gdf,
             data = moving_score,
             columns = ['gid', 'score'],
             fill_color='YlOrRd', 
             key_on='properties.gid',
             legend_name="유동 인구 지수"
            )

moving_m.save('moving_map.html')

moving_m

## 6) 부양 인구 지수

In [None]:
popu_gdf = gpd.read_file('10.김해시_성연령별_거주인구격자.geojson')

#격자별 인구 수 데이터와 전처리한 격자 결합
popu_gdf = pd.merge(popu_gdf, result_grid_gdf, on = 'gid', how = 'inner')

# popu_gdf의 열 확인
popu_gdf.columns

In [None]:
# 2021년 인구 분포 데이터를 기준으로 상정함
# 2021년 전체 나이대의 인구분포 Column 리스트를 total_list
# 2021년 부양인구 나이대의 인구분포 Column 리스트를 deppop_list로 선정
# 부양인구 : 유소년 인구(0~14세), 고령인구(65세~)이나 계산의 편의를 위해 유소년 인구 0~19세, 고령인구 60세~ 로 상정

total_list = ['2021_m_0g_pop',
       '2021_w_0g_pop', '2021_m_20g_pop', '2021_w_20g_pop', '2021_m_30g_pop',
       '2021_w_30g_pop', '2021_m_40g_pop', '2021_w_40g_pop', '2021_m_50g_pop',
       '2021_w_50g_pop', '2021_m_60g_pop', '2021_w_60g_pop', '2021_m_70g_pop',
       '2021_w_70g_pop']
deppop_list = ['2021_m_0g_pop',
       '2021_w_0g_pop', '2021_m_60g_pop', '2021_w_60g_pop', '2021_m_70g_pop',
       '2021_w_70g_pop']

In [None]:
popu_gdf['total_pop'] = popu_gdf.loc[:, total_list].sum(axis = 1)

# 2021년 grid별 부양 인구수
popu_gdf['deppop_list'] = popu_gdf.loc[:, deppop_list].sum(axis = 1)

# 부양인구지수 = grid별 부양 인구수 / grid별 전체 인구수
popu_gdf['deppop_ratio'] = popu_gdf['deppop_list'] / popu_gdf['total_pop']


In [None]:
# 2021년 grid별 전체 인구수
popu_gdf['total_pop'] = popu_gdf.loc[:, total_list].sum(axis = 1)

# 2021년 grid별 부양 인구수
popu_gdf['deppop_list'] = popu_gdf.loc[:, deppop_list].sum(axis = 1)

# 부양인구지수 = grid별 부양 인구수 / grid별 전체 인구수
popu_gdf['deppop_ratio'] = popu_gdf['deppop_list'] / popu_gdf['total_pop']

# 결측치 대체
popu_gdf = popu_gdf.fillna(0)

In [None]:
popu_score = popu_gdf[['gid', 'deppop_ratio']]

# 부양인구지수에 100을 곱해 최종 스코어로 계산
popu_score['score'] = popu_score['deppop_ratio'] * 100
popu_score = popu_score[['gid', 'score']]

In [None]:
popu_score

In [None]:
popu_score = pd.merge(popu_score, result_grid_gdf, on = 'gid', how = 'left')
popu_score.sort_values(ascending = False, by = 'score')


In [None]:
popu_result = popu_score
popu_result['score'] =  popu_result['score'] * 0.013
popu_result = popu_result.rename(columns = {'score' : '부양인구지수'})
popu_result

In [None]:

popu_m = folium.Map(
    location= center,
    zoom_start=11, 
    tiles='cartodbpositron'
)


popu_m.choropleth(geo_data=result_grid_gdf,
             data=popu_result,
             columns = ['gid', '부양인구지수'],
             fill_color='YlOrRd',
             key_on='properties.gid',
             legend_name="부양 인구 지수"
            )

popu_m.save('popu_map.html')

popu_m

## 7) 외국인 위험 지수

In [None]:
for_gdf = gpd.read_file('11.김해시_외국인_읍면동별_격자.geojson')
for_gdf = pd.merge(for_gdf, result_grid_gdf, on = 'gid', how = 'inner')
for_gdf = for_gdf.drop(columns = ['geometry_y'])

In [None]:
index_list = ['2018_foreigner_pop', '2019_foreigner_pop', '2020_foreigner_pop', '2021_foreigner_pop']
for_gdf[index_list] = for_gdf[index_list].fillna(0).astype(float)
for_gdf['foreigner_pop_mean'] = for_gdf[index_list].mean(axis = 1)
for_gdf

In [None]:
transformer = MinMaxScaler()
transformer.fit(for_gdf[['foreigner_pop_mean']])
transformed_X = transformer.transform(for_gdf[['foreigner_pop_mean']])
transformed_X
for_gdf['foreigner_score'] = transformed_X * 100
for_gdf

In [None]:
for_score = for_gdf[['gid', 'foreigner_score']]
for_score = pd.merge(for_score, result_grid_gdf, on = 'gid', how = 'left')
for_score.sort_values(ascending = False, by = 'foreigner_score')

In [None]:
for_result = for_score[['gid', 'foreigner_score']]
for_result['foreigner_score'] = for_result['foreigner_score'] * 0.022
for_result.rename(columns = {"Foreigner_score" : "외국인 위험 지수"})

In [None]:
for_result = for_result.rename(columns = {"foreigner_score" : "외국인 위험 지수"})
for_result

In [None]:
for_m = folium.Map(
    location= center,
    zoom_start=11, 
    tiles='cartodbpositron'
)


for_m.choropleth(geo_data=result_grid_gdf,
             data=for_score,
             columns = ['gid', 'foreigner_score'],
             fill_color='YlOrRd',
             key_on='properties.gid',
             legend_name="외국인 지수"
            )

for_m.save('foreigner_map.html')

for_m

## 8) 건물 특성 지수

In [None]:
build_tmp_gdf = gpd.read_file('12.김해시_건물노후도.geojson')
build_gdf = build_tmp_gdf.copy()
build_gdf

In [None]:
build_gdf['OLD_LEVEL'].unique()

In [None]:
build_set_gdf = gpd.sjoin(result_grid_gdf2, build_gdf,  how='left', op='intersects')
build_set_gdf = build_set_gdf.groupby('gid')['OLD_LEVEL'].agg(['sum'])
build_set_gdf = build_set_gdf[build_set_gdf['sum'] > 0]
build_set_gdf

In [None]:
build_set_gdf = pd.merge(build_set_gdf, result_grid_gdf, how = 'right', on = 'gid')
build_set_gdf = build_set_gdf.fillna(0)
build_set_gdf

In [None]:
build_set_gdf['건물특성지수_1'] = (build_set_gdf['sum'] - build_set_gdf['sum'].min()) / (build_set_gdf['sum'].max() - build_set_gdf['sum'].min())
build_set_gdf['건물특성지수_2'] = build_set_gdf['건물특성지수_1'] * 100
build_set_gdf['건물특성지수_3'] = build_set_gdf['건물특성지수_2'] * 0.064
build_set_gdf = build_set_gdf.sort_values(by = ['건물특성지수_3'], ascending = False)
build_set_gdf

In [None]:
build_result = build_set_gdf[['gid', '건물특성지수_3']]
build_result

In [None]:
build_score = build_set_gdf[['gid', '건물특성지수_2']]
build_score

In [None]:
result_grid_gdf

In [None]:
building_m = folium.Map(
    location= center,
    zoom_start=11, 
    tiles='cartodbpositron'
)


building_m.choropleth(geo_data=result_grid_gdf,
             data=build_score,
             columns = ['gid', '건물특성지수_2'],
             fill_color='YlOrRd',
             key_on='properties.gid',
             legend_name="건물 특성 지수"
            )

building_m.save('Building_map.html')

building_m

## 9) 아동 시설 지수

In [None]:
# result_grid_gdf2['center'] = result_grid_gdf2['geometry'].centroid
result_grid_gdf2['center'] = result_grid_gdf2.geometry.centroid
result_grid_gdf2 = result_grid_gdf2.drop_duplicates()
result_grid_gdf2

In [None]:
child_tmp_df = pd.read_csv('18.김해시_유치원현황.csv')
child_df = child_tmp_df.copy()

child_gdf = gpd.GeoDataFrame(
    child_df, geometry=gpd.points_from_xy(child_df.lon, child_df.lat))

child_gdf

In [None]:
child_set_gdf = gpd.sjoin(result_grid_gdf2, child_gdf,  how='left', op='intersects')
child_set_gdf = child_set_gdf.dropna(axis=0)
child_set_gdf = child_set_gdf[['gid', 'center', 'fac_nm']]
child_set_gdf = pd.merge(child_set_gdf, child_gdf, how = 'left')
child_set_gdf = child_set_gdf[['gid', 'center', 'fac_nm', 'geometry']]
child_set_gdf

In [None]:
child_set_gdf['child_dist'] = 0

for i in range(0, 32):
    dist = child_set_gdf['center'].loc[i].distance(child_set_gdf['geometry'].loc[i])
    child_set_gdf['child_dist'].loc[i] = dist

child_set_gdf = child_set_gdf[['gid', 'fac_nm','child_dist']]
child_dist_gdf = child_set_gdf
child_dist_gdf

In [None]:
kid_tmp_df = pd.read_csv('17.김해시_어린이집현황.csv')
kid_df = kid_tmp_df.copy()

kid_gdf = gpd.GeoDataFrame(
    kid_df, geometry=gpd.points_from_xy(kid_df.lon, kid_df.lat))

kid_gdf = kid_gdf[kid_gdf['oper_status'] != '폐지']
kid_gdf

In [None]:
kid_set_gdf = gpd.sjoin(result_grid_gdf2, kid_gdf,  how='left', op='intersects')
kid_set_gdf = kid_set_gdf[['gid', 'center', 'fac_nm']]
kid_set_gdf = kid_set_gdf.dropna(axis=0)
kid_set_gdf = pd.merge(kid_set_gdf, kid_gdf, how = 'left')
kid_set_gdf = kid_set_gdf[['gid', 'center', 'fac_nm', 'geometry']]
kid_set_gdf

In [None]:
kid_set_gdf['kid_dist'] = 0

for i in range(0, 276):
    dist = kid_set_gdf['center'].loc[i].distance(kid_set_gdf['geometry'].loc[i])
    kid_set_gdf['kid_dist'].loc[i] = dist

kid_set_gdf = kid_set_gdf[['gid', 'fac_nm','kid_dist']]
kid_dist_gdf = kid_set_gdf
kid_dist_gdf

In [None]:
stu_tmp_df = pd.read_csv('19.김해시_학교(초,중,고)현황.csv')
stu_df = stu_tmp_df.copy()

stu_gdf = gpd.GeoDataFrame(
    stu_df, geometry=gpd.points_from_xy(stu_df.lon, stu_df.lat))

stu_gdf

In [None]:
stu_set_gdf = gpd.sjoin(result_grid_gdf2, stu_gdf,  how='left', op='intersects')
stu_set_gdf = stu_set_gdf[['gid', 'center', 'school_nm']]
stu_set_gdf = stu_set_gdf.dropna(axis=0)
stu_set_gdf = pd.merge(stu_set_gdf, stu_gdf, how = 'left')
stu_set_gdf = stu_set_gdf[['gid', 'center', 'school_nm', 'geometry']]
stu_set_gdf

In [None]:
stu_set_gdf['stu_dist'] = 0

for i in range(0, 20):
    dist = stu_set_gdf['center'].loc[i].distance(stu_set_gdf['geometry'].loc[i])
    stu_set_gdf['stu_dist'].loc[i] = dist

stu_set_gdf = stu_set_gdf[['gid', 'school_nm','stu_dist']]
stu_dist_gdf = stu_set_gdf
stu_dist_gdf

In [None]:
children_set_gdf = pd.concat([child_dist_gdf, kid_dist_gdf], ignore_index=True)
children_set_gdf = pd.concat([children_set_gdf, stu_dist_gdf], ignore_index=True)
children_set_gdf = children_set_gdf[['gid', 'child_dist', 'kid_dist', 'stu_dist']]
children_set_gdf = children_set_gdf.fillna(0)
children_set_gdf['dist_total'] = children_set_gdf['child_dist'] + children_set_gdf['kid_dist'] + children_set_gdf['stu_dist']
children_set_gdf = children_set_gdf.groupby(['gid'])['dist_total'].agg(['mean'])
children_set_gdf

In [None]:
set_gdf = pd.merge(child_set_gdf, result_grid_gdf2, how = 'right', on = 'gid')
set_gdf = set_gdf.fillna(0)
set_gdf

In [None]:
children_set_gdf['아동시설지수_1'] = (children_set_gdf['mean'] - children_set_gdf['mean'].min()) / (children_set_gdf['mean'].max() - children_set_gdf['mean'].min())
children_set_gdf['아동시설지수_2'] = children_set_gdf['아동시설지수_1']*100
children_set_gdf['아동시설지수_3'] = children_set_gdf['아동시설지수_2'] - 100
children_set_gdf['아동시설지수_4'] = children_set_gdf['아동시설지수_3']*(-1)
children_set_gdf['아동시설지수_5'] = children_set_gdf['아동시설지수_4']*0.03
children_set_gdf = children_set_gdf.sort_values(by = ['아동시설지수_5'], ascending = False)
children_set_gdf = children_set_gdf.reset_index()
children_set_gdf

In [None]:
kid_result = children_set_gdf[['gid', '아동시설지수_5']]
kid_result = gpd.GeoDataFrame(kid_result)
kid_result = kid_result.reset_index()
kid_result = pd.merge(result_grid_gdf, kid_result, on = 'gid', how = 'left')
kid_result

In [None]:
kid_result = kid_result[['gid', '아동시설지수_5']]
kid_result = kid_result.fillna(0)
kid_result

In [None]:
kid_score = children_set_gdf[['gid','아동시설지수_4']]
kid_score = pd.merge(result_grid_gdf, kid_score, on = 'gid', how = 'left')
kid_score

In [None]:
backup = backup.drop(columns = ['center'])

In [None]:
kid_m = folium.Map(
    location= center,
    zoom_start=11, 
    tiles='cartodbpositron'
)


kid_m.choropleth(geo_data=backup,
             data=kid_score,
             columns = ['gid', '아동시설지수_4'],
             fill_color='YlOrRd',
             key_on='properties.gid',
             legend_name="아동 시설 지수"
            )

kid_m.save('kid_map.html')

kid_m

## 10) 공적 감시 취약 지수

In [None]:
police_sta_gdf = pd.read_excel('경찰청_경찰관서 위치, 주소 위경도 현황_20210924.xlsx')
police_sta_gdf

In [None]:
police_w_gdf = police_sta_gdf[police_sta_gdf.경찰서 == '김해서부']
police_c_gdf = police_sta_gdf[police_sta_gdf.경찰서 == '김해중부']
police_gdf = pd.concat([police_w_gdf, police_c_gdf])
police_gdf

police_gdf = gpd.GeoDataFrame(
    police_gdf, geometry=gpd.points_from_xy(police_gdf.Longitude, police_gdf.Latitude))
police_gdf = police_gdf.drop(['Longitude', 'Latitude'], axis = 1)
police_gdf

In [None]:
police_state = result_grid_gdf
police_state

In [None]:
police_state['centroid'] = police_state['geometry'].centroid
police_state

In [None]:
for grid_index in police_state.index:
    minimum = 1000000000000
    for police_index in police_gdf.index:
        dist = police_gdf.loc[police_index, 'geometry'].distance(police_state.loc[grid_index, 'centroid'])
        if minimum > dist:
            minimum = dist
        else:
            pass
    police_state.loc[grid_index, 'minimum'] = minimum    
    
police_state

In [None]:
police_dist_score = police_state
police_dist_score[['gid', 'minimum']].sort_values(by = 'minimum', ascending = False)
police_dist_score['minimum']

In [None]:
transformer = MinMaxScaler()
transformer.fit(police_dist_score[['minimum']])
transformed_X = transformer.transform(police_dist_score[['minimum']])

In [None]:
police_dist_score['police_score'] = transformed_X * 100
police_dist_score.sort_values(by = 'police_score', ascending = False)

In [None]:
police_score = police_dist_score[['gid','police_score']]
police_score.sort_values(by='police_score', ascending = False)

In [None]:
police_result = police_score
police_result['police_score'] = police_result['police_score'] * 0.073
police_result = police_result.rename(columns = {'police_score':'공적감시취약지수'})
police_result

In [None]:
police_state = police_state.drop(columns = ['centroid', 'minimum'])
police_state

In [None]:
police_m = folium.Map(
    location = center,
    zoom_start=11, 
    tiles='cartodbpositron'
)


police_m.choropleth(geo_data=backup,
             data=police_score,
             columns = ['gid', 'police_score'],
             fill_color='YlOrRd',
             key_on='properties.gid',
             legend_name="공적감시취약지수"
            )

police_m.save('police_map.html')

police_m

## 11) 대체 감시 취약 지수

In [None]:
security_gdf = pd.read_csv('6.김해시_보안등설치현황.csv')
security_gdf.sort_values(by ='securitylight_cnt', ascending =False)

In [None]:
security_gdf = security_gdf.drop(columns = ['emd_nm', 'address', 'year'])
security_gdf = gpd.GeoDataFrame(
    security_gdf, geometry=gpd.points_from_xy(security_gdf.lon, security_gdf.lat))
security_gdf

In [None]:
security_gdf = gpd.sjoin(security_gdf, backup, how = 'left', op = 'within')
security_gdf

In [None]:
security_gdf = security_gdf.dropna(subset = ['gid'])
security_gdf

In [None]:
security_gdf = security_gdf.groupby('gid').sum()
security_gdf = security_gdf.drop(columns = ['lat','lon','index_right'])
security_gdf.sort_values(by = 'securitylight_cnt', ascending = False)

In [None]:
security_gdf = pd.merge(security_gdf, backup, how = 'outer', on = 'gid')
security_gdf = security_gdf.fillna(0)
security_gdf['security_ratio']  = 1 - (security_gdf['securitylight_cnt'] / security_gdf['securitylight_cnt'].sum())
security_gdf

In [None]:
security_score = security_gdf[['gid', 'securitylight_cnt', 'security_ratio']]
security_score

In [None]:
transformer = MinMaxScaler()
transformer.fit(security_score[['security_ratio']])
transformed_X = transformer.transform(security_score[['security_ratio']])
security_score['security_score'] = transformed_X * 100
security_score.sort_values(by = 'security_score', ascending = False)

In [None]:
security_result = security_score[['gid','security_score']]
security_result['security_score'] = security_result['security_score'] * 0.058
security_result = security_result.rename(columns = {'security_score' : '대체감시취약지수'})
security_result

In [None]:
security_m = folium.Map(
    location= center,
    zoom_start=11, 
    tiles='cartodbpositron'
)


security_m.choropleth(geo_data=backup,
             data=security_score,
             columns = ['gid', 'security_score'],
             fill_color='YlOrRd',
             key_on='properties.gid',
             legend_name="대체감시시설지수"
            )

security_m.save('security_map.html')

security_m

## 12) 최종 우선 설치 지수 등급화

In [None]:
final_result = pd.merge(cctv_result, call_result, how = 'inner', on = 'gid')
final_result = pd.merge(final_result, busi_result, how = 'inner', on = 'gid')
final_result = pd.merge(final_result, moving_result, how = 'inner', on = 'gid')
final_result = pd.merge(final_result, for_result, how = 'inner', on = 'gid')
final_result = pd.merge(final_result, build_result, how = 'inner', on = 'gid')
final_result = pd.merge(final_result, kid_result, how = 'inner', on = 'gid')
final_result = pd.merge(final_result, popu_result, how = 'inner', on = 'gid')
final_result = pd.merge(final_result, police_result, how = 'inner', on = 'gid')
final_result = pd.merge(final_result, security_result, how = 'inner', on = 'gid')
final_result

In [None]:
list = ['감시취약지수_4','범죄발생지수_3','풍속업소지수_3','유동인구_지수','외국인 위험 지수','건물특성지수_3','아동시설지수_5','부양인구지수','공적감시취약지수','대체감시취약지수']

In [None]:
final_result[list].astype(float)

In [None]:
final_result['sum'] = final_result[list].sum(axis = 1)
final_result.sort_values(by='sum', ascending = False)

In [None]:
final_result

In [None]:
finale = final_result[['gid','sum']]
finale

In [None]:
result_grid_gdf = result_grid_gdf.drop(columns = ['center', 'police_score'])

In [None]:
finale

### 최종 지수 시각화

In [None]:
final_m = folium.Map(
    location= center,
    zoom_start=11, 
    tiles='cartodbpositron'
)


final_m.choropleth(geo_data=backup,
             data=finale,
             columns = ['gid', 'sum'],
             fill_color='YlOrRd',
             key_on='properties.gid',
             legend_name="최종 지수"
            )

final_m.save('final_map.html')

final_m

## CCTV 설치 우선지역 상위 50개소 산출

In [None]:
final_top50 = final_result[['gid','sum']].sort_values(by='sum', ascending = False).head(50)
final_50_grid_gdf = pd.merge(result_grid_gdf, final_top50, how = 'right', on = 'gid')
final_50_grid_gdf = final_50_grid_gdf.drop(columns = ['centroid', 'minimum'])
final_50_grid_gdf

In [None]:
final_50_m = folium.Map(
    location= center,
    zoom_start=11, 
    tiles='cartodbpositron'
)


final_50_m.choropleth(geo_data=final_50_grid_gdf,
             data=final_top50,
             columns = ['gid', 'sum'],
             fill_color='YlOrRd',
             key_on='properties.gid',
             legend_name="최종 지수"
            )

final_50_m.save('final_50_map.html')

final_50_m

## CCTV 우선설치 지역 Top 5 도출

In [None]:
final_top5 = final_result[['gid','sum']].sort_values(by='sum', ascending = False).head(5)
final_5_grid_gdf = pd.merge(result_grid_gdf, final_top5, how = 'right', on = 'gid')
final_5_grid_gdf = final_5_grid_gdf.drop(columns = ['centroid', 'minimum'])
final_5_grid_gdf

In [None]:
final_5_m = folium.Map(
    location= center,
    zoom_start=11, 
    tiles='cartodbpositron'
)


final_5_m.choropleth(geo_data=final_5_grid_gdf,
             data=final_top5,
             columns = ['gid', 'sum'],
             fill_color='YlOrRd',
             key_on='properties.gid',
             legend_name="최종 지수"
            )

final_5_m.save('final_5_map.html')

final_5_m

## CCTV 우선설치 상위 50개소 결과

In [None]:
final_result.sort_values(by = 'sum', ascending = False).head(50)

## Top 3 지역 분석

### Top 1 지역 마라244972 (북부동사무소 앞) 심층분석

In [None]:
# 신고 횟수 분석
police_df[police_df['gid'] == '마라244972'].count()

In [None]:
# 신고 유형별 분석
police_df[police_df['gid'] == '마라244972'].groupby('case_type').count()

In [None]:
# Grid 내 CCTV 개수 분석
cctv_set_gdf[cctv_set_gdf.index == '마라244972']

In [None]:
# Grid 내 보안등 개수 분석
security_gdf[security_gdf['gid'] == '마라244972']

### Top 2 지역 마라184895 (대청동 롯데마트 장유점 앞) 심층분석

In [None]:
# Grid 내 유흥업소 개수 분석
busi_set_gdf[busi_set_gdf['gid'] == '마라184895']

In [None]:
# Grid 내 CCTV 개수 분석
cctv_set_gdf[cctv_set_gdf.index == '마라184895']

In [None]:
# Grid 내 보안등 개수 분석
security_gdf[security_gdf['gid'] == '마라184895']

### Top 3 지역 마라244947 (내동 연지사거리) 심층분석

In [None]:
# 신고 횟수 분석
police_df[police_df['gid'] == '마라244947'].count()

In [None]:
# 신고 유형별 분석
police_df[police_df['gid'] == '마라244947'].groupby('case_type').count()

In [None]:
# Grid 내 CCTV 개수 분석
cctv_set_gdf[cctv_set_gdf.index == '마라244947']