## 기본설정 및 함수정의

In [1]:
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
import plotly.graph_objs as go
import plotly.offline as offline
from folium.plugins import HeatMapWithTime
from plotly.subplots import make_subplots
import folium
from folium import plugins
from folium.plugins import HeatMap
from folium import FeatureGroup
import json
import math
import re
from datetime import datetime
import os
import glob
import subprocess
from bs4 import BeautifulSoup as bs
from shapely.geometry import Point, Polygon, LineString
from shapely.ops import unary_union
import geopandas as gpd
from geopandas import GeoSeries
import pyproj
from tqdm import tqdm
from keplergl import KeplerGl

# 모든 열이 생략되지 않도록 설정
pd.set_option('display.max_columns', None)

# tqdm의 pandas 적용
tqdm.pandas()

# 아산시청 위도, 경도
Asan = [36.789882248764656, 127.00274491353838]

# Point를만드는 함수
def make_point(x):
    try:
        return Point(x)
    except Exception as e:
        print(f"An error occurred: {e}")
        return None
# Polygon을 만드는 함수
def make_pol(x):
    try:
        return Polygon(x[0])
    except:
        return Polygon(x[0][0])
    
# Linestring을 만드는 함수
def make_lin(x):
    try:
        return LineString(x)
    except:
        return LineString(x[0])

# 데이터프레임을 GeoPandas 데이터프레임으로 변환하는 함수 정의
def geo_transform(DataFrame) :
    # csv to geopandas
    # lon, lat data를 geometry로 변경
    DataFrame['lat'] = DataFrame['lat'].astype(float)
    DataFrame['lon'] = DataFrame['lon'].astype(float)
    DataFrame['geometry'] = DataFrame.apply(lambda row : Point([row['lon'], row['lat']]), axis=1) # 위도 및 경도롤 GeoPandas Point 객체로 변환
    DataFrame = gpd.GeoDataFrame(DataFrame, geometry='geometry')
    DataFrame.crs = {'init':'epsg:4326'} # geopandas 데이터프레임의 좌표계를 EPSG 4326으로 설정
    DataFrame = DataFrame.to_crs({'init':'epsg:4326'}) # 데이터프레임의 좌표계를 자체 좌표계에서 EPSG 4326으로 변환
    return DataFrame


The Shapely GEOS version (3.11.2-CAPI-1.17.2) is incompatible with the GEOS version PyGEOS was compiled with (3.10.4-CAPI-1.16.2). Conversions between both will be slow.


Shapely 2.0 is installed, but because PyGEOS is also installed, GeoPandas still uses PyGEOS by default. However, starting with version 0.14, the default will switch to Shapely. To force to use Shapely 2.0 now, you can either uninstall PyGEOS or set the environment variable USE_PYGEOS=0. You can do this before starting the Python process, or in your code before importing geopandas:

import os
os.environ['USE_PYGEOS'] = '0'
import geopandas

In the next release, GeoPandas will switch to using Shapely by default, even if PyGEOS is installed. If you only have PyGEOS installed to get speed-ups, this switch should be smooth. However, if you are using PyGEOS directly (calling PyGEOS functions on geometries from GeoPandas), this will then stop working and you are encouraged to migrate from PyGEOS to Shapely 2.0 (https://sha

#### 아산시 행정경계 (출처 - 통계지리정보서비스 2023년 센서스용 행정구역경계(읍면동))

In [2]:
# shp to geodataframe convert
shapefile_path = "SBJ_2405_001/_census_data_2023_bnd_dong_bnd_dong_34040_2023_2023/bnd_dong_34040_2023_2023_2Q.shp"
asan_gdf = gpd.read_file(shapefile_path)
asan_gdf = asan_gdf.to_crs(epsg=4326) #EPSG4326 형식으로 변환

# 행정동 구분에 따라 색 구분하는 함수
def hjd_color(name):
    if name[-1] == '읍':
        return 'green'
    elif name[-1] == '면':
        return 'yellow'
    elif name[-1] == '동':
        return 'red'

asan_gdf['color'] = asan_gdf['ADM_NM'].apply(hjd_color)

#### 격자(매핑용)

In [3]:
# GeoJSON 파일 불러오기
with open('SBJ_2405_001/12.아산시_격자(매핑용).geojson', encoding="UTF8") as geojson_file:
    geojson_data = json.load(geojson_file)
grid_map_df = pd.json_normalize(geojson_data['features'])
grid_map_df['geometry'] = grid_map_df['geometry.coordinates'].apply(lambda x : make_pol(x))
# grid_map_df 데이터프레임을 GeoDataFrame으로 변환
grid_map_df = gpd.GeoDataFrame(grid_map_df, geometry='geometry')

#### 아산시 상세 도로망

In [4]:
# GeoJSON 파일 불러오기
with open('SBJ_2405_001/4.아산시_상세도로망.geojson', encoding="UTF8") as geojson_file:
    geojson_data = json.load(geojson_file)
roadsystem_df = pd.json_normalize(geojson_data['features'])
roadsystem_df['geometry'] = roadsystem_df['geometry.coordinates'].apply(lambda x : make_lin(x))
roadsystem_df['properties.link_id'] = roadsystem_df['properties.link_id'].astype(str)
# roadsystem_df 데이터프레임을 GeoDataFrame으로 변환
roadsystem_df = gpd.GeoDataFrame(roadsystem_df, geometry='geometry')

#### 아산시 약국현황

In [5]:
# 약국 데이터프레임을 GeoPandas 데이터프레임으로 변환
pharmacy_df = pd.read_csv('SBJ_2405_001/23.아산시_약국현황.csv')
pharmacy_df = geo_transform(pharmacy_df)


'+init=<authority>:<code>' syntax is deprecated. '<authority>:<code>' is the preferred initialization method. When making the change, be mindful of axis order changes: https://pyproj4.github.io/pyproj/stable/gotchas.html#axis-order-changes-in-proj-6


'+init=<authority>:<code>' syntax is deprecated. '<authority>:<code>' is the preferred initialization method. When making the change, be mindful of axis order changes: https://pyproj4.github.io/pyproj/stable/gotchas.html#axis-order-changes-in-proj-6



#### 아산시 AED위치정보

In [6]:
AED_df = pd.read_csv('SBJ_2405_001/8.아산시_자동심장충격기(AED)위치정보.csv')
AED_df = geo_transform(AED_df)


'+init=<authority>:<code>' syntax is deprecated. '<authority>:<code>' is the preferred initialization method. When making the change, be mindful of axis order changes: https://pyproj4.github.io/pyproj/stable/gotchas.html#axis-order-changes-in-proj-6


'+init=<authority>:<code>' syntax is deprecated. '<authority>:<code>' is the preferred initialization method. When making the change, be mindful of axis order changes: https://pyproj4.github.io/pyproj/stable/gotchas.html#axis-order-changes-in-proj-6



#### 아산시 119 안전센터

In [7]:
df_119 = pd.read_csv('SBJ_2405_001/24.아산시_119안전센터정보.csv')
df_119 = geo_transform(df_119)


'+init=<authority>:<code>' syntax is deprecated. '<authority>:<code>' is the preferred initialization method. When making the change, be mindful of axis order changes: https://pyproj4.github.io/pyproj/stable/gotchas.html#axis-order-changes-in-proj-6


'+init=<authority>:<code>' syntax is deprecated. '<authority>:<code>' is the preferred initialization method. When making the change, be mindful of axis order changes: https://pyproj4.github.io/pyproj/stable/gotchas.html#axis-order-changes-in-proj-6



#### 아산시 병원진료과목정보

In [8]:
# 병원 데이터프레임을
treatment_df = pd.read_csv('SBJ_2405_001/22.아산시_병원진료과목정보.csv')

In [9]:
# 병원별 진료과목 list mapping한 dict생성
hos_dic = {}
for hos in tqdm(treatment_df['mdcl_inst'].unique()):
    subset = treatment_df[treatment_df['mdcl_inst'] == hos]
    hos_dic[hos] = subset['mdcl_mjr_nm'].tolist()

100%|██████████| 331/331 [00:00<00:00, 3464.40it/s]


#### 아산시 병원정보

In [10]:
# 병원 데이터프레임을 GeoPandas 데이터프레임으로 변환
hospital_df = pd.read_csv('SBJ_2405_001/21.아산시_병원정보.csv')
hospital_df = geo_transform(hospital_df)
# 병원별 진료과목 mapping
hospital_df['mjr'] = hospital_df['mdcl_inst'].map(hos_dic)
# Update 'mjr' column where it is null with 'mdcl_gbn' column value as a list
hospital_df.loc[hospital_df['mjr'].isnull(), 'mjr'] = hospital_df['mdcl_gbn'].apply(lambda x: [x])


'+init=<authority>:<code>' syntax is deprecated. '<authority>:<code>' is the preferred initialization method. When making the change, be mindful of axis order changes: https://pyproj4.github.io/pyproj/stable/gotchas.html#axis-order-changes-in-proj-6


'+init=<authority>:<code>' syntax is deprecated. '<authority>:<code>' is the preferred initialization method. When making the change, be mindful of axis order changes: https://pyproj4.github.io/pyproj/stable/gotchas.html#axis-order-changes-in-proj-6



#### 119구급출동이력

In [11]:
# 격자 ID와 격자 폴리곤 각각을 매핑한 딕셔너리 생성
map_dic = {}
for _, row in grid_map_df.iterrows():
    map_dic[row['properties.gid']] = row['geometry']

# 'geometry' 열 값이 특정 폴리곤 내에 있는지 확인하고 'emd_nm' 값을 변경하는 함수
def update_emd_nm(row, polygons, names):
    for polygon, name in zip(polygons, names):
        if row.geometry.intersects(polygon):
            return name
    return row.emd_nm

# 119구급출동이력 데이터프레임
df_emer119 = pd.read_csv('SBJ_2405_001/28.아산시_119구급출동이력.csv')
df_emer119 = df_emer119[df_emer119['sgg_nm'] == '아산시']
df_emer119['emd_nm'] = df_emer119['emd_nm'].replace('배방면', '배방읍') # 배방면 -> 배방읍 변경됨

# map_dic(격자:폴리곤)에 df_emer119의 gid를 매핑하여 'geometry' column 생성
geometry_lst = []
for _, row in df_emer119.iterrows():
    try:
        geometry_lst.append(map_dic[row['gid']])
    except:
        geometry_lst.append(None)
df_emer119['geometry'] = geometry_lst
df_emer119 = df_emer119[~df_emer119['geometry'].isnull()]
df_emer119 = gpd.GeoDataFrame(df_emer119, geometry='geometry')

# 'geometry' 열 값이 특정 폴리곤 내에 있는지 확인하고 'emd_nm' 값 변경
polygons = asan_gdf['geometry'].tolist()
names = asan_gdf['ADM_NM'].tolist()
df_emer119['emd_nm'] = df_emer119.progress_apply(update_emd_nm, axis=1, polygons=polygons, names=names)

# NaN 값을 0으로 대체
df_emer119 = df_emer119.fillna(0)

# 날짜 컬럼을 정수로 변환
df_emer119['arrv_ymd'] = df_emer119['arrv_ymd'].astype(int)
df_emer119['cntct_ymd'] = df_emer119['cntct_ymd'].astype(int)
df_emer119['hm_ymd'] = df_emer119['hm_ymd'].astype(int)
df_emer119['hpt_arrv_ymd'] = df_emer119['hpt_arrv_ymd'].astype(int)

# 날짜와 시간 컬럼을 문자열로 변환
for col in df_emer119.columns[:-1]:
    df_emer119[col] = df_emer119[col].astype(str)

# 시간 형식을 'HH:MM:SS'로 변환하는 함수
def format_time(time_str):
    if time_str == '0':
        return '00:00:00'
    parts = time_str.split(':')
    if len(parts) == 2:
        return f"{int(parts[0]):02}:{int(parts[1]):02}:00"
    return f"{int(parts[0]):02}:{int(parts[1]):02}:{int(parts[2]):02}"

# 날짜와 시간 컬럼을 통합하여 새로운 시간 컬럼 생성 함수
def combine_datetime(date_col, time_col):
    formatted_date = date_col.apply(lambda x: pd.to_datetime(x, format='%Y%m%d', errors='coerce'))
    formatted_time = time_col.apply(lambda x: format_time(x))
    combined = formatted_date.astype(str) + ' ' + formatted_time
    combined = combined.replace('NaT 00:00:00', np.nan)
    return pd.to_datetime(combined, errors='coerce')

df_emer119['신고시각'] = combine_datetime(df_emer119['rpt_ymd'], df_emer119['rpt_tm'])
df_emer119['출동시각'] = combine_datetime(df_emer119['mv_ymd'], df_emer119['mv_tm'])
df_emer119['도착시각'] = combine_datetime(df_emer119['arrv_ymd'], df_emer119['arrv_tm'])
df_emer119['접촉시각'] = combine_datetime(df_emer119['cntct_ymd'], df_emer119['cntct_tm'])
df_emer119['귀소시각'] = combine_datetime(df_emer119['hm_ymd'], df_emer119['hm_tm'])
df_emer119['병원도착시각'] = combine_datetime(df_emer119['hpt_arrv_ymd'], df_emer119['hpt_arrv_tm'])
df_emer119['신고시각'] = df_emer119['신고시각'].astype(str)
df_emer119['출동시각'] = df_emer119['출동시각'].astype(str)
df_emer119['도착시각'] = df_emer119['도착시각'].astype(str)
df_emer119['접촉시각'] = df_emer119['접촉시각'].astype(str)
df_emer119['귀소시각'] = df_emer119['귀소시각'].astype(str)
df_emer119['병원도착시각'] = df_emer119['병원도착시각'].astype(str)

# 불필요한 컬럼 제거
df_emer119.drop(columns=['rpt_ymd', 'rpt_tm', 'mv_ymd', 'mv_tm', 'arrv_ymd', 'arrv_tm', 'cntct_ymd', 'cntct_tm', 'hm_ymd', 'hm_tm', 'hpt_arrv_ymd', 'hpt_arrv_tm'], inplace=True)

df_emer119

100%|██████████| 53598/53598 [00:22<00:00, 2410.85it/s]


Unnamed: 0,frstn_nm,sfcntr_nm,rslt_gbn,ptnt_type,smptm1,sgg_nm,emd_nm,accdnt_plc,ptnt_age,ptnt_gndr,trnsfr_gbn,hpt_nm,gid,geometry,신고시각,출동시각,도착시각,접촉시각,귀소시각,병원도착시각
0,아산소방서,아산119구조구급센터,취소,질병외,0,아산시,온양6동,0,미확인,미확인,미이송,0,다바569639,"POLYGON ((127.01704 36.77224, 127.01703 36.773...",2021-01-01 00:47:00,2021-01-01 00:53:00,NaT,NaT,2021-01-01 00:55:00,NaT
1,아산소방서,아산119구조구급센터,정상,질병,두통,아산시,온양1동,집,60대,여,이송,아산충무병원,다바552660,"POLYGON ((126.99787 36.79109, 126.99786 36.791...",2021-01-01 10:08:00,2021-01-01 10:10:00,2021-01-01 10:16:00,2021-01-01 10:20:00,2021-01-01 10:55:00,2021-01-01 10:40:00
2,아산소방서,아산119구조구급센터,정상,질병,기타,아산시,온양5동,기타(생활치료센터),20대,여,이송,서울특별시서남병원,다바543609,"POLYGON ((126.98808 36.74507, 126.98808 36.745...",2021-01-01 11:26:00,2021-01-01 12:15:00,2021-01-01 12:28:00,2021-01-01 12:31:00,2021-01-01 16:10:00,2021-01-01 14:10:00
3,아산소방서,아산119구조구급센터,정상,질병외,0,아산시,온양4동,상업시설,미확인,미확인,소방활동,0,다바533669,"POLYGON ((126.97652 36.79911, 126.97651 36.800...",2021-01-01 11:41:00,2021-01-01 11:43:00,2021-01-01 11:50:00,NaT,2021-01-01 12:30:00,NaT
4,신창119안전센터,선장119지역대,정상,질병,기타,아산시,온양5동,기타(생활치료센터),20대,남,이송,충청남도 천안의료원,다바543609,"POLYGON ((126.98808 36.74507, 126.98808 36.745...",2021-01-01 12:11:00,2021-01-01 12:16:00,2021-01-01 12:31:00,2021-01-01 12:35:00,2021-01-01 14:50:00,2021-01-01 13:10:00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
57773,아산소방서,탕정119안전센터,정상,질병외,0,아산시,배방읍,집,미확인,미확인,미이송,0,다바597638,"POLYGON ((127.04842 36.77146, 127.04841 36.772...",2023-12-31 21:39:00,2023-12-31 21:43:00,2023-12-31 21:55:00,NaT,2023-12-31 22:17:00,NaT
57777,신창119안전센터,선장119지역대,정상,질병외,기타,아산시,선장면,집,70대,여,이송,아산충무병원,다바443671,"POLYGON ((126.87562 36.80042, 126.87561 36.801...",2023-12-31 23:11:00,2023-12-31 23:13:00,2023-12-31 23:17:00,2023-12-31 23:19:00,2024-01-01 00:32:00,2023-12-31 23:50:00
57784,아산소방서,아산119구조구급센터,정상,질병,흉통,아산시,신창면,집,60대,남,이송,순천향대학교부속 천안병원,다바512657,"POLYGON ((126.95305 36.78819, 126.95305 36.789...",2023-12-31 06:58:00,2023-12-31 06:59:00,2023-12-31 07:10:00,2023-12-31 07:10:00,2023-12-31 08:12:00,2023-12-31 07:33:00
57786,아산소방서,배방119안전센터,취소,질병외,0,아산시,온양5동,0,미확인,미확인,미이송,0,다바550635,"POLYGON ((126.99577 36.76854, 126.99577 36.769...",2023-12-31 08:27:00,2023-12-31 08:27:00,NaT,NaT,2023-12-31 08:28:00,NaT


In [12]:
# 'emd_nm' 값의 개수 세기
emd_nm_counts = df_emer119['emd_nm'].value_counts().reset_index()
emd_nm_counts.columns = ['emd_nm', 'count']
hjd_119_dic = {key:value for key, value in zip(emd_nm_counts['emd_nm'], emd_nm_counts['count'])}
asan_gdf['119'] = asan_gdf['ADM_NM'].map(hjd_119_dic)

In [13]:
# 진료과목을 개별 행으로 분리
hospital_exploded = hospital_df.explode('mjr').reset_index(drop=True)
hospital_exploded['mjr'] = hospital_exploded['mjr'].astype(str)

### 의료불균형 분석

#### 격자별 고령화정도 가중치 변환

In [14]:
respop_df = pd.read_csv('SBJ_2405_001/1.아산시_거주인구(격자).csv')

#아산시 grid ID 리스트 와 매핑 & respop_df 필터링
grid_id = grid_map_df['properties.gid'].tolist() # gid는 전부 unique한 값
grid_respop = respop_df[respop_df['gid'].isin(grid_id)]
grid_respop = grid_respop.fillna(0)
grid_respop = pd.concat([grid_respop.iloc[:, :2], grid_respop.iloc[:, 2:].astype(int)], axis=1)
grid_respop['year'] = grid_respop['year'].astype(str)

# 성별구분 없이 통합 및 세대별 인구수 데이터프레임으로 변환
columns = ['gid', 'year']
for i in range(2, len(grid_respop.columns)):
    age = f'{grid_respop.columns[i][2:4]}대' if grid_respop.columns[i][2] != '1' else f'{grid_respop.columns[i][2:5]}대'
    sex = '남' if grid_respop.columns[i][0] != 'm' else '여'
    columns.append('%s_%s'%(age, sex))
grid_respop.columns = columns
# 인구 column & 고령인구 column & 고령인구 비율(%) column 생성
grid_respop['인구'] = grid_respop.iloc[:, 2:].sum(axis=1)
grid_respop =  grid_respop[grid_respop['인구'] != 0] # 거주인구가 없는 격자 필터링(제외)
grid_respop['고령인구'] = grid_respop.iloc[:, 10:-1].sum(axis=1)
grid_respop['고령인구_비율'] = grid_respop.iloc[:, -1] / grid_respop.iloc[:, -2]
grid_respop = grid_respop.fillna(0) # NaN값 0으로 대체
grid_respop['고령인구_비율'] = grid_respop['고령인구_비율'].apply(lambda x : round(x*100, 2))

# 격자별 평균 고령인구 비율 계산
avg_old_age_ratio = grid_respop.groupby('gid')['고령인구_비율'].mean()

# 가중치 계산
weights = avg_old_age_ratio / 100

grid_map_df_c = grid_map_df.copy()
# grid_map_df_c에 '고령인구비율_가중치' column 추가
grid_map_df_c['고령인구비율_가중치'] = grid_map_df_c['properties.gid'].map(lambda x: weights[x] if x in weights.index else float(0))

#### 격자별 의료시설간 거리 가중치 변환

##### 약국

In [16]:
# 격자의 중심점 좌표 추출
grid_coords = np.array(list(zip(grid_map_df.geometry.centroid.x, grid_map_df.geometry.centroid.y)))

In [17]:
from sklearn.neighbors import NearestNeighbors
# 약국의 좌표 추출
pharmacy_coords = np.array(list(zip(pharmacy_df.geometry.x, pharmacy_df.geometry.y)))

# KNN 모델 생성 및 학습 (k=1 사용하여 가장 가까운 약국 찾기)
knn = NearestNeighbors(n_neighbors=1)
knn.fit(pharmacy_coords)

# 각 격자와 가장 가까운 약국 간의 거리 계산
distances, indices = knn.kneighbors(grid_coords)

# 거리를 km 단위로 변환 (EPSG:4326의 경우, 1도 ≈ 111,000m이므로 111을 곱하여 km단위로 변환)
distances_km = distances * 111

# 결과를 DataFrame에 추가
grid_map_df_c['nearest_pharmacy_distance_weight'] = distances_km

##### AED

In [18]:
# AED의 좌표 추출
AED_coords = np.array(list(zip(AED_df.geometry.x, AED_df.geometry.y)))

# KNN 모델 생성 및 학습 (k=1 사용하여 가장 가까운 약국 찾기)
knn = NearestNeighbors(n_neighbors=1)
knn.fit(AED_coords)

# 각 격자와 가장 가까운 약국 간의 거리 계산
distances, indices = knn.kneighbors(grid_coords)

# 거리를 km 단위로 변환 (EPSG:4326의 경우, 1도 ≈ 111,000m이므로 111을 곱하여 km단위로 변환)
distances_km = distances * 111

# 결과를 DataFrame에 추가
grid_map_df_c['nearest_AED_distance_weight'] = distances_km

##### 119안전센터

In [19]:
# 119안전센터의 좌표 추출
coords_119 = np.array(list(zip(df_119.geometry.x, df_119.geometry.y)))

# KNN 모델 생성 및 학습 (k=1 사용하여 가장 가까운 약국 찾기)
knn = NearestNeighbors(n_neighbors=1)
knn.fit(coords_119)

# 각 격자와 가장 가까운 약국 간의 거리 계산
distances, indices = knn.kneighbors(grid_coords)

# 거리를 km 단위로 변환 (EPSG:4326의 경우, 1도 ≈ 111,000m이므로 111을 곱하여 km단위로 변환)
distances_km = distances * 111

# 결과를 DataFrame에 추가
grid_map_df_c['nearest_119_distance_weight'] = distances_km

#### 격자별 병원진료과목별 거리계산 알고리즘

In [20]:
# 격자의 중심점 추출
grid_coords = np.array(list(zip(grid_map_df_c.geometry.centroid.x, grid_map_df_c.geometry.centroid.y)))

# 각 진료분과별로 병원희소성을 고려한 'Neighbors' 열 생성
unique_departments = dict(hospital_exploded['mjr'].value_counts())
hospital_exploded['Neighbors'] = hospital_exploded['mjr'].map(lambda x: min(unique_departments[x], 3))

for dept in unique_departments:
    # 해당 진료분과의 의료시설 좌표 추출
    dept_hospitals = hospital_exploded[hospital_exploded['mjr'] == dept]
    dept_coords = np.array(list(zip(dept_hospitals.geometry.x, dept_hospitals.geometry.y)))
    
    if len(dept_coords) == 0:
        continue
    
    # KNN 모델 생성 및 학습 (Neighbors 열을 사용하여 가장 가까운 Neighbors개 의료시설 찾기)
    knn = NearestNeighbors(n_neighbors=dept_hospitals['Neighbors'].iloc[0])
    knn.fit(dept_coords)
    
    # 각 격자와 해당 진료분과 의료시설 간의 거리 계산
    distances, indices = knn.kneighbors(grid_coords)
    
    # 각 격자에 대해 Neighbors 개수만큼 거리의 평균 계산
    avg_distances = np.mean(distances[:, :dept_hospitals['Neighbors'].iloc[0]], axis=1)
    
    # 거리를 km 단위로 변환 (EPSG:4326의 경우, 1도 ≈ 111,000m이므로 111을 곱하여 km단위로 변환)
    avg_distances_km = avg_distances * 111
    
    # 결과를 DataFrame에 추가
    column_name = f'distance_to_{dept}'
    grid_map_df_c[column_name] = avg_distances_km

#### 격자별 도로 접근성 가중치 변환

In [21]:
# GeoJSON 파일 불러오기
with open('SBJ_2405_001/14.아산시_표준노드정보.geojson', encoding="UTF8") as geojson_file:
    geojson_data = json.load(geojson_file)
node_df = pd.json_normalize(geojson_data['features'])
node_df['geometry'] = node_df['geometry.coordinates'].apply(lambda x : make_point(x))
# node_df 데이터프레임을 GeoDataFrame으로 변환
node_df = gpd.GeoDataFrame(node_df, geometry='geometry')

In [22]:
# 도로 데이터에서 도로(노드)에 대한 LineString 정보 추출
coords_node = np.array(list(zip(node_df.geometry.x, node_df.geometry.y)))

# KNN 모델 생성 및 학습 (주도로까지의 최단 거리 계산)
knn = NearestNeighbors(n_neighbors=1)
knn.fit(coords_node)

# 각 격자의 중심점과 가장 가까운 주도로까지의 거리 계산
distances, indices = knn.kneighbors(grid_coords)

# 거리를 km 단위로 변환 (예시에서는 간단하게 유클리디안 거리를 사용했으므로, 다른 메트릭을 사용할 경우에는 단위 변환을 조정해야 할 수 있습니다)
distances_km = distances * 111

# 결과를 격자 데이터프레임에 추가
grid_map_df_c['distance_to_nearest_road'] = distances_km

#### 격자별 응급환자 신고 & 출동 & 병원도착시간 계산 및 가중치 변환 알고리즘

In [149]:
# NaT 문자열을 NaT로 변환
for col in df_emer119.columns[14:]:
    df_emer119[col] = pd.to_datetime(df_emer119[col], errors='coerce')

# datetime값이 NaT(결측값)인 row들 제거
datetime_columns = ['신고시각', '출동시각', '도착시각', '접촉시각', '귀소시각', '병원도착시각']

# 모든 datetime 열에서 NaT 값을 가진 행 제거
datetime_columns = df_emer119.columns.tolist()
df_emer119_cleaned = df_emer119.dropna(subset=datetime_columns)

# timedelta 계산하여 의료불균형 가중치로 변환
# 신고시각과 접촉시각의 시간차이 계산하여 가중치 구하기(시간단위)
df_emer119_cleaned['신고시각_접촉시각_시간차이'] = (df_emer119_cleaned['접촉시각'] - df_emer119_cleaned['신고시각']).dt.total_seconds() / 3600
# 접촉시각과 병원도착시각의 시간차이 계산하여 가중치 구하기(시간단위)
df_emer119_cleaned['접촉시각_병원도착시각_시간차이'] = (df_emer119_cleaned['병원도착시각'] - df_emer119_cleaned['접촉시각']).dt.total_seconds() / 3600

# gid 별 평균 계산
avg_times = df_emer119_cleaned.groupby('gid').agg({
    '신고시각_접촉시각_시간차이': 'mean',
    '접촉시각_병원도착시각_시간차이': 'mean'
}).to_dict(orient='index')

# avg_times을 기반으로 '신고-접촉시간_가중치'와 '접촉-병원도착시간_가중치' column 추가
grid_map_df_c['신고-접촉시간_가중치'] = grid_map_df_c['properties.gid'].map(lambda x: avg_times[x]['신고시각_접촉시각_시간차이'] if x in avg_times else float('nan'))
grid_map_df_c['접촉-병원도착시간_가중치'] = grid_map_df_c['properties.gid'].map(lambda x: avg_times[x]['접촉시각_병원도착시각_시간차이'] if x in avg_times else float('nan'))



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



#### 모델활용 격자단위 의료취약지역 도출

In [None]:
# 시간_가중치 column을 기준으로 하위 75% 이하인 값을 NaN 값을 포함하여 75% 값으로 대체
threshold = grid_map_df_c['신고-접촉시간_가중치'].quantile(0.75)
grid_map_df_c['신고-접촉시간_가중치'] = grid_map_df_c['신고-접촉시간_가중치'].apply(lambda x: threshold if pd.isna(x) or x <= threshold else x)
threshold = grid_map_df_c['접촉-병원도착시간_가중치'].quantile(0.75)
grid_map_df_c['접촉-병원도착시간_가중치'] = grid_map_df_c['접촉-병원도착시간_가중치'].apply(lambda x: threshold if pd.isna(x) or x <= threshold else x)

In [None]:
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler

# 데이터 준비 (예시로 간단하게 특성을 설정합니다)
features = grid_map_df_c[grid_map_df_c.columns[5:]]

# 데이터 정규화
scaler = StandardScaler()
scaled_features = scaler.fit_transform(features)

# 클러스터 개수 범위 설정
k_range = range(1, 15)

# 각 클러스터 개수에 대한 WCSS 계산
wcss = []
for k in k_range:
    kmeans = KMeans(n_clusters=k, random_state=42)
    kmeans.fit(scaled_features)
    wcss.append(kmeans.inertia_)

# 엘보우 방법을 위한 그래프 그리기
plt.figure(figsize=(10, 8))
plt.plot(k_range, wcss, marker='o')
plt.xlabel('Number of Clusters (K)')
plt.ylabel('WCSS')
plt.title('Elbow Method for Optimal K')
plt.show()


In [None]:
# 적절한 클러스터 개수를 엘보우 그래프를 통해 확인
optimal_k = 8

# K-means 모델 생성 및 학습
kmeans = KMeans(n_clusters=optimal_k, random_state=42)
kmeans.fit(scaled_features)

# 클러스터링 결과 확인
grid_map_df_c['cluster'] = kmeans.labels_

In [191]:
#### 격자별 가중치 시각화
# 맵 객체 생성 및 데이터 로드
weight_map = KeplerGl(height=800)
weight_map.add_data(data=grid_map_df_c, name="가중치데이터")
weight_map.add_data(data=asan_gdf, name="아산시 데이터")
# 맵 출력 및 상세설정
weight_map.save_to_html(file_name="visualization/4. 아산시 의료불균형 분석/격자별 가중치 시각화 keplerGL.html")

User Guide: https://docs.kepler.gl/docs/keplergl-jupyter
Map saved to visualization/4. 아산시 의료불균형 분석/격자별 가중치 시각화 keplerGL.html!


In [195]:
# 의료취약 군집번호는 4,5,6,7 -> 4이상인 군집들만 필터링
grid_map_vulnerable = grid_map_df_c[grid_map_df_c['cluster'] >= 4]
grid_map_vulnerable = grid_map_vulnerable[['properties.gid', 'geometry']]
grid_map_vulnerable.rename(columns={'properties.gid': 'gid'}, inplace=True)
grid_map_vulnerable.to_csv("아산시_의료취약지역.csv")