## 기본설정 및 함수정의

In [2]:
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
import plotly.graph_objs as go
import plotly.offline as offline
from folium.plugins import HeatMapWithTime
from plotly.subplots import make_subplots
import folium
from folium import plugins
from folium.plugins import HeatMap
from folium import FeatureGroup
import json
import math
import re
from datetime import datetime
import os
import glob
import subprocess
from bs4 import BeautifulSoup as bs
from shapely.geometry import Point, Polygon, LineString
from shapely.ops import unary_union
import geopandas as gpd
from geopandas import GeoSeries
import pyproj
from tqdm import tqdm
from keplergl import KeplerGl

# tqdm의 pandas전용 메소드를 호출
tqdm.pandas()
# 모든 열이 생략되지 않도록 설정
pd.set_option('display.max_columns', None)

# 영등포역 위도, 경도
sejong = [36.51430420729354, 127.29034973889941 ]

# Point를만드는 함수
def make_point(x):
    try:
        return Point(x)
    except Exception as e:
        print(f"An error occurred: {e}")
        return None
# Polygon을 만드는 함수
def make_pol(x):
    try:
        return Polygon(x[0])
    except:
        return Polygon(x[0][0])
    
# Linestring을 만드는 함수
def make_lin(x):
    try:
        return LineString(x)
    except:
        return LineString(x[0])

# 데이터프레임을 GeoPandas 데이터프레임으로 변환하는 함수 정의
def geo_transform(DataFrame) :
    # csv to geopandas
    # lon, lat data를 geometry로 변경
    DataFrame['lat'] = DataFrame['lat'].astype(float)
    DataFrame['lon'] = DataFrame['lon'].astype(float)
    DataFrame['geometry'] = DataFrame.progress_apply(lambda row : Point([row['lon'], row['lat']]), axis=1) # 위도 및 경도롤 GeoPandas Point 객체로 변환
    DataFrame = gpd.GeoDataFrame(DataFrame, geometry='geometry')
    DataFrame.crs = {'init':'epsg:4326'} # geopandas 데이터프레임의 좌표계를 EPSG 4326으로 설정
    DataFrame = DataFrame.to_crs({'init':'epsg:4326'}) # 데이터프레임의 좌표계를 자체 좌표계에서 EPSG 4326으로 변환
    return DataFrame


The Shapely GEOS version (3.11.2-CAPI-1.17.2) is incompatible with the GEOS version PyGEOS was compiled with (3.10.4-CAPI-1.16.2). Conversions between both will be slow.


Shapely 2.0 is installed, but because PyGEOS is also installed, GeoPandas still uses PyGEOS by default. However, starting with version 0.14, the default will switch to Shapely. To force to use Shapely 2.0 now, you can either uninstall PyGEOS or set the environment variable USE_PYGEOS=0. You can do this before starting the Python process, or in your code before importing geopandas:

import os
os.environ['USE_PYGEOS'] = '0'
import geopandas

In the next release, GeoPandas will switch to using Shapely by default, even if PyGEOS is installed. If you only have PyGEOS installed to get speed-ups, this switch should be smooth. However, if you are using PyGEOS directly (calling PyGEOS functions on geometries from GeoPandas), this will then stop working and you are encouraged to migrate from PyGEOS to Shapely 2.0 (https://sha

#### 세종시 행정경계(대상구역)

In [48]:
# GeoJSON 파일 불러오기
with open('SBJ_2406_002/11.세종시_대상구역도.geojson', encoding="UTF8") as geojson_file:
    geojson_data = json.load(geojson_file)
object_area_df = pd.json_normalize(geojson_data['features'])
object_area_df['geometry'] = object_area_df['geometry.coordinates'].apply(lambda x : make_pol(x))
object_area_df.drop(columns="geometry.coordinates", inplace=True)
object_area_df.rename(columns = {'properties.EMD_NM' : 'ADM_NM'}, inplace = True)
object_area_df = object_area_df[['ADM_NM', 'geometry']]

# shp to geodataframe convert
shapefile_path = "SBJ_2406_002/_census_data_2023_bnd_dong_bnd_dong_29010_2023_2023"
sejong_gdf = gpd.read_file(shapefile_path)
sejong_gdf = sejong_gdf.to_crs(epsg=4326) #EPSG4326 형식으로 변환
sejong_gdf = sejong_gdf[['ADM_NM', 'geometry']]
sejong_gdf = sejong_gdf[~sejong_gdf['ADM_NM'].str.endswith('동')]
sejong_gdf =  pd.concat([object_area_df, sejong_gdf], ignore_index=True)
# sejong_gdf 데이터프레임을 GeoDataFrame으로 변환
sejong_gdf = gpd.GeoDataFrame(sejong_gdf, geometry='geometry')
# 조건에 따라 'newtown' 열 생성 및 값 지정
sejong_gdf['newtown'] = sejong_gdf['ADM_NM'].apply(lambda x: '신도시' if x.endswith('동') else '그외지역')

#### 격자(매핑용)

In [50]:
# GeoJSON 파일 불러오기
with open('SBJ_2406_002/4.세종시_격자(매핑용).geojson', encoding="UTF8") as geojson_file:
    geojson_data = json.load(geojson_file)
grid_map_df = pd.json_normalize(geojson_data['features'])
grid_map_df['geometry'] = grid_map_df['geometry.coordinates'].apply(lambda x : make_pol(x))
grid_map_df.drop(columns="geometry.coordinates", axis=1, inplace=True)
# grid_map_df 데이터프레임을 GeoDataFrame으로 변환
grid_map_df = gpd.GeoDataFrame(grid_map_df, geometry='geometry')

In [None]:
# 맵 객체 생성 및 데이터 로드
sejong_map = KeplerGl(height=1000, width=1500)
sejong_map.add_data(data=sejong_gdf, name="세종시 행정동 데이터")
sejong_map.add_data(data=grid_map_df, name="세종시 격자 데이터")

# 맵 출력 및 상세설정
sejong_map

In [40]:
# 맵 저장
sejong_map.save_to_html(file_name="visualization/세종시 현황/세종시 기본정보 map.html")

Map saved to visualization/세종시 현황/세종시 기본정보 map.html!


#### 연속지적도

In [4]:
cadastral_dic = {'전' : '밭',
                '답' : '논',
                '과' : '과수원',
                '목' : '목장',
                '임' : '임야',
                '광' : '광천지',
                '염' : '염전',
                '대' : '대지',
                '장' : '공장용지',
                '학' : '학교용지',
                '차' : '주차장',
                '주' : '주유소용지',
                '창' : '창고용지',
                '도' : '도로',
                '철' : '철도용지',
                '제' : '제방',
                '천' : '하천',
                '구' : '도랑',
                '유' : '유지',
                '양' : '양어장',
                '수' : '수도용지',
                '공' : '공원용지',
                '체' : '체육용지',
                '원' : '유원지',
                '종' : '종교용지',
                '사' : '사적지',
                '묘' : '묘지',
                '잡' : '잡종지',
                '기타' : '기타'}

In [5]:
# GeoJSON 파일 불러오기
with open('SBJ_2406_002/8.세종시_연속지적도.geojson', encoding="UTF8") as geojson_file:
    geojson_data = json.load(geojson_file)
cadastral_map_df = pd.json_normalize(geojson_data['features'])
cadastral_map_df['geometry'] = cadastral_map_df['geometry.coordinates'].apply(lambda x : make_pol(x))
cadastral_map_df.drop(columns="geometry.coordinates", inplace=True)
# cadastral_map_df 데이터프레임을 GeoDataFrame으로 변환
cadastral_map_df = gpd.GeoDataFrame(cadastral_map_df, geometry='geometry')
# 정규식을 사용하여 'properties.JIBUN'을 지번과 용도로 분리
# 지번은 숫자 또는 숫자-숫자 형식, 용도는 공백 뒤에 오는 한글 문자로 가정
cadastral_map_df['JIBUN'] = cadastral_map_df['properties.JIBUN'].str.extract(r'(\d+-?\d*)')
cadastral_map_df['USAGE'] = cadastral_map_df['properties.JIBUN'].apply(lambda x:x[-1])
# USAGE가 빈 문자열이거나 숫자로만 되어 있는 경우 '기타'로 설정
cadastral_map_df['USAGE'] = cadastral_map_df['USAGE'].apply(lambda x: '기타' if x == '' or x.isdigit() else x)
cadastral_map_df['USAGE'] = cadastral_map_df['USAGE'].apply(lambda x: cadastral_dic.get(x, x))
cadastral_map_df = cadastral_map_df[['JIBUN', 'USAGE', 'geometry']]
cadastral_map_df

Unnamed: 0,JIBUN,USAGE,geometry
0,59-14,임야,"POLYGON ((127.26892 36.48192, 127.26901 36.481..."
1,223-1,논,"POLYGON ((127.30726 36.46622, 127.30716 36.466..."
2,295-4,밭,"POLYGON ((127.18762 36.69594, 127.18766 36.695..."
3,63,임야,"POLYGON ((127.31121 36.47124, 127.31127 36.471..."
4,296,밭,"POLYGON ((127.18777 36.69513, 127.18783 36.695..."
...,...,...,...
205429,450-10,밭,"POLYGON ((127.27924 36.54751, 127.27915 36.547..."
205430,450-7,밭,"POLYGON ((127.27924 36.54751, 127.27926 36.547..."
205431,450-11,밭,"POLYGON ((127.27953 36.54758, 127.27926 36.547..."
205432,451-3,밭,"POLYGON ((127.27953 36.54758, 127.27965 36.547..."


In [None]:
# 맵 객체 생성 및 데이터 로드
sejong_cadastral_map = KeplerGl(height=1000, width=1500)
sejong_cadastral_map.add_data(data=sejong_gdf, name="세종시 행정동 데이터")
sejong_cadastral_map.add_data(data=cadastral_map_df, name="세종시 연속지적도")

# 맵 출력 및 상세설정
sejong_cadastral_map

In [None]:
# 맵 저장
sejong_cadastral_map.save_to_html(file_name="visualization/세종시 현황/세종시 연속지적도 map.html")

Map saved to visualization/세종시 현황/세종시 연속지적도 map.html!


#### 격자인구통계

In [52]:
respop_df = pd.read_csv('SBJ_2406_002/10.세종시_성연령별_거주인구(격자).csv')

#grid ID 리스트 와 매핑 & respop_df 필터링
grid_id = grid_map_df['properties.gid'].tolist() # gid는 전부 unique한 값
grid_respop = respop_df[respop_df['gid'].isin(grid_id)]
grid_respop = grid_respop.fillna(0)
grid_respop = pd.concat([grid_respop.iloc[:, :2], grid_respop.iloc[:, 2:].astype(int)], axis=1)
grid_respop['year'] = grid_respop['year'].astype(str)

# 성별구분 없이 통합 및 세대별 인구수 데이터프레임으로 변환
columns = ['gid', 'year']
for i in range(2, len(grid_respop.columns)):
    age = f'{grid_respop.columns[i][2:4]}대' if grid_respop.columns[i][2] != '1' else f'{grid_respop.columns[i][2:5]}대'
    sex = '남' if grid_respop.columns[i][0] != 'm' else '여'
    columns.append('%s_%s'%(age, sex))
grid_respop.columns = columns
# 인구 column & 고령인구 column & 고령인구 비율(%) column 생성
grid_respop['인구'] = grid_respop.iloc[:, 2:].sum(axis=1)

In [None]:
# 연령대별로 성별 통합
grid_respop['20대'] = grid_respop['20대_여'] + grid_respop['20대_남']
grid_respop['30대'] = grid_respop['30대_여'] + grid_respop['30대_남']
grid_respop['40대'] = grid_respop['40대_여'] + grid_respop['40대_남']
grid_respop['50대'] = grid_respop['50대_여'] + grid_respop['50대_남']
grid_respop['60대'] = grid_respop['60대_여'] + grid_respop['60대_남']
grid_respop['70대'] = grid_respop['70대_여'] + grid_respop['70대_남']
grid_respop['80대'] = grid_respop['80대_여'] + grid_respop['80대_남']
grid_respop['90대'] = grid_respop['90대_여'] + grid_respop['90대_남']
grid_respop['100대'] = grid_respop['100대_여'] + grid_respop['100대_남']

# 필요한 열만 선택
age_totals = grid_respop[['year', '20대', '30대', '40대', '50대', '60대', '70대', '80대', '90대', '100대']]

# 데이터프레임을 긴 형식으로 변환
age_totals_melted = age_totals.melt(id_vars='year', var_name='Age Group', value_name='Count')

# 연도별 연령대 분포 계산
age_totals_yearly = age_totals_melted[['Age Group', 'Count']].groupby(['Age Group']).sum().reset_index()
# 100대 연령대 제거
age_totals_yearly = age_totals_yearly[age_totals_yearly['Age Group'] != '100대']

# 연령대 종합 평균내기
age_totals_yearly['Count'] = age_totals_yearly['Count'] / 4
age_totals_yearly['Count'] = age_totals_yearly['Count'].astype(int)
# 시각화
fig = px.bar(
    age_totals_yearly,
    x='Age Group',
    y='Count',
    color='Count',
    color_continuous_scale='Plasma_r',  # Plasma 색상 맵 사용
    labels={'year': 'Year', 'Count': 'Population Count', 'Age Group': 'Age Group'},
)
fig.update_layout(height=600, width=1200)
fig.show()

In [56]:
gen_respop = grid_respop[['gid', '20대', '30대', '40대', '50대', '60대', '70대', '80대', '90대', '100대']]
gen_respop = gen_respop.groupby('gid').sum().reset_index()

# 격자 ID와 격자 폴리곤 각각을 매핑한 딕셔너리 생성
map_dic = dict(zip(grid_map_df['properties.gid'], grid_map_df['geometry']))

# 생성된 map_dic에 filtered_respop의 gid를 매핑하여 'geometry' column 생성
geometry_lst = []
for _, row in gen_respop.iterrows():
    geometry_lst.append(map_dic[row['gid']])
gen_respop['geometry'] = geometry_lst
gen_respop = gpd.GeoDataFrame(gen_respop, geometry='geometry')

In [None]:
# 맵 객체 생성 및 데이터 로드
sejong_gridgenpop_map = KeplerGl(height=1000, width=1500)
sejong_gridgenpop_map.add_data(data=gen_respop, name="세종시 세대별 격자거주인구 데이터")
sejong_gridgenpop_map.add_data(data=sejong_gdf, name="세종시 행정구역 데이터")

# 맵 출력 및 상세설정
sejong_gridgenpop_map

In [46]:
# 맵 저장
sejong_gridgenpop_map.save_to_html(file_name="visualization/세종시 현황/세종시 세대별 격자거주인구 map.html")

Map saved to visualization/세종시 현황/세종시 세대별 격자거주인구 map.html!


In [None]:
age_totals_gen = age_totals.groupby('year').sum().reset_index()
# 각 세대별 총 인구수 계산 및 정렬
total_pop = age_totals_gen.drop(columns='year').sum().sort_values(ascending=False)
sorted_columns = total_pop.index.tolist()

# 각 세대별로 데이터를 분리하여 선과 마커를 동시에 표시하는 그래프 생성
fig = go.Figure()

for column in sorted_columns:  # 'year' 열을 제외한 각 세대별 열에 대해 반복
    fig.add_trace(go.Scatter(
        x=age_totals_gen['year'], 
        y=age_totals_gen[column], 
        mode='lines+markers', 
        name=column
    ))
# 색상 업데이트
colors =px.colors.sequential.Plasma # 컬러맵
for i, trace in enumerate(fig.data):
    trace.line.color = colors[i % len(colors)]
# 그래프 레이아웃 설정
fig.update_layout(
    xaxis_title='Year',
    yaxis_title='Population',
    width=800,
    height=600
)

# 그래프 시각화
fig.show()

In [None]:
# 연도별 인구수 합계 계산
yearly_totals = age_totals_gen.drop(columns='year').sum(axis=1)

# 각 세대의 비율 계산
age_totals_ratio = age_totals_gen.copy()
for column in age_totals_gen.columns[1:]:
    age_totals_ratio[column] = age_totals_gen[column] / yearly_totals

# 누적 비율 그래프 생성
fig_cumulative = go.Figure()

# 색상 팔레트 가져오기
colors = px.colors.sequential.Plasma

for idx, column in enumerate(age_totals_gen.columns[1:]):  # 세대 순서에 따라 누적 비율 그래프 생성
    fig_cumulative.add_trace(go.Scatter(
        x=age_totals_ratio['year'], 
        y=age_totals_ratio[column], 
        mode='lines+markers', 
        stackgroup='one', 
        name=column,
        line=dict(color=colors[idx % len(colors)])  # 색상 적용
    ))

# 누적 비율 그래프 레이아웃 설정
fig_cumulative.update_layout(
    xaxis_title='Year',
    yaxis_title='Proportion',
    width=800,
    height=600
)

# 누적 비율 그래프 시각화
fig_cumulative.show()

In [59]:
year_lst = range(2020, 2024)
year_pop = [] # 인구수 종합
for year in year_lst:
    # 해당 year의 인구수 종합
    year_pop.append(grid_respop[grid_respop['year'] == str(year)]['인구'].sum())

pop_df = pd.DataFrame({'연도':year_lst, '종합인구':year_pop})
pop_df

Unnamed: 0,연도,종합인구
0,2020,216288
1,2021,224599
2,2022,223073
3,2023,223989


In [None]:
fig = px.line(pop_df, x='연도', y='종합인구', labels={'x':'연도', 'y':'인구수'})
fig.update_traces(mode='lines+markers')

fig.update_xaxes(
        showticklabels=True, 
        showgrid=False, 
        tickmode='array',
        range=[2019.5, 2023.5],
        tickvals=[2020, 2021, 2022, 2023],  # x축에 표시할 값
        ticktext=['2020', '2021', '2022', '2023'],  # x축에 표시할 텍스트
    )
fig.update_yaxes(
    showticklabels=True, 
    showgrid=True,
    )
fig.update_layout(height=800, width=1200)
fig.show()

In [61]:
grid_pop_df = grid_respop[['gid', 'year', '인구']]
geometry_lst = []
for _, row in grid_pop_df.iterrows():
    geometry_lst.append(map_dic[row['gid']])
grid_pop_df['geometry'] = geometry_lst
grid_pop_df = grid_pop_df[grid_pop_df['인구'] != 0]
grid_pop_df = gpd.GeoDataFrame(grid_pop_df, geometry='geometry')



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [None]:
# 맵 객체 생성 및 데이터 로드
sejong_gridpop_map = KeplerGl(height=1000, width=1500)
sejong_gridpop_map.add_data(data=grid_pop_df, name="세종시 격자거주인구 데이터")
sejong_gridpop_map.add_data(data=sejong_gdf, name="세종시 행정구역 데이터")

# 맵 출력 및 상세설정
sejong_gridpop_map

In [75]:
# 맵 저장
sejong_gridpop_map.save_to_html(file_name="visualization/세종시 현황/세종시 격자거주인구 map.html")

Map saved to visualization/세종시 현황/세종시 격자거주인구 map.html!


#### 행정구역별 인구(격자거주인구 활용)

In [62]:
# 공간 조인 수행
joined_df = gpd.sjoin(grid_pop_df, sejong_gdf, how='inner', op='intersects')

# 연도별로 행정구역의 인구수를 합산
sejong_pop_gdf = joined_df.groupby(['year', 'ADM_NM', 'newtown']).agg({'인구': 'sum'}).reset_index()
sejong_pop_gdf = pd.merge(sejong_pop_gdf, sejong_gdf[['ADM_NM', 'geometry']], on='ADM_NM')
sejong_pop_gdf = gpd.GeoDataFrame(sejong_pop_gdf, geometry='geometry')


The `op` parameter is deprecated and will be removed in a future release. Please use the `predicate` parameter instead.



In [77]:
# 연도별로 행정구역의 인구수 합산
pop_trend_df = sejong_pop_gdf.groupby(['year', 'ADM_NM']).agg({'인구': 'sum'}).reset_index()
# 각 행정구역의 총 인구수 계산
total_pop_df = pop_trend_df.groupby('ADM_NM')['인구'].sum().reset_index()

# 총 인구수를 기준으로 행정구역 정렬
total_pop_df = total_pop_df.sort_values(by='인구', ascending=False)

# 각 행정구역별로 데이터를 분리하여 선과 마커를 동시에 표시하는 그래프 생성
fig = go.Figure()

for adm_nm in total_pop_df['ADM_NM'].unique():
    df = pop_trend_df[pop_trend_df['ADM_NM'] == adm_nm]
    fig.add_trace(go.Scatter(x=df['year'], y=df['인구'], mode='lines+markers', name=adm_nm))
# 색상 업데이트
colors =px.colors.sequential.Plasma # 컬러맵
for i, trace in enumerate(fig.data):
    trace.line.color = colors[i % len(colors)]
# 그래프 레이아웃 설정
fig.update_layout(
    xaxis_title='Year',
    yaxis_title='Population',
    width=1000,
    height=800
)

# 그래프 시각화
fig.show()

In [82]:
newtown_lst = sejong_gdf[sejong_gdf['newtown'] == '신도시']['ADM_NM'].tolist()
# 연도별로 행정구역의 인구수 합산
pop_trend_df = sejong_pop_gdf.groupby(['year', 'ADM_NM']).agg({'인구': 'sum'}).reset_index()
pop_trend_df = pop_trend_df[pop_trend_df['ADM_NM'].isin(newtown_lst)]
# 각 행정구역의 총 인구수 계산
total_pop_df = pop_trend_df.groupby('ADM_NM')['인구'].sum().reset_index()

# 총 인구수를 기준으로 행정구역 정렬
total_pop_df = total_pop_df.sort_values(by='인구', ascending=False)

# 각 행정구역별로 데이터를 분리하여 선과 마커를 동시에 표시하는 그래프 생성
fig = go.Figure()

for adm_nm in total_pop_df['ADM_NM'].unique():
    df = pop_trend_df[pop_trend_df['ADM_NM'] == adm_nm]
    fig.add_trace(go.Scatter(x=df['year'], y=df['인구'], mode='lines+markers', name=adm_nm))
# 색상 업데이트
colors =px.colors.sequential.Plasma # 컬러맵
for i, trace in enumerate(fig.data):
    trace.line.color = colors[i % len(colors)]
# 그래프 레이아웃 설정
fig.update_layout(
    xaxis_title='Year',
    yaxis_title='Population',
    width=800,
    height=600
)

# 그래프 시각화
fig.show()

In [85]:
pop_trend_df.to_csv("행정동별_거주인구.csv", index=False)

In [None]:
# 맵 객체 생성 및 데이터 로드
sejong_pop_map = KeplerGl(height=1000, width=1500)
sejong_pop_map.add_data(data=sejong_pop_gdf, name="세종시 거주인구 데이터")

# 맵 출력 및 상세설정
sejong_pop_map

In [97]:
# 맵 저장
sejong_pop_map.save_to_html(file_name="visualization/세종시 현황/세종시 행정구역거주인구 map.html")

Map saved to visualization/세종시 현황/세종시 행정구역거주인구 map.html!


#### 세대별 유동인구

In [112]:
# 유동인구 df 로드
floating_population_df = pd.read_csv('SBJ_2406_002/1.세종시_성연령별_유동인구.csv')
floating_population_df['STD_YM'] = floating_population_df['STD_YM'].astype(str)

# 성별구분 없이 통합 및 세대별 인구수 데이터프레임으로 변환
columns = ['year']
for i in range(1, 13):
    floating_population_df[floating_population_df.columns[i]] = floating_population_df[floating_population_df.columns[i]].astype(float)
    new_col = f'{floating_population_df.columns[i][2:4]}대_남' if floating_population_df.columns[i][0] != 'm' else f'{floating_population_df.columns[i][2:4]}대_여'
    columns.append(new_col)
floating_population_df.columns = columns + floating_population_df.columns[13:15].tolist()
floating_population_df['pop'] = floating_population_df.iloc[:, 1:-3].sum(axis=1)

# 연령대별로 성별 통합
floating_population_df['10대'] = floating_population_df['10대_남'] + floating_population_df['10대_여']
floating_population_df['20대'] = floating_population_df['20대_남'] + floating_population_df['20대_여']
floating_population_df['30대'] = floating_population_df['30대_남'] + floating_population_df['30대_여']
floating_population_df['40대'] = floating_population_df['40대_남'] + floating_population_df['40대_여']
floating_population_df['50대'] = floating_population_df['50대_남'] + floating_population_df['50대_여']
floating_population_df['60대'] = floating_population_df['60대_남'] + floating_population_df['60대_여']
flopop_df = floating_population_df[['year', 'pop'] + list(floating_population_df.columns[13:15])]
col_lst = ['year'] + list(floating_population_df.columns[1:13]) + list(floating_population_df.columns[-6:]) + ['pop'] + list(floating_population_df.columns[13:15])
floating_population_df = floating_population_df[col_lst]
# 연도 월 정보를 "YYYY-MM-DD HH:MM:SS" 형식으로 변환하는 함수
def convert_to_datetime_format(year_month_str):
    year = year_month_str[:4]
    month = year_month_str[4:6]
    return f"{year}-{month}-01 00:00:00"
# 변환 적용
floating_population_df['year'] = floating_population_df['year'].apply(convert_to_datetime_format)

floating_population_df

Unnamed: 0,year,10대_여,20대_여,30대_여,40대_여,50대_여,60대_여,10대_남,20대_남,30대_남,40대_남,50대_남,60대_남,10대,20대,30대,40대,50대,60대,pop,lon,lat
0,2020-01-01 00:00:00,0.00,0.00,0.00,0.04,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.04,0.00,0.00,0.04,127.130339,36.708900
1,2020-01-01 00:00:00,0.00,0.04,0.02,0.04,0.04,0.02,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.04,0.02,0.04,0.04,0.02,0.16,127.136491,36.710271
2,2020-01-01 00:00:00,0.00,0.04,0.02,0.04,0.04,0.02,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.04,0.02,0.04,0.04,0.02,0.16,127.137050,36.710273
3,2020-01-01 00:00:00,0.00,0.04,0.02,0.04,0.04,0.02,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.04,0.02,0.04,0.04,0.02,0.16,127.137048,36.710724
4,2020-01-01 00:00:00,0.04,0.07,0.19,0.28,0.23,0.15,0.04,0.04,0.06,0.09,0.09,0.04,0.08,0.11,0.25,0.37,0.32,0.19,1.28,127.137633,36.705317
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1839168,2022-12-01 00:00:00,23.45,13.10,41.50,53.50,33.96,31.89,25.90,14.37,43.79,55.62,25.83,27.48,49.35,27.47,85.29,109.12,59.79,59.37,362.91,127.244107,36.501854
1839169,2022-12-01 00:00:00,20.15,8.48,26.43,31.47,16.17,18.70,22.32,9.92,33.76,39.27,17.57,19.28,42.47,18.40,60.19,70.74,33.74,37.98,244.24,127.244106,36.502305
1839170,2022-12-01 00:00:00,11.74,5.21,21.98,26.22,11.48,12.29,11.44,6.34,24.60,27.48,10.75,12.43,23.18,11.55,46.58,53.70,22.23,24.72,169.53,127.244104,36.502755
1839171,2022-12-01 00:00:00,23.75,11.46,37.68,50.17,26.01,24.16,22.55,14.07,49.64,61.01,25.75,26.17,46.30,25.53,87.32,111.18,51.76,50.33,346.25,127.244666,36.501855


In [116]:
flopop_df['year'] = flopop_df['year'].apply(lambda x : x[:4])
flopop_df = geo_transform(flopop_df)
# 공간 조인 수행
joined_df = gpd.sjoin(flopop_df, sejong_gdf, how='inner', op='intersects')
# 연도별로 행정구역의 인구수를 합산
flopop_df_d = joined_df.groupby(['year', 'ADM_NM']).agg({'pop': 'sum'}).reset_index()
flopop_df_d.to_csv('행정동별_유동인구.csv', index=False)


The `op` parameter is deprecated and will be removed in a future release. Please use the `predicate` parameter instead.


CRS mismatch between the CRS of left geometries and the CRS of right geometries.
Use `to_crs()` to reproject one of the input geometries to match the CRS of the other.

Left CRS: +init=epsg:4326 +type=crs
Right CRS: None




In [25]:
# 상관계수 계산
correlation_matrix1 = floating_population_df[['10대', '20대', '30대', '40대', '50대', '60대']].corr()
correlation_matrix1

Unnamed: 0,10대,20대,30대,40대,50대,60대
10대,1.0,0.645954,0.698082,0.710936,0.576563,0.555442
20대,0.645954,1.0,0.82189,0.832186,0.844595,0.813548
30대,0.698082,0.82189,1.0,0.967819,0.881735,0.813822
40대,0.710936,0.832186,0.967819,1.0,0.941483,0.869169
50대,0.576563,0.844595,0.881735,0.941483,1.0,0.956468
60대,0.555442,0.813548,0.813822,0.869169,0.956468,1.0


In [26]:
# 상관계수 계산
correlation_matrix2 = floating_population_df[floating_population_df.columns[1:13]].corr()
correlation_matrix2

Unnamed: 0,10대_여,20대_여,30대_여,40대_여,50대_여,60대_여,10대_남,20대_남,30대_남,40대_남,50대_남,60대_남
10대_여,1.0,0.619419,0.643857,0.632051,0.517254,0.510785,0.983147,0.703025,0.745245,0.785834,0.669435,0.63062
20대_여,0.619419,1.0,0.778377,0.778041,0.781756,0.768196,0.551491,0.912304,0.666468,0.691751,0.779966,0.760108
30대_여,0.643857,0.778377,1.0,0.974716,0.901772,0.839755,0.623865,0.888391,0.924135,0.90889,0.929106,0.850981
40대_여,0.632051,0.778041,0.974716,1.0,0.954359,0.884096,0.614521,0.87148,0.872777,0.90793,0.951431,0.878742
50대_여,0.517254,0.781756,0.901772,0.954359,1.0,0.954554,0.497251,0.823579,0.728551,0.784854,0.919848,0.889701
60대_여,0.510785,0.768196,0.839755,0.884096,0.954554,1.0,0.485941,0.791374,0.685483,0.738704,0.902102,0.94436
10대_남,0.983147,0.551491,0.623865,0.614521,0.497251,0.485941,1.0,0.665992,0.726383,0.774654,0.656033,0.60691
20대_남,0.703025,0.912304,0.888391,0.87148,0.823579,0.791374,0.665992,1.0,0.838753,0.848877,0.885523,0.836031
30대_남,0.745245,0.666468,0.924135,0.872777,0.728551,0.685483,0.726383,0.838753,1.0,0.951686,0.867621,0.784877
40대_남,0.785834,0.691751,0.90889,0.90793,0.784854,0.738704,0.774654,0.848877,0.951686,1.0,0.930324,0.845711


In [27]:
# 첫 번째 상관계수 히트맵
fig1 = go.Figure(data=go.Heatmap(
    z=correlation_matrix1.values,
    x=correlation_matrix1.columns,
    y=correlation_matrix1.index,
    colorscale='Plasma'
))
fig1.update_layout(
    xaxis_nticks=36
)
# 히트맵 출력
fig1.show()

In [28]:
# 두 번째 상관계수 히트맵
fig2 = go.Figure(data=go.Heatmap(
    z=correlation_matrix2.values,
    x=correlation_matrix2.columns,
    y=correlation_matrix2.index,
    colorscale='Plasma'
))
fig2.update_layout(
    xaxis_nticks=36
)

# 히트맵 출력
fig2.show()

In [31]:
# 'year' 열을 datetime 형식으로 변환
df = floating_population_df[['year', '10대', '20대', '30대', '40대', '50대', '60대']].groupby(['year']).sum().reset_index()
df['year'] = pd.to_datetime(df['year'])

# 'year' 열을 인덱스로 설정
df.set_index('year', inplace=True)

# Plotly를 사용하여 시간적 패턴 시각화
fig = go.Figure()

# 각 세대별로 데이터를 추가
for column in ['40대', '50대', '60대', '30대', '20대', '10대']:
    fig.add_trace(go.Scatter(x=df.index, y=df[column], mode='lines', name=column))
# 색상 업데이트
colors =px.colors.sequential.Plasma # 컬러맵
for i, trace in enumerate(fig.data):
    trace.line.color = colors[i % len(colors)]
# 레이아웃 설정
fig.update_layout(
    xaxis_title='날짜',
    yaxis_title='유동인구 수',
    template='plotly'
)

# 그래프 표시
fig.show()

In [None]:
# 맵 객체 생성 및 데이터 로드
sejong_floatingpop_map = KeplerGl(height=1000, width=1500)
sejong_floatingpop_map.add_data(data=sejong_gdf, name="세종시 행정동 데이터")
sejong_floatingpop_map.add_data(data=floating_population_df, name="세종시 세대별 유동인구 데이터")

# 맵 출력 및 상세설정
sejong_floatingpop_map

In [148]:
# 맵 저장
sejong_floatingpop_map.save_to_html(file_name="visualization/세종시 현황/세종시 세대별 유동인구 map.html")

Map saved to visualization/세종시 현황/세종시 세대별 유동인구 map.html!


#### 시간대별 유동인구

In [87]:
# 유동인구 df 로드
floating_population_df_T = pd.read_csv('SBJ_2406_002/2.세종시_시간대별_유동인구.csv')
floating_population_df_T['STD_YM'] = floating_population_df_T['STD_YM'].astype(str)

# 열 이름 변경 과정
columns_to_rename = floating_population_df_T.columns[1:-2] # 시계열 정보를 포함한 column만 선택
new_column_names = []
# 각 열 이름을 처리하여 새로운 열 이름을 생성
for column_name in columns_to_rename:
    numeric_part = column_name.split('_')[-1] # 숫자 부분 추출
    new_column_name = f"2020-01-01 {numeric_part}:00" # '시'를 붙여 새로운 열 이름 생성
    new_column_names.append(new_column_name) # new_column_names에 추가
# 열 이름을 변경합니다.
floating_population_df_T.rename(columns=dict(zip(columns_to_rename, new_column_names)), inplace=True)
floating_population_df_T =floating_population_df_T.iloc[:,1:]
floating_population_df_T = floating_population_df_T.groupby(['lon', 'lat']).mean().reset_index()

# 시간대별 유동인구 정보를 'timestamp' 형식으로 변환
def melt_time_columns(df):
    id_vars = ['lon', 'lat']
    value_vars = [col for col in df.columns if col not in id_vars]
    df_melted = df.melt(id_vars=id_vars, value_vars=value_vars, var_name='time', value_name='population')
    return df_melted

floating_population_df_T = melt_time_columns(floating_population_df_T)
floating_population_df_T

Unnamed: 0,lon,lat,time,population
0,127.128657,36.709346,2020-01-01 00:00,0.000000
1,127.130339,36.708900,2020-01-01 00:00,0.000000
2,127.136488,36.710722,2020-01-01 00:00,0.000000
3,127.136491,36.710271,2020-01-01 00:00,0.000000
4,127.137046,36.711174,2020-01-01 00:00,0.000000
...,...,...,...,...
1771675,127.408820,36.496234,2020-01-01 23:00,0.000000
1771676,127.408821,36.495783,2020-01-01 23:00,0.003333
1771677,127.408821,36.495332,2020-01-01 23:00,0.009444
1771678,127.409379,36.496234,2020-01-01 23:00,0.000000


In [38]:
# 'year' 열을 datetime 형식으로 변환
df = floating_population_df_T[['time', 'population']].groupby('time').sum().reset_index()
df['time'] = pd.to_datetime(df['time'])

# 'time' 열을 인덱스로 설정
df.set_index('time', inplace=True)

# Plotly를 사용하여 시간적 패턴 시각화
fig = go.Figure()
fig.add_trace(go.Scatter(x=df.index, y=df['population'], mode='lines', name='population'))
# 색상 업데이트
colors =px.colors.sequential.Plasma # 컬러맵
for i, trace in enumerate(fig.data):
    trace.line.color = colors[i % len(colors)]
# 레이아웃 설정
fig.update_layout(
    xaxis_title='시간대',
    yaxis_title='유동인구 수',
    template='plotly'
)

# 그래프 표시
fig.show()

In [None]:
# 맵 객체 생성 및 데이터 로드
sejong_floatingpop_T_map = KeplerGl(height=1000, width=1500)
sejong_floatingpop_T_map.add_data(data=sejong_gdf, name="영등포구 행정동 데이터")
sejong_floatingpop_T_map.add_data(data=floating_population_df_T, name="영등포구 시간대별 유동인구 데이터")

# 맵 출력 및 상세설정
sejong_floatingpop_T_map

In [23]:
# 맵 저장
sejong_floatingpop_T_map.save_to_html(file_name="visualization/세종시 현황/세종시 시간대별 유동인구 map.html")

Map saved to visualization/세종시 현황/세종시 시간대별 유동인구 map.html!


#### 신도시 부분만 나타내기

In [None]:
# 공간 조인 수행
floating_population_df_T = geo_transform(floating_population_df_T)
joined_df = gpd.sjoin(floating_population_df_T, sejong_gdf, how='inner', op='intersects')

# 연도별로 행정구역의 인구수를 합산
sejong_floatingpop_T_gdf = joined_df.groupby(['time', 'newtown', 'geometry']).agg({'population': 'sum'}).reset_index()
sejong_floatingpop_T_gdf = sejong_floatingpop_T_gdf[sejong_floatingpop_T_gdf['newtown'] == '신도시']
sejong_floatingpop_T_gdf = gpd.GeoDataFrame(sejong_floatingpop_T_gdf, geometry='geometry')

In [None]:
# 맵 객체 생성 및 데이터 로드
sejong_floatingpop_T_map_NT = KeplerGl(height=1000, width=1500)
sejong_floatingpop_T_map_NT.add_data(data=sejong_gdf, name="영등포구 행정동 데이터")
sejong_floatingpop_T_map_NT.add_data(data=sejong_floatingpop_T_gdf, name="영등포구 시간대별 유동인구 데이터")

# 맵 출력 및 상세설정
sejong_floatingpop_T_map_NT

In [61]:
# 맵 저장
sejong_floatingpop_T_map_NT.save_to_html(file_name="visualization/세종시 현황/세종시(신도시) 시간대별 유동인구 map.html")

Map saved to visualization/세종시 현황/세종시(신도시) 시간대별 유동인구 map.html!


#### 요일별 유동인구

In [65]:
floating_population_df_D = pd.read_csv('SBJ_2406_002/3.세종시_요일별_유동인구.csv')

floating_population_df_D['STD_YM'] = floating_population_df_D['STD_YM'].astype(str)
floating_population_df_D['STD_YM'] = floating_population_df_D['STD_YM'].apply(lambda x:x[:4])

# 열 이름 변경 과정
columns_to_rename = floating_population_df_D.columns[1:-2] # 시계열 정보를 포함한 column만 선택
new_column_names = []
# 각 열 이름을 처리하여 새로운 열 이름을 생성
for column_name in columns_to_rename:
    day_part = column_name.split('_')[0] # 숫자 부분 추출
    new_column_names.append(day_part) # new_column_names에 추가
# 열 이름을 변경합니다.
floating_population_df_D.rename(columns=dict(zip(columns_to_rename, new_column_names)), inplace=True)
floating_population_df_D =floating_population_df_D.iloc[:,1:]
floating_population_df_D = floating_population_df_D.groupby(['lon', 'lat']).mean().reset_index()
floating_population_df_cor = floating_population_df_D.copy()

# 평일 및 주말 평균 계산
def calculate_weekday_weekend_means(df):
    # 평일 평균 = 1
    df['1'] = df[['mon', 'tue', 'wed', 'thu', 'fri']].mean(axis=1)
    # 주말 평균 = 2
    df['2'] = df[['sat', 'sun']].mean(axis=1)
    return df[['lon', 'lat', '1', '2']]

floating_population_df_D = calculate_weekday_weekend_means(floating_population_df_D)

# long 형식으로 변환
floating_population_df_D = floating_population_df_D.melt(
    id_vars=['lon', 'lat'],
    value_vars=['1', '2'],
    var_name='day_type',
    value_name='population'
)

floating_population_df_D

Unnamed: 0,lon,lat,day_type,population
0,127.128100,36.708893,1,0.036000
1,127.128657,36.709346,1,0.079800
2,127.129215,36.709798,1,0.073667
3,127.129775,36.709800,1,0.040000
4,127.130335,36.709802,1,0.077200
...,...,...,...,...
164425,127.408821,36.495332,2,1.252639
164426,127.409378,36.496685,2,0.061250
164427,127.409379,36.496234,2,0.136667
164428,127.409936,36.496685,2,0.164375


##### 요일별 유동인구의 상관관계 분석

In [88]:
# 'year' 열을 datetime 형식으로 변환
df = floating_population_df_cor[['mon', 'tue', 'wed', 'thu', 'fri', 'sat', 'sun']].sum().reset_index()

# 'time' 열을 인덱스로 설정
df.set_index('index', inplace=True)

# Plotly를 사용하여 시간적 패턴 시각화
fig = go.Figure()
fig.add_trace(go.Scatter(x=df.index, y=df[0], mode='lines', name='population'))
# 색상 업데이트
colors =px.colors.sequential.Plasma # 컬러맵
for i, trace in enumerate(fig.data):
    trace.line.color = colors[i % len(colors)]
# 레이아웃 설정
fig.update_layout(
    xaxis_title='요일',
    yaxis_title='유동인구 수',
    template='plotly'
)

# 그래프 표시
fig.show()

In [89]:
# 상관계수 계산
correlation_matrix3 = floating_population_df_cor[['mon', 'tue', 'wed', 'thu', 'fri', 'sat', 'sun']].corr()
correlation_matrix3

Unnamed: 0,mon,tue,wed,thu,fri,sat,sun
mon,1.0,0.999209,0.999462,0.999414,0.998362,0.927462,0.929759
tue,0.999209,1.0,0.999641,0.999768,0.996923,0.917737,0.921486
wed,0.999462,0.999641,1.0,0.999652,0.997275,0.923979,0.927455
thu,0.999414,0.999768,0.999652,1.0,0.997899,0.92007,0.923005
fri,0.998362,0.996923,0.997275,0.997899,1.0,0.933622,0.933926
sat,0.927462,0.917737,0.923979,0.92007,0.933622,1.0,0.989952
sun,0.929759,0.921486,0.927455,0.923005,0.933926,0.989952,1.0


In [90]:
# 첫 번째 상관계수 히트맵
fig1 = go.Figure(data=go.Heatmap(
    z=correlation_matrix3.values,
    x=correlation_matrix3.columns,
    y=correlation_matrix3.index,
    colorscale='Plasma'
))
fig1.update_layout(
    xaxis_nticks=36
)
# 히트맵 출력
fig1.show()

In [91]:
# 공간 조인 수행
floating_population_df_D = geo_transform(floating_population_df_D)
joined_df = gpd.sjoin(floating_population_df_D, sejong_gdf, how='inner', op='intersects')

# 연도별로 행정구역의 인구수를 합산
sejong_floatingpop_D_gdf = joined_df.groupby(['day_type', 'newtown', 'geometry']).agg({'population': 'sum'}).reset_index()
sejong_floatingpop_D_gdf = gpd.GeoDataFrame(sejong_floatingpop_D_gdf, geometry='geometry')

100%|██████████| 164430/164430 [00:10<00:00, 15644.84it/s]

'+init=<authority>:<code>' syntax is deprecated. '<authority>:<code>' is the preferred initialization method. When making the change, be mindful of axis order changes: https://pyproj4.github.io/pyproj/stable/gotchas.html#axis-order-changes-in-proj-6


'+init=<authority>:<code>' syntax is deprecated. '<authority>:<code>' is the preferred initialization method. When making the change, be mindful of axis order changes: https://pyproj4.github.io/pyproj/stable/gotchas.html#axis-order-changes-in-proj-6


The `op` parameter is deprecated and will be removed in a future release. Please use the `predicate` parameter instead.


CRS mismatch between the CRS of left geometries and the CRS of right geometries.
Use `to_crs()` to reproject one of the input geometries to match the CRS of the other.

Left CRS: +init=epsg:4326 +type=crs
Right CRS: None




In [None]:
# 맵 객체 생성 및 데이터 로드
sejong_floatingpop_D_map = KeplerGl(height=1000, width=1500)
sejong_floatingpop_D_map.add_data(data=sejong_gdf, name="세종시 행정동 데이터")
sejong_floatingpop_D_map.add_data(data=sejong_floatingpop_D_gdf, name="세종시 평일 및 주말 유동인구 데이터")

# 맵 출력 및 상세설정
sejong_floatingpop_D_map

In [70]:
# 맵 저장
sejong_floatingpop_D_map.save_to_html(file_name="visualization/세종시 현황/세종시 주중 및 주말 유동인구 map.html")

Map saved to visualization/세종시 현황/세종시 주중 및 주말 유동인구 map.html!


##### 행정동별 주중 및 주말 유동인구 계산

In [93]:
# 연도별로 행정구역의 인구수를 합산
sejong_floatingpop_D_hjd = joined_df.groupby(['day_type', 'ADM_NM']).agg({'population': 'sum'}).reset_index()
# 행정동별로 주중 유동 인구와 주말 유동 인구를 집계
weekday_population = sejong_floatingpop_D_hjd[sejong_floatingpop_D_hjd['day_type'] == '1'].groupby('ADM_NM')['population'].mean().reset_index()
weekend_population = sejong_floatingpop_D_hjd[sejong_floatingpop_D_hjd['day_type'] == '2'].groupby('ADM_NM')['population'].mean().reset_index()

# 행정동별 주중과 주말 유동 인구를 하나의 데이터프레임으로 병합
population_diff = weekday_population.merge(weekend_population, on='ADM_NM', suffixes=('_weekday', '_weekend'))

# 주중과 주말의 유동 인구 차이를 계산
population_diff['population_diff'] = population_diff['population_weekend'] - population_diff['population_weekday']

# 주중과 주말 유동 인구 차이를 바 차트로 시각화, plasma 색상 맵 사용
fig = px.bar(
    population_diff,
    x='ADM_NM',
    y='population_diff',
    color='population_diff',
    color_continuous_scale='plasma',
    labels={'population_diff': '유동 인구 차이', 'ADM_NM': '행정동'}
)

# 그래프 출력
fig.show()