In [1]:
import folium
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import koreanize_matplotlib

# 기본 서울지도 표시하기

In [2]:
seoul = folium.Map(location=[37.55, 126.98], zoom_start=12)

In [3]:
seoul

In [4]:
# 서울 지하철 이용객수 정보 파일
data = pd.read_csv('./data/seoul-metro-2021.logs.csv')
data.head()

Unnamed: 0,timestamp,station_code,people_in,people_out
0,2021-01-01T05:00:00.000+09:00,150,86,85
1,2021-01-01T06:00:00.000+09:00,150,111,355
2,2021-01-01T07:00:00.000+09:00,150,157,438
3,2021-01-01T08:00:00.000+09:00,150,306,592
4,2021-01-01T09:00:00.000+09:00,150,333,841


In [5]:
# 서울 지하철 역 정보 파일
station_info = pd.read_csv('./data/seoul-metro-station-info.csv')
station_info.head(2)

Unnamed: 0,station.code,station.fr_code,line.num,line.name,line.name_sub,line.station_seq,station.name_full,station.name,station.name_chc,station.name_chn,station.name_en,station.name_jp,geo.latitude,geo.longitude,geo.sigungu_code,geo.sigungu_name,geo.addres_road,geo.address_land,geo.phone
0,158,124,1,1호선,지하철1호선,1,청량리(서울시립대입구),청량리|서울시립대입구,祭基洞,祭基洞,Jegidong,チェギドン,37.580178,127.046835,11060,동대문구,서울특별시 동대문구 왕산로 지하205(전농동),서울특별시 동대문구 전농동 620-69 청량리역(1호선),02-6110-1241
1,157,125,1,1호선,지하철1호선,2,제기동,제기동,新設洞,新设洞,Sinseoldong,シンソルトン,37.578103,127.034893,11060,동대문구,서울특별시 동대문구 왕산로 지하93(제기동),서울특별시 동대문구 제기동 65 제기동역(1호선),02-6110-1251


In [6]:
station_info.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 285 entries, 0 to 284
Data columns (total 19 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   station.code       285 non-null    int64  
 1   station.fr_code    285 non-null    object 
 2   line.num           285 non-null    int64  
 3   line.name          285 non-null    object 
 4   line.name_sub      285 non-null    object 
 5   line.station_seq   285 non-null    int64  
 6   station.name_full  285 non-null    object 
 7   station.name       285 non-null    object 
 8   station.name_chc   285 non-null    object 
 9   station.name_chn   285 non-null    object 
 10  station.name_en    285 non-null    object 
 11  station.name_jp    285 non-null    object 
 12  geo.latitude       285 non-null    float64
 13  geo.longitude      285 non-null    float64
 14  geo.sigungu_code   285 non-null    int64  
 15  geo.sigungu_name   285 non-null    object 
 16  geo.addres_road    285 non

In [7]:
station_info.isna().sum()

station.code         0
station.fr_code      0
line.num             0
line.name            0
line.name_sub        0
line.station_seq     0
station.name_full    0
station.name         0
station.name_chc     0
station.name_chn     0
station.name_en      0
station.name_jp      0
geo.latitude         0
geo.longitude        0
geo.sigungu_code     0
geo.sigungu_name     0
geo.addres_road      0
geo.address_land     0
geo.phone            0
dtype: int64

In [8]:
# 각 역별 승하차 인원
station_sum = data.groupby('station_code').sum()

In [9]:
station_sum.head()

Unnamed: 0_level_0,timestamp,people_in,people_out
station_code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
150,2021-01-01T05:00:00.000+09:002021-01-01T06:00:...,12697273,12109991
151,2021-01-01T05:00:00.000+09:002021-01-01T06:00:...,5997344,6030491
152,2021-01-01T05:00:00.000+09:002021-01-01T06:00:...,9638952,9284693
153,2021-01-01T05:00:00.000+09:002021-01-01T06:00:...,7704599,7090896
154,2021-01-01T05:00:00.000+09:002021-01-01T06:00:...,6651283,6609055


In [10]:
station_sum2 = station_sum.drop('timestamp', axis=1)
station_sum2

Unnamed: 0_level_0,people_in,people_out
station_code,Unnamed: 1_level_1,Unnamed: 2_level_1
150,12697273,12109991
151,5997344,6030491
152,9638952,9284693
153,7704599,7090896
154,6651283,6609055
...,...,...
2824,3206211,3097722
2825,1395919,1495603
2826,1614590,1475010
2827,1444073,1147602


In [11]:
# 지하철 역 정보파일에서 필요한 컬럼만 추출하기
station_info = station_info[['station.code', 'line.name_sub', 'geo.latitude', 'geo.longitude']]
station_info

Unnamed: 0,station.code,line.name_sub,geo.latitude,geo.longitude
0,158,지하철1호선,37.580178,127.046835
1,157,지하철1호선,37.578103,127.034893
2,156,지하철1호선,37.575297,127.025087
3,159,지하철1호선,37.572627,127.016429
4,155,지하철1호선,37.571420,127.009745
...,...,...,...,...
280,2823,지하철8호선,37.451535,127.159816
281,2824,지하철8호선,37.445210,127.156866
282,2825,지하철8호선,37.440918,127.147564
283,2826,지하철8호선,37.437428,127.140722


In [12]:
station_info = station_info.set_index('station.code')
station_info

Unnamed: 0_level_0,line.name_sub,geo.latitude,geo.longitude
station.code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
158,지하철1호선,37.580178,127.046835
157,지하철1호선,37.578103,127.034893
156,지하철1호선,37.575297,127.025087
159,지하철1호선,37.572627,127.016429
155,지하철1호선,37.571420,127.009745
...,...,...,...
2823,지하철8호선,37.451535,127.159816
2824,지하철8호선,37.445210,127.156866
2825,지하철8호선,37.440918,127.147564
2826,지하철8호선,37.437428,127.140722


In [13]:
station_sum2

Unnamed: 0_level_0,people_in,people_out
station_code,Unnamed: 1_level_1,Unnamed: 2_level_1
150,12697273,12109991
151,5997344,6030491
152,9638952,9284693
153,7704599,7090896
154,6651283,6609055
...,...,...
2824,3206211,3097722
2825,1395919,1495603
2826,1614590,1475010
2827,1444073,1147602


In [14]:
joined_data = station_sum2.join(station_info)
joined_data

Unnamed: 0_level_0,people_in,people_out,line.name_sub,geo.latitude,geo.longitude
station_code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
150,12697273,12109991,지하철1호선,37.554648,126.972559
151,5997344,6030491,지하철1호선,37.564718,126.977108
152,9638952,9284693,지하철1호선,37.570161,126.982923
153,7704599,7090896,지하철1호선,37.571607,126.991806
154,6651283,6609055,지하철1호선,37.570926,127.001849
...,...,...,...,...,...
2824,3206211,3097722,지하철8호선,37.445210,127.156866
2825,1395919,1495603,지하철8호선,37.440918,127.147564
2826,1614590,1475010,지하철8호선,37.437428,127.140722
2827,1444073,1147602,지하철8호선,37.432130,127.129087


# 지도에 표시하기

In [15]:
# 승차용 서울 지도 만들기
seoul_in = folium.Map(location=[37.55,126.98], zoom_start=12)
seoul_in

In [16]:
# 히트맵 플러그인 모듈 
from folium.plugins import HeatMap

In [17]:
joined_data.columns

Index(['people_in', 'people_out', 'line.name_sub', 'geo.latitude',
       'geo.longitude'],
      dtype='object')

In [18]:
# 히트맵 플러그인을 서울 지도에 추가하기
HeatMap(data= joined_data[['geo.latitude', 'geo.longitude', 'people_in']]).add_to(seoul_in)
seoul_in

In [19]:
# 하차용 지도 만들고 하차 인원 히트맵 지도에 표시하기
# 하차용 서울 지도 만들기
seoul_out = folium.Map(location=[37.55,126.98], zoom_start=12)
# 히트맵 플러그인을 서울 지도에 추가하기
HeatMap(data= joined_data[['geo.latitude', 'geo.longitude', 'people_out']]).add_to(seoul_out)
seoul_out

# 시간을 추출해서 출퇴근 시간대 혼잡 역 히트맵 그리기

In [20]:
data.head()

Unnamed: 0,timestamp,station_code,people_in,people_out
0,2021-01-01T05:00:00.000+09:00,150,86,85
1,2021-01-01T06:00:00.000+09:00,150,111,355
2,2021-01-01T07:00:00.000+09:00,150,157,438
3,2021-01-01T08:00:00.000+09:00,150,306,592
4,2021-01-01T09:00:00.000+09:00,150,333,841


In [21]:
# timestamp를 시간 데이터로 변환하기 pd.to_datetiome
data['timestamp'] = pd.to_datetime(data['timestamp'])

In [22]:
from datetime import datetime

In [23]:
today = datetime.today()

In [24]:
str(today.day) + str(today.month) + str(today.year)

'30102023'

In [25]:
# dt.year, month, day, hour, minute, second

# 오전 9시 이전 데이터
morning_data = data[data['timestamp'].dt.hour < 9]
morning_data

Unnamed: 0,timestamp,station_code,people_in,people_out
0,2021-01-01 05:00:00+09:00,150,86,85
1,2021-01-01 06:00:00+09:00,150,111,355
2,2021-01-01 07:00:00+09:00,150,157,438
3,2021-01-01 08:00:00+09:00,150,306,592
19,2021-01-01 05:00:00+09:00,151,43,40
...,...,...,...,...
1941518,2021-12-31 08:00:00+09:00,2827,391,311
1941534,2021-12-31 05:00:00+09:00,2828,20,10
1941535,2021-12-31 06:00:00+09:00,2828,83,41
1941536,2021-12-31 07:00:00+09:00,2828,279,119


In [26]:
# 퇴근용 데이터 추출
evening_data = data[data['timestamp'].dt.hour >= 18]
evening_data

Unnamed: 0,timestamp,station_code,people_in,people_out
13,2021-01-01 18:00:00+09:00,150,658,437
14,2021-01-01 19:00:00+09:00,150,579,425
15,2021-01-01 20:00:00+09:00,150,479,354
16,2021-01-01 21:00:00+09:00,150,510,307
17,2021-01-01 22:00:00+09:00,150,445,134
...,...,...,...,...
1941548,2021-12-31 19:00:00+09:00,2828,74,263
1941549,2021-12-31 20:00:00+09:00,2828,73,145
1941550,2021-12-31 21:00:00+09:00,2828,95,209
1941551,2021-12-31 22:00:00+09:00,2828,54,138


In [27]:
morning_data.drop('timestamp', axis=1, inplace=True)
evening_data.drop('timestamp', axis=1, inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  morning_data.drop('timestamp', axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  evening_data.drop('timestamp', axis=1, inplace=True)


In [33]:
morning_station_sum = morning_data.groupby('station_code').sum()
evening_station_sum = evening_data.groupby('station_code').sum()

In [34]:
morning_station_sum.head(2)

Unnamed: 0_level_0,people_in,people_out
station_code,Unnamed: 1_level_1,Unnamed: 2_level_1
150,1280861,3455240
151,215072,2744871


In [35]:
evening_station_sum.head(2)

Unnamed: 0_level_0,people_in,people_out
station_code,Unnamed: 1_level_1,Unnamed: 2_level_1
150,4945020,2317244
151,3203211,425727


In [36]:
morning_joined_data = morning_station_sum.join(station_info)
evening_joined_data = evening_station_sum.join(station_info)

In [37]:
morning_joined_data

Unnamed: 0_level_0,people_in,people_out,line.name_sub,geo.latitude,geo.longitude
station_code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
150,1280861,3455240,지하철1호선,37.554648,126.972559
151,215072,2744871,지하철1호선,37.564718,126.977108
152,221218,3643820,지하철1호선,37.570161,126.982923
153,218777,1222960,지하철1호선,37.571607,126.991806
154,185510,1776080,지하철1호선,37.570926,127.001849
...,...,...,...,...,...
2824,1132603,457685,지하철8호선,37.445210,127.156866
2825,414540,164925,지하철8호선,37.440918,127.147564
2826,544243,174746,지하철8호선,37.437428,127.140722
2827,282770,180392,지하철8호선,37.432130,127.129087


In [38]:
evening_joined_data

Unnamed: 0_level_0,people_in,people_out,line.name_sub,geo.latitude,geo.longitude
station_code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
150,4945020,2317244,지하철1호선,37.554648,126.972559
151,3203211,425727,지하철1호선,37.564718,126.977108
152,5196284,805785,지하철1호선,37.570161,126.982923
153,3149067,730651,지하철1호선,37.571607,126.991806
154,2436684,525173,지하철1호선,37.570926,127.001849
...,...,...,...,...,...
2824,543714,1380576,지하철8호선,37.445210,127.156866
2825,244250,606686,지하철8호선,37.440918,127.147564
2826,235273,609286,지하철8호선,37.437428,127.140722
2827,329366,308355,지하철8호선,37.432130,127.129087


In [39]:
# 출근시간용 지도 만들고 하차 인원 히트맵 지도에 표시하기
# 출근시간용 서울 지도 만들기
seoul_morning_in = folium.Map(location=[37.55,126.98], zoom_start=12)
# 히트맵 플러그인을 서울 지도에 추가하기
HeatMap(data= morning_joined_data[['geo.latitude', 'geo.longitude', 'people_in']]).add_to(seoul_morning_in)
seoul_morning_in

In [40]:
# 출근시간용 지도 만들고 하차 인원 히트맵 지도에 표시하기
# 출근시간용 서울 지도 만들기
seoul_morning_out = folium.Map(location=[37.55,126.98], zoom_start=12)
# 히트맵 플러그인을 서울 지도에 추가하기
HeatMap(data= morning_joined_data[['geo.latitude', 'geo.longitude', 'people_in']]).add_to(seoul_morning_out)
seoul_morning_out

In [41]:
# 퇴근시간용 지도 만들고 하차 인원 히트맵 지도에 표시하기
# 퇴근시간용 서울 지도 만들기
seoul_evening_in = folium.Map(location=[37.55,126.98], zoom_start=12)
# 히트맵 플러그인을 서울 지도에 추가하기
HeatMap(data= evening_joined_data[['geo.latitude', 'geo.longitude', 'people_in']]).add_to(seoul_evening_in)
seoul_evening_in

In [42]:
# 퇴근시간용 지도 만들고 하차 인원 히트맵 지도에 표시하기
# 퇴근시간용 서울 지도 만들기
seoul_evening_out = folium.Map(location=[37.55,126.98], zoom_start=12)
# 히트맵 플러그인을 서울 지도에 추가하기
HeatMap(data= evening_joined_data[['geo.latitude', 'geo.longitude', 'people_in']]).add_to(seoul_evening_out)
seoul_evening_out

In [54]:
# 점심시간에 가장 많이 탑승/하차 하는곳 히트맵 11-13
# 점심시간(11시~ 13시)
lunch_data = data.loc[(data['timestamp'].dt.hour >= 11) & (data['timestamp'].dt.hour <= 13),[ 'station_code', 'people_in', 'people_out']]
lunch_data

Unnamed: 0,station_code,people_in,people_out
6,150,579,625
7,150,737,838
8,150,696,788
25,151,117,208
26,151,190,223
...,...,...,...
1941522,2827,232,194
1941523,2827,277,219
1941540,2828,240,112
1941541,2828,192,124


In [48]:
lunch_data_sum = lunch_data.groupby('station_code').sum()
lunch_data_sum

Unnamed: 0_level_0,people_in,people_out
station_code,Unnamed: 1_level_1,Unnamed: 2_level_1
150,9232174,5474977
151,5247637,1507544
152,8563224,2709954
153,6375501,3161351
154,5455558,2510372
...,...,...
2824,1337443,2231780
2825,659707,1090946
2826,676523,1055348
2827,847928,701708


In [49]:
lunch_joined_data = lunch_data_sum.join(station_info)

In [52]:
# 점심시간용 지도 만들고 승차인원 히트맵 지도에 표시하기
# 점심시간용 서울지도 만들기
seoul_lunch_in = folium.Map(location=[37.55, 126.98], zoom_start=12)
# 히트맵 플러그인을 서울 지도에 추가하기
HeatMap(data = lunch_joined_data[['geo.latitude','geo.longitude','people_in']]).add_to(seoul_lunch_in)
seoul_lunch_in

In [53]:
# 점심시간용 지도 만들고 하차인원 히트맵 지도에 표시하기
# 점심시간용 서울지도 만들기
seoul_lunch_out = folium.Map(location=[37.55, 126.98], zoom_start=12)
# 히트맵 플러그인을 서울 지도에 추가하기
HeatMap(data = lunch_joined_data[['geo.latitude','geo.longitude','people_out']]).add_to(seoul_lunch_out)
seoul_lunch_out