In [19]:
import numpy as np
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point

df_bike_station = gpd.read_file('../../data/bike_station/bike_station.geojson')
df_bike_station

Unnamed: 0,station_id,address,address_detail,latitude,longitude,dist_to_subway,geometry
0,ST-997,서울특별시 양천구 목동중앙로 49,목동3단지 시내버스정류장,37.534390,126.869598,1011.885895,POINT (188404.676 448023.915)
1,ST-996,서울특별시 양천구 남부순환로88길5-16,양강중학교앞 교차로,37.524334,126.850548,504.224092,POINT (186719.138 446910.345)
2,ST-995,서울특별시 양천구 중앙로 153 공중화장실,,37.510597,126.857323,777.296433,POINT (187315.694 445384.759)
3,ST-994,서울특별시 양천구 목동서로161,SBS방송국,37.529163,126.872749,560.822569,POINT (188682.385 447443.391)
4,ST-993,서울특별시 양천구 신월로 342-1 구두수선대19,,37.521511,126.857384,401.021544,POINT (187322.943 446596.077)
...,...,...,...,...,...,...,...
3279,ST-1003,서울특별시 양천구 화곡로 59,신월동 이마트,37.539551,126.828300,1096.610445,POINT (184755.579 448602.649)
3280,ST-1002,서울특별시 양천구 목동동로 316-6,서울시 도로환경관리센터,37.529900,126.876541,611.711570,POINT (189017.675 447524.738)
3281,ST-1000,서울특별시 양천구 신정동 236,서부식자재마트 건너편,37.510380,126.866798,240.117803,POINT (188153.380 445359.433)
3282,ST-100,서울특별시 광진구 아차산로 262,더샵스타시티 C동 앞,37.536667,127.073593,510.952220,POINT (206434.705 448270.900)


## Import top 20 community datasets

In [2]:
community_dir = '../../data/bike_rental_history/communities/weekday/'

list_df = []
for i in range(0,20):
    df = pd.read_csv(community_dir + str(i) + '.csv')
    df.loc[:,'community'] = i
    list_df.append(df)

df_bike_rental = pd.concat(list_df, ignore_index=True)

In [3]:
df_bike_rental

Unnamed: 0,birth_year,gender,user_type,origin_station_id,desti_station_id,rent_hour,return_hour,rent_minute_of_day,return_minute_of_day,community
0,1969,M,내국인,ST-3128,ST-2031,9,9,589,596,0
1,1991,M,내국인,ST-2040,ST-1727,23,0,1437,10,0
2,\N,F,내국인,ST-2943,ST-1250,11,11,692,696,0
3,1994,\N,내국인,ST-2031,ST-2056,23,23,1406,1416,0
4,1980,M,내국인,ST-2052,ST-1063,8,8,513,515,0
...,...,...,...,...,...,...,...,...,...,...
25891,2001,F,내국인,ST-2087,ST-2070,15,15,931,942,19
25892,2007,F,내국인,ST-1284,ST-1473,7,7,447,454,19
25893,1973,M,내국인,ST-914,ST-1284,19,19,1147,1154,19
25894,1901,\N,내국인,ST-1640,ST-1642,14,14,871,878,19


## Merge bike rental data with bike station data

In [4]:
geometry_columns = ['station_id','latitude','longitude','dist_to_subway']
df_bike_rental_merged = df_bike_rental.merge(df_bike_station.loc[:,geometry_columns],
                                             left_on='origin_station_id',
                                             right_on='station_id').rename(columns={'latitude':'origin_lat',
                                                                                    'longitude':'origin_lng',
                                                                                    'dist_to_subway':'origin_dist_to_subway'}).drop(columns='station_id')

df_bike_rental_merged = df_bike_rental_merged.merge(df_bike_station.loc[:,geometry_columns],
                                                    left_on='desti_station_id',
                                                    right_on='station_id').rename(columns={'latitude':'desti_lat',
                                                                                           'longitude':'desti_lng',
                                                                                           'dist_to_subway':'desti_dist_to_subway'}).drop(columns='station_id')

In [5]:
df_bike_rental_merged

Unnamed: 0,birth_year,gender,user_type,origin_station_id,desti_station_id,rent_hour,return_hour,rent_minute_of_day,return_minute_of_day,community,origin_lat,origin_lng,origin_dist_to_subway,desti_lat,desti_lng,desti_dist_to_subway
0,1969,M,내국인,ST-3128,ST-2031,9,9,589,596,0,37.563648,126.834541,624.891215,37.566925,126.827438,19.851351
1,1983,F,내국인,ST-3128,ST-2031,17,17,1056,1062,0,37.563648,126.834541,624.891215,37.566925,126.827438,19.851351
2,1987,F,내국인,ST-3128,ST-2031,16,16,965,970,0,37.563648,126.834541,624.891215,37.566925,126.827438,19.851351
3,1997,\N,내국인,ST-3128,ST-2031,11,11,690,696,0,37.563648,126.834541,624.891215,37.566925,126.827438,19.851351
4,1989,\N,내국인,ST-3128,ST-2031,17,17,1024,1027,0,37.563648,126.834541,624.891215,37.566925,126.827438,19.851351
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
26070,1974,M,내국인,ST-1640,ST-1476,19,19,1164,1169,19,37.665249,127.057892,39.278598,37.661140,127.058052,423.075758
26071,1972,M,내국인,ST-1640,ST-1476,16,16,987,991,19,37.665249,127.057892,39.278598,37.661140,127.058052,423.075758
26072,1979,F,내국인,ST-1640,ST-1476,15,15,924,935,19,37.665249,127.057892,39.278598,37.661140,127.058052,423.075758
26073,1992,F,내국인,ST-1640,ST-1476,12,12,745,774,19,37.665249,127.057892,39.278598,37.661140,127.058052,423.075758


In [21]:
df_bike_rental_merged.to_csv('../../data/bike_rental_history_weekday_community.csv', index=False)
df_bike_rental_merged.to_json('../../data/bike_rental_history_weekday_community.json', orient='records')
df_bike_rental_merged.loc[np.logical_and(df_bike_rental_merged.loc[:,'rent_hour']==8,
                                         df_bike_rental_merged.loc[:,'community'] < 3)].to_json('../../data/bike_rental_history_weekday_top3_peak.json', orient='records')

In [9]:
## merge with aggregated data

df_bike_rental_agg = df_bike_rental.groupby(['origin_station_id','desti_station_id','community']).size().reset_index().rename(columns={0:'count'})

df_bike_rental_agg_merged = df_bike_rental_agg.merge(df_bike_station.loc[:,geometry_columns],
                                             left_on='origin_station_id',
                                             right_on='station_id').rename(columns={'latitude':'origin_lat',
                                                                                    'longitude':'origin_lng',
                                                                                    'dist_to_subway':'origin_dist_to_subway'}).drop(columns='station_id')

df_bike_rental_agg_merged = df_bike_rental_agg_merged.merge(df_bike_station.loc[:,geometry_columns],
                                                            left_on='desti_station_id',
                                                            right_on='station_id').rename(columns={'latitude':'desti_lat',
                                                                                                    'longitude':'desti_lng',
                                                                                                    'dist_to_subway':'desti_dist_to_subway'}).drop(columns='station_id')

In [10]:
df_bike_rental_agg_merged

Unnamed: 0,origin_station_id,desti_station_id,community,count,origin_lat,origin_lng,origin_dist_to_subway,desti_lat,desti_lng,desti_dist_to_subway
0,ST-1000,ST-1000,4,5,37.510380,126.866798,240.117803,37.510380,126.866798,240.117803
1,ST-1996,ST-1000,4,2,37.525715,126.864868,39.243172,37.510380,126.866798,240.117803
2,ST-2005,ST-1000,4,1,37.518543,126.853882,190.334705,37.510380,126.866798,240.117803
3,ST-2011,ST-1000,4,3,37.517475,126.866325,565.261182,37.510380,126.866798,240.117803
4,ST-2012,ST-1000,4,2,37.520073,126.868301,728.718283,37.510380,126.866798,240.117803
...,...,...,...,...,...,...,...,...,...,...
7482,ST-66,ST-47,13,1,37.522079,126.930367,532.562489,37.528816,126.924530,593.610074
7483,ST-67,ST-47,13,3,37.522675,126.937790,654.031763,37.528816,126.924530,593.610074
7484,ST-70,ST-47,13,1,37.520088,126.932365,423.038363,37.528816,126.924530,593.610074
7485,ST-72,ST-47,13,2,37.517765,126.928413,54.501081,37.528816,126.924530,593.610074


In [11]:
df_bike_rental_agg_merged.to_csv('../../data/bike_rental_history_weekday_community_agg.csv', index=False)
df_bike_rental_agg_merged.to_json('../../data/bike_rental_history_weekday_community_agg.json', orient='records')
df_bike_rental_agg_merged.loc[df_bike_rental_agg_merged.loc[:,'community']<3].to_json('../../data/bike_rental_history_weekday_community_agg_top3.json', orient='records')

## merge with hourly aggregated sample data

In [15]:
## merge with aggregated data

df_bike_rental_agg = df_bike_rental.groupby(['origin_station_id','desti_station_id','community', 'rent_hour']).size().reset_index().rename(columns={0:'count'})

df_bike_rental_agg_merged = df_bike_rental_agg.merge(df_bike_station.loc[:,geometry_columns],
                                             left_on='origin_station_id',
                                             right_on='station_id').rename(columns={'latitude':'origin_lat',
                                                                                    'longitude':'origin_lng',
                                                                                    'dist_to_subway':'origin_dist_to_subway'}).drop(columns='station_id')

df_bike_rental_agg_merged = df_bike_rental_agg_merged.merge(df_bike_station.loc[:,geometry_columns],
                                                            left_on='desti_station_id',
                                                            right_on='station_id').rename(columns={'latitude':'desti_lat',
                                                                                                    'longitude':'desti_lng',
                                                                                                    'dist_to_subway':'desti_dist_to_subway'}).drop(columns='station_id')

In [20]:
df_bike_rental_agg_merged.loc[np.logical_and(df_bike_rental_agg_merged.loc[:,'community']<3,
                                             df_bike_rental_agg_merged.loc[:,'rent_hour']==8)].to_json('../../data/bike_rental_history_weekday_community_top3_agg_peak.json', orient='records')

## merge with aggregated sample data for visualization

In [10]:
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point

df_bike_station = gpd.read_file('../../data/bike_station/bike_station.geojson')

df_bike_rental_agg_sample = pd.read_csv('../../data/bike_rental_history/processed_30min_aggregated/2023_processed_weekday_agg_sample.csv')

In [11]:
geometry_columns = ['station_id','latitude','longitude','dist_to_subway']
df_bike_rental_sample_merged = df_bike_rental_agg_sample.merge(df_bike_station.loc[:,geometry_columns],
                                                               left_on='origin_station_id',
                                                               right_on='station_id').rename(columns={'latitude':'origin_lat',
                                                                                                      'longitude':'origin_lng',
                                                                                                      'dist_to_subway':'origin_dist_to_subway'}).drop(columns='station_id')

df_bike_rental_agg_merged = df_bike_rental_sample_merged.merge(df_bike_station.loc[:,geometry_columns],
                                                               left_on='desti_station_id',
                                                               right_on='station_id').rename(columns={'latitude':'desti_lat',
                                                                                                      'longitude':'desti_lng',
                                                                                                       'dist_to_subway':'desti_dist_to_subway'}).drop(columns='station_id')

In [12]:
df_bike_rental_agg_merged.to_json('../../data/bike_rental_history_sample.json', orient='records')