In [10]:
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point

df_bike_station = gpd.read_file('../../data/bike_station/bike_station.geojson')
df_bike_station

Unnamed: 0,station_id,address,address_detail,latitude,longitude,dist_to_subway,geometry
0,ST-997,서울특별시 양천구 목동중앙로 49,목동3단지 시내버스정류장,37.534390,126.869598,1011.885895,POINT (188404.676 448023.915)
1,ST-996,서울특별시 양천구 남부순환로88길5-16,양강중학교앞 교차로,37.524334,126.850548,504.224092,POINT (186719.138 446910.345)
2,ST-995,서울특별시 양천구 중앙로 153 공중화장실,,37.510597,126.857323,777.296433,POINT (187315.694 445384.759)
3,ST-994,서울특별시 양천구 목동서로161,SBS방송국,37.529163,126.872749,560.822569,POINT (188682.385 447443.391)
4,ST-993,서울특별시 양천구 신월로 342-1 구두수선대19,,37.521511,126.857384,401.021544,POINT (187322.943 446596.077)
...,...,...,...,...,...,...,...
3279,ST-1003,서울특별시 양천구 화곡로 59,신월동 이마트,37.539551,126.828300,1096.610445,POINT (184755.579 448602.649)
3280,ST-1002,서울특별시 양천구 목동동로 316-6,서울시 도로환경관리센터,37.529900,126.876541,611.711570,POINT (189017.675 447524.738)
3281,ST-1000,서울특별시 양천구 신정동 236,서부식자재마트 건너편,37.510380,126.866798,240.117803,POINT (188153.380 445359.433)
3282,ST-100,서울특별시 광진구 아차산로 262,더샵스타시티 C동 앞,37.536667,127.073593,510.952220,POINT (206434.705 448270.900)


## Import top 3 community datasets

In [11]:
community_dir = '../../data/bike_rental_history/communities/weekday/'

list_df = []
for i in range(0,10):
    df = pd.read_csv(community_dir + str(i) + '.csv')
    df.loc[:,'community'] = i
    list_df.append(df)

df_bike_rental = pd.concat(list_df, ignore_index=True)

In [12]:
df_bike_rental

Unnamed: 0,rent_datetime_30min,origin_station_id,desti_station_id,count,community
0,2023-01-02 06:30:00,ST-1253,ST-1063,3,0
1,2023-01-02 06:30:00,ST-2419,ST-1064,4,0
2,2023-01-02 06:30:00,ST-2965,ST-2871,5,0
3,2023-01-02 07:00:00,ST-1253,ST-1718,5,0
4,2023-01-02 07:00:00,ST-2965,ST-1718,3,0
...,...,...,...,...,...
226526,2023-12-29 18:30:00,ST-1264,ST-882,3,9
226527,2023-12-29 20:00:00,ST-1548,ST-3105,3,9
226528,2023-12-29 21:30:00,ST-723,ST-723,3,9
226529,2023-12-29 22:30:00,ST-1546,ST-1546,3,9


## Merge bike rental data with bike station data

In [13]:
geometry_columns = ['station_id','latitude','longitude','dist_to_subway']
df_bike_rental_merged = df_bike_rental.merge(df_bike_station.loc[:,geometry_columns],
                                             left_on='origin_station_id',
                                             right_on='station_id').rename(columns={'latitude':'origin_lat',
                                                                                    'longitude':'origin_lng',
                                                                                    'dist_to_subway':'origin_dist_to_subway'}).drop(columns='station_id')

df_bike_rental_merged = df_bike_rental_merged.merge(df_bike_station.loc[:,geometry_columns],
                                                    left_on='desti_station_id',
                                                    right_on='station_id').rename(columns={'latitude':'desti_lat',
                                                                                           'longitude':'desti_lng',
                                                                                           'dist_to_subway':'desti_dist_to_subway'}).drop(columns='station_id')

In [14]:
df_bike_rental_merged

Unnamed: 0,rent_datetime_30min,origin_station_id,desti_station_id,count,community,origin_lat,origin_lng,origin_dist_to_subway,desti_lat,desti_lng,desti_dist_to_subway
0,2023-01-02 06:30:00,ST-1253,ST-1063,3,0,37.562679,126.820473,519.576002,37.560207,126.824142,115.422897
1,2023-01-06 07:30:00,ST-1253,ST-1063,3,0,37.562679,126.820473,519.576002,37.560207,126.824142,115.422897
2,2023-01-12 06:30:00,ST-1253,ST-1063,3,0,37.562679,126.820473,519.576002,37.560207,126.824142,115.422897
3,2023-01-12 07:30:00,ST-1253,ST-1063,3,0,37.562679,126.820473,519.576002,37.560207,126.824142,115.422897
4,2023-01-18 07:30:00,ST-1253,ST-1063,4,0,37.562679,126.820473,519.576002,37.560207,126.824142,115.422897
...,...,...,...,...,...,...,...,...,...,...,...
226526,2023-12-04 05:00:00,ST-635,ST-635,4,9,37.475319,126.898888,1126.148023,37.475319,126.898888,1126.148023
226527,2023-03-16 00:30:00,ST-1269,ST-1269,3,9,37.488564,126.928482,210.842304,37.488564,126.928482,210.842304
226528,2023-06-19 20:00:00,ST-1269,ST-1269,3,9,37.488564,126.928482,210.842304,37.488564,126.928482,210.842304
226529,2023-07-06 17:30:00,ST-1269,ST-1269,3,9,37.488564,126.928482,210.842304,37.488564,126.928482,210.842304


In [15]:
df_bike_rental_merged.to_csv('../../data/bike_rental_history_top10_community.csv', index=False)
df_bike_rental_merged.to_json('../../data/bike_rental_history_top10_community.json', orient='records')

In [16]:
## merge with aggregated data

df_bike_rental_agg = df_bike_rental.groupby(['origin_station_id','desti_station_id','community']).agg({'count':'sum'}).reset_index()

df_bike_rental_agg_merged = df_bike_rental_agg.merge(df_bike_station.loc[:,geometry_columns],
                                             left_on='origin_station_id',
                                             right_on='station_id').rename(columns={'latitude':'origin_lat',
                                                                                    'longitude':'origin_lng',
                                                                                    'dist_to_subway':'origin_dist_to_subway'}).drop(columns='station_id')

df_bike_rental_agg_merged = df_bike_rental_agg_merged.merge(df_bike_station.loc[:,geometry_columns],
                                                            left_on='desti_station_id',
                                                             right_on='station_id').rename(columns={'latitude':'desti_lat',
                                                                                                    'longitude':'desti_lng',
                                                                                                    'dist_to_subway':'desti_dist_to_subway'}).drop(columns='station_id')

In [17]:
df_bike_rental_agg_merged

Unnamed: 0,origin_station_id,desti_station_id,community,count,origin_lat,origin_lng,origin_dist_to_subway,desti_lat,desti_lng,desti_dist_to_subway
0,ST-1000,ST-1000,1,52,37.510380,126.866798,240.117803,37.510380,126.866798,240.117803
1,ST-1523,ST-1000,1,3,37.506050,126.860786,833.316694,37.510380,126.866798,240.117803
2,ST-1731,ST-1000,1,3,37.524776,126.875481,40.853816,37.510380,126.866798,240.117803
3,ST-1996,ST-1000,1,9,37.525715,126.864868,39.243172,37.510380,126.866798,240.117803
4,ST-2005,ST-1000,1,8,37.518543,126.853882,190.334705,37.510380,126.866798,240.117803
...,...,...,...,...,...,...,...,...,...,...
14988,ST-667,ST-667,8,4,37.497505,126.884377,650.156726,37.497505,126.884377,650.156726
14989,ST-873,ST-667,8,8,37.503117,126.880386,139.975763,37.497505,126.884377,650.156726
14990,ST-702,ST-885,9,3,37.484085,126.926888,250.364987,37.472294,126.933411,56.572056
14991,ST-721,ST-721,9,3,37.469055,126.958145,1002.383212,37.469055,126.958145,1002.383212


In [18]:
df_bike_rental_agg_merged.to_csv('../../data/bike_rental_history_top10_community_agg.csv', index=False)
df_bike_rental_agg_merged.to_json('../../data/bike_rental_history_top10_community_agg.json', orient='records')