In [1]:
import os
import pandas as pd
import json


def parse_data(filename):
    df1 = pd.read_csv(filename)
    df2 = df1.rename(columns={"trnOpDate": "乘車日",
                              "staCode": "車站代碼",
                              "gateInComingCnt": "進站人數",
                              "gateOutGoingCnt": "出站人數"})
    with open('車站基本資料集.json', encoding='utf-8') as file:
        json_data = json.load(file)
        stations_info = pd.DataFrame(
            json_data, columns=['stationCode', 'stationName'])
        stations_info = stations_info.astype({'stationCode': 'int32'})
    df3 = pd.merge(df2, stations_info, left_on='車站代碼', right_on='stationCode')
    df4 = df3.rename(columns={'stationName': '車站名稱'})
    df5 = df4.reindex(columns=['乘車日', '車站名稱', '進站人數', '出站人數'])
    df5['乘車日'] = pd.to_datetime(df5['乘車日'].astype(str))
    return df5


def main():
    current_dir = os.getcwd()
    csv_dir = os.path.join(current_dir, '每日各站進出站人數')
    files_and_dirs = os.listdir(csv_dir)
    all_csv_files = [
        filename for filename in files_and_dirs if '每日各站進出站人數' in filename]
    sorted_csv_files = sorted(all_csv_files)
    all_final_dfs = []
    for filename in sorted_csv_files:
        abs_filename = os.path.join(csv_dir, filename)
        all_final_dfs.append(parse_data(abs_filename))
    df_done = pd.concat(all_final_dfs)
    df_done1 = df_done.set_index('乘車日')
    return df_done1


if __name__ == '__main__': 
    df_done1 = main()
    

In [2]:
df_done1.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 406761 entries, 2019-04-23 to 2023-12-31
Data columns (total 3 columns):
 #   Column  Non-Null Count   Dtype 
---  ------  --------------   ----- 
 0   車站名稱    406761 non-null  object
 1   進站人數    406761 non-null  int64 
 2   出站人數    406761 non-null  int64 
dtypes: int64(2), object(1)
memory usage: 12.4+ MB


In [3]:
gb1 = df_done1.groupby(by = '車站名稱')
len(gb1)

241

In [4]:
Sum = gb1.sum()
Sum

Unnamed: 0_level_0,進站人數,出站人數
車站名稱,Unnamed: 1_level_1,Unnamed: 2_level_1
七堵,9044563,9115329
三坑,2080517,2140894
三塊厝,786111,824381
三姓橋,1760760,1594509
三民,12535,18652
...,...,...
鼓山,469414,476151
龍井,596686,589722
龍泉,24965,30795
龍港,30146,38335


In [8]:
Sum.loc[['臺北','桃園','臺中','臺南','高雄']]

Unnamed: 0_level_0,進站人數,出站人數
車站名稱,Unnamed: 1_level_1,Unnamed: 2_level_1
臺北,88421749,87261561
桃園,40453667,41594793
臺中,36872010,36827889
臺南,37454585,38168096
高雄,21784563,21879440
