# Combine data


In [1]:
import pandas as pd
import os

### Combine monthly data for each year

In [2]:
years = [108, 109, 110, 111, 112, 113]

for year in years:
    # Folder path for each year
    folder_path = f"raw_data/{year}"

    # Find all CSV files in that year's folder
    csv_files = [f"{year}年度A2交通事故資料_{i}.csv" for i in range(1, 13)]

    # Read each CSV and store in a list of DataFrames
    df_list = []
    for file in csv_files:
        filename = os.path.join(folder_path, file)
        df_temp = pd.read_csv(filename, encoding="utf-8", low_memory=False)
        df_list.append(df_temp)

    # Combine all monthly DataFrames into one for the year
    df_combined = pd.concat(df_list, ignore_index=True)

    # Save file
    output_filename = f"raw_data/{year}/{year}年度A2交通事故資料.csv"
    df_combined.to_csv(output_filename, index=False, encoding="utf-8-sig")

    print(f"Combined CSV for {year} saved to: {output_filename}")

Combined CSV for 108 saved to: raw_data/108/108年度A2交通事故資料.csv
Combined CSV for 109 saved to: raw_data/109/109年度A2交通事故資料.csv
Combined CSV for 110 saved to: raw_data/110/110年度A2交通事故資料.csv
Combined CSV for 111 saved to: raw_data/111/111年度A2交通事故資料.csv
Combined CSV for 112 saved to: raw_data/112/112年度A2交通事故資料.csv
Combined CSV for 113 saved to: raw_data/113/113年度A2交通事故資料.csv


### Combine data from different year

In [3]:
years = [108, 109, 110, 111, 112, 113]
for i in [1, 2]:
    csv_files = [f"raw_data/{year}/{year}年度A{i}交通事故資料.csv" for year in range(108, 114)]
    df_list = []
    for file in csv_files:
        df_temp = pd.read_csv(file, encoding="utf-8", low_memory=False)
        df_list.append(df_temp)

    df_combined = pd.concat(df_list, ignore_index=True)

    output_filename = f"raw_data/A{i}_raw.csv"
    df_combined.to_csv(output_filename, index=False, encoding="utf-8-sig")

    print(f"Combined CSV for A{i} saved to: {output_filename}")

Combined CSV for A1 saved to: raw_data/A1_raw.csv
Combined CSV for A2 saved to: raw_data/A2_raw.csv
