In [1]:
import pandas as pd
import os
import re

In [2]:
def process_folder(folder_path, cost_column_name):
    """
    (重构后的通用函数)
    读取指定文件夹内所有 'lat_lon_best_cost.csv' 文件中的总年度成本，
    并将其整合到一个DataFrame中。

    参数:
    folder_path (str): 存放CSV文件的文件夹路径。
    cost_column_name (str): 用于存储成本数据的新列的名称。

    返回:
    pandas.DataFrame: 一个包含 'lat', 'lon', 和指定成本列的DataFrame。
    """
    all_data = []
    file_pattern = re.compile(r'^(-?\d+\.?\d*)_(-?\d+\.?\d*)_best_cost\.csv$')

    if not os.path.isdir(folder_path):
        print(f"警告：文件夹 '{folder_path}' 不存在，将跳过。")
        return pd.DataFrame()

    print(f"正在从文件夹 '{folder_path}' 中读取文件...")

    for filename in os.listdir(folder_path):
        match = file_pattern.match(filename)
        if match:
            lat = float(match.group(1))
            lon = float(match.group(2))
            file_path = os.path.join(folder_path, filename)
            try:
                with open(file_path, 'r', encoding='utf-8') as f:
                    for line in f:
                        if '--- TOTAL ANNUAL COST ---' in line:
                            parts = line.strip().split(',')
                            if len(parts) > 1:
                                total_cost = float(parts[1])
                                # 使用传入的列名来存储数据
                                all_data.append({'lat': lat, 'lon': lon, cost_column_name: total_cost})
                                break
            except Exception as e:
                print(f"处理文件 '{filename}' 时发生错误: {e}")

    if not all_data:
        print(f"在 '{folder_path}' 中未找到匹配的数据。")
        return pd.DataFrame()
        
    return pd.DataFrame(all_data)

def combine_costs_from_folders(folders_to_process):
    """
    从多个文件夹中处理成本数据并将它们合并到一个DataFrame中。

    参数:
    folders_to_process (dict): 一个字典，键是文件夹路径，值是对应的成本列名。

    返回:
    pandas.DataFrame: 一个合并了所有文件夹数据的DataFrame。
    """
    # 用于存储从每个文件夹处理得到的DataFrame
    list_of_dfs = []

    for folder, col_name in folders_to_process.items():
        df = process_folder(folder, col_name)
        if not df.empty:
            list_of_dfs.append(df)

    if not list_of_dfs:
        print("所有文件夹均未产出有效数据，返回一个空的DataFrame。")
        return pd.DataFrame()

    # 将第一个DataFrame作为合并的基础
    merged_df = list_of_dfs[0]

    # 依次将后续的DataFrame合并进来
    for i in range(1, len(list_of_dfs)):
        # 使用 'outer' 合并，以保留所有经纬度坐标
        # on=['lat', 'lon'] 指定了合并的键
        merged_df = pd.merge(merged_df, list_of_dfs[i], on=['lat', 'lon'], how='outer')
    
    return merged_df



In [3]:
# 定义需要处理的文件夹和它们对应的列名
folders = {
    'output_0/': 'total_cost_0',
    'output_2020/': 'total_cost_2020',
    'output_2050/': 'total_cost_2050',
    'output_future_2030/': 'total_cost_future_2030',
    'output_future_2040/': 'total_cost_future_2040',
    'output_future_2050/': 'total_cost_future_2050',
}

# 调用主函数来处理和合并数据
df_cost = combine_costs_from_folders(folders)
df_pop = pd.read_csv('island_data_origin.csv')
df_pop_renamed = df_pop.rename(columns={'Lat': 'lat', 'Long': 'lon'})
df_merged = pd.merge(
    left=df_cost, 
    right=df_pop_renamed[['lat', 'lon', 'pop', 'Country']], # 只选择需要的列进行合并
    on=['lat', 'lon'], # 指定用于匹配的键
    how='left' # 使用左连接
).reset_index(drop=True)
df_merged

正在从文件夹 'output_0/' 中读取文件...
正在从文件夹 'output_2020/' 中读取文件...
正在从文件夹 'output_2050/' 中读取文件...
正在从文件夹 'output_future_2030/' 中读取文件...
正在从文件夹 'output_future_2040/' 中读取文件...
正在从文件夹 'output_future_2050/' 中读取文件...


Unnamed: 0,lat,lon,total_cost_0,total_cost_2020,total_cost_2050,total_cost_future_2030,total_cost_future_2040,total_cost_future_2050,pop,Country
0,-55.246549,-68.960532,2.147850e+05,2.148243e+05,2.148036e+05,2.147350e+05,2.145823e+05,2.144068e+05,129.0,Chile
1,-55.084701,-67.644515,4.639495e+05,4.639495e+05,4.639495e+05,4.609817e+05,4.599571e+05,4.587735e+05,1682.0,Chile
2,-54.798325,-64.315773,1.748499e+05,2.452969e+05,2.455406e+05,2.442885e+05,2.435094e+05,2.425503e+05,240.0,Argentina
3,-54.054777,-68.677673,6.110317e+05,6.110317e+05,6.110318e+05,6.110318e+05,6.110318e+05,6.108037e+05,167448.0,Argentina; Chile
4,-53.964415,-70.595284,1.161452e+05,1.161452e+05,1.161452e+05,1.146415e+05,1.143287e+05,1.139982e+05,129.0,Chile
...,...,...,...,...,...,...,...,...,...,...
1894,64.990673,-18.594070,1.072303e+06,1.123303e+06,1.068146e+06,1.064988e+06,1.061758e+06,1.057394e+06,328476.0,Iceland
1895,65.026437,24.757215,8.206930e+05,8.206930e+05,8.204118e+05,8.144930e+05,8.123485e+05,8.102801e+05,929.0,Finland
1896,65.077007,35.699304,3.596547e+05,3.596550e+05,3.380603e+05,3.370000e+05,3.363712e+05,3.356125e+05,189.0,Russia
1897,65.548737,-37.140346,2.849165e+05,4.815937e+05,4.652056e+05,4.645807e+05,4.638702e+05,4.630687e+05,176.0,Greenland


In [4]:
df_merged.to_csv('output_scenario.csv', index=False)