In [2]:
import pandas as pd
import re

def get_failed_islands_from_file(filepath):
    """从日志文件中解析失败的岛屿ID"""
    pattern = re.compile(r"Island (\d+): ONE OR MORE TASKS FAILED")
    failed_ids = set()
    
    try:
        with open(filepath, 'r', encoding='utf-8') as f:
            for line in f:
                match = pattern.search(line)
                if match:
                    failed_ids.add(int(match.group(1)))
    except FileNotFoundError:
        print(f"错误: 文件 '{filepath}' 未找到。")
        return pd.DataFrame(columns=['Failed_Island_ID']) # 返回一个空的DataFrame

    df = pd.DataFrame(list(failed_ids), columns=['Failed_Island_ID'])
    return df

df_failed = get_failed_islands_from_file('main_parallel_log.log')
df_failed 

Unnamed: 0,Failed_Island_ID
0,83200
1,74754
2,68612
3,74500
4,59911
...,...
69,63994
70,76155
71,59132
72,84350


In [3]:
df = pd.read_csv('../demand_get/filtered_island_179.csv')


In [4]:
# 筛选出失败的岛屿行
failed_islands_df = df[df['ID'].isin(df_failed['Failed_Island_ID'])]

# 保存为新的CSV文件
failed_islands_df.to_csv('failed_islands.csv', index=False)

print(f"失败岛屿数量: {len(failed_islands_df)}")
print(f"已保存到 failed_islands.csv")
failed_islands_df.head()

失败岛屿数量: 74
已保存到 failed_islands.csv


Unnamed: 0,ID,Long,Lat,Country,Island,pop,geometry,id,cluster,Region
33,12526,-81.484967,21.634773,Cuba,Cayo Largo,1680,POINT (-81.484967 21.634773),173,15,Unknown
37,13230,-79.477052,22.715426,Cuba,Cayo Fragoso,65,POINT (-79.477052 22.715426),182,16,Unknown
47,19103,-28.701332,38.578217,Portugal,Ilha do Faial,13589,POINT (-28.701332 38.578217),258,25,Europe
108,59132,107.005822,20.800673,Vietnam,Dao Cat Ba,26029,POINT (107.005822 20.800673),940,80,APAC
109,59758,112.781316,21.68574,China,Shangchuan Dao,14981,POINT (112.781316 21.68574),949,81,APAC
