<a href="https://colab.research.google.com/github/HuiLinFDU/cruisefetch/blob/main/ForPythia_Group5.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import re
import csv


def extract_core_data(log_file_path):
    core_data = {}
    with open(log_file_path, 'r') as file:
        lines = file.readlines()
        for line in lines:
            if line.startswith('Core_0'):
                parts = line.strip().split()
                key = parts[0]
                value = parts[1] if len(parts) > 1 else None
                # 处理 -nan 字符串
                if value == '-nan':
                    value = 0
                try:
                    # 尝试将值转换为浮点数，如果是字符串则保留原样
                    value = float(value)
                except ValueError:
                    pass
                core_data[key] = value
    return core_data


def save_multiple_logs_to_csv(log_files, csv_file_path):
    all_data = []
    all_keys = set()

    # 提取所有日志文件的数据并合并键
    for log_file in log_files:
        data = extract_core_data(log_file)
        # 加入日志文件名作为首列数据
        data['LogFileName'] = log_file
        all_data.append(data)
        all_keys.update(data.keys())

    # 对键进行排序，确保表头顺序一致
    sorted_keys = sorted(all_keys)
    # 把 LogFileName 移到首位
    if 'LogFileName' in sorted_keys:
        sorted_keys.remove('LogFileName')
        sorted_keys.insert(0, 'LogFileName')

    with open(csv_file_path, 'w', newline='', encoding='utf - 8') as csvfile:
        writer = csv.DictWriter(csvfile, fieldnames=sorted_keys)
        writer.writeheader()

        # 写入每行数据
        for data in all_data:
            row = {key: data.get(key, '') for key in sorted_keys}
            writer.writerow(row)


if __name__ == "__main__":
    # 这里列出所有要处理的日志文件路径
    log_files = [
        '/content/sample_data/605.mcf_s-994B_bingo.out',
        '/content/sample_data/605.mcf_s-994B_mlop.out',
        '/content/sample_data/605.mcf_s-994B_nopref.out',
        '/content/sample_data/605.mcf_s-994B_pythia.out',
        '/content/sample_data/605.mcf_s-994B_spp.out',
        '/content/sample_data/605.mcf_s-994B_pythia_MTPS150.out'
    ]
    csv_file_path = '/content/sample_data/log_statistics.csv'
    save_multiple_logs_to_csv(log_files, csv_file_path)

File: /content/sample_data/logfiles/605.mcf_s-472B_mlop.out, Trace: None, Prefetcher: None
File: /content/sample_data/logfiles/605.mcf_s-472B_pythia.out, Trace: None, Prefetcher: None
File: /content/sample_data/logfiles/605.mcf_s-472B_nopref.out, Trace: None, Prefetcher: None
File: /content/sample_data/logfiles/605.mcf_s-472B_dspatch.out, Trace: None, Prefetcher: None
File: /content/sample_data/logfiles/605.mcf_s-472B_bingo.out, Trace: None, Prefetcher: None
File: /content/sample_data/logfiles/605.mcf_s-472B_spp.out, Trace: None, Prefetcher: None
