In [None]:
import os
import re
import numpy as np

# Define the base directory where the expansible log directories are located
for method in ["oneline_st_an_sd", "trafficstream", "stkec", "eac"]:  # 
    print(" " * 50 + "*" * 10 + method +"*" * 10)
    # method = "stkec"  # expansible / retrained / fullretrained / static / trafficStream / stkec
    base_dir = f"./log/SD/{method}"

    # Define the directories to parse
    directories = [f"{base_dir}-42"]

    # Define a regular expression to extract the metric values
    metric_pattern = re.compile(r"(\d+|Avg)\s+(MAE|RMSE|MAPE)\s+([\d\.\s]+)")
    total_time_pattern = re.compile(r"total time:\s+([\d\.]+)")

    # Initialize a dictionary to store metrics
    metrics = {
        "3": {"MAE": [], "RMSE": [], "MAPE": []},
        "6": {"MAE": [], "RMSE": [], "MAPE": []},
        "12": {"MAE": [], "RMSE": [], "MAPE": []},
        "Avg": {"MAE": [], "RMSE": [], "MAPE": []}
    }

    # List to store total times
    total_times = []

    # Function to parse the log file and extract the last column value of metric lines
    def parse_log_file(file_path):
        with open(file_path, "r") as file:
            if method == 'pretrain_st_pems':
                lines = file.readlines()[-15:]
            else:
                lines = file.readlines()[-22:]
            for line in lines:
                match = metric_pattern.search(line)
                
                total_time_match = total_time_pattern.search(line)
                if match:
                    key, metric_type, values = match.groups()
                    values_list = list(map(float, values.split()))
                    last_value = values_list[-1]  # Extract the last value
                    if key in metrics:
                        metrics[key][metric_type].append(last_value)
                if total_time_match:
                    total_time = float(total_time_match.group(1))
                    total_times.append(total_time)


    # Process each directory and extract metrics
    for directory in directories:
        log_file_path = os.path.join(directory, f"{method}.log")
        if os.path.exists(log_file_path):
            parse_log_file(log_file_path)


    # Function to compute mean and variance for the metrics
    def compute_statistics(metric_data):
        metric_array = np.array(metric_data)
        mean = np.mean(metric_array)
        variance = np.std(metric_array)
        return mean, variance

    formatted_results = []

    for key in metrics:
        for metric_type in ["MAE", "RMSE", "MAPE"]:
            mean, variance = compute_statistics(metrics[key][metric_type])
            formatted_results.append(f"{mean:.2f}±{variance:.2f}\t")

    # Compute mean for total times
    time_mean, time_variance = compute_statistics(total_times)

    # Output the formatted results
    header = " "*22 + "3" + " " * 48 + "6" + " " * 46 + "12" + " " * 46 + "Avg" + " " * 27 + "Total Time"
    sub_header = "    {:<10}\t     {:<10}\t     {:<10}\t     {:<10}\t     {:<10}\t     {:<10}\t     {:<10}\t     {:<10}\t     {:<10}\t     {:<10}\t     {:<10}\t     {:<10}".format(
        "MAE", "RMSE", "MAPE", "MAE", "RMSE", "MAPE", "MAE", "RMSE", "MAPE", "MAE", "RMSE", "MAPE"
    )
    values_row = "{:<10} {:<10} {:<10} {:<10} {:<10} {:<10} {:<10} {:<10} {:<10} {:<10} {:<10} {:<10}  {:.2f}±{:.2f}".format(*formatted_results, time_mean,time_variance)

    print(header)
    print(sub_header)
    print(values_row)
    print('\n')

In [17]:
import os
import re
import numpy as np

# Define the base directory where the expansible log directories are located 

for method in ["retrain_st_sd","pretrain_st_sd","online_st_nn_sd", "online_st_an_sd", "trafficstream", "stkec", "eac"]:  # 
    print(" " * 50 + "*" * 10 + method +"*" * 10)
    # method = "stkec"  # expansible / retrained / fullretrained / static / trafficStream / stkec
    base_dir = f"./log/SD/{method}"

    # Define the directories to parse
    directories = [f"{base_dir}-42"]

    # Define a regular expression to extract the metric values
    metric_pattern = re.compile(r"(\d+|Avg)\s+(MAE|RMSE|MAPE)\s+([\d\.\s]+)")
    total_time_pattern = re.compile(r"total time:\s+([\d\.]+)")

    # Initialize a dictionary to store metrics
    metrics = {
        "3": {"MAE": [], "RMSE": [], "MAPE": []},
        "6": {"MAE": [], "RMSE": [], "MAPE": []},
        "12": {"MAE": [], "RMSE": [], "MAPE": []},
        "Avg": {"MAE": [], "RMSE": [], "MAPE": []}
    }

    # List to store total times
    total_times = []

    # Function to parse the log file and extract the last column value of metric lines
    def parse_log_file(file_path):
        with open(file_path, "r") as file:
            if method == 'pretrain_st_pems':
                lines = file.readlines()[-15:]
            else:
                lines = file.readlines()[-22:]
            for line in lines:
                match = metric_pattern.search(line)
                
                total_time_match = total_time_pattern.search(line)
                if match:
                    key, metric_type, values = match.groups()
                    values_list = list(map(float, values.split()))
                    last_value = values_list[-1]  # Extract the last value
                    if key in metrics:
                        metrics[key][metric_type].append(last_value)
                if total_time_match:
                    total_time = float(total_time_match.group(1))
                    total_times.append(total_time)


    # Process each directory and extract metrics
    for directory in directories:
        log_file_path = os.path.join(directory, f"{method}.log")
        if os.path.exists(log_file_path):
            parse_log_file(log_file_path)


    # Function to compute mean and variance for the metrics
    def compute_statistics(metric_data):
        metric_array = np.array(metric_data)
        mean = np.mean(metric_array)
        variance = np.std(metric_array)
        return mean, variance

    formatted_results = []

    for key in metrics:
        for metric_type in ["MAE", "RMSE", "MAPE"]:
            mean, variance = compute_statistics(metrics[key][metric_type])
            formatted_results.append(f"{mean:.2f}\t")

    # Compute mean for total times
    time_mean, time_variance = compute_statistics(total_times)

    # Output the formatted results
    header = " "*22 + "3" + " " * 48 + "6" + " " * 46 + "12" + " " * 46 + "Avg" + " " * 27 + "Total Time"
    sub_header = "    {:<10}\t     {:<10}\t     {:<10}\t     {:<10}\t     {:<10}\t     {:<10}\t     {:<10}\t     {:<10}\t     {:<10}\t     {:<10}\t     {:<10}\t     {:<10}".format(
        "MAE", "RMSE", "MAPE", "MAE", "RMSE", "MAPE", "MAE", "RMSE", "MAPE", "MAE", "RMSE", "MAPE"
    )
    values_row = "{:<10} {:<10} {:<10} {:<10} {:<10} {:<10} {:<10} {:<10} {:<10} {:<10} {:<10} {:<10}  {:.2f}±{:.2f}".format(*formatted_results, time_mean,time_variance)

    print(header)
    print(sub_header)
    print(values_row)
    print('\n')

                                                  **********retrain_st_sd**********
                      3                                                6                                              12                                              Avg                           Total Time
    MAE       	     RMSE      	     MAPE      	     MAE       	     RMSE      	     MAPE      	     MAE       	     RMSE      	     MAPE      	     MAE       	     RMSE      	     MAPE      
26.49	     36.81	     22.81	     28.24	     39.94	     24.51	     31.68	     46.07	     28.23	     34.20	     40.37	     24.88	      136.58±0.00


                                                  **********pretrain_st_sd**********
                      3                                                6                                              12                                              Avg                           Total Time
    MAE       	     RMSE      	     MAPE      	     MAE       	     RMSE      	

In [10]:
import os
import re
import numpy as np

# 初始化一个字典来存储所有方法的结果
all_results = {}

# 首先收集所有方法的数据
for method in ["retrain_st_sd","pretrain_st_sd","online_st_nn_sd", "online_st_an_sd", "trafficstream", "stkec", "eac"]:
    base_dir = f"./log/SD/{method}"
    directories = [f"{base_dir}-42"]
    
    metric_pattern = re.compile(r"(\d+|Avg)\s+(MAE|RMSE|MAPE)\s+([\d\.\s]+)")
    total_time_pattern = re.compile(r"total time:\s+([\d\.]+)")
    
    metrics = {
        "3": {"MAE": [], "RMSE": [], "MAPE": []},
        "6": {"MAE": [], "RMSE": [], "MAPE": []},
        "12": {"MAE": [], "RMSE": [], "MAPE": []},
        "Avg": {"MAE": [], "RMSE": [], "MAPE": []}
    }
    
    total_times = []
    
    def parse_log_file(file_path):
        with open(file_path, "r") as file:
            lines = file.readlines()[-22:] if method != 'pretrain_st_pems' else file.readlines()[-15:]
            for line in lines:
                match = metric_pattern.search(line)
                total_time_match = total_time_pattern.search(line)
                if match:
                    key, metric_type, values = match.groups()
                    values_list = list(map(float, values.split()))
                    if key in metrics:
                        metrics[key][metric_type].append(values_list[-1])
                if total_time_match:
                    total_times.append(float(total_time_match.group(1)))
    
    for directory in directories:
        log_file_path = os.path.join(directory, f"{method}.log")
        if os.path.exists(log_file_path):
            parse_log_file(log_file_path)
    
    def compute_statistics(metric_data):
        metric_array = np.array(metric_data)
        return np.mean(metric_array), np.std(metric_array)
    
    # 存储每个方法的结果
    method_results = []
    for key in ["3", "6", "12", "Avg"]:
        for metric_type in ["MAE", "RMSE", "MAPE"]:
            mean, std = compute_statistics(metrics[key][metric_type])
            method_results.append((mean, std))
    
    time_mean, time_std = compute_statistics(total_times)
    method_results.append((time_mean, time_std))
    
    all_results[method] = method_results

# 生成一个大的LaTeX表格
print("\\begin{table*}[htbp]")
print("\\centering")
print("\\small")  # 使用小号字体使表格更紧凑
print("\\setlength{\\tabcolsep}{4pt}")  # 减小列间距
print("\\begin{tabular}{l|ccc|ccc|ccc|ccc}")
print("\\toprule")  # 使用 booktabs 包的粗线
print("\\multirow{2}{*}{Method} & \\multicolumn{3}{c|}{3\#Step} & \\multicolumn{3}{c|}{6\#Step} & \\multicolumn{3}{c|}{12\#Step} & \\multicolumn{3}{c}{Avg} \\\\")
print("\\cmidrule(lr){2-4} \\cmidrule(lr){5-7} \\cmidrule(lr){8-10} \\cmidrule(lr){11-13}")  # 使用 booktabs 的中等粗细线
print("& MAE & RMSE & MAPE & MAE & RMSE & MAPE & MAE & RMSE & MAPE & MAE & RMSE & MAPE \\\\")
print("\\midrule")  # 使用 booktabs 的中等粗细线

# 为每个方法生成数据行
for method, results in all_results.items():
    row_data = [f"{mean:.2f}" for mean,_ in results[:-1]]  # 去掉最后一个时间数据
    print(f"{method.replace('_', '-')} & {' & '.join(row_data)} \\\\")
    print("\\midrule")  # 使用 booktabs 的中等粗细线

print("\\bottomrule")  # 使用 booktabs 的粗线
print("\\end{tabular}")
print("\\caption{Experimental Results Comparison}")
print("\\label{tab:comparison}")
print("\\end{table*}")

\begin{table*}[htbp]
\centering
\small
\setlength{\tabcolsep}{4pt}
\begin{tabular}{l|ccc|ccc|ccc|ccc}
\toprule
\multirow{2}{*}{Method} & \multicolumn{3}{c|}{3h} & \multicolumn{3}{c|}{6h} & \multicolumn{3}{c|}{12h} & \multicolumn{3}{c}{Avg} \\
\cmidrule(lr){2-4} \cmidrule(lr){5-7} \cmidrule(lr){8-10} \cmidrule(lr){11-13}
& MAE & RMSE & MAPE & MAE & RMSE & MAPE & MAE & RMSE & MAPE & MAE & RMSE & MAPE \\
\midrule
retrain-st-sd & 26.49 & 36.81 & 22.81 & 28.24 & 39.94 & 24.51 & 31.68 & 46.07 & 28.23 & 34.20 & 40.37 & 24.88 \\
\midrule
pretrain-st-sd & 31.82 & 36.90 & 25.49 & 33.31 & 39.71 & 26.65 & 36.44 & 46.27 & 29.78 & 33.59 & 40.39 & 27.05 \\
\midrule
online-st-nn-sd & 28.97 & 39.33 & 27.09 & 31.15 & 43.04 & 28.80 & 42.55 & 50.16 & 31.99 & 40.14 & 43.53 & 29.01 \\
\midrule
online-st-an-sd & 28.97 & 39.33 & 27.09 & 31.15 & 43.04 & 28.80 & 42.55 & 50.16 & 31.99 & 40.14 & 43.53 & 29.01 \\
\midrule
trafficstream & 27.27 & 37.30 & 24.76 & 28.35 & 39.62 & 25.40 & 31.21 & 44.91 & 27.89 & 28.67