In [1]:
import os
import json
import pandas as pd

METRICS_ORDER = ['HR@1', 'HR@5', 'HR@10', 'HR@20', 'HR@50','NDCG@5', 'NDCG@10', 
                 'NDCG@20', 'NDCG@50','MRR@5', 'MRR@10', 'MRR@20', 'MRR@50',]

def parse_test_result(test_result_str):
    
    metrics = {}
    for item in test_result_str.strip().split(','):
        key, value = item.split(':')
        metrics[key.strip()] = float(value.strip())
    return metrics

def collect_results(root_dir):
    
    data = []
    
    for runner_name in os.listdir(root_dir):
        runner_path = os.path.join(root_dir, runner_name)
        if not os.path.isdir(runner_path):
            continue
        print(runner_path)
        for model_name in os.listdir(runner_path):
            model_path = os.path.join(runner_path, model_name)
            if not os.path.isdir(model_path):
                continue
            print(model_path)
            for ex_id in os.listdir(model_path):
                ex_id_path = os.path.join(model_path, ex_id)
                if not os.path.isdir(ex_id_path):
                    continue
                print(ex_id_path)
                # 
                json_path = os.path.join(ex_id_path, 'result.json')
                if not os.path.exists(json_path):
                    continue
                print(json_path)
                # JSON
                try:
                    with open(json_path, 'r', encoding='utf-8') as f:
                        results = json.load(f)
                        test_result = parse_test_result(results['test_result'])
                        
                        # 
                        test_record = {
                            'total_time': results.get('total_time',''),
                            'best_val_epoch': results['best_val_epoch'],
                            'runner': runner_name,
                            'model': model_name,
                            'ex_id': ex_id,
                            **test_result
                        }
                        data.append(test_record)
                except Exception as e:
                    print(f"Error processing {json_path}: {str(e)}")
    
    return data

def save_to_excel(data, root_dir):
    """Excel"""
    if not data:
        print("No data found to save.")
        return

    output_path = os.path.join(root_dir, 'results.xlsx')
    df = pd.DataFrame(data)
    
    columns = [
        # 'runner',
        'model', 'ex_id','best_val_epoch','total_time'] + METRICS_ORDER
    df = df[columns].astype(str)

    df.to_excel(output_path, index=False, float_format="%.4f")
    print(f"Results saved to: {output_path}")


    

In [2]:
dataset = "PersonalWAB"
base_dir = f'output/{dataset}'

save_to_excel(collect_results(base_dir), base_dir)

output/PersonalWAB/TEM_SrcRunner_0-qwen2.5Emb-lr_scheduler
output/PersonalWAB/TEM_SrcRunner_0-qwen2.5Emb-lr_scheduler/checkpoints
output/PersonalWAB/TEM_SrcRunner_0-qwen2.5Emb-lr_scheduler/checkpoints/20250202-173622
output/PersonalWAB/SrcRunner
output/PersonalWAB/SrcRunner/TEM
output/PersonalWAB/SrcRunner/TEM/0-qwen2.5Emb-lr_scheduler
output/PersonalWAB/SrcRunner/TEM/0-qwen2.5Emb-lr_scheduler/result.json
output/PersonalWAB/SrcRunner/TEM/1-noLlmEmb-lr_scheduler
output/PersonalWAB/SrcRunner/TEM/1-noLlmEmb-lr_scheduler/result.json
output/PersonalWAB/SrcRunner/AEM
output/PersonalWAB/SrcRunner/AEM/0-qwen2.5Emb-lr_scheduler
output/PersonalWAB/SrcRunner/AEM/0-qwen2.5Emb-lr_scheduler/result.json
output/PersonalWAB/SrcRunner/AEM/1-noLlmEmb-lr_scheduler
output/PersonalWAB/SrcRunner/AEM/1-noLlmEmb-lr_scheduler/result.json
output/PersonalWAB/SrcRunner/HEM
output/PersonalWAB/SrcRunner/HEM/0-qwen2.5Emb-lr_scheduler
output/PersonalWAB/SrcRunner/HEM/0-qwen2.5Emb-lr_scheduler/result.json
output/Person