In [4]:
import os
import json
import pandas as pd

In [53]:
alg_meta = {
"JMVFG" : dict(publisher="IEEE Transactions on Emerging Topics in Computational Intelligence", year=2023),
"MCFS" : dict(publisher="ACM SIGKDD Conference on Knowledge Discovery and Data Mining", year=2010),
"UFS2" : dict(publisher="IEEE Transactions on Big Data", year=2022),
"UDFS" : dict(publisher="IJCAI'11", year=2011),
"CNAFS" : dict(publisher="IEEE Transactions on Cybernetics", year=2020),
"SOCFS" : dict(publisher="CVPR", year=2015),
"EGCFS" : dict(publisher="IEEE transactions on neural networks and learning systems", year=2020),
"SLNMF" : dict(publisher="Expert Systems with Applications", year=2023),
"VCSDFS" : dict(publisher="Neural Networks", year=2023),
"FSDK" : dict(publisher="IEEE Transactions on Neural Networks and Learning Systems", year=2023),
"RAFG" : dict(publisher="Applied Intelligence", year=2024),
"RSOBC" : dict(publisher="Expert Systems with Applications", year=2025),
"RUSLP" : dict(publisher="Information Sciences", year=2022),
"NDFS" : dict(publisher="AAAI", year=2012),
"U2FS" : dict(publisher="PeerJ Comput Sci", year=2021),
"MGAGR" : dict(publisher="IEEE Transactions on Knowledge and Data Engineering", year=2021),
"SSDS" : dict(publisher="IEEE Transactions on Knowledge and Data Engineering", year=2024),
"FMIUFS" : dict(publisher="IEEE Transactions on Fuzzy Systems", year=2021),
"GRSSLSF" : dict(publisher="Transactions on Machine Learning Research", year=2024),}

In [54]:

# Root directory where your dataset folders are located
root_dir = 'perf_results'


In [58]:

# List to collect all rows
data = []

# Traverse through the directory
for dataset_name in os.listdir(root_dir):
    dataset_path = os.path.join(root_dir, dataset_name)
    if os.path.isdir(dataset_path):
        for algorithm_name in os.listdir(dataset_path):
            algorithm_path = os.path.join(dataset_path, algorithm_name)
            if os.path.isdir(algorithm_path):
                for file_name in os.listdir(algorithm_path):
                    if file_name.endswith('.json'):
                        file_path = os.path.join(algorithm_path, file_name)
                        with open(file_path, 'r') as f:
                            json_data = json.load(f)
                            row = {
                                'algorithm': algorithm_name,
                                'dataset': dataset_name,
                                'time': json_data.get('time'),
                                'uniqueness': json_data.get('uniqueness'),
                                'ent_ratio': json_data.get('ent_ratio'),
                                'publisher': alg_meta[algorithm_name]['publisher'] if algorithm_name in alg_meta else "",
                                'year': alg_meta[algorithm_name]['year'] if algorithm_name in alg_meta else -1
                            }
                            data.append(row)


In [65]:

# Create DataFrame
df = pd.DataFrame(data)


In [71]:
df = df.sort_values(by=['algorithm'])

In [72]:
dataset_counts = df.groupby('algorithm')['dataset'].nunique()

# Step 2: Filter algorithms that have both datasets
algorithms_with_both = dataset_counts[dataset_counts == 2].index

# Step 3: Keep only rows with those algorithms
filtered_df = df[df['algorithm'].isin(algorithms_with_both)]


In [81]:
filtered_df.to_csv("filtered_df.csv")

In [None]:
# latex_code = filtered_df.to_latex(index=False, float_format="%.4f", escape=False, 
#                          caption="Your caption here", label="tab:your_label", 
#                          column_format="lccccc",  # adjust for column alignment
#                          bold_rows=False)
# with open("table_output.tex", "w", encoding="utf-8") as f:
#     f.write(latex_code)

In [86]:
algs_warp = df[df['dataset'] == 'warpPIE10P.mat'].sort_values(by='time')
algs_mush = df[df['dataset'] == 'mushrooms.mat'].sort_values(by='time')

In [88]:
algs_mush.to_csv("mushroom.csv")

In [89]:
algs_warp.to_csv("warp.csv")