In [1]:
import os

# Define datasets, experiment names, and expected filenames
datasets = ["realworld_mobiact"]
experiment_names = ["Df", "Df_aug", "Syn", "Syn_all", "Df_Syn", "Df_Syn_all", "FS_Dpfs", "FS_Df_Dpfs", "FS_Syn", "FS_Df_Syn", "FS_Syn_Dpfs", "FS_Df_Syn_Dpfs", "FS_Df_plus_Dpfs", "FS_Df_plus_Syn", "Dp"]
required_files = [f"{dataset}_{experiment}.csv" for dataset in datasets for experiment in experiment_names]

# Directory containing the result files
results_dir = "results"

# Check for missing files
missing_files = [file for file in required_files if not os.path.isfile(os.path.join(results_dir, file))]

# Print results
if missing_files:
    print("The following files are missing:")
    for file in missing_files:
        print(file)
else:
    print("All required files are present.")


All required files are present.


In [2]:
import os
import pandas as pd

def create_summary_csv(dataset, results_dir="results"):
    
    # Dictionary to store data for each source across experiments and modes
    data_acc = {}
    data_loss = {}
    
    for experiment in experiment_names:
        file_path = os.path.join(results_dir, f"{dataset}_{experiment}.csv")
        if os.path.isfile(file_path):
            # Read the file and extract source, mode, and accuracy columns
            df = pd.read_csv(file_path)
            # Group by 'source', then compute the average accuracy for each combination
            avg_accuracy = df.groupby(['source'])['accuracy'].mean()
            avg_loss = df.groupby(['source'])['loss'].mean()
            # Store results in dictionary
            data_acc[experiment] = round(avg_accuracy, 4)
            data_loss[experiment] = round(avg_loss, 4)
        else:
            print(f"Warning: {file_path} does not exist and will be skipped.")
    
    # Combine all data into a single DataFrame
    summary_acc = pd.concat(data_acc.values(), axis=1, keys=data_acc.keys())
    summary_loss = pd.concat(data_loss.values(), axis=1, keys=data_loss.keys())
    
    # # Save the result to a new CSV file
    # output_file = os.path.join(results_dir, f"{dataset}_results.csv")
    # summary_df.to_csv(output_file)
    # print(f"Summary CSV file saved as: {output_file}")

    print(summary_acc)
    print()
    print(summary_loss)

# Usage example
create_summary_csv("realworld_mobiact")


            Df  Df_aug     Syn  Syn_all  Df_Syn  Df_Syn_all  FS_Dpfs  \
source                                                                 
WAL     0.5141  0.7358  0.7911   0.8209  0.7942       0.828   0.8852   

        FS_Df_Dpfs  FS_Syn  FS_Df_Syn  FS_Syn_Dpfs  FS_Df_Syn_Dpfs  \
source                                                               
WAL         0.7147  0.8198     0.7703       0.8864          0.7835   

        FS_Df_plus_Dpfs  FS_Df_plus_Syn     Dp  
source                                          
WAL              0.9284          0.5263  0.904  

            Df  Df_aug     Syn  Syn_all  Df_Syn  Df_Syn_all  FS_Dpfs  \
source                                                                 
WAL     7.8065   2.299  1.1437   1.4932  2.1412       0.617   0.3384   

        FS_Df_Dpfs  FS_Syn  FS_Df_Syn  FS_Syn_Dpfs  FS_Df_Syn_Dpfs  \
source                                                               
WAL         1.5768  0.9659     2.2165       0.3727          1.0742 