In [2]:
import pandas as pd
import glob
import os
import shutil

In [3]:
def remove_id_from_header(folder_path):
    # Iterate over all files in the specified folder
    for filename in os.listdir(folder_path):
        if filename.endswith('.csv'):

            file_path = os.path.join(folder_path, filename)

            # If there was no result for a certain semantics, change value to empty
            df = pd.read_csv(file_path, na_filter=True, skip_blank_lines=False)          
            df = df.replace("Time limit", "")  
            df.to_csv(file_path, index=False)

            # Read the lines of the CSV file
            with open(file_path, 'r') as file:
                lines = file.readlines()

            # Modify the first line by removing 'ID,'
            lines[0] = lines[0].replace('ID,', '')

            # Write the modified lines back to the file
            with open(file_path, 'w') as file:
                file.writelines(lines)
            #print(f"Updated header in: {filename}")

In [4]:

def add_column_to_csvs(source_csv, target_folder, column_name, dataset):
    # Read the specified column from the source CSV file
    source_df = pd.read_csv(source_csv)
    
    # Check if the specified column exists in the source DataFrame
    if column_name not in source_df.columns:
        print(f"Column '{column_name}' not found in '{source_csv}'.")
        return

    # Extract the specified column
    column_data = source_df[column_name]

    # Find all CSV files in the target folder
    target_files = glob.glob(os.path.join(target_folder, '*.csv'))

    # Iterate over each target CSV file and append the column
    for target_file in target_files:
        if dataset not in target_file:
            continue

        try:
            # Read the target CSV file
            # Since we are not reading with index, the ID's will disappear
            target_df = pd.read_csv(target_file, na_filter=True, skip_blank_lines=False)

            # print(target_file)
            # print(target_df.shape)

            # Add the column to the target DataFrame
            target_df["GroundTruth"] = column_data.values  # Ensure proper alignment by using .values

            # Save the updated DataFrame back to the CSV file
            target_df.to_csv(target_file, index=False)
            #print(f"Column '{column_name}' added to '{target_file}'.")
        except Exception as e:
            print(f"An error occurred while processing '{target_file}': {e}")


In [29]:
# Remove ID from header
folder_path = './framework_data'  # Change this to your folder path
remove_id_from_header(folder_path)

#datasets = ["cars", "cirrhosis", "genetic", "myocardial"]
# datasets = ["personality"]
datasets = ["cirrhosis"]

limited = ""

if "cars" in datasets:
    source_csv = './Cars/cars_numeric.csv'  # Replace with your source CSV file path
    target_folder = './framework_data/'  # Replace with your target folder path
    column_name = 'class'  # Replace with your desired column name

    add_column_to_csvs(source_csv, target_folder, column_name, "cars")

if "cirrhosis" in datasets:
    source_csv = './Cirrhosis/cirrhosis_numeric.csv'  # Replace with your source CSV file path
    target_folder = './framework_data/'  # Replace with your target folder path
    column_name = 'Stage'  # Replace with your desired column name
    add_column_to_csvs(source_csv, target_folder, column_name, "cirrhosis")

if "myocardial" in datasets:
    source_csv = './Myocardial_limited/myocardial_numeric.csv'  # Replace with your source CSV file path
    target_folder = './framework_data/'  # Replace with your target folder path
    column_name = 'LET_IS'  # Replace with your desired column name

    add_column_to_csvs(source_csv, target_folder, column_name, "myocardial")
    limited = "_limited"

if "personality" in datasets:
    source_csv = './Personality_limited/16P_numerical2.csv'  # Replace with your source CSV file path
    target_folder = './framework_data/'  # Replace with your target folder path
    column_name = 'Personality'  # Replace with your desired column name

    add_column_to_csvs(source_csv, target_folder, column_name, "personality")
    limited = "_limited"

if "soccer" in datasets:
    source_csv = './Soccer_limited/soccer_numeric.csv'  # Replace with your source CSV file path
    target_folder = './framework_data/'  # Replace with your target folder path
    column_name = 'RESULT'  # Replace with your desired column name

    add_column_to_csvs(source_csv, target_folder, column_name, "soccer")
    limited = "_limited"

# Move files
# Find all CSV files in the target folder
source_folder = ".\\framework_data\\"
target_files = glob.glob(os.path.join("./framework_data/", '*.csv'))

endings_folder = {"25.csv":"25_budget", "50.csv":"50_budget", "90.csv":"90_budget", "100.csv":"100_budget"}

# Iterate over each target CSV file and append the column
for target_file in target_files:

    data_folder = ""
    for data in datasets:
        if data in target_file:
            data_folder = data.capitalize()
            break

    for end, folder in endings_folder.items():
        if end in target_file:
            source_file = os.path.join("", target_file)
            destination_file = os.path.join("./" + data_folder + limited + "/results_framework/" + folder + "/", target_file.split("\\")[1])
            print(source_file)
            print(destination_file)
            shutil.move(source_file, destination_file)
            break  # found folder
        


./framework_data\cirrhosis_ErrorMax0010Fold1_rules_error_0010_100.csv
./Cirrhosis/results_framework/100_budget/cirrhosis_ErrorMax0010Fold1_rules_error_0010_100.csv
./framework_data\cirrhosis_ErrorMax0010Fold1_rules_error_0010_25.csv
./Cirrhosis/results_framework/25_budget/cirrhosis_ErrorMax0010Fold1_rules_error_0010_25.csv
./framework_data\cirrhosis_ErrorMax0010Fold1_rules_error_0010_50.csv
./Cirrhosis/results_framework/50_budget/cirrhosis_ErrorMax0010Fold1_rules_error_0010_50.csv
./framework_data\cirrhosis_ErrorMax0010Fold1_rules_error_0010_90.csv
./Cirrhosis/results_framework/90_budget/cirrhosis_ErrorMax0010Fold1_rules_error_0010_90.csv
