In [1]:
import os
import csv

In [27]:
def replace_column_name(input_file, old_column_name, new_column_name):
    # Create the output file name
    output_file = input_file.split(".")[0] + "_updated.csv"

    # Read the input file
    with open(input_file, "r", newline='') as f:
        reader = csv.DictReader(f)
        fieldnames = reader.fieldnames

        # Replace the old_column_name with the new_column_name
        fieldnames = [new_column_name if name == old_column_name else name for name in fieldnames]
        rows = [{new_column_name if key == old_column_name else key: row[key] for key in row} for row in reader]

    # Write the output file
    with open(output_file, "w", newline='') as f:
        writer = csv.DictWriter(f, fieldnames=fieldnames)
        writer.writeheader()
        writer.writerows(rows)

    return output_file


In [2]:
folder_path_ld_clean = 'C:\\Users\\stefa\\OneDrive\\Documents\\UNI\\YEAR 3\\II\\Thesis\\experiments\\results\\results\\results_ld\\clean'
folder_path_pdm_clean = 'C:\\Users\\stefa\\OneDrive\\Documents\\UNI\\YEAR 3\\II\\Thesis\\experiments\\results\\results\\results_pdm\\revised'
folder_path_prpc_clean = 'C:\\Users\\stefa\\OneDrive\\Documents\\UNI\\YEAR 3\\II\\Thesis\\experiments\\results\\results\\results_prpc\\revised'



In [25]:
os.chdir(folder_path_prpc_clean)

In [28]:
for file_name in os.listdir(folder_path_prpc_clean):
    if file_name.endswith('.csv'):
        replace_column_name(file_name, 'participant', 'subject_nr')
        #print('done')

In [15]:
def add_column_with_value(input_file, column_name, value):
    # Create the output file name
    output_file = input_file.split("_")[0] + '_' +  input_file.split("_")[1] +"_updated.csv"

    # Read the input file
    with open(input_file, "r", newline='') as f:
        reader = csv.DictReader(f)
        fieldnames = reader.fieldnames

        # Add the new column name to the fieldnames list
        fieldnames.append(column_name)
        rows = [{**row, column_name: value} for row in reader]

    # Write the output file
    with open(output_file, "w", newline='') as f:
        writer = csv.DictWriter(f, fieldnames=fieldnames)
        writer.writeheader()
        writer.writerows(rows)

    return output_file


In [4]:
def fill_empty_cells(input_file):
    # Create the output file name
    output_file = input_file.split(".")[0] + "_filled.csv"

    # Read the input file
    with open(input_file, "r", newline='') as f:
        reader = csv.DictReader(f)
        fieldnames = reader.fieldnames

        rows = []
        for row in reader:
            filled_row = {key: (value if value.strip() != '' else 'none') for key, value in row.items()}
            rows.append(filled_row)

    # Write the output file
    with open(output_file, "w", newline='') as f:
        writer = csv.DictWriter(f, fieldnames=fieldnames)
        writer.writeheader()
        writer.writerows(rows)

    return output_file


In [3]:
os.chdir(folder_path_prpc_clean)

In [16]:
for file_name in os.listdir(folder_path_prpc_clean):
    if file_name.endswith('.csv'):
        add_column_with_value(file_name, 'task', 'prpc')

In [5]:
for file_name in os.listdir(folder_path_prpc_clean):
    if file_name.endswith('.csv'):
        fill_empty_cells(file_name)

In [6]:
def merge_csv_by_task(folder_path):
    # Initialize the fieldnames list and an empty list to store the rows
    fieldnames = None
    all_rows = []

    for file_name in os.listdir(folder_path):
        if file_name.endswith('.csv'):
            file_path = os.path.join(folder_path, file_name)

            # Read the CSV file
            with open(file_path, "r", newline='') as f:
                reader = csv.DictReader(f)
                current_fieldnames = reader.fieldnames

                # Check if the fieldnames list is empty, or if it matches the current fieldnames
                if fieldnames is None:
                    fieldnames = current_fieldnames
                elif fieldnames != current_fieldnames:
                    raise ValueError(f"Column names in {file_name} do not match the other files")

                # Add the rows to the all_rows list
                all_rows.extend(list(reader))

    # Create the output file name
    output_file = os.path.join(folder_path, "merged.csv")

    # Write the output file
    with open(output_file, "w", newline='') as f:
        writer = csv.DictWriter(f, fieldnames=fieldnames)
        writer.writeheader()
        writer.writerows(all_rows)

    return output_file



In [8]:
def remove_rows_by_given_column(input_file, column_name, value):
    # Create the output file name
    output_file = input_file.split(".")[0] + "_filtered.csv"

    # Read the input file
    with open(input_file, "r", newline='') as f:
        reader = csv.DictReader(f)
        fieldnames = reader.fieldnames

        # Filter the rows based on the given column and value
        filtered_rows = [row for row in reader if row[column_name] != value]

    # Write the output file
    with open(output_file, "w", newline='') as f:
        writer = csv.DictWriter(f, fieldnames=fieldnames)
        writer.writeheader()
        writer.writerows(filtered_rows)

    return output_file

In [11]:
def merge_tasks(folder_path):
    all_rows = []
    all_fieldnames = set()

    # Iterate through all files in the folder
    for file_name in os.listdir(folder_path):
        if file_name.endswith('.csv'):
            file_path = os.path.join(folder_path, file_name)

            # Read the CSV file
            with open(file_path, "r", newline='') as f:
                reader = csv.DictReader(f)
                fieldnames = reader.fieldnames

                # Add fieldnames to the set of all fieldnames
                all_fieldnames.update(fieldnames)

                # Add the rows to the all_rows list
                all_rows.extend(list(reader))

    # Fill missing column values with 'none'
    filled_rows = []
    for row in all_rows:
        filled_row = {key: row.get(key, 'none') for key in all_fieldnames}
        filled_rows.append(filled_row)

    # Create the output file name
    output_file = os.path.join(folder_path, "merged.csv")

    # Write the output file
    with open(output_file, "w", newline='') as f:
        writer = csv.DictWriter(f, fieldnames=list(all_fieldnames))
        writer.writeheader()
        writer.writerows(filled_rows)

    return output_file


In [10]:
remove_rows_by_given_column('C:\\Users\\stefa\\OneDrive\\Documents\\UNI\\YEAR 3\\II\\Thesis\\experiments\\results\\results\\merged\\merged_ld.csv', 'category', 'nonword')

'C:\\Users\\stefa\\OneDrive\\Documents\\UNI\\YEAR 3\\II\\Thesis\\experiments\\results\\results\\merged\\merged_ld_filtered.csv'

In [7]:
merge_csv_by_task(folder_path_ld_clean)
merge_csv_by_task(folder_path_pdm_clean)
merge_csv_by_task(folder_path_prpc_clean)

'C:\\Users\\stefa\\OneDrive\\Documents\\UNI\\YEAR 3\\II\\Thesis\\experiments\\results\\results\\results_prpc\\revised\\merged.csv'

In [12]:
merge_tasks('C:\\Users\\stefa\\OneDrive\\Documents\\UNI\\YEAR 3\\II\\Thesis\\experiments\\results\\results\\merged')

'C:\\Users\\stefa\\OneDrive\\Documents\\UNI\\YEAR 3\\II\\Thesis\\experiments\\results\\results\\merged\\merged.csv'

In [None]:
participants = {'1':['pdm', 'ld', 'prpc'], '2':['pdm', 'prpc', 'ld'], '3':['ld', 'pdm', 'prpc'], '4':['ld', 'prpc', 'pdm'], '5':['prpc', 'pdm', 'ld'], '6':['prpc', 'ld', 'pdm'], '7':['pdm', 'ld', 'prpc'], '8':['pdm', 'prpc', 'ld'], '9':['ld', 'pdm', 'prpc'], '10':['ld', 'prpc', 'pdm'], '11':['prpc', 'pdm', 'ld'], '12':['prpc', 'ld', 'pdm']}