# Import Required Modules

In [1]:
import os
import re

# Count CSV Files per Subject

In [2]:
base_dir = '/data0/HAR-datasets/PLHI-HAR_EEG-2025/'

expected_folders = {
    's1': 10,
    's2': 10,
    's3': 10,
    's4': 10,
    's5': 4,
    's6': 10
}

def count_csv_files_per_subject(base_dir, subject):
    subject_prefix = f"OpenBCISession_{subject}-"
    folders = [f for f in os.listdir(base_dir) if f.startswith(subject_prefix)]
    total_csvs = 0

    for folder in folders:
        folder_path = os.path.join(base_dir, folder)
        if not os.path.isdir(folder_path):
            continue
        csv_files = [f for f in os.listdir(folder_path) if f.endswith('.csv')]
        total_csvs += len(csv_files)

    return total_csvs

print("Total CSV files per subject:\n")

for subject in expected_folders.keys():
    count = count_csv_files_per_subject(base_dir, subject)
    print(f"{subject.upper()}: {count} CSV files")

Total CSV files per subject:

S1: 58 CSV files
S2: 50 CSV files
S3: 51 CSV files
S4: 50 CSV files
S5: 12 CSV files
S6: 46 CSV files


# Verify Folder and File Counts per Subject

In [3]:
base_dir = '/data0/HAR-datasets/PLHI-HAR_EEG-2025/'

expected_folders = {
    's1': 10,
    's2': 10,
    's3': 10,
    's4': 10,
    's5': 4,
    's6': 10
}

def verify_subject_sessions(base_dir, subject, expected_count):
    print(f"\nChecking subject {subject}...")

    subject_prefix = f"OpenBCISession_{subject}-"
    folders = sorted([f for f in os.listdir(base_dir) if f.startswith(subject_prefix)])
    
    if len(folders) != expected_count:
        print(f"Warning: Expected {expected_count} folders, found {len(folders)}")
    else:
        print(f"Found {expected_count} folders.")

    for folder in folders:
        folder_path = os.path.join(base_dir, folder)
        if not os.path.isdir(folder_path):
            print(f"{folder} is not a directory.")
            continue

        csv_files = sorted([f for f in os.listdir(folder_path) if f.endswith('.csv')])
        print(f"{folder} contains {len(csv_files)} CSV files.")
        for csv in csv_files:
            print(f"  - {csv}")

for subject, expected_count in expected_folders.items():
    verify_subject_sessions(base_dir, subject, expected_count)


Checking subject s1...
Found 10 folders.
OpenBCISession_s1-chair squats contains 5 CSV files.
  - BrainFlow-RAW_s1-chair squats_0.csv
  - BrainFlow-RAW_s1-chair squats_1.csv
  - BrainFlow-RAW_s1-chair squats_2.csv
  - BrainFlow-RAW_s1-chair squats_3.csv
  - BrainFlow-RAW_s1-chair squats_4.csv
OpenBCISession_s1-light stationary cycling contains 5 CSV files.
  - BrainFlow-RAW_s1-light stationary cycling_0.csv
  - BrainFlow-RAW_s1-light stationary cycling_1.csv
  - BrainFlow-RAW_s1-light stationary cycling_2.csv
  - BrainFlow-RAW_s1-light stationary cycling_3.csv
  - BrainFlow-RAW_s1-light stationary cycling_4.csv
OpenBCISession_s1-marching in place contains 5 CSV files.
  - BrainFlow-RAW_s1-marching in place_0.csv
  - BrainFlow-RAW_s1-marching in place_1.csv
  - BrainFlow-RAW_s1-marching in place_2.csv
  - BrainFlow-RAW_s1-marching in place_3.csv
  - BrainFlow-RAW_s1-marching in place_4.csv
OpenBCISession_s1-seated boxing hooks contains 5 CSV files.
  - BrainFlow-RAW_s1-seated boxing ho

# Rename Files to Correct Naming Convention

In [4]:
base_dir = '/data0/HAR-datasets/PLHI-HAR_EEG-2025/'
correct_name_pattern = re.compile(rf'^BrainFlow-RAW_(.+)-(.+)_\d+\.csv$')
subjects = ['s1', 's2', 's3', 's4', 's5', 's6']

print(f"Found {len(subjects)} subjects: {subjects}")

for subject in subjects:
    print(f"\nChecking subject {subject}...")
    folders = [f for f in os.listdir(base_dir) if f.startswith(f"OpenBCISession_{subject}-") and os.path.isdir(os.path.join(base_dir, f))]
    print(f"Found {len(folders)} folders for subject {subject}.")

    for folder in folders:
        folder_path = os.path.join(base_dir, folder)
        activity = folder.split(f"OpenBCISession_{subject}-")[-1]
        csv_files = [f for f in os.listdir(folder_path) if f.endswith('.csv')]
        print(f"{folder} contains {len(csv_files)} CSV files.")

        for filename in csv_files:
            old_file_path = os.path.join(folder_path, filename)

            if correct_name_pattern.match(filename):
                print(f"  - {filename} already correctly named.")
                continue

            name_wo_ext = os.path.splitext(filename)[0]
            suffix_parts = re.findall(r'(\d+)', name_wo_ext)
            suffix = suffix_parts[-1] if suffix_parts else '0'

            new_filename = f"BrainFlow-RAW_{subject}-{activity}_{suffix}.csv"
            new_file_path = os.path.join(folder_path, new_filename)

            os.rename(old_file_path, new_file_path)
            print(f"  Renamed file: {filename} → {new_filename}")

Found 6 subjects: ['s1', 's2', 's3', 's4', 's5', 's6']

Checking subject s1...
Found 10 folders for subject s1.
OpenBCISession_s1-chair squats contains 5 CSV files.
  - BrainFlow-RAW_s1-chair squats_1.csv already correctly named.
  - BrainFlow-RAW_s1-chair squats_4.csv already correctly named.
  - BrainFlow-RAW_s1-chair squats_0.csv already correctly named.
  - BrainFlow-RAW_s1-chair squats_2.csv already correctly named.
  - BrainFlow-RAW_s1-chair squats_3.csv already correctly named.
OpenBCISession_s1-light stationary cycling contains 5 CSV files.
  - BrainFlow-RAW_s1-light stationary cycling_4.csv already correctly named.
  - BrainFlow-RAW_s1-light stationary cycling_3.csv already correctly named.
  - BrainFlow-RAW_s1-light stationary cycling_0.csv already correctly named.
  - BrainFlow-RAW_s1-light stationary cycling_2.csv already correctly named.
  - BrainFlow-RAW_s1-light stationary cycling_1.csv already correctly named.
OpenBCISession_s1-wall push-ups contains 6 CSV files.
  - Br