In [8]:
# Import the necessary libraries
import os
import pandas as pd
from tqdm import tqdm

def delete_DS_Store(path: str) -> None:
    '''Check in the specified directory if .DS_Store file exists and then delete it.'''
    try:
        if '.DS_Store' in os.listdir(path):
            os.remove(os.path.join(path, '.DS_Store'))
            #print('.DS_Store removed')
    except Exception as e: 
        print(f'Error occurred while deleting .DS_Store: {e}')

# Get current working directory to load the dataset
cwd = os.getcwd()
parent_dir = os.path.dirname(cwd)
#print(parent_dir)
base_path = os.path.join(parent_dir, 'data') # Base path where the dataset directory is located

# Columns for each sensor axis 
columns = ['T_xacc', 'T_yacc', 'T_zacc', 'T_xgyro', 'T_ygyro', 'T_zgyro', 'T_xmag', 'T_ymag', 'T_zmag',
           'RA_xacc', 'RA_yacc', 'RA_zacc', 'RA_xgyro', 'RA_ygyro', 'RA_zgyro', 'RA_xmag', 'RA_ymag', 'RA_zmag',
           'LA_xacc', 'LA_yacc', 'LA_zacc', 'LA_xgyro', 'LA_ygyro', 'LA_zgyro', 'LA_xmag', 'LA_ymag', 'LA_zmag',
           'RL_xacc', 'RL_yacc', 'RL_zacc', 'RL_xgyro', 'RL_ygyro', 'RL_zgyro', 'RL_xmag', 'RL_ymag', 'RL_zmag',
           'LL_xacc', 'LL_yacc', 'LL_zacc', 'LL_xgyro', 'LL_ygyro', 'LL_zgyro', 'LL_xmag', 'LL_ymag', 'LL_zmag']

df = pd.DataFrame() # Initialize an empty DataFrame 

# Set the maximum number of activities, subjects, and segments to load for faster processing
n_a_max = 19 # Number activities per dictionary max 19
n_p_max = 8  # Number subjects per dictionary max 8
n_s_max = 60 # Number segments per dictionary max 60

delete_DS_Store(base_path) # Remove .DS_Store file in the base_path

# Loop through each activity directory (a01, a02, ..., a19)
for n_a, activity_dir in tqdm(enumerate(os.listdir(base_path)), desc='Loading Activities: ', ascii=False, total=n_a_max):
    activity_path = os.path.join(base_path, activity_dir)
    
    # Check if it's a directory to skip any files
    if os.path.isdir(activity_path) and (n_a <= n_a_max):

        # Remove files starting with '.' in the subject_path
        delete_DS_Store(activity_path)

        # Loop through each subject directory (p1, p2, ..., p8) within the activity directory
        for n_p, subject_dir in enumerate(os.listdir(activity_path)):
            subject_path = os.path.join(activity_path, subject_dir)

            # Remove .DS_Store file in the subject_path
            delete_DS_Store(subject_path)

            # Check if it's a directory
            if os.path.isdir(subject_path) and (n_p <= n_p_max):
                
                # Finally, loop through each segment file (s01.txt, s02.txt, ..., s60.txt) within the subject directory
                for n_s, segment_file in enumerate(os.listdir(subject_path)):
                    if n_s <= n_s_max:
                        file_path = os.path.join(subject_path, segment_file)

                        # Open the segment file as a DataFrame and concatenate it to the main DataFrame
                        try:
                            segment_data = pd.read_csv(file_path, header=None, names=columns)

                            # Add the directory name as new columns
                            segment_data['subject'] = subject_dir
                            segment_data['activity'] = activity_dir
                            

                            df = pd.concat([df, segment_data], ignore_index=True)
                        except FileNotFoundError:
                            print(f'File not found: {file_path}')
                            continue
df.sort_values(by=['subject', 'activity'], inplace=True) # Sort the DataFrame by activity and subject                       
df['timestamp'] = df.index * 0.04
# Arrange the columns in the DataFrame so that the activity and subject columns are the first two columns
cols = df.columns.tolist()
cols = cols[-3:] + cols[:-3]
df = df[cols]

# Save the DataFrame to a CSV file
df.to_csv('data_all.csv', index=False)

Loading Activities: 100%|██████████| 19/19 [09:20<00:00, 29.51s/it]
