In [13]:
import os
import pandas as pd

def process_sensor_data(base_directory):
    all_data = []

    for adl_folder in os.listdir(base_directory):
        adl_path = os.path.join(base_directory, adl_folder)
        if os.path.isdir(adl_path):
            for subfolder in os.listdir(adl_path):
                subfolder_path = os.path.join(adl_path, subfolder)

                acc_file = os.path.join(subfolder_path, 'accelerometer.txt')
                mag_file = os.path.join(subfolder_path, 'magnetometer.txt')
                gyro_file = os.path.join(subfolder_path, 'gyroscope.txt')

                if os.path.exists(acc_file) and os.path.exists(mag_file) and os.path.exists(gyro_file):
                    try:
                        # Read accelerometer data
                        acc_data = pd.read_csv(acc_file, header=None, delimiter='\t')
                        acc_data.columns = ['acc_Timestamp', 'acc_X', 'acc_Y', 'acc_Z']

                        # Handle timestamp column with 'Time ' prefix
                        if 'Time ' in acc_data['acc_Timestamp'][0]:
                            acc_data['acc_Timestamp'] = pd.to_numeric(acc_data['acc_Timestamp'].str.replace('Time ', ''), errors='coerce')
                        else:
                            acc_data['acc_Timestamp'] = pd.to_numeric(acc_data['acc_Timestamp'], errors='coerce')
                        acc_data[['acc_X', 'acc_Y', 'acc_Z']] = acc_data[['acc_X', 'acc_Y', 'acc_Z']].apply(pd.to_numeric, errors='coerce')

                        # Drop rows with NaN in the timestamp column before merging
                        acc_data = acc_data.dropna(subset=['acc_Timestamp'])

                        # Read magnetometer data
                        mag_data = pd.read_csv(mag_file, header=None, delimiter='\t')
                        mag_data.columns = ['mag_Timestamp', 'mag_X', 'mag_Y', 'mag_Z']

                        # Handle timestamp column with 'Time ' prefix
                        if 'Time ' in mag_data['mag_Timestamp'][0]:
                            mag_data['mag_Timestamp'] = pd.to_numeric(mag_data['mag_Timestamp'].str.replace('Time ', ''), errors='coerce')
                        else:
                            mag_data['mag_Timestamp'] = pd.to_numeric(mag_data['mag_Timestamp'], errors='coerce')
                        mag_data[['mag_X', 'mag_Y', 'mag_Z']] = mag_data[['mag_X', 'mag_Y', 'mag_Z']].apply(pd.to_numeric, errors='coerce')

                        # Drop rows with NaN in the timestamp column before merging
                        mag_data = mag_data.dropna(subset=['mag_Timestamp'])

                        # Read gyroscope data
                        gyro_data = pd.read_csv(gyro_file, header=None, delimiter='\t')
                        gyro_data.columns = ['gyro_Timestamp', 'gyro_X', 'gyro_Y', 'gyro_Z']

                        # Handle timestamp column with 'Time ' prefix
                        if 'Time ' in gyro_data['gyro_Timestamp'][0]:
                            gyro_data['gyro_Timestamp'] = pd.to_numeric(gyro_data['gyro_Timestamp'].str.replace('Time ', ''), errors='coerce')
                        else:
                            gyro_data['gyro_Timestamp'] = pd.to_numeric(gyro_data['gyro_Timestamp'], errors='coerce')
                        gyro_data[['gyro_X', 'gyro_Y', 'gyro_Z']] = gyro_data[['gyro_X', 'gyro_Y', 'gyro_Z']].apply(pd.to_numeric, errors='coerce')

                        # Drop rows with NaN in the timestamp column before merging
                        gyro_data = gyro_data.dropna(subset=['gyro_Timestamp'])

                        # Merge data
                        merged_data = pd.merge_asof(
                            acc_data.sort_values('acc_Timestamp'),
                            mag_data.sort_values('mag_Timestamp'),
                            left_on='acc_Timestamp', right_on='mag_Timestamp',
                            direction='nearest'
                        )

                        merged_data = pd.merge_asof(
                            merged_data,
                            gyro_data.sort_values('gyro_Timestamp'),
                            left_on='acc_Timestamp', right_on='gyro_Timestamp',
                            direction='nearest'
                        )

                        merged_data.rename(columns={'acc_Timestamp': 'Timestamp'}, inplace=True)
                        merged_data['activity'] = adl_folder

                        final_columns = ['Timestamp', 'acc_X', 'acc_Y', 'acc_Z',
                                         'mag_X', 'mag_Y', 'mag_Z',
                                         'gyro_X', 'gyro_Y', 'gyro_Z', 'activity']

                        merged_data = merged_data[final_columns]
                        all_data.append(merged_data)

                    except Exception as e:
                        print(f"Error processing data in {subfolder_path}: {e}")

    if all_data:
        consolidated_data = pd.concat(all_data, ignore_index=True)
        return consolidated_data
    else:
        raise ValueError("No valid data found in the directory")

def save_to_csv(data, output_path='consolidated_sensor_data.csv'):
    data.to_csv(output_path, index=False)
    print(f"Data saved to {output_path}")

if __name__ == "__main__":
    base_directory = "./data for activities with motion"
    try:
        consolidated_data = process_sensor_data(base_directory)
        save_to_csv(consolidated_data)
    except Exception as e:
        print(f"Error: {e}")


Data saved to consolidated_sensor_data.csv


In [15]:
import pandas as pd

# Path to the saved CSV file
file_path = 'consolidated_sensor_data.csv'

# Read the data from the CSV file
data = pd.read_csv(file_path)

# Display the first few rows of the data
data


Unnamed: 0,Timestamp,acc_X,acc_Y,acc_Z,mag_X,mag_Y,mag_Z,gyro_X,gyro_Y,gyro_Z,activity
0,1.493997e+12,-2.145,-9.174,3.802,-14.22,39.54,1.50,0.716627,0.077405,-0.804956,downstairs
1,1.493997e+12,-0.612,-9.625,3.984,-14.22,39.54,1.50,0.527844,0.212630,-0.811484,downstairs
2,1.493997e+12,-0.641,-10.678,3.840,-14.22,39.54,1.50,0.338130,0.280309,-0.893285,downstairs
3,1.493997e+12,-0.201,-12.517,3.390,-14.22,39.54,1.50,-0.169065,0.236611,-1.147482,downstairs
4,1.493997e+12,-1.522,-12.536,3.323,-14.22,39.54,1.50,-0.561684,0.267120,-1.295763,downstairs
...,...,...,...,...,...,...,...,...,...,...,...
4737304,1.495486e+12,-0.747,3.217,-1.484,-11.10,-23.52,23.94,0.523581,-2.016787,-0.328271,walking
4737305,1.495486e+12,0.354,4.280,-0.632,-11.10,-23.52,23.94,0.523581,-2.016787,-0.328271,walking
4737306,1.495486e+12,0.459,5.650,-1.331,-11.10,-23.52,23.94,0.651079,-2.146683,-0.773248,walking
4737307,1.495486e+12,0.220,8.006,-2.729,-11.10,-23.52,23.94,0.853983,-2.254596,-1.033973,walking
