## Importing Necessary Libraries

In [1]:
import pandas as pd
import os
import zipfile

## Verifying Root Folder Structure

In [5]:
def verify_root_structure(root_folder):
    print(f"\nDEBUG: Verifying root folder structure")
    print(f"DEBUG: Root path: {root_folder}")
    print(f"DEBUG: Root path exists: {os.path.exists(root_folder)}")
    
    if os.path.exists(root_folder):
        print("\nDEBUG: Contents of root directory:")
        try:
            contents = os.listdir(root_folder)
            print(contents)
            
            subject_folders = [f for f in contents if f.startswith('Subject')]
            print(f"\nDEBUG: Found subject folders: {subject_folders}")
            
        except PermissionError:
            print("DEBUG: Permission denied when trying to list directory contents")
        except Exception as e:
            print(f"DEBUG: Error when accessing directory: {str(e)}")
    else:
        parent_dir = os.path.dirname(root_folder)
        print(f"\nDEBUG: Root folder not found. Checking parent directory: {parent_dir}")
        if os.path.exists(parent_dir):
            print("DEBUG: Contents of parent directory:")
            try:
                print(os.listdir(parent_dir))
            except Exception as e:
                print(f"DEBUG: Error when accessing parent directory: {str(e)}")

verify_root_structure(root_folder)


DEBUG: Verifying root folder structure
DEBUG: Root path: /data0/HAR-datasets/PLHI Data for All Trial Subjects-20250212T053126Z-001/PLHI Data for All Trial Subjects
DEBUG: Root path exists: True

DEBUG: Contents of root directory:
['Subject 5', 'Subject 3', 'Subject 2', 'Subject 1', 'Subject 6', 'Subject 4']

DEBUG: Found subject folders: ['Subject 5', 'Subject 3', 'Subject 2', 'Subject 1', 'Subject 6', 'Subject 4']


## Renaming Subjects Folders to Remove Leading and Trailing Spaces

In [2]:
root_folder = '/data0/HAR-datasets/PLHI Data for All Trial Subjects-20250212T053126Z-001/PLHI Data for All Trial Subjects'

for folder_name in os.listdir(root_folder):
    folder_path = os.path.join(root_folder, folder_name)
    
    if os.path.isdir(folder_path):
        new_folder_name = folder_name.strip()
        
        if folder_name != new_folder_name:
            new_folder_path = os.path.join(root_folder, new_folder_name)
            os.rename(folder_path, new_folder_path)
            print(f"Renamed {folder_name} to {new_folder_name}")

## Loading and Processing Subject 1's Data

In [3]:
root_folder = '/data0/HAR-datasets/PLHI Data for All Trial Subjects-20250212T053126Z-001/PLHI Data for All Trial Subjects'

subject_activities = {
    'Subject 1': ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10']
}

def load_subject_data(subject_folder, activities):
    gyro_data = []
    acg_data = []

    for activity in activities:
        activity_folder = os.path.join(subject_folder, f"Activity {activity}")
        
        if os.path.isdir(activity_folder):
            gyro_file = os.path.join(activity_folder, 'WEAR_GYRO.csv')
            acg_file = os.path.join(activity_folder, 'WEAR_ACG.csv')
            
            if os.path.exists(gyro_file):
                gyro_df = pd.read_csv(gyro_file, delimiter=';')
                gyro_data.append(gyro_df)
                print(f"Loaded {gyro_file}")
            else:
                print(f"Missing {gyro_file}")
            
            if os.path.exists(acg_file):
                acg_df = pd.read_csv(acg_file, delimiter=';')
                acg_data.append(acg_df)
                print(f"Loaded {acg_file}")
            else:
                print(f"Missing {acg_file}")

    return gyro_data, acg_data

def process_subject_data(root_folder, subject, activities):
    subject_path = os.path.join(root_folder, subject)
    gyro_data, acg_data = [], []

    if os.path.isdir(subject_path):
        gyro_data, acg_data = load_subject_data(subject_path, activities)

    print(f"Number of gyro data frames loaded for {subject}: {len(gyro_data)}")
    print(f"Number of acg data frames loaded for {subject}: {len(acg_data)}")

    if gyro_data:
        gyro_df = pd.concat(gyro_data, ignore_index=True)
        print(f"Gyro data concatenated for {subject}")
    else:
        gyro_df = None
        print(f"No gyro data to concatenate for {subject}")

    if acg_data:
        acg_df = pd.concat(acg_data, ignore_index=True)
        print(f"ACG data concatenated for {subject}")
    else:
        acg_df = None
        print(f"No ACG data to concatenate for {subject}")
    
    return gyro_df, acg_df

for subject, activities in subject_activities.items():
    print(f"\nProcessing data for {subject}...")
    gyro_df, acg_df = process_subject_data(root_folder, subject, activities)

    if gyro_df is not None:
        print(f"Gyro data for {subject}:")
        print(gyro_df.head())
    
    if acg_df is not None:
        print(f"ACG data for {subject}:")
        print(acg_df.head())


Processing data for Subject 1...
Loaded /data0/HAR-datasets/PLHI Data for All Trial Subjects-20250212T053126Z-001/PLHI Data for All Trial Subjects/Subject 1/Activity 1/WEAR_GYRO.csv
Loaded /data0/HAR-datasets/PLHI Data for All Trial Subjects-20250212T053126Z-001/PLHI Data for All Trial Subjects/Subject 1/Activity 1/WEAR_ACG.csv
Loaded /data0/HAR-datasets/PLHI Data for All Trial Subjects-20250212T053126Z-001/PLHI Data for All Trial Subjects/Subject 1/Activity 2/WEAR_GYRO.csv
Loaded /data0/HAR-datasets/PLHI Data for All Trial Subjects-20250212T053126Z-001/PLHI Data for All Trial Subjects/Subject 1/Activity 2/WEAR_ACG.csv
Loaded /data0/HAR-datasets/PLHI Data for All Trial Subjects-20250212T053126Z-001/PLHI Data for All Trial Subjects/Subject 1/Activity 3/WEAR_GYRO.csv
Loaded /data0/HAR-datasets/PLHI Data for All Trial Subjects-20250212T053126Z-001/PLHI Data for All Trial Subjects/Subject 1/Activity 3/WEAR_ACG.csv
Loaded /data0/HAR-datasets/PLHI Data for All Trial Subjects-20250212T053126

## Loading and Concatenating Gyro and ACG Data for All Subjects

In [4]:
gyro_data = []
acg_data = []

root_folder = '/data0/HAR-datasets/PLHI Data for All Trial Subjects-20250212T053126Z-001/PLHI Data for All Trial Subjects'

subject_activities = {
    'Subject 1': ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10'],
    'Subject 2': ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10'],
    'Subject 3': ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10'],
    'Subject 4': ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10'],
    'Subject 5': ['2', '3', '4', '5'],
    'Subject 6': ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10']
}

for subject_folder, activities in subject_activities.items():
    subject_path = os.path.join(root_folder, subject_folder)
    
    if os.path.isdir(subject_path):
        for activity in activities:
            activity_folder = os.path.join(subject_path, f"Activity {activity}")
            
            if os.path.isdir(activity_folder):
                gyro_file = os.path.join(activity_folder, 'WEAR_GYRO.csv')
                acg_file = os.path.join(activity_folder, 'WEAR_ACG.csv')
                
                if os.path.exists(gyro_file):
                    gyro_df = pd.read_csv(gyro_file, delimiter=';')
                    gyro_data.append(gyro_df)
                    print(f"Loaded {gyro_file}")
                else:
                    print(f"Missing {gyro_file}")
                
                if os.path.exists(acg_file):
                    acg_df = pd.read_csv(acg_file, delimiter=';')
                    acg_data.append(acg_df)
                    print(f"Loaded {acg_file}")
                else:
                    print(f"Missing {acg_file}")

print(f"Number of gyro data frames loaded: {len(gyro_data)}")
print(f"Number of acg data frames loaded: {len(acg_data)}")

if gyro_data:
    gyro_df = pd.concat(gyro_data, ignore_index=True)
    print("Gyro data concatenated")
else:
    print("No gyro data to concatenate")

if acg_data:
    acg_df = pd.concat(acg_data, ignore_index=True)
    print("ACG data concatenated")
else:
    print("No ACG data to concatenate")

if 'gyro_df' in locals():
    print(gyro_df.head())
if 'acg_df' in locals():
    print(acg_df.head())

Loaded /data0/HAR-datasets/PLHI Data for All Trial Subjects-20250212T053126Z-001/PLHI Data for All Trial Subjects/Subject 1/Activity 1/WEAR_GYRO.csv
Loaded /data0/HAR-datasets/PLHI Data for All Trial Subjects-20250212T053126Z-001/PLHI Data for All Trial Subjects/Subject 1/Activity 1/WEAR_ACG.csv
Loaded /data0/HAR-datasets/PLHI Data for All Trial Subjects-20250212T053126Z-001/PLHI Data for All Trial Subjects/Subject 1/Activity 2/WEAR_GYRO.csv
Loaded /data0/HAR-datasets/PLHI Data for All Trial Subjects-20250212T053126Z-001/PLHI Data for All Trial Subjects/Subject 1/Activity 2/WEAR_ACG.csv
Loaded /data0/HAR-datasets/PLHI Data for All Trial Subjects-20250212T053126Z-001/PLHI Data for All Trial Subjects/Subject 1/Activity 3/WEAR_GYRO.csv
Loaded /data0/HAR-datasets/PLHI Data for All Trial Subjects-20250212T053126Z-001/PLHI Data for All Trial Subjects/Subject 1/Activity 3/WEAR_ACG.csv
Loaded /data0/HAR-datasets/PLHI Data for All Trial Subjects-20250212T053126Z-001/PLHI Data for All Trial Subj

## Processing and Loading Sensor Data for All Subjects

In [6]:
root_folder = '/data0/HAR-datasets/PLHI Data for All Trial Subjects-20250212T053126Z-001/PLHI Data for All Trial Subjects'

subject_activities = {
    'Subject 1': ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10'],
    'Subject 2': ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10'],
    'Subject 3': ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10'],
    'Subject 4': ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10'],
    'Subject 5': ['2', '3', '4', '5'],
    'Subject 6': ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10']
}

def load_subject_data(subject_folder, activities):
    gyro_data = []
    acg_data = []

    for activity in activities:
        activity_folder = os.path.join(subject_folder, f"Activity {activity}")
        
        if os.path.isdir(activity_folder):
            gyro_file = os.path.join(activity_folder, 'WEAR_GYRO.csv')
            acg_file = os.path.join(activity_folder, 'WEAR_ACG.csv')
            
            if os.path.exists(gyro_file):
                gyro_df = pd.read_csv(gyro_file, delimiter=';')
                gyro_data.append(gyro_df)
                print(f"Loaded {gyro_file}")
            else:
                print(f"Missing {gyro_file}")
            
            if os.path.exists(acg_file):
                acg_df = pd.read_csv(acg_file, delimiter=';')
                acg_data.append(acg_df)
                print(f"Loaded {acg_file}")
            else:
                print(f"Missing {acg_file}")

    return gyro_data, acg_data

def process_subject_data(root_folder, subject, activities):
    subject_path = os.path.join(root_folder, subject)
    gyro_data, acg_data = [], []

    if os.path.isdir(subject_path):
        gyro_data, acg_data = load_subject_data(subject_path, activities)

    print(f"Number of gyro data frames loaded for {subject}: {len(gyro_data)}")
    print(f"Number of acg data frames loaded for {subject}: {len(acg_data)}")

    if gyro_data:
        gyro_df = pd.concat(gyro_data, ignore_index=True)
        print(f"Gyro data concatenated for {subject}")
    else:
        gyro_df = None
        print(f"No gyro data to concatenate for {subject}")

    if acg_data:
        acg_df = pd.concat(acg_data, ignore_index=True)
        print(f"ACG data concatenated for {subject}")
    else:
        acg_df = None
        print(f"No ACG data to concatenate for {subject}")
    
    return gyro_df, acg_df

for subject, activities in subject_activities.items():
    print(f"\nProcessing data for {subject}...")
    gyro_df, acg_df = process_subject_data(root_folder, subject, activities)

    if gyro_df is not None:
        print(f"Gyro data for {subject}:")
        print(gyro_df.head())
    
    if acg_df is not None:
        print(f"ACG data for {subject}:")
        print(acg_df.head())


Processing data for Subject 1...
Loaded /data0/HAR-datasets/PLHI Data for All Trial Subjects-20250212T053126Z-001/PLHI Data for All Trial Subjects/Subject 1/Activity 1/WEAR_GYRO.csv
Loaded /data0/HAR-datasets/PLHI Data for All Trial Subjects-20250212T053126Z-001/PLHI Data for All Trial Subjects/Subject 1/Activity 1/WEAR_ACG.csv
Loaded /data0/HAR-datasets/PLHI Data for All Trial Subjects-20250212T053126Z-001/PLHI Data for All Trial Subjects/Subject 1/Activity 2/WEAR_GYRO.csv
Loaded /data0/HAR-datasets/PLHI Data for All Trial Subjects-20250212T053126Z-001/PLHI Data for All Trial Subjects/Subject 1/Activity 2/WEAR_ACG.csv
Loaded /data0/HAR-datasets/PLHI Data for All Trial Subjects-20250212T053126Z-001/PLHI Data for All Trial Subjects/Subject 1/Activity 3/WEAR_GYRO.csv
Loaded /data0/HAR-datasets/PLHI Data for All Trial Subjects-20250212T053126Z-001/PLHI Data for All Trial Subjects/Subject 1/Activity 3/WEAR_ACG.csv
Loaded /data0/HAR-datasets/PLHI Data for All Trial Subjects-20250212T053126

## Summary and Processing Data for All Subjects

In [7]:
def load_subject_data(subject_folder, activities):
    gyro_data = []
    acg_data = []
    
    for activity in activities:
        activity_folder = os.path.join(subject_folder, f"Activity {activity}")
        
        if os.path.isdir(activity_folder):
            gyro_file = os.path.join(activity_folder, 'WEAR_GYRO.csv')
            acg_file = os.path.join(activity_folder, 'WEAR_ACG.csv')
            
            if os.path.exists(gyro_file):
                gyro_df = pd.read_csv(gyro_file, delimiter=';')
                gyro_data.append(gyro_df)
                print(f"Loaded {gyro_file}")
            else:
                print(f"Missing {gyro_file}")
            
            if os.path.exists(acg_file):
                acg_df = pd.read_csv(acg_file, delimiter=';')
                acg_data.append(acg_df)
                print(f"Loaded {acg_file}")
            else:
                print(f"Missing {acg_file}")
                
    return gyro_data, acg_data

def process_all_subjects(root_folder, subject_activities):
    actual_dirs = os.listdir(root_folder)
    
    dir_mapping = {d.strip(): d for d in actual_dirs if d.startswith('Subject')}
    
    all_data = {}
    
    for subject, activities in subject_activities.items():
        print(f"\nProcessing data for {subject}...")
        
        actual_dir_name = dir_mapping.get(subject)
        if actual_dir_name is None:
            print(f"Could not find directory for {subject}")
            continue
            
        subject_path = os.path.join(root_folder, actual_dir_name)
        
        if not os.path.isdir(subject_path):
            print(f"Directory not found: {subject_path}")
            continue
            
        gyro_data, acg_data = load_subject_data(subject_path, activities)
        
        if gyro_data:
            gyro_df = pd.concat(gyro_data, ignore_index=True)
            print(f"Gyro data concatenated for {subject}: {len(gyro_df)} rows")
        else:
            gyro_df = None
            print(f"No gyro data to concatenate for {subject}")
            
        if acg_data:
            acg_df = pd.concat(acg_data, ignore_index=True)
            print(f"ACG data concatenated for {subject}: {len(acg_df)} rows")
        else:
            acg_df = None
            print(f"No ACG data to concatenate for {subject}")
            
        all_data[subject] = {
            'gyro': gyro_df,
            'acg': acg_df
        }
    
    return all_data

all_subject_data = process_all_subjects(root_folder, subject_activities)

print("\nSummary of loaded data:")
for subject, data in all_subject_data.items():
    print(f"\n{subject}:")
    if data['gyro'] is not None:
        print(f"  Gyro data shape: {data['gyro'].shape}")
        print(f"  Gyro columns: {data['gyro'].columns.tolist()}")
    if data['acg'] is not None:
        print(f"  ACG data shape: {data['acg'].shape}")
        print(f"  ACG columns: {data['acg'].columns.tolist()}")


Processing data for Subject 1...
Loaded /data0/HAR-datasets/PLHI Data for All Trial Subjects-20250212T053126Z-001/PLHI Data for All Trial Subjects/Subject 1/Activity 1/WEAR_GYRO.csv
Loaded /data0/HAR-datasets/PLHI Data for All Trial Subjects-20250212T053126Z-001/PLHI Data for All Trial Subjects/Subject 1/Activity 1/WEAR_ACG.csv
Loaded /data0/HAR-datasets/PLHI Data for All Trial Subjects-20250212T053126Z-001/PLHI Data for All Trial Subjects/Subject 1/Activity 2/WEAR_GYRO.csv
Loaded /data0/HAR-datasets/PLHI Data for All Trial Subjects-20250212T053126Z-001/PLHI Data for All Trial Subjects/Subject 1/Activity 2/WEAR_ACG.csv
Loaded /data0/HAR-datasets/PLHI Data for All Trial Subjects-20250212T053126Z-001/PLHI Data for All Trial Subjects/Subject 1/Activity 3/WEAR_GYRO.csv
Loaded /data0/HAR-datasets/PLHI Data for All Trial Subjects-20250212T053126Z-001/PLHI Data for All Trial Subjects/Subject 1/Activity 3/WEAR_ACG.csv
Loaded /data0/HAR-datasets/PLHI Data for All Trial Subjects-20250212T053126

## Loading and Previewing Gyro and ACG Data for All Subjects After Dropping Column "a"

In [8]:
gyro_data = []
acg_data = []

root_folder = '/data0/HAR-datasets/PLHI Data for All Trial Subjects-20250212T053126Z-001/PLHI Data for All Trial Subjects'

subject_activities = {
    'Subject 1': ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10'],
    'Subject 2': ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10'],
    'Subject 3': ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10'],
    'Subject 4': ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10'],
    'Subject 5': ['2', '3', '4', '5'],
    'Subject 6': ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10']
}

for subject_folder, activities in subject_activities.items():
    subject_path = os.path.join(root_folder, subject_folder)
    
    if os.path.isdir(subject_path):
        for activity in activities:
            activity_folder = os.path.join(subject_path, f"Activity {activity}")
            
            if os.path.isdir(activity_folder):
                gyro_file = os.path.join(activity_folder, 'WEAR_GYRO.csv')
                acg_file = os.path.join(activity_folder, 'WEAR_ACG.csv')
                
                if os.path.exists(gyro_file):
                    gyro_df = pd.read_csv(gyro_file, delimiter=';')
                    gyro_df = gyro_df.drop(columns=['a'], errors='ignore')  
                    gyro_data.append(gyro_df)
                    print(f"Loaded {gyro_file}")
                    print(f"Head of {gyro_file}:")
                    print(gyro_df.head()) 
                else:
                    print(f"Missing {gyro_file}")
                
                if os.path.exists(acg_file):
                    acg_df = pd.read_csv(acg_file, delimiter=';')
                    acg_df = acg_df.drop(columns=['a'], errors='ignore')  
                    acg_data.append(acg_df)
                    print(f"Loaded {acg_file}")
                    print(f"Head of {acg_file}:")
                    print(acg_df.head())  
                else:
                    print(f"Missing {acg_file}")

print(f"Number of gyro data frames loaded: {len(gyro_data)}")
print(f"Number of acg data frames loaded: {len(acg_data)}")

Loaded /data0/HAR-datasets/PLHI Data for All Trial Subjects-20250212T053126Z-001/PLHI Data for All Trial Subjects/Subject 1/Activity 1/WEAR_GYRO.csv
Head of /data0/HAR-datasets/PLHI Data for All Trial Subjects-20250212T053126Z-001/PLHI Data for All Trial Subjects/Subject 1/Activity 1/WEAR_GYRO.csv:
       t_Android         t_unix         x         y         z
0  7862086973596  1739025164003  0.009774  0.017104 -0.003665
1  7862096883362  1739025164003  0.009774  0.017104 -0.003665
2  7862106823645  1739025164003  0.009774  0.017104 -0.003665
3  7862116763928  1739025164059  0.009774  0.017104 -0.003665
4  7862126773694  1739025164059  0.009774  0.017104 -0.003665
Loaded /data0/HAR-datasets/PLHI Data for All Trial Subjects-20250212T053126Z-001/PLHI Data for All Trial Subjects/Subject 1/Activity 1/WEAR_ACG.csv
Head of /data0/HAR-datasets/PLHI Data for All Trial Subjects-20250212T053126Z-001/PLHI Data for All Trial Subjects/Subject 1/Activity 1/WEAR_ACG.csv:
      t_Android         t_unix

## Loading, Labeling, and Previewing Sensor Data with 10 Activity Labels

In [9]:
gyro_data = []
acg_data = []

root_folder = '/data0/HAR-datasets/PLHI Data for All Trial Subjects-20250212T053126Z-001/PLHI Data for All Trial Subjects'

subject_activities = {
    'Subject 1': ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10'],
    'Subject 2': ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10'],
    'Subject 3': ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10'],
    'Subject 4': ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10'],
    'Subject 5': ['2', '3', '4', '5'],
    'Subject 6': ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10']
}

activity_list = [
    "Seated Leg Extensions",
    "Marching in Place",
    "Wall Push-ups",
    "Seated Boxing Hooks",
    "Standing Heel-to-Toe Walk",
    "Side-Stepping",
    "Seated Side Bends",
    "Seated Medicine Ball Twists",
    "Chair Squats",
    "Light Stationary Cycling"
]

for subject_folder, activities in subject_activities.items():
    subject_path = os.path.join(root_folder, subject_folder)
    
    if os.path.isdir(subject_path):
        for activity in activities:
            activity_folder = os.path.join(subject_path, f"Activity {activity}")
            
            if os.path.isdir(activity_folder):
                gyro_file = os.path.join(activity_folder, 'WEAR_GYRO.csv')
                acg_file = os.path.join(activity_folder, 'WEAR_ACG.csv')
                
                activity_name = activity_list[int(activity) - 1]
                
                if os.path.exists(gyro_file):
                    gyro_df = pd.read_csv(gyro_file, delimiter=';')
                    gyro_df = gyro_df.drop(columns=['a'], errors='ignore')
                    gyro_df['Activity_Label'] = activity_name
                    gyro_data.append(gyro_df)
                    print(f"Loaded {gyro_file}")
                    print(f"Head of {gyro_file}:")
                    print(gyro_df.head()) 
                else:
                    print(f"Missing {gyro_file}")
                
                if os.path.exists(acg_file):
                    acg_df = pd.read_csv(acg_file, delimiter=';')
                    acg_df = acg_df.drop(columns=['a'], errors='ignore')
                    acg_df['Activity_Label'] = activity_name
                    acg_data.append(acg_df)
                    print(f"Loaded {acg_file}")
                    print(f"Head of {acg_file}:")
                    print(acg_df.head())  
                else:
                    print(f"Missing {acg_file}")

print(f"Number of gyro data frames loaded: {len(gyro_data)}")
print(f"Number of acg data frames loaded: {len(acg_data)}")

Loaded /data0/HAR-datasets/PLHI Data for All Trial Subjects-20250212T053126Z-001/PLHI Data for All Trial Subjects/Subject 1/Activity 1/WEAR_GYRO.csv
Head of /data0/HAR-datasets/PLHI Data for All Trial Subjects-20250212T053126Z-001/PLHI Data for All Trial Subjects/Subject 1/Activity 1/WEAR_GYRO.csv:
       t_Android         t_unix         x         y         z  \
0  7862086973596  1739025164003  0.009774  0.017104 -0.003665   
1  7862096883362  1739025164003  0.009774  0.017104 -0.003665   
2  7862106823645  1739025164003  0.009774  0.017104 -0.003665   
3  7862116763928  1739025164059  0.009774  0.017104 -0.003665   
4  7862126773694  1739025164059  0.009774  0.017104 -0.003665   

          Activity_Label  
0  Seated Leg Extensions  
1  Seated Leg Extensions  
2  Seated Leg Extensions  
3  Seated Leg Extensions  
4  Seated Leg Extensions  
Loaded /data0/HAR-datasets/PLHI Data for All Trial Subjects-20250212T053126Z-001/PLHI Data for All Trial Subjects/Subject 1/Activity 1/WEAR_ACG.csv

## Loading, Labeling, and Previewing Sensor Data with Renamed Columns And Dropping "t_Android" Column

In [10]:
gyro_data = []
acg_data = []

root_folder = '/data0/HAR-datasets/PLHI Data for All Trial Subjects-20250212T053126Z-001/PLHI Data for All Trial Subjects'

subject_activities = {
    'Subject 1': ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10'],
    'Subject 2': ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10'],
    'Subject 3': ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10'],
    'Subject 4': ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10'],
    'Subject 5': ['2', '3', '4', '5'],
    'Subject 6': ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10']
}

activity_list = [
    "Seated Leg Extensions",
    "Marching in Place",
    "Wall Push-ups",
    "Seated Boxing Hooks",
    "Standing Heel-to-Toe Walk",
    "Side-Stepping",
    "Seated Side Bends",
    "Seated Medicine Ball Twists",
    "Chair Squats",
    "Light Stationary Cycling"
]

for subject_folder, activities in subject_activities.items():
    subject_path = os.path.join(root_folder, subject_folder)
    
    if os.path.isdir(subject_path):
        for activity in activities:
            activity_folder = os.path.join(subject_path, f"Activity {activity}")
            
            if os.path.isdir(activity_folder):
                gyro_file = os.path.join(activity_folder, 'WEAR_GYRO.csv')
                acg_file = os.path.join(activity_folder, 'WEAR_ACG.csv')
                
                activity_name = activity_list[int(activity) - 1]
                
                if os.path.exists(gyro_file):
                    gyro_df = pd.read_csv(gyro_file, delimiter=';')
                    gyro_df = gyro_df.drop(columns=['a', 't_Android'], errors='ignore')
                    gyro_df['Activity_Label'] = activity_name
                    gyro_df = gyro_df.rename(columns={
                        'x': 'Gyro X (°/s)',
                        'y': 'Gyro Y (°/s)',
                        'z': 'Gyro Z (°/s)',
                        't_unix': 'Timestamp (microseconds)'
                    })
                    gyro_data.append(gyro_df)
                    print(f"Loaded {gyro_file}")
                    print(f"Head of {gyro_file}:")
                    print(gyro_df.head()) 
                else:
                    print(f"Missing {gyro_file}")
                
                if os.path.exists(acg_file):
                    acg_df = pd.read_csv(acg_file, delimiter=';')
                    acg_df = acg_df.drop(columns=['a', 't_Android'], errors='ignore')
                    acg_df['Activity_Label'] = activity_name
                    acg_df = acg_df.rename(columns={
                        'x': 'Accel X (g)',
                        'y': 'Accel Y (g)',
                        'z': 'Accel Z (g)',
                        't_unix': 'Timestamp (microseconds)'
                    })
                    acg_data.append(acg_df)
                    print(f"Loaded {acg_file}")
                    print(f"Head of {acg_file}:")
                    print(acg_df.head())  
                else:
                    print(f"Missing {acg_file}")

print(f"Number of gyro data frames loaded: {len(gyro_data)}")
print(f"Number of acg data frames loaded: {len(acg_data)}")

Loaded /data0/HAR-datasets/PLHI Data for All Trial Subjects-20250212T053126Z-001/PLHI Data for All Trial Subjects/Subject 1/Activity 1/WEAR_GYRO.csv
Head of /data0/HAR-datasets/PLHI Data for All Trial Subjects-20250212T053126Z-001/PLHI Data for All Trial Subjects/Subject 1/Activity 1/WEAR_GYRO.csv:
   Timestamp (microseconds)  Gyro X (°/s)  Gyro Y (°/s)  Gyro Z (°/s)  \
0             1739025164003      0.009774      0.017104     -0.003665   
1             1739025164003      0.009774      0.017104     -0.003665   
2             1739025164003      0.009774      0.017104     -0.003665   
3             1739025164059      0.009774      0.017104     -0.003665   
4             1739025164059      0.009774      0.017104     -0.003665   

          Activity_Label  
0  Seated Leg Extensions  
1  Seated Leg Extensions  
2  Seated Leg Extensions  
3  Seated Leg Extensions  
4  Seated Leg Extensions  
Loaded /data0/HAR-datasets/PLHI Data for All Trial Subjects-20250212T053126Z-001/PLHI Data for All 

## Loading, Labeling, and Renaming Columns in Sensor Data with Subject and Activity Labels

In [11]:
gyro_data = []
acg_data = []

root_folder = '/data0/HAR-datasets/PLHI Data for All Trial Subjects-20250212T053126Z-001/PLHI Data for All Trial Subjects'

subject_activities = {
    'Subject 1': ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10'],
    'Subject 2': ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10'],
    'Subject 3': ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10'],
    'Subject 4': ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10'],
    'Subject 5': ['2', '3', '4', '5'],
    'Subject 6': ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10']
}

activity_list = [
    "Seated Leg Extensions",
    "Marching in Place",
    "Wall Push-ups",
    "Seated Boxing Hooks",
    "Standing Heel-to-Toe Walk",
    "Side-Stepping",
    "Seated Side Bends",
    "Seated Medicine Ball Twists",
    "Chair Squats",
    "Light Stationary Cycling"
]

for subject_folder, activities in subject_activities.items():
    subject_path = os.path.join(root_folder, subject_folder)
    
    if os.path.isdir(subject_path):
        for activity in activities:
            activity_folder = os.path.join(subject_path, f"Activity {activity}")
            
            if os.path.isdir(activity_folder):
                gyro_file = os.path.join(activity_folder, 'WEAR_GYRO.csv')
                acg_file = os.path.join(activity_folder, 'WEAR_ACG.csv')
                
                activity_name = activity_list[int(activity) - 1]
                
                subject_id = subject_folder
                
                if os.path.exists(gyro_file):
                    gyro_df = pd.read_csv(gyro_file, delimiter=';')
                    gyro_df = gyro_df.drop(columns=['a', 't_Android'], errors='ignore')
                    gyro_df['Activity_Label'] = activity_name
                    gyro_df['Subject_ID'] = subject_id
                    gyro_df = gyro_df.rename(columns={
                        'x': 'Gyro X (°/s)',
                        'y': 'Gyro Y (°/s)',
                        'z': 'Gyro Z (°/s)',
                        't_unix': 'Timestamp (microseconds)'
                    })
                    gyro_data.append(gyro_df)
                    print(f"Loaded {gyro_file}")
                    print(f"Head of {gyro_file}:")
                    print(gyro_df.head()) 
                else:
                    print(f"Missing {gyro_file}")
                
                if os.path.exists(acg_file):
                    acg_df = pd.read_csv(acg_file, delimiter=';')
                    acg_df = acg_df.drop(columns=['a', 't_Android'], errors='ignore')
                    acg_df['Activity_Label'] = activity_name
                    acg_df['Subject_ID'] = subject_id
                    acg_df = acg_df.rename(columns={
                        'x': 'Accel X (g)',
                        'y': 'Accel Y (g)',
                        'z': 'Accel Z (g)',
                        't_unix': 'Timestamp (microseconds)'
                    })
                    acg_data.append(acg_df)
                    print(f"Loaded {acg_file}")
                    print(f"Head of {acg_file}:")
                    print(acg_df.head())  
                else:
                    print(f"Missing {acg_file}")

print(f"Number of gyro data frames loaded: {len(gyro_data)}")
print(f"Number of acg data frames loaded: {len(acg_data)}")

Loaded /data0/HAR-datasets/PLHI Data for All Trial Subjects-20250212T053126Z-001/PLHI Data for All Trial Subjects/Subject 1/Activity 1/WEAR_GYRO.csv
Head of /data0/HAR-datasets/PLHI Data for All Trial Subjects-20250212T053126Z-001/PLHI Data for All Trial Subjects/Subject 1/Activity 1/WEAR_GYRO.csv:
   Timestamp (microseconds)  Gyro X (°/s)  Gyro Y (°/s)  Gyro Z (°/s)  \
0             1739025164003      0.009774      0.017104     -0.003665   
1             1739025164003      0.009774      0.017104     -0.003665   
2             1739025164003      0.009774      0.017104     -0.003665   
3             1739025164059      0.009774      0.017104     -0.003665   
4             1739025164059      0.009774      0.017104     -0.003665   

          Activity_Label Subject_ID  
0  Seated Leg Extensions  Subject 1  
1  Seated Leg Extensions  Subject 1  
2  Seated Leg Extensions  Subject 1  
3  Seated Leg Extensions  Subject 1  
4  Seated Leg Extensions  Subject 1  
Loaded /data0/HAR-datasets/PLHI D

## Combining and Saving Gyro and ACG Data for All Subjects

In [12]:
combined_data_all_subjects = []

for i in range(len(gyro_data)):
    gyro_df = gyro_data[i]
    acg_df = acg_data[i]
    
    gyro_df = gyro_df.rename(columns={'Timestamp (microseconds)': 'Timestamp_Gyro'})
    acg_df = acg_df.rename(columns={'Timestamp (microseconds)': 'Timestamp_Accel'})
    
    gyro_df['Timestamp_Gyro'] = gyro_df['Timestamp_Gyro'].astype(float)
    acg_df['Timestamp_Accel'] = acg_df['Timestamp_Accel'].astype(float)
    
    merged_df = pd.merge(gyro_df, acg_df, left_on='Timestamp_Gyro', right_on='Timestamp_Accel', how='inner')
    
    combined_data_all_subjects.append(merged_df)

final_combined_df_all_subjects = pd.concat(combined_data_all_subjects, ignore_index=True)

final_combined_df_all_subjects['Timestamp_Gyro'] = final_combined_df_all_subjects['Timestamp_Gyro'].apply(lambda x: f'{x:.0f}')
final_combined_df_all_subjects['Timestamp_Accel'] = final_combined_df_all_subjects['Timestamp_Accel'].apply(lambda x: f'{x:.0f}')

combined_csv_path = '/data0/HAR-datasets/Combined_Gyro_Acg_Data_Subjects_1_to_6.csv'
final_combined_df_all_subjects.to_csv(combined_csv_path, index=False)

total_rows, total_columns = final_combined_df_all_subjects.shape

print(f"Combined data for subjects 1-6 saved to {combined_csv_path}")
print(f"Total rows: {total_rows}, Total columns: {total_columns}")
print(final_combined_df_all_subjects.head())

Combined data for subjects 1-6 saved to /data0/HAR-datasets/Combined_Gyro_Acg_Data_Subjects_1_to_6.csv
Total rows: 944460, Total columns: 12
  Timestamp_Gyro Gyro X (°/s)  Gyro Y (°/s)  Gyro Z (°/s)  \
0  1739025164003     0.009774      0.017104     -0.003665   
1  1739025164003     0.009774      0.017104     -0.003665   
2  1739025164003     0.009774      0.017104     -0.003665   
3  1739025164003     0.009774      0.017104     -0.003665   
4  1739025164003     0.009774      0.017104     -0.003665   

        Activity_Label_x Subject_ID_x Timestamp_Accel  Accel X (g)  \
0  Seated Leg Extensions    Subject 1   1739025164003    -2.533065   
1  Seated Leg Extensions    Subject 1   1739025164003    -2.533065   
2  Seated Leg Extensions    Subject 1   1739025164003    -2.533065   
3  Seated Leg Extensions    Subject 1   1739025164003    -2.533065   
4  Seated Leg Extensions    Subject 1   1739025164003    -2.533065   

  Accel Y (g)  Accel Z (g)       Activity_Label_y Subject_ID_y  
0   -4

## Compressing and Saving Combined Data as ZIP File

In [13]:
csv_file_path = '/data0/HAR-datasets/Combined_Gyro_Acg_Data_Subjects_1_to_6.csv'
zip_file_path = '/data0/HAR-datasets/Combined_Gyro_Acg_Data_Subjects_1_to_6.zip'

with zipfile.ZipFile(zip_file_path, 'w', zipfile.ZIP_STORED) as zipf:
    zipf.write(csv_file_path, arcname='Combined_Gyro_Acg_Data_Subjects_1_to_6.csv')

print(f"ZIP file saved to {zip_file_path}")

ZIP file saved to /data0/HAR-datasets/Combined_Gyro_Acg_Data_Subjects_1_to_6.zip
