# Merge Accelerometer and Gyroscope datasets
This notebook scans all subfolders, reads `Accelerometer.csv` and `Gyroscope.csv` files, tags each row with `Activity` and `Sensor`, concatenates them into a single DataFrame, and saves both a full merged file and per-activity merged files.

Notes: It assumes folders start with activity names like `Jumping`, `Walking`, `Standing`, `Still`.

In [1]:
# Imports and workspace base path
import os
from pathlib import Path
import pandas as pd
import glob

BASE_DIR = Path(r'c:\Users\HP\Hidden-Markov-Models')
print('Base dir:', BASE_DIR)
# Find the Accelerometer and Gyroscope CSVs
acc_files = sorted(BASE_DIR.glob('**/Accelerometer.csv'))
gyro_files = sorted(BASE_DIR.glob('**/Gyroscope.csv'))
print(f'Found {len(acc_files)} Accelerometer files and {len(gyro_files)} Gyroscope files')
# show some example paths
for p in acc_files[:5]:
    print('ACC sample:', p)
for p in gyro_files[:5]:
    print('GYRO sample:', p)

Base dir: c:\Users\HP\Hidden-Markov-Models
Found 75 Accelerometer files and 75 Gyroscope files
ACC sample: c:\Users\HP\Hidden-Markov-Models\samples\Kanisa\Jumping_1-2025-10-24_13-03-55\Accelerometer.csv
ACC sample: c:\Users\HP\Hidden-Markov-Models\samples\Kanisa\Jumping_1-2025-10-25_05-20-38\Accelerometer.csv
ACC sample: c:\Users\HP\Hidden-Markov-Models\samples\Kanisa\Jumping_1-2025-10-25_05-28-36\Accelerometer.csv
ACC sample: c:\Users\HP\Hidden-Markov-Models\samples\Kanisa\Jumping_1-2025-10-25_05-36-41\Accelerometer.csv
ACC sample: c:\Users\HP\Hidden-Markov-Models\samples\Kanisa\Jumping_1-2025-10-25_05-43-47\Accelerometer.csv
GYRO sample: c:\Users\HP\Hidden-Markov-Models\samples\Kanisa\Jumping_1-2025-10-24_13-03-55\Gyroscope.csv
GYRO sample: c:\Users\HP\Hidden-Markov-Models\samples\Kanisa\Jumping_1-2025-10-25_05-20-38\Gyroscope.csv
GYRO sample: c:\Users\HP\Hidden-Markov-Models\samples\Kanisa\Jumping_1-2025-10-25_05-28-36\Gyroscope.csv
GYRO sample: c:\Users\HP\Hidden-Markov-Models\samp

In [2]:
def activity_from_path(p: Path) -> str:
    # Infer activity by the folder name prefix (e.g., 'Jumping_1-...')
    name = p.parent.name
    for act in ('Jumping', 'Walking', 'Standing', 'Still'):
        if name.startswith(act):
            return act
    # fallback: try splitting on underscore or dash
    for sep in ['_', '-']:
        if sep in name:
            return name.split(sep)[0]
    return name

def read_and_tag(file_path: Path, sensor_name: str) -> pd.DataFrame:
    try:
        df = pd.read_csv(file_path)
    except Exception as e:
        print(f'Failed to read {file_path}:', e)
        return pd.DataFrame()
    df = df.copy()
    df['Sensor'] = sensor_name
    df['SourceFile'] = str(file_path)
    df['Activity'] = activity_from_path(file_path)
    return df

# Read all files into a list of DataFrames
dfs = []
for p in acc_files:
    dfs.append(read_and_tag(p, 'Accelerometer'))
for p in gyro_files:
    dfs.append(read_and_tag(p, 'Gyroscope'))

# Concatenate all (ignore empty frames)
if len(dfs) == 0:
    print('No dataframes to concatenate')
else:
    merged = pd.concat([d for d in dfs if not d.empty], ignore_index=True, sort=False)
    print('Merged shape:', merged.shape)
    display(merged.head())

Merged shape: (69158, 8)


Unnamed: 0,time,seconds_elapsed,z,y,x,Sensor,SourceFile,Activity
0,1761311035749738500,0.209739,-0.073487,0.131348,0.204234,Accelerometer,c:\Users\HP\Hidden-Markov-Models\samples\Kanis...,Jumping
1,1761311035768349700,0.22835,-0.067876,0.06642,0.14014,Accelerometer,c:\Users\HP\Hidden-Markov-Models\samples\Kanis...,Jumping
2,1761311035786962400,0.246962,-0.153368,-0.093135,0.078951,Accelerometer,c:\Users\HP\Hidden-Markov-Models\samples\Kanis...,Jumping
3,1761311035805573600,0.265574,-0.217052,-0.173825,0.062118,Accelerometer,c:\Users\HP\Hidden-Markov-Models\samples\Kanis...,Jumping
4,1761311035824186400,0.284186,-0.261741,-0.193203,0.045196,Accelerometer,c:\Users\HP\Hidden-Markov-Models\samples\Kanis...,Jumping


In [3]:
# Save merged outputs into a data/ directory
OUT_DIR = BASE_DIR / 'data'
OUT_DIR.mkdir(exist_ok=True)
if 'merged' in globals() and not merged.empty:
    full_path = OUT_DIR / 'merged_all_sensors.csv'
    merged.to_csv(full_path, index=False)
    print('Saved full merged to', full_path)
    # save per-activity files
    for act, sub in merged.groupby('Activity'):
        fn = OUT_DIR / f'merged_{act}.csv'
        sub.to_csv(fn, index=False)
        print('Saved', fn, 'shape=', sub.shape)
else:
    print('No merged dataframe available to save')

Saved full merged to c:\Users\HP\Hidden-Markov-Models\data\merged_all_sensors.csv
Saved c:\Users\HP\Hidden-Markov-Models\data\merged_Jumping.csv shape= (12207, 8)
Saved c:\Users\HP\Hidden-Markov-Models\data\merged_Rene-jumping-2025-10-29.csv shape= (2701, 8)
Saved c:\Users\HP\Hidden-Markov-Models\data\merged_Rene-running-2025-10-29.csv shape= (3118, 8)
Saved c:\Users\HP\Hidden-Markov-Models\data\merged_Rene-standing-2025-10-29.csv shape= (2732, 8)
Saved c:\Users\HP\Hidden-Markov-Models\data\merged_Rene-still-2025-10-29.csv shape= (2737, 8)
Saved c:\Users\HP\Hidden-Markov-Models\data\merged_Standing.csv shape= (15164, 8)
Saved c:\Users\HP\Hidden-Markov-Models\data\merged_Still.csv shape= (12915, 8)
Saved c:\Users\HP\Hidden-Markov-Models\data\merged_Walking.csv shape= (17584, 8)
