In [1]:
import pandas as pd
from glob import glob
import os

In [2]:
data_path = os.getenv("DATA_PATH", "C:/Users/prajualr/PycharmProjects/Strength_training/Data/raw/MetaMotion")

In [5]:
# Read single CSV file (for testing)
single_file_path = os.path.join(data_path, "A-bench-heavy_MetaWear_2019-01-14T14.22.49.165_C42732BE255C_Accelerometer_12.500Hz_1.4.4.csv")
try:
    single_file_acc = pd.read_csv(single_file_path)
    print("Successfully read file:", single_file_path)
except FileNotFoundError:
    print(f"File not found: {single_file_path}")
    single_file_acc = pd.DataFrame()  # Fallback to empty DataFrame

Successfully read file: C:/Users/prajualr/PycharmProjects/Strength_training/Data/raw/MetaMotion\A-bench-heavy_MetaWear_2019-01-14T14.22.49.165_C42732BE255C_Accelerometer_12.500Hz_1.4.4.csv


In [7]:
# Read all CSV files in the directory
files = glob(os.path.join(data_path, "*.csv"))
print(f"Found {len(files)} files in directory: {data_path}")

Found 187 files in directory: C:/Users/prajualr/PycharmProjects/Strength_training/Data/raw/MetaMotion


In [9]:
def read_data_from_files(files):
    """
    Reads data from all CSV files and processes it into accelerometer and gyroscope DataFrames.
    """
    acc_df = pd.DataFrame()
    gyr_df = pd.DataFrame()
    
    acc_set = 1
    gyr_set = 1
    
    for f in files:
        try:
            # Extract metadata from filename
            participant = f.split("-")[0].replace(data_path + "\\", "")
            label = f.split("-")[1]
            category = f.split("-")[2].rstrip("123").rstrip("_MetaWear_2019")
            
            # Read CSV file
            df = pd.read_csv(f)
            df["participant"] = participant
            df["label"] = label
            df["category"] = category
            
            # Separate accelerometer and gyroscope data
            if "Accelerometer" in f:
                df["set"] = acc_set
                acc_set += 1
                acc_df = pd.concat([acc_df, df])
            if "Gyroscope" in f:
                df["set"] = gyr_set
                gyr_set += 1
                gyr_df = pd.concat([gyr_df, df])
        except Exception as e:
            print(f"Error processing file {f}: {e}")
    
    # Create timestamp index
    acc_df.index = pd.to_datetime(acc_df["epoch (ms)"], unit="ms")
    gyr_df.index = pd.to_datetime(gyr_df["epoch (ms)"], unit="ms")
    
    # Drop unnecessary columns
    for df in [acc_df, gyr_df]:
        df.drop(columns=["epoch (ms)", "time (01:00)", "elapsed (s)"], inplace=True)
    
    return acc_df, gyr_df

In [11]:
acc_df, gyr_df = read_data_from_files(files)

In [12]:
# Merge accelerometer and gyroscope data
data_merged = pd.concat([acc_df.iloc[:, :3], gyr_df], axis=1)
data_merged.columns = ["acc_x", "acc_y", "acc_z", "gyr_x", "gyr_y", "gyr_z", "participant", "label", "category", "set"]

In [13]:
# Resample data at 200ms intervals to reduce noise
sampling = {
    'acc_x': "mean", 'acc_y': "mean", 'acc_z': "mean", 
    'gyr_x': "mean", 'gyr_y': "mean", 'gyr_z': "mean", 
    'participant': "last", 'label': "last", 'category': "last", 'set': "last"
}

In [17]:
# Split by days and resample
days = [g for n, g in data_merged.groupby(pd.Grouper(freq="D"))]
data_resampled = pd.concat([df.resample(rule="200ms").apply(sampling).dropna() for df in days])
data_resampled["set"] = data_resampled["set"].astype("int")

In [19]:
data_resampled

Unnamed: 0_level_0,acc_x,acc_y,acc_z,gyr_x,gyr_y,gyr_z,participant,label,category,set
epoch (ms),Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2019-01-11 15:08:05.200,0.013500,0.977000,-0.071000,-1.8904,2.4392,0.9388,B,bench,heavy,30
2019-01-11 15:08:05.400,-0.001500,0.970500,-0.079500,-1.6826,-0.8904,2.1708,B,bench,heavy,30
2019-01-11 15:08:05.600,0.001333,0.971667,-0.064333,2.5608,-0.2560,-1.4146,B,bench,heavy,30
2019-01-11 15:08:05.800,-0.024000,0.957000,-0.073500,8.0610,-4.5244,-2.0730,B,bench,heavy,30
2019-01-11 15:08:06.000,-0.028000,0.957667,-0.115000,2.4390,-1.5486,-3.6098,B,bench,heavy,30
...,...,...,...,...,...,...,...,...,...,...
2019-01-20 17:33:27.000,-0.048000,-1.041500,-0.076500,1.4146,-5.6218,0.2926,E,row,medium,90
2019-01-20 17:33:27.200,-0.037000,-1.030333,-0.053333,-2.7684,-0.5854,2.2440,E,row,medium,90
2019-01-20 17:33:27.400,-0.060000,-1.031000,-0.082000,2.8416,-5.1342,-0.1220,E,row,medium,90
2019-01-20 17:33:27.600,-0.038667,-1.025667,-0.044667,-0.2318,0.2562,1.1220,E,row,medium,90


In [21]:
data_resampled.to_csv("../../Data/motion_dataset.csv", index=False)

In [23]:
# Save processed data
data_resampled.to_pickle("../../Data/interim/01_data_processed.pkl")
print("Data processing complete. Saved to 01_data_processed.pkl.")

Data processing complete. Saved to 01_data_processed.pkl.
