In [1]:
from glob import glob
import pandas as pd

def read_data(files):
    acc_set = 1
    gyr_set = 1

    acc_df = pd.DataFrame()
    gyr_df = pd.DataFrame()

    for f in files:
        participant = f.split("\\")[1].replace(f, "").split("-")[0]
        label = f.split("-")[1]
        category = f.split("-")[2].rstrip("123")

        df = pd.read_csv(f)

        df["participant"] = participant
        df["label"] = label
        df["category"] = category

        if "Accelerometer" in f:
            df["set"] = acc_set
            acc_set += 1
            acc_df = pd.concat([acc_df, df])

        if "Gyroscope" in f:
            df["set"] = gyr_set
            gyr_set += 1
            gyr_df = pd.concat([gyr_df, df])
            
    acc_df.index = pd.to_datetime(acc_df["epoch (ms)"], unit="ms")
    gyr_df.index = pd.to_datetime(gyr_df["epoch (ms)"], unit="ms")
    
    
    del acc_df["epoch (ms)"]
    del acc_df["time (01:00)"]
    del acc_df["elapsed (s)"]       
    del gyr_df["epoch (ms)"]
    del gyr_df["time (01:00)"]
    del gyr_df["elapsed (s)"]
    
    return acc_df, gyr_df

files = glob("../../data/raw/*.csv")

In [2]:
def process_data(files = files):
    acc_df, gyr_df = read_data(files)
    df_mergerd = pd.concat([acc_df.iloc[:,:3], gyr_df], axis=1)

    df_mergerd.columns = [
        "acc_x",
        "acc_y",
        "acc_z",
        "gyr_x",
        "gyr_y",
        "gyr_z",
        "participant",
        "label",
        "category",
        "set"
    ]

    sampling_rules = {
        "acc_x": "mean",
        "acc_y": "mean",
        "acc_z": "mean",
        "gyr_x": "mean",
        "gyr_y": "mean",
        "gyr_z": "mean",
        "participant": "last",
        "label": "last",
        "category": "last",
        "set": "last"
    }

    days = [g for n, g in df_mergerd.groupby(pd.Grouper(freq="D"))]
    df_resampled = pd.concat([df.resample("150ms").apply(sampling_rules).dropna() for df in days])

    df_resampled["set"] = df_resampled["set"].astype("int")
    return df_resampled

In [3]:
def export_data(output_path):
    df_resampled = process_data()
    df_resampled.to_csv(output_path, index=False)

In [5]:
output_path = "../../data/partially processed/resampled_data.csv"
export_data(output_path)