In [1]:
import pandas as pd
from glob import glob

# --------------------------------------------------------------
# Read single CSV file
# --------------------------------------------------------------
single_file_acc = pd.read_csv("data/raw/MetaMotion/A-bench-heavy_MetaWear_2019-01-14T14.22.49.165_C42732BE255C_Accelerometer_12.500Hz_1.4.4.csv")

single_file_gyr = pd.read_csv("data/raw/MetaMotion/A-bench-heavy_MetaWear_2019-01-14T14.22.49.165_C42732BE255C_Gyroscope_25.000Hz_1.4.4.csv")
single_file_acc


# --------------------------------------------------------------
# List all data in data/raw/MetaMotion
# --------------------------------------------------------------
files = glob("data/raw/MetaMotion/*.csv")
len(files)
# --------------------------------------------------------------
# Extract features from filename
# --------------------------------------------------------------

f = files[0]
datapath = "data/raw/MetaMotion\\"
# --------------------------------------------------------------
# Turn into function
# --------------------------------------------------------------
def read_data_from_files(files):
    gyr_df = pd.DataFrame()
    acc_df = pd.DataFrame()

    acc_set = 1
    gyr_set = 1 

    for f in files:
        participant = f.split("-")[0].replace(datapath , "")
        label = f.split("-")[1]
        category = f.split("-")[2].rstrip("123").rstrip("_MetaWear_2019")
    
        df = pd.read_csv(f)
        df['participant'] = participant
        df['label'] = label
        df['category'] = category
    
        if "Accelerometer" in f:
            df['Set'] = acc_set
            acc_set+=1
            acc_df = pd.concat([acc_df , df])
    
        if "Gyroscope" in f:
            df['Set'] = gyr_set
            gyr_set+=1
            gyr_df = pd.concat([gyr_df , df])


    pd.to_datetime(df["epoch (ms)"],unit="ms")
    
    acc_df.index = pd.to_datetime(acc_df["epoch (ms)"],unit="ms")
    gyr_df.index = pd.to_datetime(gyr_df["epoch (ms)"],unit="ms")
    
    del acc_df["epoch (ms)"]
    del acc_df['time (01:00)']
    del acc_df['elapsed (s)']
    
    del gyr_df["epoch (ms)"]
    del gyr_df['time (01:00)']
    del gyr_df['elapsed (s)']



    return acc_df , gyr_df

acc_df,gyr_df = read_data_from_files(files)

gyr_df

# --------------------------------------------------------------
# Merging datasets
# --------------------------------------------------------------

data_merged = pd.concat([acc_df.iloc[:,:3],gyr_df],axis = 1)
data_merged.dropna()


# --------------------------------------------------------------
# Resample data (frequency conversion)
# --------------------------------------------------------------

# Accelerometer:    12.500HZ
# Gyroscope:        25.000Hz
data_merged = data_merged.rename(columns={
    'x-axis (g)': 'acceleration_x',
    'y-axis (g)': 'acceleration_y',
    'z-axis (g)': 'acceleration_z',
    'x-axis (deg/s)': 'gyroscope_x',
    'y-axis (deg/s)': 'gyroscope_y',
    'z-axis (deg/s)': 'gyroscope_z',
    })

sampling = {'acceleration_x':"mean",
            'acceleration_y':"mean",
            'acceleration_z':"mean",
            'gyroscope_x':"mean",
            'gyroscope_y':"mean",
            'gyroscope_z':"mean",
            'participant':"last",
            'label':"last",
            'category':"last",
            'Set':"last",
}

data_merged[:1000].resample(rule = "200ms").apply(sampling)


days = [g for n,g in data_merged.groupby(pd.Grouper(freq = "D"))]
data_resampled = pd.concat(df.resample(rule="200ms").apply(sampling).dropna() for df in days)
data_resampled.info()

data_resampled['Set'] = data_resampled['Set'].astype(int)
# --------------------------------------------------------------
# Export dataset
# --------------------------------------------------------------

data_resampled.to_pickle("data/processed/data_processed.pkl")

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 9009 entries, 2019-01-11 15:08:05.200000 to 2019-01-20 17:33:27.800000
Data columns (total 10 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   acceleration_x  9009 non-null   float64
 1   acceleration_y  9009 non-null   float64
 2   acceleration_z  9009 non-null   float64
 3   gyroscope_x     9009 non-null   float64
 4   gyroscope_y     9009 non-null   float64
 5   gyroscope_z     9009 non-null   float64
 6   participant     9009 non-null   object 
 7   label           9009 non-null   object 
 8   category        9009 non-null   object 
 9   Set             9009 non-null   float64
dtypes: float64(7), object(3)
memory usage: 774.2+ KB
