In [66]:
import pandas as pd
import matplotlib.pyplot as plt

path = "../weee/dataset/"

df_all = pd.DataFrame({'user_id': [], 'x': [], 'y': [], 'z': []})

df_labels = pd.read_csv(f'{path}/VO2.csv')

for i in range(17):

    # Two subjects should neglected because of the study issues
    if i+1 in [6, 14]:
        continue

    # Read CSV file
    df = pd.read_csv(f'{path}/P{i+1:02d}/E4/ACC.csv', header=None, names=['x', 'y', 'z'])

    # Take the first row as the start timestamp
    start = df.iloc[0, :]['x']*1000

    # Convert above time stamp to datetime
    start = pd.to_datetime(start, unit='ms')

    # Remove first two rows from the df
    df = df.iloc[2:, :]

    # Create a timestamp column using start timestamp and with the sampling rate of 32 Hz
    df['timestamp'] = pd.date_range(start=start, periods=df.shape[0], freq='31.25ms')

    # Make the timestamp column as the index
    df.set_index('timestamp', inplace=True)

    # resample the data to 50Hz with interpolation
    df = df.resample('20ms').ffill()

    # Get the first and last start_timestamp values of particular user_id from the df_labels
    start_timestamp = pd.to_datetime(df_labels[df_labels['user_id'] == i+1]['start_timestamp'].iloc[0])
    end_timestamp = pd.to_datetime(df_labels[df_labels['user_id'] == i+1]['end_timestamp'].iloc[-1])

    # Get the data only between start_timestamp and end_timestamp
    df = df[start_timestamp:end_timestamp]

    # Divide all columns by 64
    df = df / 64

    # Add a column for user_id
    df['user_id'] = i+1

    # Make index as a range column and make timestamp as the first column
    df.reset_index(inplace=True)

    # get the dataframe only for user_id = i+1 from df_labels
    df_label = df_labels[df_labels['user_id'] == i+1]

    # Neglect the last df.shape[0] % 32 rows
    if df.shape[0] % 32 != 0:
        df = df.iloc[:-(df.shape[0] % 32), :]

    # Make the timestamp of the df datetime format
    df['timestamp'] = pd.to_datetime(df['timestamp'])

    # Append to the dataframe
    df_all = pd.concat([df_all, df])

# Save to CSV file
df_all.to_csv(f'{path}/combined_e4_acc.csv', index=False)
# Print the shape of df_all
print(df_all.shape)

(1443968, 5)
