# Merge sensor data with MET lables

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import os
import numpy as np


os.chdir("/home/cernerrood246/University/DataMining")
pd.set_option("display.precision", 2)

In [None]:
participants_list = ['P1', 'P2', 'P3', 'P4', 'P5', 'P6', 'P7', 'P8', 'P9', 'P10', 'P11', 'P12', 'P13', 'P14', 'P15', 'P16', 'P17']

## Load Datasets

### Load MET

In [None]:
MET_all_list = [pd.read_pickle(f"preprocessed_dataset/{participant}/MET.pkl") for participant in participants_list]
MET_all_list[1]


### Load Fitbit

In [None]:
fitbit_path = "Fitbit_data/average_data_5S_new/average_data_5S_new"
fitbit_df_list = []
for df in os.listdir(fitbit_path):
    temp_df = pd.read_csv(f"{fitbit_path}/{df}")
    #convert index to datetime
    temp_df = temp_df.rename(columns={"Unnamed: 0": "Time"})
    temp_df["Time"] = pd.to_datetime(temp_df["Time"])

    temp_df = temp_df.set_index("Time")
    #upsample to 1 second without filling
    temp_df = temp_df.resample("1S").asfreq().copy()

    #fill next 4 values with the previous value
    temp_df = temp_df.ffill(limit=4)
    temp_df = temp_df[temp_df['value__bpm'].notna()]

    
    fitbit_df_list.append(temp_df)
    
fitbit_df_all = pd.concat(fitbit_df_list, axis=0)
fitbit_df_all 


### Load E4

In [None]:
E4_all_list = [pd.read_pickle(f"merged_dataset/{participant}/E4.pkl") for participant in participants_list]


In [None]:
E4_all_list[1]["Delta_Time"]

## Merge Datasets

In [19]:
merged_df_list = []
for participant_number, met_df in enumerate(MET_all_list):
    #left join on met_df
    temp_df = met_df.merge(fitbit_df_all, how="left", left_index=True, right_index=True)
    temp_df = temp_df.merge(E4_all_list[participant_number].drop(columns = {"Delta_Time", "MET"}), how="left", left_on = "Time[s]", right_on = "Time[s]")
    temp_df.index = met_df.index
    merged_df_list.append(temp_df)

merged_df_list[0]

Unnamed: 0_level_0,Time[s],Delta_Time,MET,value__bpm,value__confidence,X,Y,Z,Magnitude
Time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2021-12-03 16:58:51,1.0,0 days 00:00:00,1.31,91.0,1.0,30.0,0.0,55.67,63.24
2021-12-03 16:58:52,2.0,0 days 00:00:01,1.31,91.0,1.0,30.0,0.0,55.75,63.31
2021-12-03 16:58:53,3.0,0 days 00:00:02,1.52,91.0,1.0,30.0,0.0,55.80,63.35
2021-12-03 16:58:54,4.0,0 days 00:00:03,1.63,91.0,1.0,30.0,0.0,55.60,63.18
2021-12-03 16:58:55,5.0,0 days 00:00:04,1.56,88.0,1.0,30.0,0.0,55.80,63.35
...,...,...,...,...,...,...,...,...,...
2021-12-03 17:28:48,1798.0,0 days 00:29:57,5.32,139.0,1.0,20.2,-49.4,36.00,64.38
2021-12-03 17:28:49,1799.0,0 days 00:29:58,5.11,139.0,1.0,18.0,-50.2,33.80,63.14
2021-12-03 17:28:50,1800.0,0 days 00:29:59,4.46,142.0,2.0,17.8,-50.2,34.20,63.30
2021-12-03 17:28:51,1801.0,0 days 00:30:00,4.12,142.0,2.0,15.2,-49.4,36.20,63.10


In [17]:
merged_df_list[0]

Unnamed: 0,Time[s],Delta_Time,MET,value__bpm,value__confidence,X,Y,Z,Magnitude
0,1.0,0 days 00:00:00,1.31,91.0,1.0,30.0,0.0,55.67,63.24
1,2.0,0 days 00:00:01,1.31,91.0,1.0,30.0,0.0,55.75,63.31
2,3.0,0 days 00:00:02,1.52,91.0,1.0,30.0,0.0,55.80,63.35
3,4.0,0 days 00:00:03,1.63,91.0,1.0,30.0,0.0,55.60,63.18
4,5.0,0 days 00:00:04,1.56,88.0,1.0,30.0,0.0,55.80,63.35
...,...,...,...,...,...,...,...,...,...
1797,1798.0,0 days 00:29:57,5.32,139.0,1.0,20.2,-49.4,36.00,64.38
1798,1799.0,0 days 00:29:58,5.11,139.0,1.0,18.0,-50.2,33.80,63.14
1799,1800.0,0 days 00:29:59,4.46,142.0,2.0,17.8,-50.2,34.20,63.30
1800,1801.0,0 days 00:30:00,4.12,142.0,2.0,15.2,-49.4,36.20,63.10


In [21]:
for i, participant_df in enumerate(merged_df_list):
    # os.makedirs(f"merged_dataset/P{i+1}", exist_ok=True)
    participant_df.to_pickle(f"merged_dataset/P{i+1}/ALL.pkl")
    #os.remove(f"preprocessed_dataset/P{i+1}/MET.csv")

In [None]:
merged_df_list[1].hist(column = "Time[s]", bins = 100)
merged_df_list[1]

In [None]:
test = MET_all_list[1].merge(E4_all_list[1].drop(columns = { "MET"}), how="left", left_on = "Time[s]", right_on = "Time[s]")
test

In [None]:
for df in merged_df_list:
    fig, ax1 = plt.subplots(figsize=(20, 10))
    ax2 = ax1.twinx()
    ax3 = ax1.twinx()

    # plot MET on the first y-axis
    ax1.plot(df.index, df["MET"], 'g-')
    ax1.set_ylabel('MET', color='g')

    # plot value_bpm on the second y-axis
    ax2.plot(df.index, df["value__bpm"], 'b-')
    ax2.set_ylabel('BPM', color='b')

    # plot Magnitude on the third y-axis
    ax3.plot(df.index, df["Magnitude"], 'r-')
    ax3.set_ylabel('Magnitude', color='r')

    # adjust the position of the third y-axis
    ax3.spines['right'].set_position(('outward', 60))
    ax3.set_ylim(0, df["Magnitude"].max() * 1.1)
    ax3.set_ylabel('Magnitude', color='r')

    ax1.set_xlabel('Time')

    plt.show()