# Merge sensor data with MET lables

In [2]:
import pandas as pd
import matplotlib.pyplot as plt
import os
import numpy as np


os.chdir("/home/cernerrood246/University/DataMining")
pd.set_option("display.precision", 2)

In [2]:
participants_list = ['P1', 'P2', 'P3', 'P4', 'P5', 'P6', 'P7', 'P8', 'P9', 'P10', 'P11', 'P12', 'P13', 'P14', 'P15', 'P16', 'P17']

## Load Datasets

### Load MET

In [3]:
MET_all_list = [pd.read_pickle(f"preprocessed_dataset/{participant}/MET.pkl") for participant in participants_list]
MET_all_list[1]


Unnamed: 0_level_0,Time[s],Delta_Time,MET,Activity
Time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2021-11-25 17:17:43,1.0,0 days 00:00:00,0.72,1.0
2021-11-25 17:17:44,2.0,0 days 00:00:01,0.72,1.0
2021-11-25 17:17:45,3.0,0 days 00:00:02,0.72,1.0
2021-11-25 17:17:46,4.0,0 days 00:00:03,0.72,1.0
2021-11-25 17:17:47,5.0,0 days 00:00:04,0.71,1.0
...,...,...,...,...
2021-11-25 17:56:28,2326.0,0 days 00:38:45,0.00,5.0
2021-11-25 17:56:29,2327.0,0 days 00:38:46,0.00,5.0
2021-11-25 17:56:30,2328.0,0 days 00:38:47,0.00,5.0
2021-11-25 17:56:31,2329.0,0 days 00:38:48,0.00,5.0


### Load Demographics


In [3]:
demographics_df = pd.read_csv("original_dataset/Demographics.csv")
#dummy encoding Gender
demographics_df = pd.get_dummies(demographics_df, columns = ["Gender"], dtype=int)
demographics_df = demographics_df.drop(columns=["Participant", "Comments"])
demographics_df.to_pickle("preprocessed_dataset/demographics.pkl")

### Load Fitbit

In [6]:
fitbit_path = "Fitbit_data/average_data_5S_new/average_data_5S_new"
fitbit_df_list = []
for df in os.listdir(fitbit_path):
    temp_df = pd.read_csv(f"{fitbit_path}/{df}")
    #convert index to datetime
    temp_df = temp_df.rename(columns={"Unnamed: 0": "Time"})
    temp_df["Time"] = pd.to_datetime(temp_df["Time"])

    temp_df = temp_df.set_index("Time")
    #upsample to 1 second without filling
    temp_df = temp_df.resample("1S").asfreq().copy()

    #fill next 4 values with the previous value
    temp_df = temp_df.ffill(limit=4)
    temp_df = temp_df[temp_df['value__bpm'].notna()]

    
    fitbit_df_list.append(temp_df)
    
fitbit_df_all = pd.concat(fitbit_df_list, axis=0)
fitbit_df_all 


Unnamed: 0_level_0,value__bpm,value__confidence
Time,Unnamed: 1_level_1,Unnamed: 2_level_1
2021-11-20 09:11:40,104.0,0.0
2021-11-20 09:11:41,104.0,0.0
2021-11-20 09:11:42,104.0,0.0
2021-11-20 09:11:43,104.0,0.0
2021-11-20 09:11:44,104.0,0.0
...,...,...
2021-11-19 22:49:20,111.0,2.0
2021-11-19 22:49:21,111.0,2.0
2021-11-19 22:49:22,111.0,2.0
2021-11-19 22:49:23,111.0,2.0


### Load E4

In [7]:
E4_all_list = [pd.read_pickle(f"merged_dataset/{participant}/E4.pkl") for participant in participants_list]


In [None]:
E4_all_list[1]["Delta_Time"]

## Merge Datasets

In [8]:
merged_df_list = []
for participant_number, met_df in enumerate(MET_all_list):
    #left join on met_df
    temp_df = met_df.merge(fitbit_df_all, how="left", left_index=True, right_index=True)
    temp_df = temp_df.merge(E4_all_list[participant_number].drop(columns = {"Delta_Time", "MET"}), how="left", left_on = "Time[s]", right_on = "Time[s]")

    #add demographics cross join
    temp_df = temp_df.merge(demographics_df.iloc[participant_number].to_frame().T, how="cross")

    temp_df.index = met_df.index
    merged_df_list.append(temp_df)

merged_df_list[0]

Unnamed: 0_level_0,Time[s],Delta_Time,MET,Activity,value__bpm,value__confidence,X,Y,Z,Magnitude,BMI,Fat,Muscle,Water,Bone,Weight,Height,Age,Gender_F,Gender_M
Time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
2021-12-03 16:58:51,1.0,0 days 00:00:00,1.31,1.0,91.0,1.0,30.0,0.0,55.67,63.24,24.7,33.0,14.0,49.0,4.0,70.6,166.0,29.0,1.0,0.0
2021-12-03 16:58:52,2.0,0 days 00:00:01,1.31,1.0,91.0,1.0,30.0,0.0,55.75,63.31,24.7,33.0,14.0,49.0,4.0,70.6,166.0,29.0,1.0,0.0
2021-12-03 16:58:53,3.0,0 days 00:00:02,1.52,1.0,91.0,1.0,30.0,0.0,55.80,63.35,24.7,33.0,14.0,49.0,4.0,70.6,166.0,29.0,1.0,0.0
2021-12-03 16:58:54,4.0,0 days 00:00:03,1.63,1.0,91.0,1.0,30.0,0.0,55.60,63.18,24.7,33.0,14.0,49.0,4.0,70.6,166.0,29.0,1.0,0.0
2021-12-03 16:58:55,5.0,0 days 00:00:04,1.56,1.0,88.0,1.0,30.0,0.0,55.80,63.35,24.7,33.0,14.0,49.0,4.0,70.6,166.0,29.0,1.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2021-12-03 17:28:48,1798.0,0 days 00:29:57,5.32,5.0,139.0,1.0,20.2,-49.4,36.00,64.38,24.7,33.0,14.0,49.0,4.0,70.6,166.0,29.0,1.0,0.0
2021-12-03 17:28:49,1799.0,0 days 00:29:58,5.11,5.0,139.0,1.0,18.0,-50.2,33.80,63.14,24.7,33.0,14.0,49.0,4.0,70.6,166.0,29.0,1.0,0.0
2021-12-03 17:28:50,1800.0,0 days 00:29:59,4.46,5.0,142.0,2.0,17.8,-50.2,34.20,63.30,24.7,33.0,14.0,49.0,4.0,70.6,166.0,29.0,1.0,0.0
2021-12-03 17:28:51,1801.0,0 days 00:30:00,4.12,5.0,142.0,2.0,15.2,-49.4,36.20,63.10,24.7,33.0,14.0,49.0,4.0,70.6,166.0,29.0,1.0,0.0


In [10]:
merged_df_list[16]

Unnamed: 0_level_0,Time[s],Delta_Time,MET,Activity,value__bpm,value__confidence,X,Y,Z,Magnitude,BMI,Fat,Muscle,Water,Bone,Weight,Height,Age,Gender_F,Gender_M
Time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
2021-12-02 17:04:07,1.0,0 days 00:00:00,0.60,1.0,76.0,1.0,31.67,-23.0,49.0,62.71,25.2,21.0,16.0,56.0,5.0,77.2,175.0,33.0,0.0,1.0
2021-12-02 17:04:08,2.0,0 days 00:00:01,0.60,1.0,76.0,1.0,31.50,-23.0,49.0,62.63,25.2,21.0,16.0,56.0,5.0,77.2,175.0,33.0,0.0,1.0
2021-12-02 17:04:09,3.0,0 days 00:00:02,0.55,1.0,76.0,1.0,31.40,-23.0,49.0,62.58,25.2,21.0,16.0,56.0,5.0,77.2,175.0,33.0,0.0,1.0
2021-12-02 17:04:10,4.0,0 days 00:00:03,0.52,1.0,78.0,1.0,31.40,-23.0,48.8,62.42,25.2,21.0,16.0,56.0,5.0,77.2,175.0,33.0,0.0,1.0
2021-12-02 17:04:11,5.0,0 days 00:00:04,0.50,1.0,78.0,1.0,31.40,-23.0,48.8,62.42,25.2,21.0,16.0,56.0,5.0,77.2,175.0,33.0,0.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2021-12-02 17:34:03,1797.0,0 days 00:29:56,8.61,5.0,161.5,2.0,71.80,0.6,13.2,73.01,25.2,21.0,16.0,56.0,5.0,77.2,175.0,33.0,0.0,1.0
2021-12-02 17:34:04,1798.0,0 days 00:29:57,8.50,5.0,161.5,2.0,44.00,16.6,12.6,48.69,25.2,21.0,16.0,56.0,5.0,77.2,175.0,33.0,0.0,1.0
2021-12-02 17:34:05,1799.0,0 days 00:29:58,8.39,5.0,163.5,2.0,59.00,-1.8,7.6,59.51,25.2,21.0,16.0,56.0,5.0,77.2,175.0,33.0,0.0,1.0
2021-12-02 17:34:06,1800.0,0 days 00:29:59,8.26,5.0,163.5,2.0,32.00,-15.6,3.8,35.80,25.2,21.0,16.0,56.0,5.0,77.2,175.0,33.0,0.0,1.0


In [11]:
for i, participant_df in enumerate(merged_df_list):
    # os.makedirs(f"merged_dataset/P{i+1}", exist_ok=True)
    participant_df.to_pickle(f"merged_dataset/P{i+1}/ALL.pkl")
    #os.remove(f"preprocessed_dataset/P{i+1}/MET.csv")

In [12]:
participant_df.columns

Index(['Time[s]', 'Delta_Time', 'MET', 'Activity', 'value__bpm',
       'value__confidence', 'X', 'Y', 'Z', 'Magnitude', 'BMI', 'Fat', 'Muscle',
       'Water', 'Bone', 'Weight', 'Height', 'Age', 'Gender_F', 'Gender_M'],
      dtype='object')

In [None]:
merged_df_list[1].hist(column = "Time[s]", bins = 100)
merged_df_list[1]

In [None]:
test = MET_all_list[1].merge(E4_all_list[1].drop(columns = { "MET"}), how="left", left_on = "Time[s]", right_on = "Time[s]")
test

In [None]:
for df in merged_df_list:
    fig, ax1 = plt.subplots(figsize=(20, 10))
    ax2 = ax1.twinx()
    ax3 = ax1.twinx()

    # plot MET on the first y-axis
    ax1.plot(df.index, df["MET"], 'g-')
    ax1.set_ylabel('MET', color='g')

    # plot value_bpm on the second y-axis
    ax2.plot(df.index, df["value__bpm"], 'b-')
    ax2.set_ylabel('BPM', color='b')

    # plot Magnitude on the third y-axis
    ax3.plot(df.index, df["Magnitude"], 'r-')
    ax3.set_ylabel('Magnitude', color='r')

    # adjust the position of the third y-axis
    ax3.spines['right'].set_position(('outward', 60))
    ax3.set_ylim(0, df["Magnitude"].max() * 1.1)
    ax3.set_ylabel('Magnitude', color='r')

    ax1.set_xlabel('Time')

    plt.show()