## How to use this code

### EEG

#### To see each experiment in EEG
eeg_process = EEGProcessor(r"C:\Users\ballj\OneDrive\EEG_jm.csv", time_interval = 20, remove_time_in_group = 15) <br>
#### To get total experiment for each person
eeg_process = EEGProcessor(r"C:\Users\ballj\OneDrive\EEG_jm.csv", time_interval = 20, remove_time_in_group = 15) <br>
data = pd.read_csv(r"C:\Users\ballj\OneDrive\EEG_jm.csv") <br>
You can use this code with changing the hyperparameters(time_interval, remove_time_in_group) and file_path.<br>
*Reference* <br>
time_interval : seconds for group (ex. 20secs)<br>
remove_time_in_group : the criteria of processing error values in each group (ex. 15secs). <br>
Also, for example, if there is 16secs group, this code will extend it to 20secs group and export representative value in the group.


In [1]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta

class EEGProcessor:
    #time_interval : Unified seconds with Fitbit data (ex. 20secs) 
    #remove_time_in_group : the criteria of processing error values in each time_interval group (ex. 15secs)
    def __init__(self, file_path, time_interval, remove_time_in_group):
        self.time_interval = time_interval
        self.remove_time_in_group = remove_time_in_group
        self.time_interval_str = f'{time_interval}S'
        self.EEG_report = pd.read_csv(file_path)

    # list to dataframe (ex. brain waves)
    def parse_raw_data(self, dataframe, col_name):
        col_str = dataframe.iloc[0][col_name]
        col_str = col_str.strip('[]')
        col_list = [float(val) for val in col_str.split(',')]  # cause list is divided by comma
        col_data = pd.DataFrame({col_name: col_list})
        return col_data

    # experiment time calculating function
    def time_difference(self, dataframe, start_time_col, finish_time_col):
        start_time = datetime.strptime(dataframe.iloc[0][start_time_col], '%Y-%m-%d %H:%M:%S')
        finish_time = datetime.strptime(dataframe.iloc[0][finish_time_col], '%Y-%m-%d %H:%M:%S')

        # time difference between two datatime objects
        time_difference = (finish_time - start_time).total_seconds()
        return time_difference
    
    # Comparing the experimental initial recognition error period and delete the part to be deleted
    def count_initial_same_values(self, series):
        initial_value = series.iloc[0]
        count = 0
        for value in series:
            if value == initial_value:
                count += 1
            else:
                break
        return count
    
    # Processing of values that are not exactly divided into front and back
    def process_start_time_trash_sec(self, dt):
        # Plus 1 min and delete second in input time
        rounded_time = dt + timedelta(minutes=1) - timedelta(seconds=dt.second)
        time_difference = (rounded_time - dt).total_seconds()

        remainder = time_difference % float(self.time_interval)

        # use the seconds over remove_time_in_group seconds
        if self.remove_time_in_group <= remainder:
            return False
        else:
        # ex. 2023-10-15 00:21:04 -> remainder : 16 (False)
        # ex. 2023-10-15 00:21:15 -> remainder : 5
            return remainder

    def process_finish_time_trash_sec(self, dt):
        # delete second in input time
        rounded_time = dt - timedelta(seconds=dt.second)
        time_difference = (dt - rounded_time).total_seconds()

        remainder = time_difference % float(self.time_interval)

        if self.remove_time_in_group <= remainder:
            return False

        else:
        #ex. 2023-10-15 00:40:46 -> remainder : 6
        #ex. 2023-10-15 00:40:56 -> remainder : 16 (False)
            return remainder

    def nearest_time_rounding(self, dt):
        seconds = dt.second
        # find nearest value in 0, 20, 40sec
        time_points = [i for i in range(0,60, self.time_interval)]
        nearest = min(time_points, key=lambda x: abs(x - seconds))
        # 찾은 초 값으로 시간 변경
        # 만약 nearest가 40이고 seconds가 55보다 크다면, 분을 1 더해주고 초를 0으로 설정
        if nearest == time_points[-1] and seconds >= (time_points[-1] + self.remove_time_in_group):
            rounded_time = dt.replace(second=0, microsecond=0) + timedelta(minutes=1)
        else:
            rounded_time = dt.replace(second=nearest, microsecond=0)

        return rounded_time

    # 실험 종료 시간 맞춰주기
    # 더 늦게 끝나는 데이터프레임을 일찍 끝나는 데이터프레임에 맞춰야 함
    def align_end_time(self, df1, df2):
        if df1.index[-1] > df2.index[-1]:
            df1 = df1[df1.index <= df2.index[-1]]

        elif df1.index[-1] < df2.index[-1]:
            df2 = df2[df2.index <= df1.index[-1]]

        else: 
            pass # 두 데이터프레임의 종료 시간이 동일한 경우

        return df1, df2

    # start time 가공 -> i : 0 , finish time 가공 -> i : -1
    # start time 가공 -> process_start_time_trash_sec 함수 , finish time 가공 -> process_finish_time_trash_sec 함수
    def adjust_time_index(self, i, df, func):
        remainder = func(df.index[i])
        #finish 부분에서 00초, 20초, 40초로 끝나면 가공된 마지막 row는 한 개의 데이터로 이루어지기 때문에 1초를 빼서 59, 19, 39초에서 마무리하고자
        one_sec = timedelta(seconds=1)

        if remainder == False:
            # time을 가장 가까운 0, 20, 40초 중 하나로 변경
            if i == 0 :
                time = self.nearest_time_rounding(df.index[i])
                new_index = df.index.tolist()
                new_index[i] = time
                df.index = new_index
            else:
                time = self.nearest_time_rounding(df.index[i]) - one_sec
                new_index = df.index.tolist()
                new_index[i] = time
                df.index = new_index

        else:
            cutting_time = timedelta(seconds=remainder)        
            # remainder가 15초 미만이면 버리고 그 이후부터 시작
            if i == 0:
                df = df[df.index >= df.index[i] + cutting_time]
            # remainder가 15초 미만이면 버리고 그 이전 0,20,40초 중으로 마무리
            else:
                df = df[df.index <= df.index[-1] - cutting_time - one_sec]

        return df
    
    def check_invalid_values(self, group):
        # brain wave에서 연속 0 차이값을 가지는 시간 구간 계산
        alpha_invalid_series = group['α_wave_raw_data'].diff().eq(0)
        alpha_invalid_timestamps = group.index[alpha_invalid_series].tolist()

        # attention_raw_data에서 0 값을 가지는 시간 구간 계산
        attention_invalid_series = group['attention_raw_data'] == 0
        attention_invalid_timestamps = group.index[attention_invalid_series].tolist()

        # 연속 0 차이값 또는 0 값을 가지는 시간 구간의 길이가 15초 이상인지 확인
        def has_long_invalid_duration(invalid_timestamps):
            if not invalid_timestamps:
                return False
            for i in range(1, len(invalid_timestamps)):
                if (invalid_timestamps[i] - invalid_timestamps[i-1]).seconds > self.remove_time_in_group:
                    return True
            return False

        alpha_invalid = has_long_invalid_duration(alpha_invalid_timestamps)
        attention_invalid = has_long_invalid_duration(attention_invalid_timestamps)

        if alpha_invalid or attention_invalid:
            return pd.Series([np.nan] * group.shape[1], index=group.columns)

        else:
            # 오류 값을 제외하고 평균 계산
            valid_conditions = (
                (group['α_wave_raw_data'].diff() != 0) & 
                (group['β_wave_raw_data'].diff() != 0) & 
                (group['θ_wave_raw_data'].diff() != 0) & 
                (group['δ_wave_raw_data'].diff() != 0) & 
                (group['γ_wave_raw_data'].diff() != 0) & 
                (group['attention_raw_data'] != 0)
            )
            return group[valid_conditions].mean()
        
    def check_invalid_values_other(self, group):
        # hr에서 0 값을 가지는 시간 구간 계산
        hr_invalid_series = group['hr_raw_data'] == 0
        hr_invalid_timestamps = group.index[hr_invalid_series].tolist()

        # 연속 0 차이값 또는 0 값을 가지는 시간 구간의 길이가 15초 이상인지 확인
        def has_long_invalid_duration(invalid_timestamps):
            if not invalid_timestamps:
                return False
            for i in range(1, len(invalid_timestamps)):
                if (invalid_timestamps[i] - invalid_timestamps[i-1]).seconds > self.remove_time_in_group:
                    return True
            return False

        hr_invalid = has_long_invalid_duration(hr_invalid_timestamps)

        if hr_invalid:
            return pd.Series([np.nan] * group.shape[1], index=group.columns)

        else:
            # 오류 값을 제외하고 평균 계산
            group = group[(group['hr_raw_data'] != 0)]
            return group.mean()
    
    def process_eeg_data(self, experiment_id):
        if experiment_id not in self.EEG_report.index:
            return None

        # 모든 실험들 for문으로 돌리고 하나의 데이터프레임으로 저장되어야 함.
        EEG_report_sample = self.EEG_report.loc[[experiment_id],:]

        #한 column당 하나의 dataframe
        cols = ['α_wave_raw_data', 'β_wave_raw_data', 'θ_wave_raw_data', 'δ_wave_raw_data', 'γ_wave_raw_data', 'attention_raw_data', 'hrv_raw_data', 'hr_raw_data', 'coherence_flag_raw_data']
        parsed_dfs = [self.parse_raw_data(EEG_report_sample, col) for col in cols]

        #interval second 계산
        interval_sec = self.time_difference(EEG_report_sample, 'meditation_start_time', 'meditation_finish_time') / len(parsed_dfs[0])
        interval_sec_other = self.time_difference(EEG_report_sample, 'meditation_start_time', 'meditation_finish_time') / len(parsed_dfs[6])

        #병합된 dataframe 생성
        merged_df = parsed_dfs[0].join(parsed_dfs[1:6])
        merged_df_other = parsed_dfs[6].join(parsed_dfs[7:])

        #실험 시작 시간
        start_time = datetime.strptime(EEG_report_sample.iloc[0]['meditation_start_time'], '%Y-%m-%d %H:%M:%S')

        #실험 feature 별 interval sec 이용하여 실험 시간 time index로 데이터프레임 변환 (2가지)
        interval_sec, interval_sec_other = timedelta(seconds=round(interval_sec,2)), timedelta(seconds=round(interval_sec_other,2))
        merged_df['time'] = [start_time + i * interval_sec for i in range(len(merged_df))]
        merged_df_other['time'] = [start_time + i * interval_sec_other for i in range(len(merged_df_other))]
        merged_df, merged_df_other = merged_df.set_index('time'), merged_df_other.set_index('time')
        
        #실험 초기 인식 오류 기간 비교용
        counts = [self.count_initial_same_values(merged_df[col]) for col in cols[:6]] + [self.count_initial_same_values(merged_df_other['hr_raw_data'])]
        
        #interval_sec 단위 float로 변경하여 비교
        initial_error_times = [counts[i] * interval_sec.total_seconds() if i != 6 else counts[i] * interval_sec_other.total_seconds() for i in range(7)]
        
        # feature 별 초기 오류 시간 비교 후, 가장 오류 시간이 긴 것에 맞춰 initial error time 설정
        initial_error_time = timedelta(seconds=max(initial_error_times))

        # 초기 인식 오류 제거한 실험 data 시작 시간
        real_start_time = start_time + initial_error_time
        merged_df, merged_df_other = merged_df[merged_df.index > real_start_time], merged_df_other[merged_df_other.index > real_start_time]

        # 데이터프레임 인덱스를 초 단위로 반올림
        merged_df.index, merged_df_other.index = merged_df.index.round('S'), merged_df_other.index.round('S')

        # 실험 종료 시간 맞춰주기
        merged_df, merged_df_other = self.align_end_time(merged_df, merged_df_other)

#         # 더 늦게 끝나는 데이터프레임을 일찍 끝나는 데이터프레임에 맞춰야 함.
#         if merged_df.index[-1] > merged_df_other.index[-1]:
#             merged_df = merged_df[merged_df.index <= merged_df_other.index[-1]]

#         elif merged_df.index[-1] < merged_df_other.index[-1] :
#             merged_df_other = merged_df_other[merged_df_other.index <= merged_df.index[-1]]
            
#         else:
#             True

        # start time 가공 -> i : 0 , finish time 가공 -> i : -1
        # start time 가공 -> process_start_time_trash_sec 함수 , finish time 가공 -> process_finish_time_trash_sec 함수
        merged_df = self.adjust_time_index(0, merged_df, self.process_start_time_trash_sec)
        merged_df_other = self.adjust_time_index(0, merged_df_other, self.process_start_time_trash_sec)
        merged_df = self.adjust_time_index(-1, merged_df, self.process_finish_time_trash_sec)
        merged_df_other = self.adjust_time_index(-1, merged_df_other, self.process_finish_time_trash_sec)

        # 20초 단위로 그룹화 (인덱스가 datetime 형태이므로 floor 사용)
        grouped = merged_df.groupby(merged_df.index.floor(self.time_interval_str))
        grouped_other = merged_df_other.groupby(merged_df_other.index.floor(self.time_interval_str))

        result = grouped.apply(self.check_invalid_values)
        result_other = grouped_other.apply(self.check_invalid_values_other)

        # β/θ SP ratio를 포함한 최종 EEG 데이터셋
        EEG_data_per_time_interval = result.merge(result_other, left_index=True, right_index=True)
        EEG_data_per_time_interval['β/θ SP'] = EEG_data_per_time_interval['β_wave_raw_data'] / EEG_data_per_time_interval['θ_wave_raw_data']
        
        EEG_data_per_time_interval = EEG_data_per_time_interval.rename(columns={
            'α_wave_raw_data':'alpha_wave',
            'β_wave_raw_data':'beta_wave',
            'θ_wave_raw_data':'theta_wave',
            'δ_wave_raw_data':'delta_wave',
            'γ_wave_raw_data':'gamma_wave',
            'attention_raw_data' : 'attention',
            'hrv_raw_data' : 'hrv',
            'hr_raw_data' : 'hr',
            'coherence_flag_raw_data' : 'coherence',
            'β/θ SP' : 'SP ratio'
        })

        return EEG_data_per_time_interval

## Fitbit

In [2]:
import datetime as dt
import matplotlib.pyplot as plt
import glob

class FitbitProcessor:
    #time_interval : EEG와 통일할 초 (ex. 20초)
    def __init__(self, folder_path, time_interval):
        self.folder_path = folder_path
        self.time_interval = time_interval
        self.time_interval_str = f'{time_interval}S'
        folder_patterns = [
            "Active Zone Minutes (AZM)/*",
            "Sleep Score/*",
            "Stress Journal/CEDA*",
            "Temperature/Wrist Temperature - *"
        ]
        self.things_path = [glob.glob(f"{folder_path}/{pattern}") for pattern in folder_patterns]
        self.things_path = [item for sublist in self.things_path for item in sublist]
        self.wt_count = len(glob.glob(f"{folder_path}/Temperature/Wrist Temperature - *"))
        self.azm_count = len(glob.glob(f"{folder_path}/Active Zone Minutes (AZM)/*"))
        self.sleep_count = len(glob.glob(f"{folder_path}/Sleep Score/*"))
        self.eda_count = len(glob.glob(f"{folder_path}/Stress Journal/CEDA*"))
        # 원본 데이터의 시간 간격은 1분
        self.original_interval = 60
        self.num_timestamps = self.original_interval // self.time_interval # 20초로 하면 3, 15초로 하면 4
        self.half_point = self.num_timestamps // 2 # 20초로 하면 1, 15초로 하면 2

    def read_filtered_csv(self, path, columns):
        name = pd.read_csv(path)
        name = name[columns]
        name[columns[0]] = pd.to_datetime(name[columns[0]])
        return name
    
    def round_seconds(self, obj):
        if obj.second % self.time_interval == 0:
            return obj
        else:
            return obj - timedelta(seconds=obj.second % self.time_interval)
    
    def round_zero(self, datetime_obj):
        datetime_obj = datetime_obj.replace(second=0)
        return datetime_obj

    def process_fitbit_data(self):
        AZM_col = ['date_time', 'total_minutes']
        sleep_col = ['timestamp', 'deep_sleep_in_minutes']
        stress_col = ['timestamp', 'eda_level_real']
        temp_col = ['recorded_time', 'temperature']

        things_col = [AZM_col] * self.azm_count + [sleep_col, stress_col] + [temp_col] * self.wt_count
        things = [self.read_filtered_csv(path, col) for path, col in zip(self.things_path, things_col)]
        
        if self.wt_count == 0:
            if self.eda_count == 0:
                azm = self.process_azm(things[:self.azm_count])
                sleep = self.process_sleep(things[self.azm_count])
                Min_Time, Max_Time = self.find_time_bounds([azm, sleep])
                df = self.create_final_df([azm, sleep], Min_Time, Max_Time)
                df['timestamp'] = pd.to_datetime(df['timestamp'])
                df = df.set_index('timestamp')
                df = df.assign(temperature=np.nan)
                df = df.assign(eda=np.nan)
                return df
            else:
                azm = self.process_azm(things[:self.azm_count])
                sleep = self.process_sleep(things[self.azm_count])
                eda = self.process_eda(things[self.azm_count+1])
                Min_Time, Max_Time = self.find_time_bounds([azm, sleep, eda])

                df = self.create_final_df([azm, sleep, eda], Min_Time, Max_Time)
                df['timestamp'] = pd.to_datetime(df['timestamp'])
                df = df.set_index('timestamp')
                df = df.assign(temperature=np.nan)

                return df
        
        elif self.azm_count == 0:
            sleep = self.process_sleep(things[self.azm_count])
            eda = self.process_eda(things[self.azm_count+1])
            temp = self.process_temperature(things[self.azm_count+2:])
            Min_Time, Max_Time = self.find_time_bounds([sleep, eda, temp])

            df = self.create_final_df([sleep, eda, temp], Min_Time, Max_Time)
            df['timestamp'] = pd.to_datetime(df['timestamp'])
            df = df.set_index('timestamp')
            df = df.assign(BM=np.nan)

            return df
        
        elif self.sleep_count == 0:
            azm = self.process_azm(things[:self.azm_count])
            eda = self.process_eda(things[self.azm_count])
            temp = self.process_temperature(things[self.azm_count+1:])
            Min_Time, Max_Time = self.find_time_bounds([azm, eda, temp])

            df = self.create_final_df([azm, eda, temp], Min_Time, Max_Time)
            df['timestamp'] = pd.to_datetime(df['timestamp'])
            df = df.set_index('timestamp')
            df = df.assign(sleep=np.nan)

            return df            
        
        elif self.eda_count == 0:
            azm = self.process_azm(things[:self.azm_count])
            sleep = self.process_sleep(things[self.azm_count])
            temp = self.process_temperature(things[self.azm_count+1:])
            Min_Time, Max_Time = self.find_time_bounds([azm, sleep, temp])

            df = self.create_final_df([azm, sleep, temp], Min_Time, Max_Time)
            df['timestamp'] = pd.to_datetime(df['timestamp'])
            df = df.set_index('timestamp')
            df = df.assign(eda=np.nan)

            return df
        
        else:
            azm = self.process_azm(things[:self.azm_count])
            sleep = self.process_sleep(things[self.azm_count])
            eda = self.process_eda(things[self.azm_count+1])
            temp = self.process_temperature(things[self.azm_count+2:])
            Min_Time, Max_Time = self.find_time_bounds([azm, sleep, eda, temp])

            df = self.create_final_df([azm, sleep, eda, temp], Min_Time, Max_Time)
            df['timestamp'] = pd.to_datetime(df['timestamp'])
            df = df.set_index('timestamp')

            return df
    
    def process_azm(self, azm):

        azm = pd.concat(azm, axis=0)
        azm = azm.rename(columns={'date_time':'timestamp', 'total_minutes':'BM'})
        seq1 = []
        seq2 = []

        for x1 in azm['timestamp']:
            if self.half_point % 2 != 0:
                start_timestamp = x1 - timedelta(seconds=(self.half_point * self.time_interval))
                # 20초로 하면, 15:57:00 - 1*20 : 15:56:40
                # 12초로 하면, 15:57:00 - 2*12 : 15:56:36
                for n in range(self.num_timestamps):
                    # 20초 : 0,1,2 -> 15:56:40, 15:57:00, 15:57:20
                    # 12초 : 0,1,2,3,4 -> 15:56:36, 15:56:48, 15:57:00, 15:57:12, 15:57:24
                    new_timestamp = start_timestamp + timedelta(seconds = (n*self.time_interval))
                    seq1.append(new_timestamp)

            else:
                start_timestamp = x1 - timedelta(seconds=((self.half_point-1) * self.time_interval))
                # 15초로 하면, 15:57:00 - (2-1)*15 : 15:56:45
                # 10초로 하면, 15:57:00 - (3-1)*10 : 15:56:40
                for n in range(self.num_timestamps):
                    # 15초 : 0,1,2,3 -> 15:56:45, 15:57:00, 15:57:15, 15:57:30
                    # 10초 : 0,1,2,3,4,5 -> 15:56:40, 15:56:50, 15:57:00, 15:57:10, 15:57:20, 15:57:30
                    new_timestamp = start_timestamp + timedelta(seconds = (n*self.time_interval))
                    seq1.append(new_timestamp)

        for x1 in azm['BM']:
            for _ in range(self.num_timestamps):
                seq2.append(x1)

        azm_list = {'timestamp': seq1, 'BM': seq2}
        azm_final = pd.DataFrame(azm_list)

        return azm_final
    
    # 여기서는 아직 하루당 하나의 sleep 데이터
    def process_sleep(self, sleep):
        
        sleep['timestamp'] = [
            self.round_zero(datetime.strptime(str(a)[0:19], '%Y-%m-%d %H:%M:%S')) for a in sleep['timestamp']
        ]
        sleep = sleep.rename(columns={'deep_sleep_in_minutes':'sleep'})
        return sleep
    
    def process_eda(self, eda):
        
        eda['timestamp'] = [
            self.round_seconds(
                datetime.strptime(str(a)[0:19], '%Y-%m-%d %H:%M:%S') - dt.timedelta(hours=4)
            ) for a in eda['timestamp']]

        # EDA 양선형
        seq = []
        seq0 = []

        for i in range(1, len(eda['timestamp']) - 1):
            #self.half_point 홀수인 경우 (ex. 20초면 1, 12초면 2)
            if self.half_point % 2 != 0:
                # 20초로 하면, 15:57:00 - 1*20 : 15:56:40
                # 12초로 하면, 15:57:00 - 2*12 : 15:56:36
                start_timestamp = eda.iloc[i,0] - timedelta(seconds = self.half_point * self.time_interval)
                for n in range(self.num_timestamps):
                    # 20초 : 0,1,2 -> 15:56:40, 15:57:00, 15:57:20
                    # 12초 : 0,1,2,3,4 -> 15:56:36, 15:56:48, 15:57:00, 15:57:12, 15:57:24
                    new_timestamp = start_timestamp + timedelta(seconds = (n * self.time_interval))
                    seq.append(new_timestamp)

                    # 이전 값으로부터 보간
                    # 20초면 n 0,1,2 / half_point 1
                    # 12초면 n 0,1,2,3,4 / half_point 2
                    if n < self.half_point: # 12초면 n=0,1일때
                        weight = (self.half_point - n) / self.num_timestamps # 12초면 n=0일 때 2/3, n=1일 때 1/3
                        value = eda.iloc[i, 1] - ((eda.iloc[i, 1] - eda.iloc[i - 1, 1]) * weight)

                    elif n == self.half_point:
                        value = eda.iloc[i,1]

                    # 다음 값으로부터 보간
                    else: # 12초면 n=2,3,4일 때
                        weight = (n - self.half_point) / self.num_timestamps # 12초면 n=2일 때 0, n=3일 때 2/3, n=4일 때 1/3
                        value = eda.iloc[i, 1] + ((eda.iloc[i + 1, 1] - eda.iloc[i, 1]) * weight)

                    seq0.append(round(value, 2))

        eda_list = {'timestamp': seq, 'eda': seq0}
        eda_final = pd.DataFrame(eda_list)

        return eda_final
    
    def process_temperature(self, temperature):
        #Temperature
        #모든 온도 데이터 합치기
        temp = pd.concat(temperature, axis=0)
        temp = temp.rename(columns={'recorded_time':'timestamp'})

        # Temperature 양선형
        seq = []
        seq0 = []

        for i in range(1, len(temp['timestamp']) - 1):
            #self.half_point 홀수인 경우 (ex. 20초면 1, 12초면 2)
            if self.half_point % 2 != 0:
                # 20초로 하면, 15:57:00 - 1*20 : 15:56:40
                # 12초로 하면, 15:57:00 - 2*12 : 15:56:36
                start_timestamp = temp.iloc[i,0] - timedelta(seconds = self.half_point * self.time_interval)
                for n in range(self.num_timestamps):
                    # 20초 : 0,1,2 -> 15:56:40, 15:57:00, 15:57:20
                    # 12초 : 0,1,2,3,4 -> 15:56:36, 15:56:48, 15:57:00, 15:57:12, 15:57:24
                    new_timestamp = start_timestamp + timedelta(seconds = (n * self.time_interval))
                    seq.append(new_timestamp)

                    # 이전 값으로부터 보간
                    # 20초면 n 0,1,2 / half_point 1
                    # 12초면 n 0,1,2,3,4 / half_point 2
                    if n < self.half_point: # 12초면 n=0,1일때
                        weight = (self.half_point - n) / self.num_timestamps # 12초면 n=0일 때 1/3, n=1일 때 2/3
                        value = temp.iloc[i, 1] - ((temp.iloc[i, 1] - temp.iloc[i - 1, 1]) * weight)

                    elif n == self.half_point:
                        value = temp.iloc[i,1]

                    # 다음 값으로부터 보간
                    else: # 12초면 n=2,3,4일 때
                        weight = (n - self.half_point) / self.num_timestamps # 12초면 n=2일 때 0, n=3일 때 2/3, n=4일 때 1/3
                        value = temp.iloc[i, 1] + ((temp.iloc[i + 1, 1] - temp.iloc[i, 1]) * weight)

                    seq0.append(round(value, 6))

        temp_list = {'timestamp': seq, 'temperature': seq0}
        temp_final = pd.DataFrame(temp_list)

        return temp_final

    def find_time_bounds(self, dataframes):
        min_times = []
        max_times = []
        
        for df in dataframes:
            if not df.empty:
                min_times.append(df['timestamp'].min())
                max_times.append(df['timestamp'].max())
                
        if not min_times or not max_times:
            Min_Time = pd.Timestamp.now()
            Max_Time = pd.Timestamp.now()
        else:
            Min_Time = min(min_times)
            Max_Time = max(max_times)

        return Min_Time, Max_Time

    def create_final_df(self, datasets, Min_Time, Max_Time):
        fitbit = pd.date_range(start=Min_Time, end=Max_Time, freq=self.time_interval_str, name='timestamp')
        fitbit = pd.DataFrame(fitbit)

        for dataset in datasets:
            fitbit = pd.merge(fitbit, dataset, how='outer', on='timestamp')
            
        fitbit['BM'] = fitbit['BM'].fillna(0)
        fitbit['sleep'] = fitbit['sleep'].fillna(method='ffill')

        return fitbit

### Merging EEG and Fitbit dataset

In [3]:
class DataMerger(EEGProcessor, FitbitProcessor):
    def __init__(self, eeg_filepath, fitbit_folderpath, time_interval, eeg_remove_time_in_group):
        # Initialize by calling parent class constructor
        EEGProcessor.__init__(self, eeg_filepath, time_interval, eeg_remove_time_in_group)
        FitbitProcessor.__init__(self, fitbit_folderpath, time_interval)
        
        self.eeg_filepath = eeg_filepath

    def merge_data(self):
        # processing EEG data
        eeg_data = pd.read_csv(self.eeg_filepath)
        result_dfs = []
        
        for exp_id in range(3, len(eeg_data)):
            processed_data = self.process_eeg_data(exp_id)
            if processed_data is not None:
                result_dfs.append(processed_data)
                
        if result_dfs:
            combined_eeg = pd.concat(result_dfs)
            combined_eeg.index = pd.to_datetime(combined_eeg.index)
        
        # processing Fitbit data
        fitbit_data = self.process_fitbit_data()
        fitbit_data.index = pd.to_datetime(fitbit_data.index)

        # merging two dataframes
        if 'combined_eeg' in locals() and not fitbit_data.empty:
            merged_df = combined_eeg.merge(fitbit_data, left_index=True, right_index=True, how='left')
            return merged_df
        else:
            return None

### YH

In [4]:
# eeg_filepath, fitbit_folderpath, time_interval, eeg_remove_time_in_group
merger = DataMerger(r"C:\Users\ballj\OneDrive\바탕 화면\EEG_yh.csv",
                    r"C:\Users\ballj\OneDrive\바탕 화면\Fitbit_YH",
                    time_interval=20,
                    eeg_remove_time_in_group=15)

final_yh = merger.merge_data()

In [5]:
final_yh

Unnamed: 0,alpha_wave,beta_wave,theta_wave,delta_wave,gamma_wave,attention,hrv,hr,coherence,SP ratio,BM,sleep,eda,temperature
2023-11-09 02:19:20,96.210497,103.716261,94.812084,87.047226,93.146974,69.354839,0.000000,72.275862,0.0,1.093914,0.0,75.0,,
2023-11-09 02:19:40,95.832997,103.303197,94.158325,85.892075,92.851525,71.218750,0.000000,72.800000,0.0,1.097122,0.0,75.0,,
2023-11-09 02:20:00,95.644678,102.975200,93.845706,86.647913,92.898769,78.593750,0.000000,70.517241,0.0,1.097282,0.0,75.0,,
2023-11-09 02:20:20,96.771384,103.787469,94.646316,87.921091,92.992506,69.218750,60.233333,68.500000,0.0,1.096582,0.0,75.0,,
2023-11-09 02:20:40,94.863333,104.085285,93.064221,85.865318,93.042048,76.424242,56.551724,67.620690,0.0,1.118424,0.0,75.0,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-11-06 15:53:20,95.031556,102.017756,95.235144,88.798825,92.684828,84.875000,29.935484,93.096774,0.0,1.071220,0.0,,1.76,
2023-11-06 15:53:40,93.840372,102.013903,93.328512,85.785213,92.688775,81.031250,29.833333,84.633333,0.0,1.093063,0.0,,1.84,
2023-11-06 15:54:00,93.815109,102.620456,93.808000,86.974250,92.912225,86.906250,55.533333,82.133333,0.0,1.093941,0.0,,1.91,
2023-11-06 15:54:20,95.061944,102.804778,94.470884,88.624281,92.468916,84.000000,54.677419,89.193548,0.0,1.088217,0.0,,2.03,


### JM

In [6]:
# eeg_filepath, fitbit_folderpath, time_interval, eeg_remove_time_in_group
merger = DataMerger(r"C:\Users\ballj\OneDrive\바탕 화면\EEG_jm.csv",
                    r"C:\Users\ballj\OneDrive\바탕 화면\Fitbit_JM",
                    time_interval=20,
                    eeg_remove_time_in_group=15)

final_jm = merger.merge_data()

In [7]:
final_jm

Unnamed: 0,alpha_wave,beta_wave,theta_wave,delta_wave,gamma_wave,attention,hrv,hr,coherence,SP ratio,BM,sleep,eda,temperature
2023-10-17 16:13:00,94.063528,99.213975,93.593869,84.868628,90.974541,69.156250,0.000000,61.275862,0.0,1.060048,0.0,44.0,,-1.034706
2023-10-17 16:13:20,94.835756,99.067247,95.416897,88.063403,91.330237,64.750000,0.000000,63.275862,0.0,1.038257,0.0,44.0,,-1.111373
2023-10-17 16:13:40,93.567144,99.964944,94.096141,86.123425,92.908906,66.906250,0.000000,64.928571,0.0,1.062370,0.0,44.0,,-1.188039
2023-10-17 16:14:00,92.150706,101.219152,90.821216,83.494384,93.286394,75.032258,19.103448,60.793103,0.0,1.114488,0.0,44.0,,-1.264706
2023-10-17 16:14:20,90.833428,100.648262,88.791894,80.487381,92.247284,86.406250,33.785714,59.500000,0.0,1.133530,0.0,44.0,,-1.311373
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-10-13 14:55:40,97.990398,99.141016,89.786491,80.571006,88.462028,67.328125,39.107143,91.714286,0.0,1.104186,0.0,,8.43,
2023-10-13 14:56:00,98.738189,99.106112,91.051269,81.310068,92.582797,62.784615,38.928571,91.000000,0.0,1.088465,0.0,,8.39,
2023-10-13 14:56:20,97.644569,97.781563,89.892325,79.747017,91.389403,66.734375,18.035714,87.821429,0.0,1.087763,0.0,,8.34,
2023-10-13 14:56:40,98.487597,96.751565,89.677660,79.865295,92.926508,60.646154,18.428571,83.928571,0.0,1.078881,0.0,,8.29,


### SJ

In [8]:
# eeg_filepath, fitbit_folderpath, time_interval, eeg_remove_time_in_group
merger = DataMerger(r"C:\Users\ballj\OneDrive\바탕 화면\EEG_sj.csv",
                    r"C:\Users\ballj\OneDrive\바탕 화면\Fitbit_SJ",
                    time_interval=20,
                    eeg_remove_time_in_group=15)

final_sj = merger.merge_data()

In [9]:
final_sj

Unnamed: 0,alpha_wave,beta_wave,theta_wave,delta_wave,gamma_wave,attention,hrv,hr,coherence,SP ratio,BM,sleep,eda,temperature
2023-10-30 11:40:40,100.040588,104.365850,100.496463,93.183416,96.576003,70.093750,0.000000,85.896552,0.0,1.038503,0.0,46.0,16.90,-5.908288
2023-10-30 11:41:00,99.179203,104.105269,100.919528,92.323397,96.200328,70.406250,0.000000,89.233333,0.0,1.031567,0.0,46.0,16.94,-5.904955
2023-10-30 11:41:20,99.047885,104.210858,100.878803,93.141561,96.221573,69.575758,0.000000,90.833333,0.0,1.033030,0.0,46.0,16.94,-5.914955
2023-10-30 11:41:40,99.219181,104.358381,100.012197,93.220061,96.977645,72.032258,27.533333,92.466667,0.0,1.043457,0.0,46.0,16.95,-5.924955
2023-10-30 11:42:00,98.045081,104.198594,100.002187,92.632481,96.308416,71.812500,42.793103,90.689655,0.0,1.041963,0.0,46.0,16.95,-5.934955
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-10-26 15:48:00,89.916322,93.499059,91.138678,85.640272,83.714056,62.687500,21.275862,83.413793,0.0,1.025899,0.0,,,
2023-10-26 15:48:20,88.859097,92.876816,88.413250,83.042022,83.736244,70.343750,20.033333,85.300000,0.0,1.050485,0.0,,,
2023-10-26 15:48:40,88.613081,91.166116,90.330913,85.286738,84.124391,62.687500,20.793103,80.344828,0.0,1.009246,0.0,,,
2023-10-26 15:49:00,88.402791,91.745194,91.507712,86.265309,82.786509,61.636364,23.724138,83.689655,0.0,1.002595,0.0,,,


### SA

CEDA 데이터가 없어서 오류 발생... 우선은 나중에 추출할 것

In [10]:
# # eeg_filepath, fitbit_folderpath, time_interval, eeg_remove_time_in_group
merger = DataMerger(r"C:\Users\ballj\OneDrive\바탕 화면\EEG_sa.csv",
                    r"C:\Users\ballj\OneDrive\바탕 화면\Fitbit_SA",
                    time_interval=20,
                    eeg_remove_time_in_group=15)

final_sa = merger.merge_data()

In [11]:
final_sa

Unnamed: 0,alpha_wave,beta_wave,theta_wave,delta_wave,gamma_wave,attention,hrv,hr,coherence,SP ratio,BM,sleep,temperature,eda
2023-10-25 20:46:00,92.761538,96.515385,90.023077,83.261538,86.792308,69.192308,0.000000,93.888889,0.00000,1.072118,,,,
2023-10-25 20:46:20,91.180000,93.940000,88.820000,80.480000,84.000000,58.660000,0.000000,100.343750,0.46875,1.057645,,,,
2023-10-25 20:46:40,90.675000,93.700000,88.350000,80.725000,83.500000,56.125000,0.000000,97.687500,0.87500,1.060555,,,,
2023-10-25 20:47:00,90.287500,93.312500,87.912500,78.300000,82.512500,53.225000,14.250000,98.875000,0.00000,1.061425,,,,
2023-10-25 20:47:20,89.741667,93.500000,87.991667,79.066667,83.500000,57.250000,15.963636,98.878788,0.00000,1.062601,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-10-22 23:12:20,92.450000,100.600000,93.500000,85.900000,93.300000,71.750000,29.575000,82.750000,0.00000,1.075936,,,,
2023-10-22 23:12:40,91.066667,101.033333,93.033333,84.666667,93.633333,80.566667,26.484375,85.375000,0.00000,1.085991,,,,
2023-10-22 23:13:00,92.700000,101.350000,92.850000,84.250000,93.350000,67.350000,24.565625,89.125000,0.00000,1.091546,,,,
2023-10-22 23:13:20,93.566667,101.433333,93.633333,85.066667,93.066667,68.866667,27.857576,85.333333,0.00000,1.083304,,,,


### BS

In [12]:
# eeg_filepath, fitbit_folderpath, time_interval, eeg_remove_time_in_group
merger = DataMerger(r"C:\Users\ballj\OneDrive\바탕 화면\EEG_bs.csv",
                    r"C:\Users\ballj\OneDrive\바탕 화면\Fitbit_BS",
                    time_interval=20,
                    eeg_remove_time_in_group=15)

final_bs = merger.merge_data()

In [13]:
final_bs

Unnamed: 0,alpha_wave,beta_wave,theta_wave,delta_wave,gamma_wave,attention,hrv,hr,coherence,SP ratio,BM,sleep,eda,temperature
2023-11-05 19:12:20,98.403525,97.570572,100.023416,93.076138,86.065972,59.812500,0.000000,86.774194,0.0,0.975477,0.0,78.0,,-3.508269
2023-11-05 19:12:40,96.172782,96.519497,97.824861,91.124079,87.826124,56.333333,0.000000,87.766667,0.0,0.986656,0.0,78.0,,-3.494936
2023-11-05 19:13:00,99.947252,100.705061,99.793084,93.265626,89.085035,68.387097,0.000000,97.366667,0.0,1.009139,0.0,78.0,,-3.481603
2023-11-05 19:13:20,99.844556,100.413072,100.208875,94.780553,87.913625,70.906250,39.774194,103.129032,1.0,1.002038,0.0,78.0,,-3.484936
2023-11-05 19:13:40,95.632281,95.799209,96.882419,91.329791,85.756756,37.968750,41.400000,98.800000,0.7,0.988819,0.0,78.0,,-3.488269
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-11-01 09:20:20,86.362100,96.295173,85.630791,77.073261,87.761961,86.545455,30.133333,81.400000,0.0,1.124539,0.0,93.0,15.44,
2023-11-01 09:20:40,92.109831,102.120925,92.182928,85.330806,93.258994,79.531250,28.103448,86.482759,0.0,1.107807,0.0,93.0,15.52,
2023-11-01 09:21:00,94.654803,103.002353,95.062234,88.435281,95.215422,70.031250,39.833333,78.900000,0.0,1.083525,0.0,93.0,15.59,
2023-11-01 09:21:20,95.199706,101.807709,94.857472,87.361981,93.969819,69.125000,41.758621,80.344828,0.0,1.073270,0.0,93.0,15.79,


### MJ

In [14]:
# eeg_filepath, fitbit_folderpath, time_interval, eeg_remove_time_in_group
merger = DataMerger(r"C:\Users\ballj\OneDrive\바탕 화면\EEG_mj.csv",
                    r"C:\Users\ballj\OneDrive\바탕 화면\Fitbit_MJ",
                    time_interval=20,
                    eeg_remove_time_in_group=15)

final_mj = merger.merge_data()

In [15]:
final_mj

Unnamed: 0,alpha_wave,beta_wave,theta_wave,delta_wave,gamma_wave,attention,hrv,hr,coherence,SP ratio,BM,sleep,temperature,eda
2023-10-18 01:22:40,96.931497,106.140059,95.845341,89.615462,98.625209,64.281250,0.000000,82.689655,0.0,1.107410,,,,
2023-10-18 01:23:00,94.982166,106.002066,92.772362,86.751200,97.812469,68.125000,0.000000,85.200000,0.0,1.142604,,,,
2023-10-18 01:23:20,96.668591,106.020828,93.693484,87.472191,98.049512,70.187500,7.379310,85.517241,0.0,1.131571,,,,
2023-10-18 01:23:40,94.959200,106.174959,93.030984,85.931419,97.868356,65.031250,32.200000,89.366667,0.0,1.141286,,,,
2023-10-18 01:24:00,93.613647,106.228428,90.766597,83.235103,97.075537,75.000000,33.172414,86.689655,0.0,1.170347,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-10-17 21:42:40,91.511567,100.610650,90.017020,81.141860,93.058360,78.733333,67.724138,96.275862,0.0,1.117685,,,,
2023-10-17 21:43:00,93.092659,100.463806,91.055906,83.153009,93.647281,70.937500,32.357143,110.250000,0.0,1.103320,,,,
2023-10-17 21:43:20,95.131594,103.101097,93.605047,86.577569,94.072862,71.125000,41.413793,117.413793,0.0,1.101448,,,,
2023-10-17 21:43:40,94.149709,103.447822,92.192878,85.414209,94.190503,80.281250,45.931034,117.310345,0.0,1.122080,,,,


### Concat EEG

In [16]:
dataset = pd.concat([final_jm, final_sj, final_bs, final_yh, final_mj, final_sa])
dataset = dataset.sort_index()
dataset

Unnamed: 0,alpha_wave,beta_wave,theta_wave,delta_wave,gamma_wave,attention,hrv,hr,coherence,SP ratio,BM,sleep,eda,temperature
2023-10-13 14:38:00,98.868186,99.725558,89.895972,81.630888,93.404548,74.953846,0.000000,86.000000,0.0,1.109344,0.0,,15.38,
2023-10-13 14:38:20,100.059108,100.893505,91.251317,82.447349,93.760684,76.777778,0.000000,83.321429,0.0,1.105666,0.0,,15.28,
2023-10-13 14:38:40,101.115632,104.505088,92.826755,84.826905,90.295372,67.476923,0.000000,85.000000,0.0,1.125808,0.0,,15.18,
2023-10-13 14:39:00,102.382959,105.087347,92.123622,83.651337,90.303792,74.093750,12.035714,82.464286,0.0,1.140721,0.0,,15.08,
2023-10-13 14:39:20,101.075003,102.583895,93.021812,84.468237,88.214489,60.000000,14.714286,82.357143,0.0,1.102794,0.0,,15.01,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-11-09 02:22:00,96.207987,104.336777,94.194058,86.626061,94.166265,77.935484,33.133333,67.300000,0.0,1.107679,0.0,75.0,,
2023-11-09 02:22:20,96.598006,104.144987,93.884403,87.954487,94.578369,73.437500,28.137931,69.137931,0.0,1.109290,0.0,75.0,,
2023-11-09 02:22:40,96.831525,104.121844,95.060881,87.875966,94.568975,81.531250,33.433333,75.666667,0.0,1.095317,0.0,75.0,,
2023-11-09 02:23:00,96.056381,104.295156,96.281588,88.869034,94.667859,69.562500,44.793103,70.034483,0.0,1.083231,0.0,75.0,,


In [17]:
start_time = dataset.index[-1]
start_time = start_time.strftime('%Y-%m-%d %H:%M:%S')
start_time = start_time.replace(":", "_")

In [18]:
dataset.to_csv(r'C:\Users\ballj\OneDrive\바탕 화면\dataset_{}.csv'.format(start_time))

**실험측정 시간 기준 1시간 전의 운동시간 column / 실험측정 시간 기준 24시간 전의 운동시간 column / 깃허브**