In [None]:
!pip install neurokit2 heartpy python-dotenv

In [None]:
!pip install --upgrade tensorflow

In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import os
import random
import seaborn as sns
import heartpy as hp
import neurokit2 as nk
import matplotlib.pyplot as plt
import time
import dotenv
import gspread
import copy

import sys
from google.auth import default
from google.colab import auth, drive
from scipy import stats
from collections import Counter


import warnings
warnings.simplefilter('ignore')

from tqdm import tqdm
from tqdm.notebook import tqdm

tqdm.pandas()

drive.mount('/content/drive', force_remount=True)

dotenv.load_dotenv('/content/drive/MyDrive/.env')

DELETE_FEATURE_SAMPLE = True
DATASET_PROCEDURES = ["DP_1", "DP_2"]
AUGMENTATION_SAMPLE_AMOUNT_BY_PROCEDURE = {"AP_2": 1, "AP_3": 1, "AP_5" : 9, "AP_6": 11, "AP_7": 11, "AP_8": 22, "AP_9": 22}
AUGMENTATION_PROCEDURES = AUGMENTATION_SAMPLE_AMOUNT_BY_PROCEDURE.keys()
MAIN_PATH = os.environ.get('MAIN_EXP_PATH')
MAIN_PROCESSED_PATH = f"{MAIN_PATH}(Processed)"




In [None]:
util_script_path = os.environ.get('UTIL_SCRIPT_PATH')
if util_script_path is not None:
  sys.path.insert(0, util_script_path)
else:
  print("Warning: UTIL_SCRIPT_PATH environment variable is not set.")

In [None]:
from analyze_and_transform_datasets import list_files_scandir, \
                                          get_formatted_values, \
                                          format_time_str, \
                                          get_sample_path, \
                                          read_datas_from_csv, \
                                          get_labels_df, \
                                          write_features_in_csv, \
                                          get_signal_metrics

In [None]:

all_data_infos = {}
all_data_list = []
allowed_files = []
if 'AP_1' in AUGMENTATION_PROCEDURES:
  allowed_files.append('BVP.csv')
for PROCEDURE_NAME in AUGMENTATION_PROCEDURES:
  for idx in range(AUGMENTATION_SAMPLE_AMOUNT_BY_PROCEDURE[PROCEDURE_NAME]):
    allowed_files.append(f'Aug-{PROCEDURE_NAME}-{(idx + 1)}_BVP.csv')

for DATASET_PROCEDURE in DATASET_PROCEDURES:
  all_data_list = []
  list_files_scandir(allowed_files, all_data_list, f'{MAIN_PROCESSED_PATH} {DATASET_PROCEDURE}', MAIN_PATH, True)
  print(f'Number of files will be used in procedure {DATASET_PROCEDURE}: {len(all_data_list)}')
  all_data_infos[DATASET_PROCEDURE] = all_data_list


### Extracting Physiological Metrics

#### - Blood Volume Pulse(BVP)

In [None]:
random_procedure = random.sample(DATASET_PROCEDURES, 1)[0]
random_samples = random.sample(all_data_infos[random_procedure], 3)


In [None]:

sample_path = MAIN_PROCESSED_PATH + ' ' + random_procedure + '/' + random_samples[0]['diagnose_result']
sample_path += '/' + random_samples[0]['sample_name']
sample_path += '/' + random_samples[0]['game_name']
sample_path += '/' + random_samples[0]['file_name']

print(f'Read from : {sample_path}')
df = pd.read_csv(sample_path)
df['time'] = pd.to_datetime(df['time'].apply(format_time_str), unit='s')
df = df.sort_values(by=['time'])
df.index_col = 'time'
df.index = df['time']
df.head()

In [None]:

df_values = df.loc[df.index.values[0]: (df.index.values[0]+ np.timedelta64(10, 's')), 'values']
if len(df_values) > 0:
  df_scaled_values = (df_values - np.min(df_values)) / (np.max(df_values) - np.min(df_values))
  working_data, measures = hp.process(df_scaled_values.values, 64, report_time=True)
  working_data, measures = hp.analysis.calc_fd_measures(measures = measures, working_data = working_data)
  for working_dt in working_data.keys():
    #UnivariateSpline
    if working_dt not in ["hr", "rolling_mean", "breathing_signal", "breathing_psd", "breathing_freq"]:
      print(f'{working_dt} : {working_data[working_dt]}')
  print("=====================================")
  for measure in measures.keys():
    print(f'{measure} : {measures[measure]}')


In [None]:

df_copy = df.copy()

In [None]:
hrv_measures = [
'bpm',
'ibi',
'sdnn',
'sdsd',
'rmssd',
'pnn20',
'pnn50',
'hr_mad',
'sd1',
'sd2',
's',
'sd1/sd2',
'breathingrate',
'vlf',
'lf',
'hf',
'lf/hf',
'p_total',
'vlf_perc',
'lf_perc',
'hf_perc',
'lf_nu',
'hf_nu']

statistic_measures = [
    'BVP_zero_cross', 'BVP_positive_peak', 'BVP_negative_peak'
]

In [None]:
def get_hrv_metrics(sample_bvp_df, sampling_rate):

  hrv_infos = {}

  for hrv_measure in hrv_measures:
    hrv_infos[hrv_measure] = np.nan

  is_hrv_not_calculated = False
  try:
    working_data, measures = hp.process(sample_bvp_df.values, sampling_rate)
    working_data, measures = hp.analysis.calc_fd_measures(measures = measures, working_data = working_data)
    for measure in measures.keys():
      hrv_infos[measure] = measures[measure]
  except Exception as e:
    print(e)
    print("hp cannot calculted...")
    is_hrv_not_calculated = True

  # print('is_hrv_not_calculated', is_hrv_not_calculated)
  if is_hrv_not_calculated:
    try:
      df_scaled_values = (sample_bvp_df - np.min(sample_bvp_df)) / (np.max(sample_bvp_df) - np.min(sample_bvp_df))
      working_data, measures = hp.process(df_scaled_values.values, sampling_rate)
      working_data, measures = hp.analysis.calc_fd_measures(measures = measures, working_data = working_data)
      for measure in measures.keys():
        hrv_infos[measure] = measures[measure]
    except Exception as e:
      print(e)
      print("hp cannot calculted in scaled...")


  return hrv_infos



In [None]:
get_hrv_metrics(df_copy['values'], 64)

In [None]:
def get_bvp_metrics(df_values, sampling_rate):

  statistic_values = {statistic_measure: np.nan for statistic_measure in statistic_measures}

  hrv_metrics = get_hrv_metrics(df_values, sampling_rate)

  time.sleep(0.25)
  for hrv_measure in hrv_measures:
    statistic_values[hrv_measure] = hrv_metrics[hrv_measure]

  df_values_array = df_values.values
  try:
    zero_crossings = nk.signal_zerocrossings(df_values)
    statistic_values['BVP_zero_cross'] = len(zero_crossings)
  except Exception as e:
    print(e)
    print("BVP zero crossings cannot calculated")

  try:
    positive_peaks = nk.signal_findpeaks(df_values)
    statistic_values['BVP_positive_peak'] = len(positive_peaks["Peaks"])
  except Exception as e:
    print(e)
    print("BVP positive peaks cannot calculated")

  try:
    negative_peaks = nk.signal_findpeaks(-df_values)
    statistic_values['BVP_negative_peak'] = len(negative_peaks["Peaks"])
  except Exception as e:
    print(e)
    print("BVP negative peaks cannot calculated")

  return statistic_values


In [None]:
get_bvp_metrics(df_copy['values'], 64)

In [None]:
all_bvp_measures = copy.deepcopy(statistic_measures)
hrv_measures_copy = copy.deepcopy(hrv_measures)
hrv_measures_copy = [ hrv_measure_copy.upper().replace("/", "").replace("İ", "I") for hrv_measure_copy in hrv_measures_copy]
all_bvp_measures.extend(hrv_measures_copy)

In [None]:
# get_bvp_metrics(df_copy, 10, 'BVP')
get_signal_metrics(df_copy, 10, 'BVP', [], special_features=all_bvp_measures, get_special_metrics_callback=get_bvp_metrics, sampling_rate=64)

In [None]:

for DATASET_PROCEDURE in DATASET_PROCEDURES:
  for BVP_idx in tqdm(range(len(all_data_infos[DATASET_PROCEDURE])), desc=f'Feature extraction for {DATASET_PROCEDURE} dataset procedure'):
    BVP_file = all_data_infos[DATASET_PROCEDURE][BVP_idx]
    raw_dataframe = read_datas_from_csv(BVP_file, DATASET_PROCEDURE, MAIN_PROCESSED_PATH)
    for interval_value in [10, 30, 50, 70]:
      if interval_value == 10:
        write_features_in_csv(BVP_file, raw_dataframe, interval_value, DATASET_PROCEDURE, 'BVP', MAIN_PROCESSED_PATH, DELETE_FEATURE_SAMPLE, special_features=all_bvp_measures, get_special_metrics_callback=get_bvp_metrics, sampling_rate=64)
