In [None]:
!pip install tsgm python-dotenv

In [None]:
!pip install --upgrade tensorflow

In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import os
import random
import seaborn as sns
from scipy import stats
from collections import Counter
import matplotlib.pyplot as plt
import dotenv
import gspread


import sys
from google.auth import default
from google.colab import auth, drive


from tqdm import tqdm
from tqdm.notebook import tqdm

tqdm.pandas()

drive.mount('/content/drive', force_remount=True)

dotenv.load_dotenv('/content/drive/MyDrive/.env')

DELETE_FEATURE_SAMPLE = False
DATASET_PROCEDURES = ["DP_1", "DP_2"]
AUGMENTATION_SAMPLE_AMOUNT_BY_PROCEDURE = { "AP_2": 1, "AP_3": 1, "AP_4": 2, "AP_5" : 9, "AP_6" : 10, "AP_6" : 11, "AP_7" : 11, "AP_8" : 22, "AP_9" : 22 }
AUGMENTATION_PROCEDURES = AUGMENTATION_SAMPLE_AMOUNT_BY_PROCEDURE.keys()
MAIN_PATH = os.environ.get('MAIN_EXP_PATH')
MAIN_PROCESSED_PATH = f"{MAIN_PATH}(Processed)"


In [None]:
util_script_path = os.environ.get('UTIL_SCRIPT_PATH')
if util_script_path is not None:
  sys.path.insert(0, util_script_path)
else:
  print("Warning: UTIL_SCRIPT_PATH environment variable is not set.")
  # Consider providing a default path or handling the error differently

In [None]:
from analyze_and_transform_datasets import list_files_scandir, \
                                          get_formatted_values, \
                                          format_time_str, \
                                          get_sample_path, \
                                          read_datas_from_csv, \
                                          write_features_in_csv, \
                                          get_signal_metrics

In [None]:
all_data_infos = {}
allowed_files = [ 'ST.csv' ]
for PROCEDURE_NAME in AUGMENTATION_PROCEDURES:
  for idx in range(AUGMENTATION_SAMPLE_AMOUNT_BY_PROCEDURE[PROCEDURE_NAME]):
    allowed_files.append(f'Aug-{PROCEDURE_NAME}-{(idx + 1)}_ST.csv')

for DATASET_PROCEDURE in DATASET_PROCEDURES:
  all_data_list = []
  list_files_scandir(allowed_files, all_data_list, f'{MAIN_PROCESSED_PATH} {DATASET_PROCEDURE}', MAIN_PATH, True)
  print(f'Number of files will be used in procedure {DATASET_PROCEDURE} : {len(all_data_list)}')
  all_data_infos[DATASET_PROCEDURE] = all_data_list


### Extracting Physiological Metrics

#### - Skin Temperature(SKT)

In [None]:
random_procedure = random.sample(DATASET_PROCEDURES, 1)[0]
random_samples = random.sample(all_data_infos[random_procedure], 3)

print(f'Read from : {get_sample_path(random_samples[0], random_procedure, MAIN_PROCESSED_PATH)}')
sample_df = read_datas_from_csv(random_samples[0], random_procedure, MAIN_PROCESSED_PATH)
sample_df.head()

In [None]:
get_signal_metrics(sample_df, 10, 'ST')

In [None]:

for AUGMENTATION_PROCEDURE in AUGMENTATION_PROCEDURES:
  for DATASET_PROCEDURE in DATASET_PROCEDURES:
    for ST_file_idx in tqdm(range(len(all_data_infos[DATASET_PROCEDURE]))):
      ST_file = all_data_infos[DATASET_PROCEDURE][ST_file_idx]
      raw_dataframe = read_datas_from_csv(ST_file, DATASET_PROCEDURE, MAIN_PROCESSED_PATH)
      for interval_value in [10, 30, 50, 70]:
        if interval_value == 10:
          write_features_in_csv(ST_file, raw_dataframe, interval_value, DATASET_PROCEDURE, 'ST', MAIN_PROCESSED_PATH, DELETE_FEATURE_SAMPLE, special_features=[], sampling_rate=4)
