In [None]:
!pip install neurokit2 python-dotenv

In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import zipfile
import os
import random
import seaborn as sns
from scipy import stats, signal
import neurokit2 as nk
import shutil


from google.colab import drive

drive.mount('/content/drive', force_remount=True)

dotenv.load_dotenv('/content/drive/MyDrive/.env')

MAIN_PATH = os.environ.get('MAIN_EXP_PATH')
MAIN_PROCESSED_PATH = f"{MAIN_PATH}(Processed)"
DATASET_PROCEDURES = ['DP_1', 'DP_2']


In [None]:
diagnosis_results = [item for item in os.listdir(MAIN_PATH) if not "." in item]
diagnosis_results

In [None]:
# from there https://www.geeksforgeeks.org/python-list-all-files-in-directory-and-subdirectories/
all_data_infos = []

def list_files_scandir(path='.'):
    with os.scandir(path) as entries:
        for entry in entries:
            if entry.is_file():
                if entry.path.endswith("EDA.csv"):
                  sample_components = entry.path.split(MAIN_PATH)[1].split('/')
                  all_data_infos.append({
                      'diagnose_result': sample_components[1],
                      'sample_name': sample_components[2],
                      'game_name': sample_components[3],
                  })
            elif entry.is_dir():
                list_files_scandir(entry.path)

list_files_scandir(MAIN_PATH)

print(f'Number of files will be used : {len(all_data_infos)}')

###Physiological Metric Colleting

####- Electrodermal Activity

In [None]:
random_samples = random.sample(all_data_infos, 1)

sample_path = MAIN_PATH + '/' + random_samples[0]['diagnose_result']
sample_path += '/' + random_samples[0]['sample_name']
sample_path += '/' + random_samples[0]['game_name']
sample_path += '/EDA.csv'
print(sample_path)
df = pd.read_csv(sample_path)

In [None]:

## Apply Savitzky-Golay filter
#From this site: https://github.com/hiddenslate-dev/aktives-dataset-2022/blob/main/preprocess.ipynb


Apply all dataset values and show summaries

In [None]:
min_treashold = -0.05

In [None]:

def get_number_of_labels(sample_df_path):
  experLabel_df = pd.read_csv(f'{sample_df_path}/ExpertLabels.csv')
  number_of_labels = 0
  for label_values in experLabel_df[['Expert1','Expert2','Expert3']].values[1:]:
    if np.any([(label_value.__class__.__name__ == 'str') for label_value in label_values]):
      number_of_labels += 1

  return number_of_labels

def get_formatted_values(bvp_value):
  result_value = bvp_value
  if type(bvp_value).__name__ == 'str':
    result_value =bvp_value.replace(',','')
  return result_value

def get_eda_dataframe(data_infos):
  sample_path = data_infos['diagnose_result'] # MAIN_PATH + '/' +
  sample_path += '/' + data_infos['sample_name']
  sample_path += '/' + data_infos['game_name']
  print(f'Reading values in file {sample_path} ...')
  df = pd.read_csv(f'{MAIN_PATH}/{sample_path}/EDA.csv')
  df.insert(df.shape[1], 'source_file', [sample_path for _ in range(df.shape[0])])
  if 'EDA' in df.columns:
    df = df.drop(columns=['EDA'], axis=1)
  df['values'] = df['values'].apply(lambda x: get_formatted_values(x)).astype(np.float64)
  number_of_labels = get_number_of_labels(f'{MAIN_PATH}/{sample_path}')
  chnaged_rate = (df.shape[0] - (number_of_labels * 40)) / df.shape[0]
  if chnaged_rate < min_treashold :
    print(f"Rate of losed values is higher than {min_treashold} as {chnaged_rate}")
    return None
  df_copy = df.copy()
  if chnaged_rate > 0:
    df_copy = df.iloc[:, :(number_of_labels * 40)]
  else:
    df_copy = df.iloc[:, :(df.shape[0] - (df.shape[0] % 40))]
  return df


In [None]:

all_eda_dataframe = pd.DataFrame()
for EDA_file in all_data_infos:
  bvp_dataframe = get_eda_dataframe(EDA_file)
  if bvp_dataframe is not None:
    all_eda_dataframe = pd.concat([all_eda_dataframe, bvp_dataframe])


print("Gathering copmleted!")
all_eda_dataframe.index = [idx for idx in range(all_eda_dataframe.shape[0])]
all_eda_dataframe['values'].describe()
all_eda_dataframe.head()

In [None]:
all_eda_dataframe['EDA_Filtered'] = savgol_filter(all_eda_dataframe['values'], 11, 5)
all_eda_dataframe['EDA_Filtered'].describe()

In [None]:
all_eda_dataframe["EDA_Filtered"] = (all_eda_dataframe["EDA_Filtered"] - all_eda_dataframe["EDA_Filtered"].min()) / (all_eda_dataframe["EDA_Filtered"].max() - all_eda_dataframe["EDA_Filtered"].min()) * 100
all_eda_dataframe['EDA_Filtered'].describe()

In [None]:
df_scr_scl = nk.eda_phasic(all_eda_dataframe["EDA_Filtered"], sampling_rate = 4)
all_eda_dataframe["EDA_Tonic"] = df_scr_scl["EDA_Tonic"]
all_eda_dataframe["EDA_Phasic"] = df_scr_scl["EDA_Phasic"]
df_scr_scl.describe()

In [None]:
if 'values' in all_eda_dataframe.columns:
  all_eda_dataframe = all_eda_dataframe.drop(columns=['values'], axis=1)

main_path = f'{MAIN_PROCESSED_PATH} {DATASET_PROCEDURES[0]}'
if not os.path.exists(main_path):
  os.makedirs(main_path)

for source_file_name in all_eda_dataframe['source_file'].unique():
  subset_all_eda_dataframe = all_eda_dataframe[all_eda_dataframe['source_file'] == source_file_name].copy()
  subset_all_eda_dataframe.drop(columns=['source_file'], inplace=True)
  EDA_dataset = subset_all_eda_dataframe.copy()
  EDA_dataset.drop(columns=['EDA_Tonic','EDA_Phasic'], inplace=True)
  EDA_dataset = EDA_dataset.rename(columns={"EDA_Filtered": "values"})
  # EDA_dataset.index = [idx for idx in range(EDA_dataset.shape[0])]

  EDA_file_path = main_path

  for file_path in source_file_name.split('/'):
    EDA_file_path += f'/{file_path}'
    if not os.path.exists(EDA_file_path):
      os.makedirs(EDA_file_path)

  EDA_file_name = f'{EDA_file_path}/EDA.csv'

  if os.path.exists(EDA_file_name):
    os.remove(EDA_file_name)
  EDA_dataset.to_csv(EDA_file_name, index=True)
  print(f'{EDA_file_name} file created')

  EDA_Tonic_dataset = subset_all_eda_dataframe.copy()
  EDA_Tonic_dataset.drop(columns=['EDA_Filtered','EDA_Phasic'], inplace=True)
  EDA_Tonic_dataset = EDA_Tonic_dataset.rename(columns={"EDA_Tonic": "values"})
  # EDA_Tonic_dataset.index = [idx for idx in range(EDA_Tonic_dataset.shape[0])]
  EDA_Tonic_file = f'{EDA_file_path}/EDA_Tonic.csv'

  if os.path.exists(EDA_Tonic_file):
    os.remove(EDA_Tonic_file)
  EDA_Tonic_dataset.to_csv(EDA_Tonic_file, index=True)
  print(f'{EDA_Tonic_file} file created')

  EDA_Phasic_dataset = subset_all_eda_dataframe.copy()
  EDA_Phasic_dataset.drop(columns=['EDA_Tonic','EDA_Filtered'], inplace=True)
  EDA_Phasic_dataset = EDA_Phasic_dataset.rename(columns={"EDA_Phasic": "values"})
  # EDA_Phasic_dataset.index = [idx for idx in range(EDA_Phasic_dataset.shape[0])]
  EDA_Phasic_file = f'{EDA_file_path}/EDA_Phasic.csv'

  if os.path.exists(EDA_Phasic_file):
    os.remove(EDA_Phasic_file)
  EDA_Phasic_dataset.to_csv(EDA_Phasic_file, index=True)
  print(f'{EDA_Phasic_file} file created')


In [None]:
src_main_path = f'{MAIN_PROCESSED_PATH} {DATASET_PROCEDURES[0]}'
main_path = f'{MAIN_PROCESSED_PATH} {DATASET_PROCEDURES[1]}'
if not os.path.exists(main_path):
  os.makedirs(main_path)

for source_file_name in all_eda_dataframe['source_file'].unique():

  src_EDA_file_path = src_main_path
  EDA_file_path = main_path

  for file_path in source_file_name.split('/'):
    src_EDA_file_path += f'/{file_path}'
    EDA_file_path += f'/{file_path}'
    if not os.path.exists(EDA_file_path):
      os.makedirs(EDA_file_path)

  src_EDA_file_name = f'{src_EDA_file_path}/EDA.csv'
  EDA_file_name = f'{EDA_file_path}/EDA.csv'

  if os.path.exists(EDA_file_name):
    os.remove(EDA_file_name)

  if os.path.exists(src_EDA_file_name):
    shutil.copy(src_EDA_file_name, EDA_file_name)
    print(f'{EDA_file_name} file created')

  src_EDA_Tonic_file = f'{src_EDA_file_path}/EDA_Tonic.csv'
  EDA_Tonic_file = f'{EDA_file_path}/EDA_Tonic.csv'

  if os.path.exists(EDA_Tonic_file):
    os.remove(EDA_Tonic_file)

  if os.path.exists(src_EDA_Tonic_file):
    shutil.copy(src_EDA_Tonic_file, EDA_Tonic_file)
    print(f'{EDA_Tonic_file} file created')

  src_EDA_Phasic_file = f'{src_EDA_file_path}/EDA_Phasic.csv'
  EDA_Phasic_file = f'{EDA_file_path}/EDA_Phasic.csv'

  if os.path.exists(EDA_Phasic_file):
    os.remove(EDA_Phasic_file)

  if os.path.exists(src_EDA_Phasic_file):
    shutil.copy(src_EDA_Phasic_file, EDA_Phasic_file)
    print(f'{EDA_Phasic_file} file created')

