In [None]:
!pip install python-dotenv

In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import zipfile
import os
import random
import seaborn as sns
from scipy import stats, signal
import shutil


from google.colab import drive

drive.mount('/content/drive', force_remount=True)

dotenv.load_dotenv('/content/drive/MyDrive/.env')

MAIN_PATH = os.environ.get('MAIN_EXP_PATH')
MAIN_PROCESSED_PATH = f"{MAIN_PATH}(Processed)"
DATASET_PROCEDURES = ['DP_1', 'DP_2']


In [None]:
all_data_infos = []

def list_files_scandir(path='.'):
    with os.scandir(path) as entries:
        for entry in entries:
            if entry.is_file():
                if entry.path.endswith("ST.csv"):
                  sample_components = entry.path.split(MAIN_PATH)[1].split('/')
                  all_data_infos.append({
                      'diagnose_result': sample_components[1],
                      'sample_name': sample_components[2],
                      'game_name': sample_components[3]
                  })
            elif entry.is_dir():
                list_files_scandir(entry.path)

list_files_scandir(MAIN_PATH)

print(f'Number of files will be used : {len(all_data_infos)}')

###Physiological Metric Colleting

####- Skin Temperature

In [None]:
random_samples = random.sample(all_data_infos, 1)

sample_path = MAIN_PATH + '/' + random_samples[0]['diagnose_result']
sample_path += '/' + random_samples[0]['sample_name']
sample_path += '/' + random_samples[0]['game_name']
sample_path += '/ST.csv'
print(sample_path)
df = pd.read_csv(sample_path)

In [None]:
if 'ST' in df.columns and 'time' in df.columns:
  df = df.drop(columns=['ST', 'time'], axis=1)
df['values'] = df['values'].apply(lambda x: x.replace(',','.')).values.astype(np.float64)
df['values'].describe()

In [None]:
df["values"] = (df["values"] - df["values"].min()) / (df["values"].max() - df["values"].min()) * 100
df

Apply all dataset values and show summaries

In [None]:
min_treashold = -0.05

In [None]:
def get_number_of_labels(sample_df_path):
  experLabel_df = pd.read_csv(f'{sample_df_path}/ExpertLabels.csv')
  number_of_labels = 0
  for label_values in experLabel_df[['Expert1','Expert2','Expert3']].values[1:]:
    if np.any([(label_value.__class__.__name__ == 'str') for label_value in label_values]):
      number_of_labels += 1

  return number_of_labels

def get_formatted_values(bvp_value):
  result_value = bvp_value
  if type(bvp_value).__name__ == 'str':
    result_value =bvp_value.replace(',','.')
  return result_value

def get_st_dataframe(data_infos):
  sample_path = data_infos['diagnose_result'] # MAIN_PATH + '/' +
  sample_path += '/' + data_infos['sample_name']
  sample_path += '/' + data_infos['game_name']
  print(f'Reading values in file {sample_path} ...')
  df = pd.read_csv(f'{MAIN_PATH}/{sample_path}/ST.csv')
  df.insert(df.shape[1], 'source_file', [sample_path for _ in range(df.shape[0])])
  if 'ST' in df.columns:
    df = df.drop(columns=['ST'], axis=1)
  df['values'] = df['values'].apply(lambda x: get_formatted_values(x)).astype(np.float64)
  number_of_labels = get_number_of_labels(f'{MAIN_PATH}/{sample_path}')
  chnaged_rate = (df.shape[0] - (number_of_labels * 40)) / df.shape[0]
  if chnaged_rate < min_treashold :
    print(f"Rate of losed values is higher than {min_treashold} as {chnaged_rate}")
    return None
  df_copy = df.copy()
  if chnaged_rate > 0:
    df_copy = df.iloc[:, :(number_of_labels * 40)]
  else:
    df_copy = df.iloc[:, :(df.shape[0] - (df.shape[0] % 40))]
  return df

In [None]:

all_st_dataframe = pd.DataFrame()
for ST_file in all_data_infos:
  st_dataframe = get_st_dataframe(ST_file)
  all_st_dataframe = pd.concat([all_st_dataframe, st_dataframe])


print("Gathering copmleted!")
all_st_dataframe.index = [idx for idx in range(all_st_dataframe.shape[0])]
all_st_dataframe['values'].describe()
all_st_dataframe.head()

In [None]:
all_st_dataframe['values'].describe()

In [None]:

main_process_path = f'{MAIN_PROCESSED_PATH} {DATASET_PROCEDURES[0]}'
if not os.path.exists(main_process_path):
  os.makedirs(main_process_path)

for source_file_name in all_st_dataframe['source_file'].unique():
  src_ST_file_path = MAIN_PATH
  ST_file_path = main_process_path

  for file_path in source_file_name.split('/'):
    src_ST_file_path += f'/{file_path}'
    ST_file_path += f'/{file_path}'
    if not os.path.exists(ST_file_path):
      os.makedirs(ST_file_path)

  src_ST_file_name = f'{src_ST_file_path}/ST.csv'
  ST_file_name = f'{ST_file_path}/ST.csv'

  if os.path.exists(ST_file_name):
    os.remove(ST_file_name)

  if os.path.exists(src_ST_file_name):
    shutil.copy(src_ST_file_name, ST_file_name)
    print(f'{ST_file_name} file created')

In [None]:
all_st_dataframe["values"] = (all_st_dataframe["values"] - all_st_dataframe["values"].min()) / (all_st_dataframe["values"].max() - all_st_dataframe["values"].min()) * 100
all_st_dataframe['values'].describe()

In [None]:

main_process_path = f'{MAIN_PROCESSED_PATH} {DATASET_PROCEDURES[1]}'
if not os.path.exists(main_process_path):
  os.makedirs(main_process_path)

for source_file_name in all_st_dataframe['source_file'].unique():
  subset_all_st_dataframe = all_st_dataframe[all_st_dataframe['source_file'] == source_file_name].copy()
  subset_all_st_dataframe.drop(columns=['source_file'], inplace=True)
  ST_dataset = subset_all_st_dataframe.copy()
  # ST_dataset.index = [idx for idx in range(ST_dataset.shape[0])]
  ST_file_name = main_process_path

  for file_path in source_file_name.split('/'):
    ST_file_name += f'/{file_path}'
    if not os.path.exists(ST_file_name):
      os.makedirs(ST_file_name)

  ST_file_name += '/ST.csv'
  if os.path.exists(ST_file_name):
    os.remove(ST_file_name)
  ST_dataset.to_csv(ST_file_name, index=True)
  print(f'{ST_file_name} file created')
