In [None]:
import torch
import torch.nn as nn

import seaborn as sns
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
patients = ['MSEL_01575', 'MSEL_01828', 'MSEL_01709','MSEL_01870','MSEL_01842','MSEL_01844','MSEL_01097',
            'MSEL_00182','MSEL_01676','MSEL_01849','MSEL_01808','MSEL_00501','MSEL_00172','MSEL_01763',
            'MSEL_00502']

patients_id = [x[5:] for x in patients]
print(patients_id)

['01575', '01828', '01709', '01870', '01842', '01844', '01097', '00182', '01676', '01849', '01808', '00501', '00172', '01763', '00502']


In [None]:
def getPatientData(patient):
  path = '/content/drive/My Drive/epi_data_folders/MSEL_{}/concat_data/'.format(patient)

  data = {}

  data["BVP"] = pd.read_parquet(path + patient + "_BVP.parquet", engine="auto")
  data["HR"] = pd.read_parquet(path + patient + "_HR.parquet", engine="auto")
  data["TEMP"] = pd.read_parquet(path + patient + "_TEMP.parquet", engine="auto")
  data["EDA"] = pd.read_parquet(path + patient + "_EDA.parquet", engine="auto")

  data["Mag"] = pd.read_parquet(path + patient + "_Mag.to_parquet", engine="auto")
  data["x"] = pd.read_parquet(path + patient + "_x.to_parquet", engine="auto")
  data["y"] = pd.read_parquet(path + patient + "_y.to_parquet", engine="auto")
  data["z"] = pd.read_parquet(path + patient + "_z.to_parquet", engine="auto")

  return data

In [None]:
from sklearn.preprocessing import MinMaxScaler

def normalizeData(field, data):
  #field: "BVP", "EDA", ...
  #data: dataframe

  train_data = data[field].to_numpy()
  scaler = MinMaxScaler(feature_range=(-1, 1))
  data[field+"_normalized"] = scaler.fit_transform(train_data.reshape(-1, 1))

  return data

In [None]:
def normalizeFieldsByPatient(field, patients):

  #patients: list of patients

  path = '/content/drive/My Drive/epi_data_folders/MSEL_{}/concat_data/{}_{}.parquet'

  for patient_id in patients:
    data = pd.read_parquet(path.format(patient_id, patient_id, field), engine="auto")

    saved_path = '/content/drive/My Drive/epi_data_folders/MSEL_{}/normalized_data/{}_{}.parquet'

    data = normalizeData(field, data)
    data.drop(columns=[field])

    data.to_parquet(saved_path.format(patient_id, patient_id, field))

    print("{}-{} \t mean:{}".format(patient_id, field, data[field+"_normalized"].mean()))


    # plt.plot(data["time"], data[field], '-')
    # plt.title("{} - {}".format(field, patient_id))
    # plt.xlabel('time')
    # plt.ylabel(field)
    # plt.show()

    # plt.plot(data["time"], data[field+"_normalized"], '-')
    # plt.title("{} - {}".format(field, patient_id))
    # plt.xlabel('time')
    # plt.ylabel(field)
    # plt.show()

normalizeFieldsByPatient("BVP", patients_id)
normalizeFieldsByPatient("HR", patients_id)
normalizeFieldsByPatient("TEMP", patients_id)
normalizeFieldsByPatient("EDA", patients_id)
normalizeFieldsByPatient("Mag", patients_id)
normalizeFieldsByPatient("x", patients_id)
normalizeFieldsByPatient("y", patients_id)
normalizeFieldsByPatient("z", patients_id)

01575-BVP 	 mean:0.17135506868362427
01828-BVP 	 mean:0.018229976296424866
01709-BVP 	 mean:-0.08456387370824814
01870-BVP 	 mean:-0.04310292750597
01842-BVP 	 mean:0.027042578905820847
01844-BVP 	 mean:-0.05349082499742508
01097-BVP 	 mean:-0.0469181127846241
00182-BVP 	 mean:0.07039019465446472
01676-BVP 	 mean:0.05453825369477272
01849-BVP 	 mean:0.014756969176232815
01808-BVP 	 mean:-0.033828455954790115
00501-BVP 	 mean:0.06687148660421371
00172-BVP 	 mean:-0.0703543946146965
01763-BVP 	 mean:0.005379282403737307
00502-BVP 	 mean:-0.06483820825815201
01575-HR 	 mean:-0.16621266305446625
01828-HR 	 mean:0.2586877644062042
01709-HR 	 mean:-0.21519848704338074
01870-HR 	 mean:-0.16640201210975647
01842-HR 	 mean:-0.11899341642856598
01844-HR 	 mean:-0.1992628574371338
01097-HR 	 mean:-0.011628374457359314
00182-HR 	 mean:-0.013976383022964
01676-HR 	 mean:-0.09664731472730637
01849-HR 	 mean:0.25323373079299927
01808-HR 	 mean:-0.017076129093766212
00501-HR 	 mean:-0.0446935072541236

In [None]:
def getPatientFields(field, patients):

  #patients: list of patients
  data = {}

  path = '/content/drive/My Drive/epi_data_folders/MSEL_{}/concat_data/{}_{}'

  if field == "BVP" or field == "HR" or field == "TEMP" or field == "EDA":
    extension = ".parquet"
  else:
    extension = ".to_parquet"

  for patient_id in patients:
    data[patient_id] = pd.read_parquet(path.format(patient_id, patient_id, field) + extension, engine="auto")
    
  return data


In [None]:
from sklearn.preprocessing import MinMaxScaler

def normalizeAllData(field, data):
  #data: dictionary {patient_id: dataframe}

  for patient_id in data:
    patient = data[patient_id]

    train_data = patient[field].to_numpy()
    scaler = MinMaxScaler(feature_range=(-1, 1))
    data[patient_id][field+"_normalized"] = scaler.fit_transform(train_data.reshape(-1, 1)).tolist()

    data[patient_id].drop(columns=[field])

In [None]:
bob = "/content/drive/MyDrive/epi_data_folders/MSEL_00501/normalized_data/00501_BVP.parquet"

In [None]:
pd.read_parquet(bob)

Unnamed: 0,time,BVP,BVP_normalized
0,1.556830e+12,-22338868.0,0.118534
1,1.556830e+12,-22338868.0,0.118534
2,1.556830e+12,-22338868.0,0.118534
3,1.556830e+12,-22338868.0,0.118534
4,1.556830e+12,-22338868.0,0.118534
...,...,...,...
35990467,1.557111e+12,-69946288.0,0.118503
35990468,1.557111e+12,-69946288.0,0.118503
35990469,1.557111e+12,-69946288.0,0.118503
35990470,1.557111e+12,-69946288.0,0.118503
