# Feature Extraction

##pyEDA

pyEDA is an open-source toolkit for pre-processing and feature extraction of the EDA data. https://github.com/spdubey/pyEDA

In [None]:
!git clone "https://github.com/HealthSciTech/pyEDA.git"

Cloning into 'pyEDA'...
remote: Enumerating objects: 290, done.[K
remote: Counting objects: 100% (290/290), done.[K
remote: Compressing objects: 100% (198/198), done.[K
remote: Total 290 (delta 151), reused 203 (delta 87), pack-reused 0[K
Receiving objects: 100% (290/290), 11.30 MiB | 26.43 MiB/s, done.
Resolving deltas: 100% (151/151), done.


In [None]:
# Importing necessary functions for feature extraction
from pyEDA.main import *
from pyEDA.pyEDA.openShimmerFile import *
from pyEDA.pyEDA.preprocessing import *
from pyEDA.pyEDA.filtering import *
from pyEDA.pyEDA.pyEDA import *
from pyEDA.pyEDA.autoencoder import *

Working Data(wd) and Measures(m)
process_statistical returns two dictionaries and a numpy array: Working Data(wd), Measures(m), and Clean EDA signal(eda_clean)



Working Data(wd): This dictionary includes:


- filtered_phasic_gsr: phasic component of gsr signal passed from low pass filter for each window
phasic_gsr: phasic component of gsr signal for each window

tonic_gsr: tonic component of gsr signal for each window

peaklist: list of peaks for each window


Measures(m): This dictionary includes:


- number_of_peaks: number of peaks collected for each window
mean_gsr: mean of normalized gsr for each window

max_of_peaks: max of normalized gsr for each window


Clean EDA signal(eda_clean): This is a clean EDA signal after preprocessing

##Statistic FX

In [None]:
eda_clean_all =[]
for i in range(len(data_All['data']['EDA'][:,0])):
  m, wd, eda_clean = process_statistical(data_All['data']['EDA'][i], use_scipy=True, sample_rate=4, new_sample_rate=4, segment_width=160, segment_overlap=0)
  eda_clean_all.append(np.array(eda_clean))

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
10: -2.6894e+02 -2.6894e+02  1e-07  1e-11  3e-13
Optimal solution found.
If you are using this tool for your research please cite this paper: "pyEDA: An Open-Source Python Toolkit for Pre-processing and Feature Extraction of Electrodermal Activity"
     pcost       dcost       gap    pres   dres
 0: -2.6936e+02 -2.6913e+02  2e+02  2e+01  4e-01
 1: -2.6903e+02 -2.7599e+02  7e+00  4e-01  1e-02
 2: -2.6916e+02 -2.6951e+02  3e-01  9e-03  2e-04
 3: -2.6933e+02 -2.6937e+02  4e-02  9e-05  2e-06
 4: -2.6936e+02 -2.6936e+02  5e-03  8e-06  2e-07
 5: -2.6936e+02 -2.6936e+02  2e-03  1e-06  3e-08
 6: -2.6936e+02 -2.6936e+02  4e-04  2e-07  5e-09
 7: -2.6936e+02 -2.6936e+02  6e-05  2e-08  6e-10
 8: -2.6936e+02 -2.6936e+02  8e-06  2e-09  4e-11
 9: -2.6936e+02 -2.6936e+02  9e-07  1e-10  3e-12
10: -2.6936e+02 -2.6936e+02  1e-07  1e-11  3e-13
Optimal solution found.
If you are using this tool for your research please cite this paper: "pyEDA

In [None]:
eda_clean_all = np.array(eda_clean_all).squeeze()

In [None]:
import numpy as np

eda_clean = np.random.rand(1313, 240)
gsr = np.random.rand(1313, 240)

# Calculate the number of segments
n_segments = int(np.ceil(eda_clean.shape[1] / gsr.shape[1]))

# Segment the EDA data
eda_clean_segmented = np.split(eda_clean, n_segments, axis=1)

# Reshape each segment to match the GSR data
for i in range(n_segments):
  eda_clean_segmented[i] = eda_clean_segmented[i].reshape(1313, gsr.shape[1])

# Concatenate the segmented EDA data
eda_clean_ = np.concatenate(eda_clean_segmented, axis=1)

print(eda_clean_.shape)


(1313, 240)


##Automatic FX

In [None]:
# =================================== Preprocessing Automatic ===================================
def prepare_automatic(gsr_signal, sample_rate=128, new_sample_rate=40, k=32, epochs=100, batch_size=10):
	gsrdata = np.array(gsr_signal)
	print("If you are using this tool for your research please cite this paper: \"pyEDA: An Open-Source Python Toolkit for Pre-processing and Feature Extraction of Electrodermal Activity\"");



	# Resample the data based on original data rate of your device, here: 128Hz + rolling window

	preprocessed_gsr = []
	for i in gsr_signal:
		data = resample_data(i, sample_rate, new_sample_rate)
		preprocessed_gsr.append(rolling_mean(data, 1./new_sample_rate, new_sample_rate))
	preprocessed_gsr = np.array(preprocessed_gsr)


# =================================== Training the Autoencoder ===================================

	# set the input shape to model
	input_shape = preprocessed_gsr.shape[1]

	#  use gpu if available
	device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

	# create a model from `AE` autoencoder class
	# load it to the specified device, either gpu or cpu
	model = AE(input_shape=input_shape, latent_size=k).to(device)

	# create an optimizer object
	# Adam optimizer with learning rate 1e-3
	optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

	# mean-squared error loss
	criterion = nn.MSELoss()

	# create tensor data
	train_loader = create_train_loader(preprocessed_gsr, batch_size)

	# Training the network
	for epoch in range(epochs):
		loss = 0
		for batch_features in train_loader:
			# reset the gradients back to zero
			# PyTorch accumulates gradients on subsequent backward passes
			optimizer.zero_grad()
			# compute reconstructions
			outputs,_ = model(batch_features.to(device))

			# compute training reconstruction loss
			train_loss = criterion(outputs, batch_features.to(device))

			# compute accumulated gradients
			train_loss.backward()

			# perform parameter update based on current gradients
			optimizer.step()

			# add the mini-batch training loss to epoch loss
			loss += train_loss.item()

		# compute the epoch training loss
		loss = loss / len(train_loader)

		# display the epoch training loss
		print("epoch : {}/{}, loss = {:.6f}".format(epoch + 1, epochs, loss))

	# Save the network
	torch.save(model, '/content/pyEDA/pyEDA/checkpoint.t7')
	return preprocessed_gsr


# =================================== Feature Extraction ===================================

def process_automatic(gsr_signal):

	# Load the network
	model = torch.load('/content/pyEDA/pyEDA/checkpoint.t7')

	# Extract the features
	gsr_signal = np.reshape(gsr_signal, (1, gsr_signal.shape[0]))
	train_outputs, latent_variable = model(torch.FloatTensor(gsr_signal))
	return latent_variable.detach().numpy()[0];


# =================================== Autoencoder of 14 Layers ===================================
import torch
import numpy as np
from torch import nn
import math


class AE(nn.Module):
    def __init__(self, **kwargs):
        super().__init__()
        closest_pow2 = pow(2, int(math.floor(math.log(kwargs["input_shape"], 2))))

        # Encoder layers
        self.linear1 = nn.Linear(in_features=kwargs["input_shape"], out_features=closest_pow2)
        self.leaky_relu1 = nn.LeakyReLU()
        self.linear2 = nn.Linear(in_features=(closest_pow2), out_features=closest_pow2//2)
        self.leaky_relu2 = nn.LeakyReLU()
        self.linear3 = nn.Linear(in_features=(closest_pow2//2), out_features=closest_pow2//4)
        self.leaky_relu3 = nn.LeakyReLU()
        self.linear4 = nn.Linear(in_features=(closest_pow2//4), out_features=closest_pow2//8)
        self.leaky_relu4 = nn.LeakyReLU()
        self.linear5 = nn.Linear(in_features=(closest_pow2//8), out_features=closest_pow2//16)
        self.leaky_relu5 = nn.LeakyReLU()
        self.linear6 = nn.Linear(in_features=(closest_pow2//16), out_features=closest_pow2//32)
        self.leaky_relu6 = nn.LeakyReLU()
        self.linear7 = nn.Linear(in_features=(closest_pow2//32), out_features=kwargs["latent_size"])

        # Decoder layers
        self.linear8 = nn.Linear(in_features=kwargs["latent_size"], out_features=closest_pow2//32)
        self.sigmoid7 = nn.Sigmoid()
        self.linear9 = nn.Linear(in_features=(closest_pow2//32), out_features=closest_pow2//16)
        self.sigmoid8 = nn.Sigmoid()
        self.linear10 = nn.Linear(in_features=(closest_pow2//16), out_features=closest_pow2//8)
        self.sigmoid9 = nn.Sigmoid()
        self.linear11 = nn.Linear(in_features=(closest_pow2//8), out_features=closest_pow2//4)
        self.sigmoid10 = nn.Sigmoid()
        self.linear12 = nn.Linear(in_features=(closest_pow2//4), out_features=closest_pow2//2)
        self.sigmoid11 = nn.Sigmoid()
        self.linear13 = nn.Linear(in_features=(closest_pow2//2), out_features=closest_pow2)
        self.sigmoid12 = nn.Sigmoid()
        self.linear14 = nn.Linear(in_features=closest_pow2, out_features=kwargs["input_shape"])

    def forward(self, features):
            # Encoder
            activation = self.linear1(features)
            activation = self.leaky_relu1(activation)
            activation = self.linear2(activation)
            activation = self.leaky_relu2(activation)
            activation = self.linear3(activation)
            activation = self.leaky_relu3(activation)
            activation = self.linear4(activation)
            activation = self.leaky_relu4(activation)
            activation = self.linear5(activation)
            activation = self.leaky_relu5(activation)
            activation = self.linear6(activation)
            activation = self.leaky_relu6(activation)
            code = self.linear7(activation)

            # Decoder
            activation = self.linear8(code)
            activation = self.sigmoid7(activation)
            activation = self.linear9(activation)
            activation = self.sigmoid8(activation)
            activation = self.linear10(activation)
            activation = self.sigmoid9(activation)
            activation = self.linear11(activation)
            activation = self.sigmoid10(activation)
            activation = self.linear12(activation)
            activation = self.sigmoid11(activation)
            activation = self.linear13(activation)
            activation = self.sigmoid12(activation)
            reconstruction = self.linear14(activation)

            return reconstruction, code

def create_train_loader(gsrData, batch_size=10):
    train_loader = []
    tensor_data = []

    for data in gsrData:
        tensor_data.append(np.array(data).flatten())
        if (len(tensor_data) == batch_size):
            train_loader.append(tensor_data)
            tensor_data = []

    if (len(tensor_data) != 0):
        print("Train data concatenated due to incompatible batch_size!")

    return torch.FloatTensor(train_loader)


In [None]:
preprocessed_data = prepare_automatic(data_All['data']['EDA'], sample_rate=4, new_sample_rate=4, k=64, epochs=100, batch_size=32)
print("Output shape",preprocessed_data.shape)

If you are using this tool for your research please cite this paper: "pyEDA: An Open-Source Python Toolkit for Pre-processing and Feature Extraction of Electrodermal Activity"
Train data concatenated due to incompatible batch_size!


  return torch.FloatTensor(train_loader)


epoch : 1/100, loss = 0.664054
epoch : 2/100, loss = 0.005606
epoch : 3/100, loss = 0.001198
epoch : 4/100, loss = 0.001288
epoch : 5/100, loss = 0.001373
epoch : 6/100, loss = 0.001445
epoch : 7/100, loss = 0.001518
epoch : 8/100, loss = 0.001594
epoch : 9/100, loss = 0.001674
epoch : 10/100, loss = 0.001756
epoch : 11/100, loss = 0.001838
epoch : 12/100, loss = 0.001919
epoch : 13/100, loss = 0.001998
epoch : 14/100, loss = 0.002073
epoch : 15/100, loss = 0.002145
epoch : 16/100, loss = 0.002212
epoch : 17/100, loss = 0.002275
epoch : 18/100, loss = 0.002333
epoch : 19/100, loss = 0.002386
epoch : 20/100, loss = 0.002435
epoch : 21/100, loss = 0.002479
epoch : 22/100, loss = 0.002518
epoch : 23/100, loss = 0.002553
epoch : 24/100, loss = 0.002585
epoch : 25/100, loss = 0.002613
epoch : 26/100, loss = 0.002638
epoch : 27/100, loss = 0.002660
epoch : 28/100, loss = 0.002679
epoch : 29/100, loss = 0.002697
epoch : 30/100, loss = 0.002712
epoch : 31/100, loss = 0.002725
epoch : 32/100, l