In [2]:
from scipy import stats
import numpy.fft as fft

# features functions for a feature extraction

def k_complex_ratio(signal):

    signal = signal * 1000000    # Convert to the microvolts

    sampling_freq = 100
    min_complex_amplitude = 75
    complex_duration = 0.5
    k_complex_count = 0

    duration_samples = int(complex_duration * sampling_freq)

    for i in range(len(signal) - duration_samples):
        segment = signal[i:i + duration_samples]
        if np.max(segment) - np.min(segment) > min_complex_amplitude:
            k_complex_count += 1

    total_samples = len(signal)
    ratio = k_complex_count / total_samples

    return ratio

def feature_mean(signal):
    mean = np.mean(signal)
    return mean

def feature_variance(signal):
    var = np.var(signal)
    return var

def feature_std_deviation(signal):
    deviation = np.std(signal)
    return deviation

def feature_max(signal):
    maximum = np.max(signal)
    return maximum

def feature_min(signal):
    minimum = np.min(signal)
    return minimum

def feature_pkp(signal):
    pkp = np.max(signal) - np.min(signal)
    return pkp

def feature_discrete_diff(signal):  # substraction from the following value the previous one
    diff = np.sum(np.abs(np.diff(signal)))
    return diff

def feature_skewness(signal):  # how values are laying relatively to the normal symmetrical distribution
    skew = stats.skew(signal, axis=-1)
    return skew
    
def feature_kurtosis(signal):  # how values are laying relatively to the mean data
    kurtosis = stats.kurtosis(signal, axis=-1)
    return kurtosis



def feature_freq_info(signal):

    freq_info = []

    sampling_freq = 100
    fft_output = np.abs(np.fft.rfft(signal))
    fft_freq = np.fft.rfftfreq(len(signal), 1/sampling_freq)

    delta = np.sum(fft_output[(fft_freq >= 1) & (fft_freq < 4)])   # Frequencies are in Hz
    freq_info.append(delta)
    theta = np.sum(fft_output[(fft_freq >= 4) & (fft_freq <= 8)])
    freq_info.append(theta)
    alpha = np.sum(fft_output[(fft_freq > 8) & (fft_freq <= 13)])
    freq_info.append(alpha)
    beta = np.sum(fft_output[(fft_freq > 13) & (fft_freq <= 30)])
    freq_info.append(beta)

    return freq_info

def feature_eye_blink(signal):

    sampling_freq = 100

    signal = signal * 1000000    # Convert to the microvolts

    blink_ampl = 380  # in microvolts
    blink_duration = 0.11 * sampling_freq
    blinks = np.where(signal > blink_ampl)[0]

    blink_times = np.diff(blinks) > blink_duration
    correct_blinks = np.sum(blink_times) + 1

    blink_frequency = correct_blinks / len(signal)

    return blink_frequency

def feature_median_of_freq(signal):

    sampling_freq = 100
    fft_output = np.abs(np.fft.rfft(signal))
    fft_freq = np.fft.rfftfreq(len(signal), 1/sampling_freq)

    important_freqs_start_from = 0.5 * np.max(fft_output)

    median = np.median(fft_freq[fft_output >= important_freqs_start_from])

    return median

def feature_rms(signal):
    rms = np.sqrt( np.sum(np.square(signal))/len(signal) )
    return rms


def features_from_eeg(eeg):

    eeg_features = []

    eeg_features.append(feature_max(eeg))
    eeg_features.append(feature_min(eeg))
    eeg_features.append(feature_mean(eeg))
    eeg_features.append(feature_variance(eeg))
    eeg_features.append(feature_std_deviation(eeg))
    eeg_features.append(feature_pkp(eeg))
    eeg_features.append(feature_discrete_diff(eeg))
    eeg_features.append(feature_skewness(eeg))
    eeg_features.append(feature_kurtosis(eeg))

    for feature in feature_freq_info(eeg):
        eeg_features.append(feature)
    eeg_features.append(k_complex_ratio(eeg))

    return eeg_features

def features_from_eog(eog):

    eog_features = []

    eog_features.append(feature_max(eog))
    eog_features.append(feature_min(eog))
    eog_features.append(feature_mean(eog))
    eog_features.append(feature_variance(eog))
    eog_features.append(feature_std_deviation(eog))
    eog_features.append(feature_pkp(eog))    
    eog_features.append(feature_eye_blink(eog))    

    return eog_features

def features_from_emg(emg):

    emg_features = []

    emg_features.append(feature_max(emg))
    emg_features.append(feature_min(emg))
    emg_features.append(feature_mean(emg))
    emg_features.append(feature_median_of_freq(emg))
    emg_features.append(feature_variance(emg))
    emg_features.append(feature_std_deviation(emg))
    emg_features.append(feature_rms(emg))
    emg_features.append(feature_pkp(emg))

    return emg_features


# x[0,1,2,3] :   0 - eeg, 1 - eeg, 2 - eog, 3 - emg

def all_features(x):
    return np.concatenate( (features_from_eeg(x[0]), features_from_eeg(x[1]), 
                            features_from_eog(x[2]), features_from_emg(x[3])), axis=-1 )

    


In [3]:
def feature_extract(epochs_array_per_file):

    print("\nFeatures extraction process has been started.\n")
    
    features = []
    labels = []
    
    class_id = epochs_array_per_file.events[:, 2]
    signals_per_epoch = epochs_array_per_file.get_data(picks=[0,1,2,4])     # 0 - eeg, 1 - eeg, 2 - eog, 4 - emg
    features_subarray = []
    for all_signals in signals_per_epoch:
        features.append(all_features(all_signals))
    
    labels = class_id

    for i in range(len(labels)):     # to transform classes id's to the range [0, 5] 
        labels[i] -= 1

    features = np.array(features)
    labels = np.array(labels)
    
    print("\nFeatures extraction process has been done.\n")

    return features, labels   
    