In [1]:
import random

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import scipy.io
import scipy
from scipy import signal
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
from sklearn.cluster import KMeans
from sklearn.model_selection import cross_validate
from sklearn.model_selection import GridSearchCV
from sklearn.decomposition import PCA

import pywt
from scipy import stats

from statsmodels.tsa.holtwinters import SimpleExpSmoothing, Holt
from sklearn.linear_model import LogisticRegression
from sklearn.neural_network import MLPClassifier
from sklearn.manifold import Isomap
from sklearn.neighbors import KNeighborsClassifier
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.gaussian_process.kernels import RBF
from sklearn.naive_bayes import GaussianNB
from sklearn.mixture import GaussianMixture as GMM
from sklearn import preprocessing

In [2]:
import math

In [3]:
""

DATABASE = {
    'Database 1': ['female_1',
                  'female_2',
                  'female_3',
                  'male_1',
                  'male_2'],
    'Database 2': ['male_day_1',
                  'male_day_2',
                  'male_day_3']
}

COLUMNS = ['cyl_ch1', 
            'cyl_ch2', 
            'hook_ch1', 
            'hook_ch2', 
            'tip_ch1', 
            'tip_ch2', 
            'palm_ch1', 
            'palm_ch2', 
            'spher_ch1', 
            'spher_ch2', 
            'lat_ch1', 
            'lat_ch2']

LABELS = [
    'Spherical',
    'Tip',
    'Palmar',
    'Lateral',
    'Cylindrical',
    'Hook'
]

COL_MAPPINGS = {
            'cyl_ch1': 'Cylindrical', 
            'cyl_ch2': 'Cylindrical', 
            'hook_ch1': 'Hook', 
            'hook_ch2': 'Hook', 
            'tip_ch1': 'Tip', 
            'tip_ch2': 'Tip', 
            'palm_ch1': 'Palmar', 
            'palm_ch2': 'Palmar', 
            'spher_ch1': 'Spherical', 
            'spher_ch2': 'Spherical', 
            'lat_ch1': 'Lateral', 
            'lat_ch2': 'Lateral'
}

In [4]:
""" Preprocess and standardize dataset into a single dataframe table
"""

DB_NAME = 'Database 1'
dfs = []
for fname in DATABASE[DB_NAME]: 
    tmp_data = scipy.io.loadmat(f'./data/{DB_NAME}/{fname}')
    tmp_data = {k:v for k,v in tmp_data.items() if k in COLUMNS}
    for c in COLUMNS:
        tmp_dfx = pd.DataFrame(tmp_data[c])
        tmp_dfx['identifier'] = fname
        tmp_dfx['label'] = COL_MAPPINGS[c]
        dfs.append(pd.DataFrame(tmp_dfx))

dataset = pd.concat(dfs)
print("Dimensions", dataset.shape)
dataset.head()

Dimensions (1800, 3002)


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,2992,2993,2994,2995,2996,2997,2998,2999,identifier,label
0,0.072198,0.276211,0.429221,0.327214,0.123201,0.0977,0.072198,0.021195,-0.004307,0.174205,...,0.276211,-0.259323,0.072198,1.882814,0.480224,-2.528968,0.0977,0.837247,female_1,Cylindrical
1,0.25071,0.301713,0.199706,0.378218,0.021195,-0.080812,-0.106313,0.021195,0.276211,0.072198,...,-1.30489,0.786244,0.786244,0.939254,1.270775,-0.616346,0.454723,0.021195,female_1,Cylindrical
2,0.123201,0.148703,0.148703,0.123201,-0.004307,-0.157317,-0.029808,0.174205,0.199706,0.25071,...,-0.233821,0.403719,0.046696,-0.080812,0.378218,-0.36133,0.505726,0.607732,female_1,Cylindrical
3,0.531228,-0.106313,-0.284825,-0.335828,-0.182818,0.123201,0.301713,0.352716,0.327214,-0.029808,...,0.709739,0.276211,-0.080812,0.123201,0.735241,1.270775,-0.769356,-1.687415,female_1,Cylindrical
4,-0.310326,-0.182818,0.276211,0.480224,0.352716,0.123201,0.123201,0.0977,-0.029808,-0.080812,...,-0.20832,0.429221,0.378218,0.633234,0.811746,0.403719,-0.182818,-0.412333,female_1,Cylindrical


In [5]:
#combine coupled readouts
new_dfs = []
for lab in LABELS:
    new_temp = dataset[dataset['label'] == lab].values
    new_temp_comb = pd.DataFrame(np.concatenate((new_temp[:150,:3000],new_temp[150:,:3002]),axis=1))
    new_dfs.append(new_temp_comb)


new_dataset = pd.concat(new_dfs)
new_dataset.rename(columns={6001:'label', 6000:'identifier'}, inplace=True)
print("Dimension", new_dataset.shape)
new_dataset.head()

Dimension (900, 6002)


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,5992,5993,5994,5995,5996,5997,5998,5999,identifier,label
0,0.505726,0.531228,0.505726,0.021195,0.046696,-0.080812,0.021195,0.021195,0.123201,0.199706,...,0.227693,0.049139,-0.30797,-0.103908,0.100154,0.15117,-0.614063,-0.358985,female_3,Spherical
1,0.454723,0.174205,-0.182818,-0.386831,0.709739,0.633234,-0.029808,-0.20832,0.0977,0.582231,...,-0.333478,0.100154,0.71234,0.610309,0.559294,-0.410001,0.253201,-0.052892,female_3,Spherical
2,1.245273,0.123201,-2.273952,-2.936995,-0.335828,1.50029,1.398283,2.163332,0.403719,-0.131815,...,0.253201,-0.486524,-0.231447,-0.30797,0.100154,0.865386,0.635817,0.15117,female_3,Spherical
3,-0.080812,-0.182818,0.0977,0.123201,0.276211,0.352716,0.25071,0.199706,0.0977,-0.080812,...,0.227693,-0.818125,-0.052892,0.431755,0.457263,0.304216,0.125662,0.304216,female_3,Spherical
4,0.480224,0.021195,0.429221,0.021195,-0.004307,-0.106313,0.199706,0.199706,0.301713,0.276211,...,-0.103908,-1.124218,0.278708,0.94191,0.686832,0.023631,-1.124218,-0.333478,female_3,Spherical


In [6]:
from cgitb import Hook


def abs_val_filter(data):
    """ Apply an absolute value filter to a DataFrame
    """
    return abs(data.copy())

def butterworth_low_pass_filter(data,     
                                frequency=500,         # sampling frequency
                                lp_filter=5,           # cutoff frequency
                                order=4):
    """
    Create a low pass filter to eliminate noise and smooth EMG data 
    
    The data were collected at a sampling rate of 500 Hz, 
    using as a programming kernel the National Instruments (NI) Labview. 
    The signals were band-pass filtered using a Butterworth Band Pass filter 
    with low and high cutoff at 15Hz and 500Hz respectively and a notch filter at 50Hz 
    to eliminate line interference artifacts.
    """
    lp_filter = lp_filter/(frequency/2)

    # Create a lowpass butterworth signal 
    B, A = scipy.signal.butter(order, 
                               lp_filter, 
                               btype='lowpass')


    # Apply the lowpass signal filter to EMG data
    smooth_emg = scipy.signal.filtfilt(B, 
                                       A, 
                                       data)
    return smooth_emg


def holt_smoothing(data,
                  s_level = 0.5,
                  s_slope = 0.1):
    smoothed = Hook(data[0]).fit(smoothing_level=s_level, smoothing_slope=s_slope).fittedvalues[:]
    print(data[0])
    print(smoothed)
    return smoothed

In [7]:
df_features = new_dataset.iloc[:,:6000].copy()
df_labels = new_dataset.iloc[:, 6001]
df_features = abs_val_filter(df_features)

In [28]:
smoothed_emg_df = butterworth_low_pass_filter(df_features.to_numpy(),
                                             frequency=5000,
                                             lp_filter=25)

In [29]:
frame = smoothed_emg_df.shape[1]
print(frame)

6000


In [30]:
def next_power_of_2(x):
    return 1 if x == 0 else 2 ** (x - 1).bit_length()

In [31]:
fs = 2000

In [32]:
def spectrum(signal, fs):
    m = len(signal)
    n = next_power_of_2(m)
    y = np.round(np.fft.fft(signal, n), 5)
    yh = y[0:int(n / 2 - 1)]
    fh = (fs / n) * np.arange(0, n / 2 - 1, 1)
    power = np.round(np.real(yh * np.conj(yh) / n),5)

    return fh, power

In [47]:
smoothed_emg_df=np.array(smoothed_emg_df.astype(float))

In [48]:
def wavelet_energy(x, mother, nivel):
    coeffs = pywt.wavedecn(x, wavelet=mother, level=nivel)
    arr, _ = pywt.coeffs_to_array(coeffs)
    et = np.sum(arr ** 2)
    ca = coeffs[0]
    ea = 100 * np.sum(ca ** 2) / et
    ed = []

    for k in range(1, len(coeffs)):
        cd = list(coeffs[k].values())
        cd = np.asarray(cd)
        ed.append(100 * np.sum(cd ** 2) / et)

    return ea, ed


In [59]:

"""
    Compute time-frequency features from signal using sliding window method.
    :param signal: numpy array signal.
    :param frame: sliding window size
    :param step: sliding window step size
    :return: h_wavelet: list
    """
L = smoothed_emg_df.shape[0]
i=0
h_wavelet=[]
for i in range(L):
    h_wavelet.append([])

    E_a, E = wavelet_energy(smoothed_emg_df[i,:], 'db2', 4)
    E.insert(0, E_a)
    E = np.asarray(E) / 100
    h_wavelet[i].append(np.sum(E * np.log2(E)))
h_wavelet=np.array(h_wavelet)
h_wavelet.shape


(900, 1)

In [60]:
print(h_wavelet)

[[-7.35022196e-05]
 [-6.25445600e-05]
 [-6.64370704e-05]
 [-8.34727041e-05]
 [-7.54952965e-05]
 [-5.83725926e-05]
 [-9.65496382e-05]
 [-5.86571143e-05]
 [-7.50645614e-05]
 [-6.59069189e-05]
 [-4.99594786e-05]
 [-5.73836291e-05]
 [-8.06555127e-05]
 [-4.37290472e-05]
 [-1.03870085e-04]
 [-6.15552961e-05]
 [-5.77261255e-05]
 [-4.69484694e-05]
 [-5.87800456e-05]
 [-3.81895504e-05]
 [-1.49771239e-04]
 [-5.70649593e-05]
 [-1.12329492e-04]
 [-7.32412087e-05]
 [-1.89868220e-04]
 [-6.06014361e-05]
 [-7.33924252e-05]
 [-6.33529195e-05]
 [-7.92203966e-05]
 [-4.90502361e-05]
 [-7.91633435e-05]
 [-1.91374264e-05]
 [-3.07627636e-05]
 [-2.25420053e-05]
 [-4.40595923e-05]
 [-2.82723104e-05]
 [-3.24460146e-05]
 [-3.47019950e-05]
 [-3.88364191e-05]
 [-3.24877769e-05]
 [-2.07779849e-05]
 [-3.45646172e-05]
 [-2.25834175e-05]
 [-9.28094804e-05]
 [-1.28275092e-04]
 [-3.35560530e-05]
 [-3.39601048e-05]
 [-3.08646991e-05]
 [-2.56663499e-05]
 [-3.09454800e-05]
 [-2.77029054e-05]
 [-3.94735422e-05]
 [-4.3053210