# Data extraction

## Libraries

In [14]:
import os
os.environ['TF_GPU_THREAD_COUNT'] = '5'
os.environ['TF_GPU_THREAD_MODE'] = 'gpu_private'
import time
import numpy as np
from joblib import Parallel, delayed
import pickle
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report
import math
import setuptools.dist
from collections import Counter
from skimage.transform import resize
from sklearn.model_selection import train_test_split, cross_val_score, KFold
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import Lasso
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Input
from tensorflow.keras.utils import to_categorical
import scipy
from scipy.signal import butter, filtfilt
import pywt
import tensorflow as tf
import itertools
import seaborn as sns
import tensorflow.python.keras.backend as K
import tsaug as ts
from scipy.ndimage import gaussian_filter
from finta import TA

## Magical formulas to use .mat files

In [15]:
n_subjects = 10
files = [f"S{x}" for x in range(1,n_subjects+1)] 
data = dict()
for file in files:
  E1 = scipy.io.loadmat(f"../data/{file}/{file}_E1_A1.mat") # Exercise A
  E2 = scipy.io.loadmat(f"../data/{file}/{file}_E2_A1.mat") # Exercise B
  E3 = scipy.io.loadmat(f"../data/{file}/{file}_E3_A1.mat") # Exercise C
  
  # Movimento 0 é o movimento de repouso, então é subtraído 12 e 29 para os experimentos 2 e 3
  E2["stimulus"][np.where(E2["stimulus"]==0)] = E2["stimulus"][np.where(E2["stimulus"]==0)]-12
  E3["stimulus"][np.where(E3["stimulus"]==0)] = E3["stimulus"][np.where(E3["stimulus"]==0)]-29
  
  data[file] = {
    'emg':np.concatenate([E1["emg"], E2['emg'], E3['emg']]),
    'label':np.concatenate([E1["stimulus"], E2["stimulus"]+12, E3["stimulus"] +29]),
    'info':{
      'age':E1['age'],
      'gender':E1['gender'], 
      'n_subjects':10,
      'weight':E1['weight'], 
      'height':E1['height'],
      'fs':E1["frequency"][0][0]
    }, 
    'description': "Contiene datos de EMG de 10 sujetos, 3 experimentos por cada sujeto (E1, E2 y E3) (A, B y C de la figura), cada experimento contiene diferentes ejercicios (A:12, B:17, C:23)."
  }

In [16]:
data["S1"]["info"]

{'age': array([[23]], dtype=uint8),
 'gender': array(['m'], dtype='<U1'),
 'n_subjects': 10,
 'weight': array([[67]], dtype=uint8),
 'height': array([[187]], dtype=uint8),
 'fs': np.uint8(200)}

## functions used to get some info

In [3]:
def _rms(data):
	"""
	Root Mean Square.

	Arguments:
			@var data: list of graph data.

	Returns:
			@return: RMS feature.
	"""
	return math.sqrt(np.power(data, 2).sum() / len(data))

def _zc(data):
	"""
	Zero Crossing.

	Arguments:
			@var data: list of graph data.

	Returns:
			@return: ZC feature.
	"""
	return len(np.where(np.diff(np.sign(data)))[0])

def _var(data):
	"""
	Variance.

	Arguments:
			@var data: list of graph data.

	Returns:
			@return: Variance feature.
	"""
	return np.var(data)

def _mav(data):
	"""
	Mean Absolute Value.

	Arguments:
			@var data: list of graph data.

	Returns:
			@return: MAV feature.
	"""
	return np.mean(np.abs(data))

def _sscl(data):
	"""
	Slope Sign Change List.

	Arguments:
			@var data: list of graph data.

	Returns:
			@return: SSC feature.
	"""
	diff_signal = np.diff(data)
	sign_changes = np.diff(np.sign(diff_signal))
 
	return np.sum(sign_changes != 0)

def _wl(data):
	"""
	Waveform Length.

	Arguments:
			@var data: list of graph data.

	Returns:
			@return: WL feature.
	"""
	return np.sum(np.abs(np.diff(data)))
def _hrm(data):

    mean = np.mean(data)
    return sum([i for i in data if i>abs(mean)])


#### Faltam testes com a ren com os parâmetros ideais
def _ren(data, m=100, tolerance=50):
    n = len(data)
    
    # Verificar se o comprimento dos dados permite a execução
    if n < m:
        return 0
    
    # Cria uma matriz com todos os segmentos de tamanho `m`
    segments = np.array([data[i:i + m] for i in range(n - m + 1)])
    
    # Calcula as diferenças absolutas entre todos os pares de segmentos
    diffs = np.abs(segments[:, np.newaxis] - segments)
    # Verifica se todas as diferenças dentro de cada par são <= tolerance
    matches = np.all(diffs <= tolerance, axis=2)
    
    # Conta as correspondências, subtrai 1 para não contar o próprio segmento
    count = np.sum(matches) - (n - m + 1)
    
    # Calcula o número total de combinações possíveis
    total_combinations = (n - m) * (n - m + 1) / 2
    
    # Calcula a entropia aproximada
    rough_entropy_value = -np.log(count / total_combinations) if count > 0 else 0
    return rough_entropy_value

### Signal filters

#### Butterworth bandpass

In [5]:
def butterworth_bandpass(lowcut, highcut, fs, order=5):
    nyquist = 0.5 * fs
    low = lowcut / nyquist
    high = highcut / nyquist
    b, a = butter(order, [low, high], btype='band')
    return b, a

def bandpass_filter(data, lowcut, highcut, fs, order=5):
    b, a = butterworth_bandpass(lowcut, highcut, fs, order=order)
    y = filtfilt(b, a, data)
    return y

#### Wavelet transform

In [6]:
def wavelet_transform(data, wavelet='db4', level=4):
    coeffs = pywt.wavedec(data, wavelet, level=level)
    return coeffs

## Caracter Extraction (use with magical formula)

FileNotFoundError: [Errno 2] No such file or directory: '../data/S1/S1_E1_A1.csv'