# Clasificador de datos que permita reconocer la marcha - Validación Cruzada

## 0. Initial Setup (carga de libreriías)

In [1]:
!git clone https://github.com/domingomery/balu3
!pip install ./balu3

fatal: destination path 'balu3' already exists and is not an empty directory.
Processing ./balu3
[33m  DEPRECATION: A future pip version will change local packages to be built in-place without first copying to a temporary directory. We recommend you use --use-feature=in-tree-build to test your packages with this new behavior before it becomes the default.
   pip 21.3 will remove support for this functionality. You can find discussion regarding this at https://github.com/pypa/pip/issues/7555.[0m
Building wheels for collected packages: balu3
  Building wheel for balu3 (setup.py) ... [?25ldone
[?25h  Created wheel for balu3: filename=balu3-1.0-py3-none-any.whl size=43718 sha256=6b20c1491e4c8475a02b250339695735a773c63a90acf4e6bfe6e89b288caf36
  Stored in directory: /tmp/pip-ephem-wheel-cache-1j0pgtsx/wheels/2e/72/90/0b6128ea07a8a26c59ffe83a05ebdf4c86dcf5b9aeefc3d561
Successfully built balu3
Installing collected packages: balu3
  Attempting uninstall: balu3
    Found existing installati

In [2]:
from ciervo.plots import emg_plot
from balu3.fs.sel  import clean        
from sklearn.preprocessing import MinMaxScaler


In [3]:
import import_ipynb
from functions import *

from ciervo.io import load_data
from ciervo.models import label_data, train_test_split
from tqdm import tqdm

importing Jupyter notebook from functions.ipynb
fatal: destination path 'balu3' already exists and is not an empty directory.
Processing ./balu3
[33m  DEPRECATION: A future pip version will change local packages to be built in-place without first copying to a temporary directory. We recommend you use --use-feature=in-tree-build to test your packages with this new behavior before it becomes the default.
   pip 21.3 will remove support for this functionality. You can find discussion regarding this at https://github.com/pypa/pip/issues/7555.[0m
Building wheels for collected packages: balu3
  Building wheel for balu3 (setup.py) ... [?25ldone
[?25h  Created wheel for balu3: filename=balu3-1.0-py3-none-any.whl size=43718 sha256=fc34588949f1083aa0a92ecbeef408c467f78bfb1b332d8c6be6b8d84d1023b6
  Stored in directory: /tmp/pip-ephem-wheel-cache-wc7a1nbb/wheels/2e/72/90/0b6128ea07a8a26c59ffe83a05ebdf4c86dcf5b9aeefc3d561
Successfully built balu3
Installing collected packages: balu3
  Attemptin

## 1. Data Loading

Tomo los archivos de ciervo/tests/data/marcha son 17 archivos

In [4]:
data_files = load_data('data/marcha_larga')

Total time: 2.29 hours


## 2. Feature Extraction 

In [5]:
#Funcion extraccion caracteristicas
def extract_feature(data, divide=3):
    # data : (T, 4) 
    # T numero de muestras, 4 canales de EMG
    # C numero de indices de canales a usar
    # divide: divide la señal en partes iguales
    _, C = data.shape
    result = []
    feature_names = []

    for c in range(C):
        signal0 = data[:, c]

        # Full wave rectification
        rectified_signal = np.abs(signal0)

        #envolvente
        env = np.abs(signal.hilbert(data[:, c]))

        #RMS
        rms = np.sqrt(np.mean(rectified_signal**2))
        result.append(rms)
        feature_names.append(f"rms_channel_{c}")

        #Varianza
        var = np.var(rectified_signal)
        result.append(var)
        feature_names.append(f"var_channel_{c}")

        #kurtosis
        kurt = scipy.stats.kurtosis(rectified_signal)
        result.append(kurt)
        feature_names.append(f"kurt_channel_{c}")

        #skewness
        skew = scipy.stats.skew(rectified_signal)
        result.append(skew)
        feature_names.append(f"skew_channel_{c}")

        #zero crossing
        zc = ((signal0[:-1] * signal0[1:]) < 0).sum()
        result.append(zc)
        feature_names.append(f"zc_channel_{c}")

        #Frecuencias
        freqs, power_spectrum = scipy.signal.welch(signal0, fs=250, nperseg=32)
        median_freq = freqs[np.where(np.cumsum(power_spectrum) >= np.sum(power_spectrum) / 2)[0][0]]
        mean_freq = np.sum(freqs * power_spectrum) / np.sum(power_spectrum)
        peak_freq = freqs[np.argmax(power_spectrum)]

        result.extend([median_freq, mean_freq, peak_freq])
        feature_names.extend([f"median_freq_channel_{c}", f"mean_freq_channel_{c}", f"peak_freq_channel_{c}"])


        #SEGMENTOS
        for i in range(divide):
            start = int(i*len(data)/divide)
            end = int((i+1)*len(data)/divide)

            segment_env = env[start:end]
            mean_env = segment_env.mean()
            std_env = segment_env.std()
            max_env = segment_env.max()
            min_env = segment_env.min()

            result.extend([mean_env, std_env, max_env, min_env])
            feature_names.extend([f"mean_env_segment_{i}channel{c}", f"std_env_segment_{i}channel{c}",
                                  f"max_env_segment_{i}channel{c}", f"min_env_segment_{i}channel{c}"])

    result = np.array(result)
    return result, feature_names

def label_data_and_features(data, divide=3):
    features =[]
    for d in tqdm(data):
      f,_ = extract_feature(d, divide)
      features.append(f)
    features = np.array(features) # (1000, features)
    return features

## 3. Training and testing subsets

## 4. Feature selection/transformation

## 5. Classification

## 6. Evaluation

In [6]:
classifiers = create_classifiers()
fases_to_test = [4, 8, 16]  # Lista de fases a probar

# Para almacenar los resultados
results_without_sfs = {}
results_with_sfs = {}

In [7]:
# Iterar sobre diferentes valores de fases
for num_fases in fases_to_test:
    print(f'TESTING WITH {num_fases} PHASES')
    labeled_data=label_data(data_files,num_fases=num_fases)
    
    # 3. Training and testing subsets
    features, labels, _, _ = train_test_split(labeled_data, 
                                              columna=["EMG_Isquio","EMG_Cuadriceps","EMG_AductorLargo"],
                                              window_size=125,
                                              test_size=0,
                                              overlap=0,
                                              random_state=42)
    features = label_data_and_features(features, divide=3)
    
    print(f'Features: {features.shape[0]} samples with {features.shape[1]} features')
    print("------------------------------------------------------------------------")
    # 4. Feature selection/transformation
    sclean = clean(features)
    features = features[:, sclean]
    
    scaler = MinMaxScaler()
    features = scaler.fit_transform(features)
    
    # 5. Classification
    # Ya se han creado los clasificadores en `classifiers`
    
    # 6. Evaluation
    print('Evaluando sin SFS...')
    accuracies_without_sfs = evaluate_classifiers(classifiers, features, labels, nfolds=10, use_sfs=False, n_features=20)
    results_without_sfs[num_fases] = accuracies_without_sfs
    
    print('Evaluando con SFS...')
    accuracies_with_sfs = evaluate_classifiers(classifiers, features, labels, nfolds=10, use_sfs=True, n_features=20)
    results_with_sfs[num_fases] = accuracies_with_sfs

TESTING WITH 4 PHASES


100%|██████████| 12200/12200 [00:52<00:00, 230.37it/s]


Features: 12200 samples with 60 features
------------------------------------------------------------------------
Evaluando sin SFS...

Evaluando: knn
Accuracy = 51.29

Evaluando: knn-3
Accuracy = 53.60

Evaluando: knn-5
Accuracy = 55.66

Evaluando: knn-8
Accuracy = 56.79

Evaluando: knn-9
Accuracy = 57.12

Evaluando: knn-10
Accuracy = 57.22

Evaluando: knn-15
Accuracy = 57.85

Evaluando: knn-20
Accuracy = 57.85

Evaluando: mlp
Accuracy = 58.44

Evaluando: mlp layers 2
Accuracy = 62.79

Evaluando: svm lineal 1
Accuracy = 53.60

Evaluando: svm lineal 2
Accuracy = 55.14

Evaluando: svm polinomial
Accuracy = 48.74

Evaluando: svm rbf 1
Accuracy = 50.02

Evaluando: svm rbf 2
Accuracy = 60.93

Evaluando: svm rbf 3
Accuracy = 62.75

Evaluando: svm rbf gamma auto
Accuracy = 46.37

Evaluando: svm sigmoidal
Accuracy = 38.27

Evaluando: dmin
Accuracy = 32.23

Evaluando: bayes kde
Accuracy = 39.86

Evaluando: naive bayes
Accuracy = 32.22

Evaluando: lda
Accuracy = 57.36

Evaluando: qda
Accuracy =

100%|██████████| 12200/12200 [00:25<00:00, 484.29it/s]


Features: 12200 samples with 60 features
------------------------------------------------------------------------
Evaluando sin SFS...

Evaluando: knn
Accuracy = 35.41

Evaluando: knn-3
Accuracy = 36.01

Evaluando: knn-5
Accuracy = 38.66

Evaluando: knn-8
Accuracy = 39.79

Evaluando: knn-9
Accuracy = 40.17

Evaluando: knn-10
Accuracy = 40.85

Evaluando: knn-15
Accuracy = 41.49

Evaluando: knn-20
Accuracy = 41.49

Evaluando: mlp
Accuracy = 39.63

Evaluando: mlp layers 2
Accuracy = 46.94

Evaluando: svm lineal 1
Accuracy = 36.20

Evaluando: svm lineal 2
Accuracy = 38.25

Evaluando: svm polinomial
Accuracy = 30.75

Evaluando: svm rbf 1
Accuracy = 28.84

Evaluando: svm rbf 2
Accuracy = 44.83

Evaluando: svm rbf 3
Accuracy = 47.96

Evaluando: svm rbf gamma auto
Accuracy = 23.06

Evaluando: svm sigmoidal
Accuracy = 15.02

Evaluando: dmin
Accuracy = 15.31

Evaluando: bayes kde
Accuracy = 16.05

Evaluando: naive bayes
Accuracy = 16.11

Evaluando: lda
Accuracy = 42.25

Evaluando: qda
Accuracy =

 29%|██▉       | 3571/12200 [00:16<00:35, 241.50it/s]

Accuracy = 21.21

Evaluando: mlp layers 2
Accuracy = 28.39

Evaluando: svm lineal 1
Accuracy = 15.44

Evaluando: svm lineal 2
Accuracy = 20.14

Evaluando: svm polinomial
Accuracy = 12.57

Evaluando: svm rbf 1
Accuracy = 10.09

Evaluando: svm rbf 2
Accuracy = 28.07

Evaluando: svm rbf 3
Accuracy = 32.73

Evaluando: svm rbf gamma auto
Accuracy = 8.59

Evaluando: svm sigmoidal
Accuracy = 7.90

Evaluando: dmin
Accuracy = 8.58

Evaluando: bayes kde
Accuracy = 10.79

Evaluando: naive bayes
Accuracy = 8.92

Evaluando: lda
Accuracy = 29.89

Evaluando: qda
Accuracy = 24.47

Evaluando: random forest depth 3
Accuracy = 32.60

Evaluando: random forest depth 15
Accuracy = 52.84

Evaluando: random forest depth 30
Accuracy = 53.42

Evaluando: random forest depth 100
Accuracy = 53.23

Evaluando: random forest n_estimators 300
Accuracy = 53.38

Evaluando: decision tree
Accuracy = 13.55

Evaluando: decision tree depth 12
Accuracy = 38.63

Evaluando: decision tree depth 100
Accuracy = 37.34

Evaluando: l

In [8]:
for num_fases in fases_to_test:
    print(f'Número de Fases: {num_fases}')
    print(f'Sin SFS: {results_without_sfs[num_fases]}')
    print(f'Con SFS: {results_with_sfs[num_fases]}')

Número de Fases: 4
Sin SFS: {'knn                               ': 0.5128688524590164, 'knn-3                             ': 0.5359836065573772, 'knn-5                             ': 0.5566393442622951, 'knn-8                             ': 0.5678688524590163, 'knn-9                             ': 0.5712295081967212, 'knn-10                            ': 0.5722131147540983, 'knn-15                            ': 0.5785245901639344, 'knn-20                            ': 0.5785245901639344, 'mlp                               ': 0.5844262295081968, 'mlp layers 2                      ': 0.6278688524590165, 'svm lineal 1                      ': 0.535983606557377, 'svm lineal 2                      ': 0.5513934426229509, 'svm polinomial                    ': 0.4873770491803279, 'svm rbf 1                         ': 0.5002459016393443, 'svm rbf 2                         ': 0.6092622950819673, 'svm rbf 3                         ': 0.6275409836065574, 'svm rbf gamma auto                ': 0.4636