# Clasificador de datos que permita reconocer la marcha - Hold Out

## 0. Initial Setup (carga de libreriías)

In [11]:
!pip install ciervo --upgrade

import import_ipynb
from functions import * 
from ciervo.plots import emg_plot
import torch.nn as nn
from ciervo.io import load_data
from ciervo.models import label_data, train_test_split
from tqdm import tqdm



## 1. Data Loading

In [12]:
data_files = load_data('data/marcha_larga')

Total time: 2.29 hours


## 2. Feature Extraction 


In [13]:
#Funcion extraccion caracteristicas
def extract_feature(data, divide=3):
    # data : (T, 4) 
    # T numero de muestras, 4 canales de EMG
    # C numero de indices de canales a usar
    # divide: divide la señal en partes iguales
    _, C = data.shape
    result = []
    feature_names = []

    for c in range(C):
        signal0 = data[:, c]

        # Full wave rectification
        rectified_signal = np.abs(signal0)

        #envolvente
        env = np.abs(signal.hilbert(data[:, c]))

        #RMS
        rms = np.sqrt(np.mean(rectified_signal**2))
        result.append(rms)
        feature_names.append(f"rms_channel_{c}")

        #Varianza
        var = np.var(rectified_signal)
        result.append(var)
        feature_names.append(f"var_channel_{c}")

        #kurtosis
        kurt = scipy.stats.kurtosis(rectified_signal)
        result.append(kurt)
        feature_names.append(f"kurt_channel_{c}")

        #skewness
        skew = scipy.stats.skew(rectified_signal)
        result.append(skew)
        feature_names.append(f"skew_channel_{c}")

        #zero crossing
        zc = ((signal0[:-1] * signal0[1:]) < 0).sum()
        result.append(zc)
        feature_names.append(f"zc_channel_{c}")

        #Frecuencias
        freqs, power_spectrum = scipy.signal.welch(signal0, fs=250, nperseg=32)
        median_freq = freqs[np.where(np.cumsum(power_spectrum) >= np.sum(power_spectrum) / 2)[0][0]]
        mean_freq = np.sum(freqs * power_spectrum) / np.sum(power_spectrum)
        peak_freq = freqs[np.argmax(power_spectrum)]

        result.extend([median_freq, mean_freq, peak_freq])
        feature_names.extend([f"median_freq_channel_{c}", f"mean_freq_channel_{c}", f"peak_freq_channel_{c}"])


        #SEGMENTOS
        for i in range(divide):
            start = int(i*len(data)/divide)
            end = int((i+1)*len(data)/divide)

            segment_env = env[start:end]
            mean_env = segment_env.mean()
            std_env = segment_env.std()
            max_env = segment_env.max()
            min_env = segment_env.min()

            result.extend([mean_env, std_env, max_env, min_env])
            feature_names.extend([f"mean_env_segment_{i}channel{c}", f"std_env_segment_{i}channel{c}",
                                  f"max_env_segment_{i}channel{c}", f"min_env_segment_{i}channel{c}"])

    result = np.array(result)
    return result, feature_names

def label_data_and_features(data, divide=3):
    features =[]
    for d in tqdm(data):
      f,_ = extract_feature(d, divide)
      features.append(f)
    features = np.array(features) # (1000, features)
    return features

In [14]:
# Definir las fases a probar
fases_to_test = [4, 8, 16]

# Crear listas para almacenar los resultados
results_simple_nn = []
results_cnn = []
results_rnn = []

In [16]:
for num_fases in fases_to_test:
    print(f"TESTING WITH {num_fases} PHASES...")
    labeled_data=label_data(data_files,num_fases=num_fases)
    
    train_window, train_labels, test_window, test_labels = train_test_split(labeled_data, 
                                                                      columna=["EMG_Isquio","EMG_Cuadriceps","EMG_AductorLargo"],
                                                                      window_size=125,
                                                                        test_size=0.2,
                                                                      overlap=0,
                                                                      random_state=42)
    print(f"Train data: Extracted {train_window.shape[0]} samples with {train_window.shape[1]} features each.")
    print(f"Test data: Extracted {test_window.shape[0]} samples with {test_window.shape[1]} features each.")
    train_data = label_data_and_features(train_window, divide=3)
    test_data = label_data_and_features(test_window, divide=3)
    
     
    #train_data, test_data = clean_normalized_feature_selection(train_data, test_data)
    train_data, test_data = sfs_selection(train_data,test_data,train_labels,n_indices=5)
    print(f"Train data: Extracted {train_data.shape[0]} samples with {train_data.shape[1]} features each.")
    print(f"Test data: Extracted {test_data.shape[0]} samples with {test_data.shape[1]} features each.")
    print("-----------------------------------------------------------------------")
    
    # Simple NN
    print("Simple NN")
    input_size = train_data.shape[1]
    hidden_size = 100
    output_size = num_fases
    learning_rate = 0.001
    num_epochs = 100
    
    model = SimpleNN(input_size, hidden_size, output_size)
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
    
    train_loader, test_tensor, test_labels_tensor = prepare_data_SimpleNN(train_data, train_labels, test_data, test_labels)
    train_model_SimpleNN(model, criterion, optimizer, train_loader, num_epochs)
    accuracy_nn = evaluate_model_SimpleNN(model, test_tensor, test_labels_tensor)
    results_simple_nn.append((num_fases, accuracy_nn))
    
    # CNN
    print("-----------------------------------------------------------------------")
    print("CNN")
    train_loader, test_loader, test_labels_tensor, train_data_tensor = prepare_data_CNN(train_data, train_labels, test_data, test_labels)
    input_length = train_data_tensor.shape[2]
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    cnn_model = GaitCNN(input_length, num_fases=num_fases).to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(cnn_model.parameters(), lr=0.001)
    
    train_loss, test_loss = train_model_CNN(cnn_model, device, criterion, optimizer, train_loader, test_loader, num_epochs=100)
    accuracy_cnn = evaluate_model_CNN(cnn_model, test_loader, test_labels_tensor)
    results_cnn.append((num_fases, accuracy_cnn))
    
    # RNN
    print("-----------------------------------------------------------------------")
    print("RNN")
    train_data_rnn, test_data_rnn = preprocess_data_RNN(train_data, test_data)
    input_size = train_data_rnn.shape[2]
    hidden_size = 64
    num_layers = 2
    batch_size = 32
    num_epochs = 150
    
    train_loader, test_loader = create_dataloaders_RNN(train_data_rnn, train_labels, test_data_rnn, test_labels, batch_size)
    rnn_model = GaitRNN(input_size, hidden_size, num_layers, num_fases).to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(rnn_model.parameters(), lr=0.001)
    
    train_loss, test_loss = train_model_RNN(rnn_model, train_loader, test_loader, criterion, optimizer, num_epochs, device)
    accuracy_rnn = evaluate_model_RNN(rnn_model, test_loader, test_labels, device)
    results_rnn.append((num_fases, accuracy_rnn))
    print("-----------------------------------------------------------------------")



TESTING WITH 4 PHASES...


  0%|          | 47/9763 [00:00<00:20, 466.47it/s]

Train data: Extracted 9763 samples with 125 features each.
Test data: Extracted 2437 samples with 125 features each.


100%|██████████| 9763/9763 [00:20<00:00, 483.80it/s]
100%|██████████| 2437/2437 [00:05<00:00, 486.71it/s]


[42 22  6 25 48]
Train data: Extracted 9763 samples with 5 features each.
Test data: Extracted 2437 samples with 5 features each.
-----------------------------------------------------------------------
Simple NN
Epoch [10/100], Loss: 1.131120204925537
Epoch [20/100], Loss: 0.8113129734992981
Epoch [30/100], Loss: 0.7712395787239075
Epoch [40/100], Loss: 0.959580659866333
Epoch [50/100], Loss: 0.7485082745552063
Epoch [60/100], Loss: 2.1234748363494873
Epoch [70/100], Loss: 0.7854281067848206
Epoch [80/100], Loss: 0.41474369168281555
Epoch [90/100], Loss: 1.501035213470459
Epoch [100/100], Loss: 0.6119937896728516
Accuracy: 0.5519080837094789%
-----------------------------------------------------------------------
CNN
Epoch [10/100], Train Loss: 1.160703642305985, Val Loss: 1.1372054296654541
Epoch [20/100], Train Loss: 1.1364580324662277, Val Loss: 1.1208786020031223
Epoch [30/100], Train Loss: 1.108999134863124, Val Loss: 1.0981876726274367
Epoch [40/100], Train Loss: 1.06142843061802

  0%|          | 48/9763 [00:00<00:20, 476.70it/s]

Train data: Extracted 9763 samples with 125 features each.
Test data: Extracted 2437 samples with 125 features each.


100%|██████████| 9763/9763 [00:20<00:00, 466.09it/s]
100%|██████████| 2437/2437 [00:05<00:00, 458.15it/s]


[42 22  6 21 25]
Train data: Extracted 9763 samples with 5 features each.
Test data: Extracted 2437 samples with 5 features each.
-----------------------------------------------------------------------
Simple NN
Epoch [10/100], Loss: 1.6097087860107422
Epoch [20/100], Loss: 1.5604664087295532
Epoch [30/100], Loss: 1.5008841753005981
Epoch [40/100], Loss: 1.9631963968276978
Epoch [50/100], Loss: 1.6238101720809937
Epoch [60/100], Loss: 1.6879764795303345
Epoch [70/100], Loss: 2.6736841201782227
Epoch [80/100], Loss: 1.6287087202072144
Epoch [90/100], Loss: 1.4048256874084473
Epoch [100/100], Loss: 1.6244373321533203
Accuracy: 0.2905211325400082%
-----------------------------------------------------------------------
CNN
Epoch [10/100], Train Loss: 1.841237379834543, Val Loss: 1.8449042029195017
Epoch [20/100], Train Loss: 1.8046355960415859, Val Loss: 1.805684095853335
Epoch [30/100], Train Loss: 1.7777430204784168, Val Loss: 1.7783521723437619
Epoch [40/100], Train Loss: 1.767434479364

  1%|          | 96/9763 [00:00<00:20, 475.50it/s]

Train data: Extracted 9763 samples with 125 features each.
Test data: Extracted 2437 samples with 125 features each.


100%|██████████| 9763/9763 [00:20<00:00, 472.49it/s]
100%|██████████| 2437/2437 [00:05<00:00, 471.69it/s]


[42 22 21  6 25]
Train data: Extracted 9763 samples with 5 features each.
Test data: Extracted 2437 samples with 5 features each.
-----------------------------------------------------------------------
Simple NN
Epoch [10/100], Loss: 2.462428331375122
Epoch [20/100], Loss: 2.305957794189453
Epoch [30/100], Loss: 2.398668050765991
Epoch [40/100], Loss: 2.078456401824951
Epoch [50/100], Loss: 2.7932615280151367
Epoch [60/100], Loss: 2.1344754695892334
Epoch [70/100], Loss: 2.144317388534546
Epoch [80/100], Loss: 2.2061526775360107
Epoch [90/100], Loss: 1.98923921585083
Epoch [100/100], Loss: 3.0754082202911377
Accuracy: 0.17439474764054164%
-----------------------------------------------------------------------
CNN
Epoch [10/100], Train Loss: 2.54482387405595, Val Loss: 2.548051524471927
Epoch [20/100], Train Loss: 2.45810919415717, Val Loss: 2.4759150263550995
Epoch [30/100], Train Loss: 2.4387731108010984, Val Loss: 2.4434000640720517
Epoch [40/100], Train Loss: 2.426768747809666, Val 

In [None]:
# Mostrar los resultados
print("Results for Simple NN:")
for num_fases, acc in results_simple_nn:
    print(f"Phases: {num_fases}, Accuracy: {acc}")

print("Results for CNN:")
for num_fases, acc in results_cnn:
    print(f"Phases: {num_fases}, Accuracy: {acc}")

print("Results for RNN:")
for num_fases, acc in results_rnn:
    print(f"Phases: {num_fases}, Accuracy: {acc}")

Results for Simple NN:
Phases: 4, Accuracy: 0.5519080837094789
Phases: 8, Accuracy: 0.2905211325400082
Phases: 16, Accuracy: 0.17439474764054164
Results for CNN:
Phases: 4, Accuracy: 0.5625769388592532
Phases: 8, Accuracy: 0.29544521953221176
Phases: 16, Accuracy: 0.17152236356175626
Results for RNN:
Phases: 4, Accuracy: 0.5301600328272467
Phases: 8, Accuracy: 0.2769798933114485
Phases: 16, Accuracy: 0.16864997948297086
