# Database Exploration

This notebook allows for easy exploration of the contents of the /data folder.

It is mainly design for use with the Ninapro databases and databases which are setup in similar fashion.

In [None]:
import src
from scipy.io import loadmat, whosmat
from scipy import stats
import numpy as np
import matplotlib.pyplot as plt 
import os
import pandas as pd
import plotly.express as px
from src import config
import os
import re

Explore a dataset general information. It will output a summary for each file found in a subject.

In [None]:
# Choose the database to analyze
database = 'DB4'

data_path = f'data/{database}'

# Find the folder named with the convention s + "number"
folder = None
for item in os.listdir(data_path):
    if re.match(r'[sS]\d+', item) or re.match(r'Subject\d+', item):
        folder = item
        break

if folder:
    folder_path = os.path.join(data_path, folder)
    results = []

    # Iterate over all .mat files in the folder
    for file_name in os.listdir(folder_path):
        if file_name.endswith('.mat'):
            file_path = os.path.join(folder_path, file_name)
            info = whosmat(file_path)
            results.append((file_name, info))

    # Create a DataFrame to store the results
    data = {}
    for file_name, info in results:
        for item in info:
            if item[0] not in data:
                data[item[0]] = {}
            data[item[0]][file_name] = item[1:]

    df = pd.DataFrame(data)
    df = df.transpose()
    df.columns.name = 'File Name'

    print(df)
else:
    print("No folder found with the convention s + 'number'")

In [None]:
filename = 'S1_E1_A1.mat'
subject  = 's1'
mat_file = src.loadmatNina(database, filename, subject=subject)

###### analizar tiempo de respuesta entre "stimulus" y "restimulus"

In [None]:
import os
from src import loadmatNina  # Importa la función loadmatNina

# Nombre de la base de datos (coincide con DATABASE_INFO)
database = 'DB4'

# Ruta completa a la carpeta DB4
data_path = os.path.abspath(os.path.join('data', database))

# Lista de sujetos
subjects = [f's{i}' for i in range(1, 11)]

# recorrer cada sujeto
for subject in subjects:
    subject_path = os.path.join(data_path, subject)  # Ruta de la carpeta s1, s2, etc.

    # Lista de nombres de archivos específicos
    filenames = [f"{subject.upper()}_E1_A1.mat",
                 f"{subject.upper()}_E2_A1.mat",
                 f"{subject.upper()}_E3_A1.mat"]

    # recorrer cada archivo
    for filename in filenames:
        file_path = os.path.join(subject_path, filename)
        
        # Verificar si el archivo existe
        if os.path.isfile(file_path):
            print(f"Loading file: {file_path}")
            mat_file = loadmatNina(database, filename, subject=subject)
        else:
            print(f"File not found: {file_path}")


#### Separar por canales

In [None]:
grasp = 2
src.plot_emg_data(database, mat_file, grasp, interactive=False, include_rest=True, use_stimulus=False, addFourier = False, padding = 100)

### Feature Calculation and Database Relabeling for Ninapro

#### Overview
This script demonstrates how to import EMG data and stimulus data, along with a sample calculation of features. The following parameters can be customized:
- **Window size**
- **Padding (zeroes)** 
- **Labeling thresholds**

#### Relabeling Ninapro Databases
To maintain consistency across databases, **Databases 1, 4, and 5** require relabeling to match the convention used in the other databases. The script reorganizes **Exercise A** to appear last in the database, resulting in the following grasp numbering:

- **1 - 17:** Exercise B  
- **18 - 40:** Exercise C  
- **41 - 49:** Exercise D  
- **50 - 61:** Exercise A  
- **0:** Rest  


In [None]:
test_df, grasps = src.build_dataframe(mat_file, database, filename, rectify= False, normalize = True)
test_df.head()

In [None]:
dataframe_new = src.db_utils.filter_data_pandas(test_df, 50, include_rest=False, padding = 0)
dataframe_new.head()

In [None]:
grasp = 58
src.plot_emg_dataframe(database, test_df, grasp, interactive=False, include_rest=True, use_stimulus=True, addFourier = True)

In [None]:

segmented_re_stimulus = src.get_signal_by_movement_complete(re_stimulus, filtered_start_index_re_stimulus[str(movement)]) #Gets the signal of the movement
windows_re_stimulus = src.create_windows_with_overlap(segmented_re_stimulus, window_length, overlapping)
labels = []
for window in windows_re_stimulus:
    label = src.get_label(window, threshold, movements_label, database)
    labels.append(label)
emg_signals = []
for electrode in range(electrodes):
    segmented_emg_signal = src.get_signal_by_movement_complete(emg[:, electrode], filtered_start_index_re_stimulus[str(movement)])
    filtered_segmented_emg_signal = src.get_envelope_filtered(segmented_emg_signal, fc, fm)
    windows_emg_electrode = src.create_windows_with_overlap(filtered_segmented_emg_signal, window_length, overlapping)
    emg_signals.append(windows_emg_electrode)
windows_quantity  = len(emg_signals[0])
if (windows_quantity != len(labels)):
    print('Different longitude in labels with electrodes', movement, individual, database) #This is used to check the windows between labels and windows in electrodes
    raise ValueError('No es compatible')
for index_label,label in enumerate(labels):
    if not label:
        print(label)
    data_label = {
        'label': label,
    }
    for index_electrode, element in enumerate(emg_signals):
        if len(element) != windows_quantity:
            print('Different longitude in electrodes', index_electrode, movement, individual, database) #This is used to find any signal with different number of window
            raise ValueError('No es compatible')
        for feature, function in features.items():
            feature_calculus = function(element[index_label])
            data_label[feature+str(index_electrode+1)] = np.array([feature_calculus])
    new_data_label = pd.DataFrame(data_label)
    signals_features = pd.concat([signals_features, new_data_label], ignore_index=True)
