In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
import re

from pathlib import Path
from scipy.interpolate import interp1d

In [2]:
def cargar_senal(path_csv):
    df = pd.read_csv(path_csv, sep=';')
    df.columns = [col.strip() for col in df.columns]
    return df

In [3]:
def upsample_rom(df):
    # Obtener el número total de filas (filas de datos)
    original_len = len(df)
    
    # Contar las celdas no vacías en la columna de ángulos (ROM)
    no_vacias_angulos = df.iloc[:, 1].notna().sum()
    
    # Si hay valores válidos, se puede hacer upsampling
    if no_vacias_angulos == 0:
        raise ValueError("No hay valores válidos en la columna de ángulos.")
    
    # Calcular el factor de upsampling
    factor_upsampling = original_len / no_vacias_angulos

    #ROM Sampling Rate = 20 Hz

    sampling_rate = factor_upsampling * 20
    
    # Crear el espacio de interpolación para ROM
    x_original = np.linspace(0, 1, no_vacias_angulos)
    x_new = np.linspace(0, 1, original_len)
    
    # Crear DataFrame vacío para los datos upsampled
    upsampled_df = df.copy()  # Copiar el DataFrame original para no modificar las otras columnas
    
    # Interpolación solo de la columna de ROM
    #rom_col = 'angles_biceps_brachii_(right)_triceps_brachii_long_(right)'
    interpolator = interp1d(x_original, df.iloc[:, 1].dropna(), kind='cubic', fill_value='extrapolate')
    upsampled_df.iloc[:, 1] = interpolator(x_new)
    
    return upsampled_df, sampling_rate

In [4]:
def calcular_aceleracion(df, sampling_rate):
    #rom_col = 'angles_biceps_brachii_(right)_triceps_brachii_long_(right)'
    tiempo = np.arange(len(df)) / sampling_rate
    
    d_speed = np.diff(df.iloc[:, 1]) / np.diff(tiempo)
    d_speed = np.insert(d_speed, 0, d_speed[0])
    
    d_acceleration = np.diff(d_speed) / np.diff(tiempo)
    d_acceleration = np.insert(d_acceleration, 0, d_acceleration[0])
    
    d_jerk = np.diff(d_acceleration) / np.diff(tiempo)
    d_jerk = np.insert(d_jerk, 0, d_jerk[0])

    #Save in the dataframe only coherent values (fix first values of acceleration and jerk)
    
    d_acceleration[0] = d_acceleration[2]
    d_acceleration[1] = d_acceleration[2]

    d_jerk[0] = d_jerk[3]
    d_jerk[1] = d_jerk[3]
    d_jerk[2] = d_jerk[3]
    
    df['speed'] = d_speed
    df['acceleration'] = d_acceleration
    df['jerk'] = d_jerk
    
    return df

In [5]:
def resolver_offsets_emg(df):
    for col in df.columns:
        if 'emg_muscle' in col:
            offset = df[col].mean()
            df[col] = df[col] - offset
    return df

In [6]:
def normalizar_tiempo(df):
    n = len(df)
    df['tiempo_normalizado'] = np.linspace(0, 1, n)
    return df

In [7]:
def columnas_identificadoras(df, path):
    carpetas = os.path.dirname(path).split('/')
    #print(carpetas)
    del carpetas[0]
    #print(carpetas)
    df['class'] = [carpetas[0]] * len(df)
    bpm = re.findall(r'\d+', carpetas[1])
    df['BPM'] = [int(bpm[0])] * len(df)
    df['kid number'] = [carpetas[2]] * len(df)
    return df

In [8]:
def borrar_y_renombrar(df):
    df = df.drop(['angles_window', 'tiempo_normalizado'], axis = 1)
    df.columns = ['ROM', 'Biceps EMG', 'Triceps EMG', 'Speed', 'Acceleration', 'Jerk', 'Class', 'BPM', 'Kid number']
    return df

In [9]:
def añadir_a_tabla_global(global_df, df):
    global_df.loc[len(global_df)] = [
        df['ROM'].tolist(),
        df['Biceps EMG'].tolist(),
        df['Triceps EMG'].tolist(),
        df['Speed'].tolist(),
        df['Acceleration'].tolist(),
        df['Jerk'].tolist(),
        df['Class'].iloc[0],
        df['BPM'].iloc[0],
        df['Kid number'].iloc[0]
    ]
    return global_df

In [10]:
columnas = ['ROM', 'Biceps EMG', 'Triceps EMG', 'Speed', 'Acceleration', 'Jerk', 'Class', 'BPM', 'Kid number']
global_df = pd.DataFrame(columns=columnas)

In [11]:
#path = 'MEDICIONES_COLEGIO/2ESO/42BPM/1/advanced_mdurance-test-240835 (5).csv'
root = Path('MEDICIONES_COLEGIO')
for csv_file in root.rglob('advanced_*.csv'):
    if '.ipynb_checkpoints' in csv_file.parts:
        continue
    path = csv_file.as_posix()
    #print(path)
    df = cargar_senal(path)
    df_copy = df.copy()
    df, sampling_rate = upsample_rom(df)
    df = resolver_offsets_emg(df)
    df = calcular_aceleracion(df, sampling_rate)
    df = normalizar_tiempo(df)
    df = columnas_identificadoras(df, path)
    df = borrar_y_renombrar(df)
    global_df = añadir_a_tabla_global(global_df, df)
    df = None

In [12]:
global_df

Unnamed: 0,ROM,Biceps EMG,Triceps EMG,Speed,Acceleration,Jerk,Class,BPM,Kid number
0,"[69.89, 69.88576561129825, 69.8814602272893, 6...","[-464.7238445339335, -437.02884453393335, -403...","[-18.156588653321705, -18.156588653321705, -15...","[-4.230880924123889, -4.230880924123889, -4.30...","[-70.87773057127735, -70.87773057127735, -70.8...","[956.7065392567317, 956.7065392567317, 956.706...",2ESO,42,01
1,"[67.01, 67.03148377759416, 67.05282754130333, ...","[510.354227958454, 510.354227958454, 467.27222...","[134.37219019240547, 125.1401901924055, 131.29...","[20.1878761296766, 20.1878761296766, 20.056307...","[-123.63205075909023, -123.63205075909023, -12...","[1058.0066065203505, 1058.0066065203505, 1058....",2ESO,42,02
2,"[52.37, 52.37604835073974, 52.381888735222255,...","[398.4546213914855, 407.68662139148546, 416.91...","[801.0383592938733, 813.3473592938735, 810.270...","[5.678542108159145, 5.678542108159145, 5.48329...","[-183.31273693688567, -183.31273693688567, -18...","[3089.597120482969, 3089.597120482969, 3089.59...",2ESO,42,03
3,"[81.97, 81.97035530238969, 81.97070264242615, ...","[-8300.718212293243, -8279.176212293249, -8263...","[-2065.4565243186153, -2080.845524318618, -209...","[0.35520636201983435, 0.35520636201983435, 0.3...","[-7.958049844085127, -7.958049844085127, -7.95...","[-407.14566922511426, -407.14566922511426, -40...",2ESO,42,04
4,"[80.81, 80.80904817937069, 80.80809542361894, ...","[1165.4492473422342, 1159.2952473422356, 1159....","[2584.2429703635225, 2575.0109703635226, 2568....","[-0.9512774707995818, -0.9512774707995818, -0....","[-0.9340554886859206, -0.9340554886859206, -0....","[-62.26900940524081, -62.26900940524081, -62.2...",2ESO,42,05
...,...,...,...,...,...,...,...,...,...
352,"[66.4, 66.39429114615749, 66.38851681383866, 6...","[-22.55624494990525, -41.02024494990519, -62.5...","[306.74428042242107, 319.0532804224208, 337.51...","[-5.364579450490648, -5.364579450490648, -5.42...","[-57.8192001204144, -57.8192001204144, -57.819...","[1016.2723773237045, 1016.2723773237045, 1016....",5PRIMARIA,78,34
353,"[74.2, 74.20054165878965, 74.2010527601181, 74...","[-1308.7539746416765, -1308.7539746416765, -13...","[-274.55129281514155, -274.55129281514155, -27...","[0.5082253677998999, 0.5082253677998999, 0.479...","[-26.901615219214964, -26.901615219214964, -26...","[386.12078921750134, 386.12078921750134, 386.1...",5PRIMARIA,78,35
354,"[64.39, 64.38850830946564, 64.38706728747476, ...","[86.2989461690886, 132.45794616908825, 153.998...","[-434.4298059775465, -434.4298059775465, -443....","[-1.4005702133514284, -1.4005702133514284, -1....","[44.667405169704786, 44.667405169704786, 44.66...","[-748.1967486665184, -748.1967486665184, -748....",5PRIMARIA,78,36
355,"[53.68, 53.680823650147246, 53.68162816038451,...","[-44.4988616642313, -26.03586166423156, -26.03...","[464.47278375045653, 470.6267837504565, 476.78...","[0.7740190396615565, 0.7740190396615565, 0.756...","[-16.90275971144903, -16.90275971144903, -16.9...","[100.99921461871661, 100.99921461871661, 100.9...",5PRIMARIA,78,37


In [13]:
#global_df.to_excel('global_table.xlsx', index=False)

In [14]:
#2ESO: 7 test de 4 personas y 3 test de 5
#2PRIMARIA: 8 test de 4 personas y 2 test de 3
#5PRIMARIA: 8 test de 4 personas y 2 test de 3

In [15]:
def crear_bloques_ids(uid_list, sizes):
    bloques = []
    i = 0
    for tam in sizes:
        bloques.append(uid_list[i:i+tam])
        i += tam
    return bloques

In [16]:
global_df['UID'] = global_df['Class'] + '_' + global_df['Kid number'].astype(str).str.zfill(2)

eso2 = global_df[global_df['Class'] == '2ESO']
prim2 = global_df[global_df['Class'] == '2PRIMARIA']
prim5 = global_df[global_df['Class'] == '5PRIMARIA']

eso2_ids = sorted(eso2['UID'].unique())
prim2_ids = sorted(prim2['UID'].unique())
prim5_ids = sorted(prim5['UID'].unique())

eso2_sizes = [5]*3 + [4]*7     # Total: 43
prim_sizes = [4]*8 + [3]*2     # Total: 38

eso2_uid_blocks = crear_bloques_ids(eso2_ids, eso2_sizes)
prim2_uid_blocks = crear_bloques_ids(prim2_ids, prim_sizes)
prim5_uid_blocks = crear_bloques_ids(prim5_ids, prim_sizes)

test_groups = []
for i in range(10):
    ids_test = list(eso2_uid_blocks[i]) + list(prim2_uid_blocks[i]) + list(prim5_uid_blocks[i])
    df_test = global_df[global_df['UID'].isin(ids_test)]
    test_groups.append(df_test)

with pd.ExcelWriter('global_table_con_UID.xlsx', engine='xlsxwriter') as writer:
    global_df.to_excel(writer, sheet_name='Table', index=False)
    for i, test_df in enumerate(test_groups):
        test_df.to_excel(writer, sheet_name=f'Test{i+1:02d}', index=False)

global_df = global_df.drop(columns=['UID'])
test_groups = [df.drop(columns=['UID']) for df in test_groups]

with pd.ExcelWriter('global_table_sin_UID.xlsx', engine='xlsxwriter') as writer:
    global_df.to_excel(writer, sheet_name='Table', index=False)
    for i, test_df in enumerate(test_groups):
        test_df.to_excel(writer, sheet_name=f'Test{i+1:02d}', index=False)