# Data prep: Edge Impulse 


## 1. Cargar datos y unificar


In [1]:
import os, json
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

BASE_DIR = 'C:/Users/kevin/Github/Tarea_IA_2/data' 

def load_json_data(base_dir):
    X, y = [], []
    for label in os.listdir(base_dir):
        class_path = os.path.join(base_dir, label)
        if not os.path.isdir(class_path):
            continue

        for fname in os.listdir(class_path):
            if fname.endswith('.json'):
                with open(os.path.join(class_path, fname), 'r') as f:
                    data = json.load(f)

                values = data.get('payload', {}).get('values', [])
                if not values:
                    continue

                arr = np.array(values)
                if arr.shape[1] != 3:
                    continue 

                X.append(arr)
                y.append(label)
    return X, y

X_raw, y_raw = load_json_data(BASE_DIR)

print(f"Clases detectadas: {set(y_raw)}")
print(f"Total muestras: {len(X_raw)}")


Clases detectadas: {'quieto', 'correr', 'girando', 'saltar', 'caminando'}
Total muestras: 25


## 2. Ventaneo
Creamos ventanas de longitud `WINDOW_SIZE` (en muestras) con paso `STEP`. Cada ventana produce un tensor `[WINDOW_SIZE, 3]` y una etiqueta.

In [3]:
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import numpy as np

WINDOW_SIZE = 128
STEP = 32
scaler = StandardScaler()

def make_windows_json(X_raw, y_raw, window_size=WINDOW_SIZE, step=STEP):
    X_out, y_out = [], []
    for arr, label in zip(X_raw, y_raw):
        arr_scaled = scaler.fit_transform(arr)
        for start in range(0, len(arr_scaled)-window_size+1, step):
            X_out.append(arr_scaled[start:start+window_size])
            y_out.append(label)
    return np.array(X_out), np.array(y_out)

X, y = make_windows_json(X_raw, y_raw)
print('Dataset final:', X.shape, y.shape)

Dataset final: (396, 128, 3) (396,)


## 3. Guardar dataset procesado
Guardamos los arrays en `.npz` para cargarlos rápido desde el notebook de entrenamiento.

In [None]:
classes = sorted(np.unique(y))
cls2idx = {c: i for i, c in enumerate(classes)}
y_idx = np.array([cls2idx[c] for c in y])

print('Clases detectadas:', classes)
print('Mapa clase → índice:', cls2idx)

OUT_PATH = r"C:\Users\kevin\Github\Tarea_IA_2\data\data_processed.npz"

os.makedirs(os.path.dirname(OUT_PATH), exist_ok=True)

np.savez_compressed(OUT_PATH, X=X, y=y_idx, classes=np.array(classes))

print('Dataset guardado en:', OUT_PATH)


Clases detectadas: [np.str_('caminando'), np.str_('correr'), np.str_('girando'), np.str_('quieto'), np.str_('saltar')]
Mapa clase → índice: {np.str_('caminando'): 0, np.str_('correr'): 1, np.str_('girando'): 2, np.str_('quieto'): 3, np.str_('saltar'): 4}
Dataset guardado en: C:\Users\kevin\Github\Tarea_IA_2\data\data_processed.npz
