# Training Model: Cnn + wavelets transform

In [None]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
from obspy import read
from tqdm import tqdm
from sklearn.model_selection import train_test_split


2025-04-27 17:02:05.868912: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1745791326.120781  287014 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1745791326.195619  287014 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1745791326.808155  287014 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1745791326.808186  287014 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1745791326.808187  287014 computation_placer.cc:177] computation placer alr

In [2]:
train_data_path = '/ruta/a/train'
val_data_path = '/ruta/a/val'
test_data_path = '/ruta/a/test'
features_path = '/ruta/a/features'

In [None]:

# ------------------- Funciones de Carga -------------------
def load_data(data_path):
    # Cargar señales crudas
    raw_signals = []
    files_df = pd.read_csv(os.path.join(data_path, 'feature_files.csv'))
    print(f'Cargando señales desde {data_path}...')
    for file in tqdm(files_df['file']):
        file_path = os.path.join(data_path, 'augmented', file)
        st = read(file_path)
        signal = st[0].data
        raw_signals.append(signal)

    X_raw = np.array(raw_signals)
    X_raw = X_raw.reshape(X_raw.shape[0], -1, 1)  # Añadir dimensión para CNN

    # Cargar características wavelets
    X_wavelets = np.load(os.path.join(data_path, 'wavelet_features.npy'))
    
    # Cargar tiempos de llegada
    y = np.load(os.path.join(data_path, 'arrival_times.npy'))
    
    return X_raw, X_wavelets, y, files_df

# ------------------- Carga de Datos -------------------
X_raw_train, X_wavelets_train, y_train, train_files = load_data(train_data_path)
X_raw_val, X_wavelets_val, y_val, val_files = load_data(val_data_path)
X_raw_test, X_wavelets_test, y_test, test_files = load_data(test_data_path)

print('Formas:')
print(f'Entrenamiento señales crudas: {X_raw_train.shape}')
print(f'Validación señales crudas: {X_raw_val.shape}')
print(f'Prueba señales crudas: {X_raw_test.shape}')

In [None]:
# ------------------- Creación del Modelo -------------------
def create_model(input_shape_raw, input_shape_wavelets):
    # Entrada de señales crudas
    raw_input = tf.keras.layers.Input(shape=input_shape_raw)
    x1 = tf.keras.layers.BatchNormalization()(raw_input)
    x1 = tf.keras.layers.Conv1D(32, 5, padding='same', activation='relu')(x1)
    x1 = tf.keras.layers.BatchNormalization()(x1)
    x1 = tf.keras.layers.MaxPooling1D(2)(x1)
    x1 = tf.keras.layers.Conv1D(64, 5, padding='same', activation='relu')(x1)
    x1 = tf.keras.layers.BatchNormalization()(x1)
    x1 = tf.keras.layers.MaxPooling1D(2)(x1)
    x1 = tf.keras.layers.Conv1D(128, 5, padding='same', activation='relu')(x1)
    x1 = tf.keras.layers.BatchNormalization()(x1)
    x1 = tf.keras.layers.GlobalAveragePooling1D()(x1)

    # Entrada de características wavelets
    wavelet_input = tf.keras.layers.Input(shape=input_shape_wavelets)
    x2 = tf.keras.layers.BatchNormalization()(wavelet_input)
    x2 = tf.keras.layers.Dense(128, activation='relu')(x2)
    x2 = tf.keras.layers.BatchNormalization()(x2)
    x2 = tf.keras.layers.Dropout(0.3)(x2)

    # Combinación
    combined = tf.keras.layers.concatenate([x1, x2])

    x = tf.keras.layers.Dense(256, activation='relu')(combined)
    x = tf.keras.layers.BatchNormalization()(x)
    x = tf.keras.layers.Dropout(0.3)(x)
    x = tf.keras.layers.Dense(128, activation='relu')(x)
    x = tf.keras.layers.BatchNormalization()(x)
    x = tf.keras.layers.Dropout(0.2)(x)

    output = tf.keras.layers.Dense(1)(x)

    model = tf.keras.Model(inputs=[raw_input, wavelet_input], outputs=output)
    return model

input_shape_raw = (X_raw_train.shape[1], 1)
input_shape_wavelets = (X_wavelets_train.shape[1],)

model = create_model(input_shape_raw, input_shape_wavelets)

optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)

model.compile(
    optimizer=optimizer,
    loss='mse',
    metrics=['mae', 'mse']
)

model.summary()


In [None]:
# ------------------- Callbacks -------------------
callbacks = [
    tf.keras.callbacks.EarlyStopping(monitor='val_mae', patience=15, restore_best_weights=True, mode='min'),
    tf.keras.callbacks.ReduceLROnPlateau(monitor='val_mae', factor=0.5, patience=5, min_lr=1e-6, mode='min'),
    tf.keras.callbacks.ModelCheckpoint(filepath='best_model.h5', monitor='val_mae', save_best_only=True, mode='min')
]

# ------------------- Entrenamiento -------------------
history = model.fit(
    [X_raw_train, X_wavelets_train],
    y_train,
    validation_data=([X_raw_val, X_wavelets_val], y_val),
    epochs=100,
    batch_size=32,
    callbacks=callbacks,
    verbose=1
)


In [None]:
# ------------------- Evaluación -------------------
print('Realizando predicciones en conjunto de prueba...')
y_pred = model.predict([X_raw_test, X_wavelets_test])

# Guardar resultados
results_df = pd.DataFrame({
    'file': test_files['file'],
    'real_time': y_test,
    'predicted_time': y_pred.flatten(),
    'error': y_pred.flatten() - y_test
})

results_df['abs_error'] = np.abs(results_df['error'])
results_df = results_df.sort_values('abs_error', ascending=False)

results_df.to_csv(os.path.join(features_path, 'model_evaluation_results.csv'), index=False)

In [None]:
# ------------------- Métricas Finales -------------------
print('\nMétricas de rendimiento en conjunto de prueba:')
mae = np.mean(np.abs(results_df['error']))
rmse = np.sqrt(np.mean(results_df['error']**2))
max_error = np.max(results_df['abs_error'])

print(f'MAE: {mae:.4f} segundos')
print(f'RMSE: {rmse:.4f} segundos')
print(f'Máximo error: {max_error:.4f} segundos')


In [None]:
# ------------------- Visualización -------------------
errors = y_pred.flatten() - y_test

plt.figure(figsize=(12, 5))

# Histograma de errores
plt.subplot(1, 2, 1)
plt.hist(errors, bins=50, edgecolor='black')
plt.title('Distribución de errores')
plt.xlabel('Error (segundos)')
plt.ylabel('Frecuencia')
plt.axvline(x=0, color='r', linestyle='--', label='Error cero')
plt.legend()

# Scatter plot
plt.subplot(1, 2, 2)
plt.scatter(y_test, y_pred.flatten(), alpha=0.5)
plt.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'r--')
plt.xlabel('Tiempo real (s)')
plt.ylabel('Tiempo predicho (s)')
plt.title('Predicción vs Real')

plt.tight_layout()
plt.show()

# Estadísticas adicionales
print('\nEstadísticas de errores:')
print(f'Error medio: {np.mean(errors):.4f} segundos')
print(f'Desviación estándar: {np.std(errors):.4f} segundos')
print(f'Mediana del error: {np.median(errors):.4f} segundos')
print(f'Error dentro de ±0.5s: {100*np.mean(np.abs(errors) < 0.5):.1f}%')
print(f'Error dentro de ±1.0s: {100*np.mean(np.abs(errors) < 1.0):.1f}%')