## 1Ô∏è‚É£ Instalaci√≥n de Dependencias (si es necesario)

In [25]:
# Instalar TensorFlow/Keras si no est√° disponible
import subprocess
import sys

try:
    import tensorflow as tf
    print(f"‚úÖ TensorFlow {tf.__version__} ya instalado")
except ImportError:
    print("‚öôÔ∏è Instalando TensorFlow...")
    subprocess.check_call([sys.executable, '-m', 'pip', 'install', 'tensorflow', '-q'])
    import tensorflow as tf
    print(f"‚úÖ TensorFlow {tf.__version__} instalado correctamente")

try:
    import keras
    print(f"‚úÖ Keras {keras.__version__} disponible")
except ImportError:
    print("‚öôÔ∏è Instalando Keras...")
    subprocess.check_call([sys.executable, '-m', 'pip', 'install', 'keras', '-q'])
    import keras
    print(f"‚úÖ Keras {keras.__version__} instalado")

‚úÖ TensorFlow 2.20.0 ya instalado
‚úÖ Keras 3.12.0 disponible


---

## 2Ô∏è‚É£ Importaci√≥n de Librer√≠as

In [26]:
# Manipulaci√≥n de datos
import pandas as pd
import numpy as np
from pathlib import Path
from datetime import datetime
import json
import warnings
warnings.filterwarnings('ignore')

# Visualizaci√≥n
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots

# Deep Learning
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, Input
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
from tensorflow.keras.optimizers import Adam

# Preprocesamiento
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

# Configuraci√≥n
plt.style.use('seaborn-v0_8-darkgrid')
sns.set_palette('husl')

# Reproducibilidad
np.random.seed(42)
tf.random.set_seed(42)

print(f"‚úÖ Librer√≠as importadas correctamente")
print(f"   TensorFlow: {tf.__version__}")
print(f"   Keras: {keras.__version__}")
print(f"   GPU disponible: {tf.config.list_physical_devices('GPU')}")

‚úÖ Librer√≠as importadas correctamente
   TensorFlow: 2.20.0
   Keras: 3.12.0
   GPU disponible: []


---

## 3Ô∏è‚É£ Carga de Datos Integrados

In [None]:
# Configurar rutas
BASE_DIR = Path('/home/els4nchez/Videos/TECH')
DATA_DIR = BASE_DIR / 'data'
UNIFICACION_DIR = BASE_DIR / 'unificacion'
DATA_PROCESADO_DIR = UNIFICACION_DIR / 'datos_procesados'
FIGURAS_DIR = UNIFICACION_DIR / 'figuras'
MODELOS_DIR = UNIFICACION_DIR / 'modelos'

# Crear directorio de modelos si no existe
MODELOS_DIR.mkdir(parents=True, exist_ok=True)

print("üìÇ Cargando dataset integrado...\n")

# Cargar datos integrados (del notebook 06)
ruta_datos = DATA_PROCESADO_DIR / 'datos_integrados_precios_sentimientos.csv'
df = pd.read_csv(ruta_datos, index_col=0, parse_dates=True)

print(f"‚úÖ Dataset cargado: {df.shape[0]:,} d√≠as √ó {df.shape[1]} variables")
print(f"   Rango: {df.index.min().strftime('%Y-%m-%d')} ‚Üí {df.index.max().strftime('%Y-%m-%d')}")
print(f"\nüìä Columnas disponibles:")
print(f"   {list(df.columns)}")


üìÇ Cargando dataset integrado...

‚úÖ Dataset cargado: 2,273 d√≠as √ó 23 variables
   Rango: 2016-01-03 ‚Üí 2025-10-31

üìä Columnas disponibles:
   ['Open', 'High', 'Low', 'Close', 'Volume', 'Returns', 'Volatility_30', 'outlier_iqr', 'z_score', 'outlier_zscore', 'anomaly_score', 'outlier_iforest', 'outlier_count', 'outlier_consensus', 'sentiment_numeric', 'sentiment', 'sentiment_score', 'num_noticias', 'sentiment_ma7', 'sentiment_ma30', 'sentiment_extreme', 'has_outlier', 'outlier_AND_extreme_sentiment']

üîç Valores faltantes por columna:
Returns            1
Volatility_30     21
sentiment_ma7      6
sentiment_ma30    29
dtype: int64


Unnamed: 0,Open,High,Low,Close,Volume,Returns,Volatility_30,outlier_iqr,z_score,outlier_zscore,...,outlier_consensus,sentiment_numeric,sentiment,sentiment_score,num_noticias,sentiment_ma7,sentiment_ma30,sentiment_extreme,has_outlier,outlier_AND_extreme_sentiment
2016-01-03,1062.341,1066.321,1061.729,1063.059,1.61136,,,False,1.195449,False,...,False,0.083333,0.03622,0.813113,12,,,False,False,False
2016-01-04,1063.059,1083.488,1062.239,1072.979,76.56875,0.009288,,False,1.179487,False,...,False,0.153846,0.104399,0.835821,13,,,False,False,False
2016-01-05,1072.99,1082.048,1072.629,1077.632,72.43959,0.004327,,False,1.172,False,...,False,-0.263158,-0.226055,0.776322,19,,,False,False,False


---

## 4Ô∏è‚É£ Preparaci√≥n de Features

In [28]:
# Seleccionar features para los modelos

# Features t√©cnicos base (sin sentimiento)
features_base = [
    'Open', 'High', 'Low', 'Close',
    'Returns',
    'Volatility_30'
]

# Features con sentimiento
features_sentiment = features_base + [
    'sentiment',
    'sentiment_ma7',
    'sentiment_ma30'
]

# Variable objetivo
target = 'Close'

print("üìã CONFIGURACI√ìN DE FEATURES")
print("=" * 80)
print(f"\nüîπ Modelo Base (sin sentimiento):")
print(f"   Features: {features_base}")
print(f"   Total: {len(features_base)} variables")

print(f"\nüî∏ Modelo con Sentimiento:")
print(f"   Features: {features_sentiment}")
print(f"   Total: {len(features_sentiment)} variables")

print(f"\nüéØ Variable Objetivo: {target}")

# Eliminar filas con NaN en features cr√≠ticos (usar solo features_sentiment ya que incluye todas las columnas necesarias)
df_clean = df[features_sentiment].dropna()

print(f"\nüßπ Despu√©s de limpieza:")
print(f"   Datos v√°lidos: {len(df_clean):,} d√≠as")
print(f"   Datos eliminados: {len(df) - len(df_clean):,} d√≠as")

üìã CONFIGURACI√ìN DE FEATURES

üîπ Modelo Base (sin sentimiento):
   Features: ['Open', 'High', 'Low', 'Close', 'Returns', 'Volatility_30']
   Total: 6 variables

üî∏ Modelo con Sentimiento:
   Features: ['Open', 'High', 'Low', 'Close', 'Returns', 'Volatility_30', 'sentiment', 'sentiment_ma7', 'sentiment_ma30']
   Total: 9 variables

üéØ Variable Objetivo: Close

üßπ Despu√©s de limpieza:
   Datos v√°lidos: 2,238 d√≠as
   Datos eliminados: 35 d√≠as


---

## 5Ô∏è‚É£ Divisi√≥n Temporal de Datos (Train/Val/Test)

In [29]:
# Divisi√≥n temporal: 60% train, 20% validation, 20% test
train_size = int(len(df_clean) * 0.60)
val_size = int(len(df_clean) * 0.20)
test_size = len(df_clean) - train_size - val_size

# Dividir datos
df_train = df_clean.iloc[:train_size]
df_val = df_clean.iloc[train_size:train_size + val_size]
df_test = df_clean.iloc[train_size + val_size:]

print("üìä DIVISI√ìN TEMPORAL DE DATOS")
print("=" * 80)
print(f"\nüîπ Train (60%):")
print(f"   Filas: {len(df_train):,}")
print(f"   Rango: {df_train.index.min().strftime('%Y-%m-%d')} ‚Üí {df_train.index.max().strftime('%Y-%m-%d')}")

print(f"\nüî∏ Validation (20%):")
print(f"   Filas: {len(df_val):,}")
print(f"   Rango: {df_val.index.min().strftime('%Y-%m-%d')} ‚Üí {df_val.index.max().strftime('%Y-%m-%d')}")

print(f"\nüî∫ Test (20%):")
print(f"   Filas: {len(df_test):,}")
print(f"   Rango: {df_test.index.min().strftime('%Y-%m-%d')} ‚Üí {df_test.index.max().strftime('%Y-%m-%d')}")

print(f"\n‚úÖ Total: {len(df_clean):,} d√≠as")

# Visualizar divisi√≥n
fig = go.Figure()

fig.add_trace(go.Scatter(
    x=df_train.index,
    y=df_train['Close'],
    mode='lines',
    name='Train (60%)',
    line=dict(color='blue')
))

fig.add_trace(go.Scatter(
    x=df_val.index,
    y=df_val['Close'],
    mode='lines',
    name='Validation (20%)',
    line=dict(color='orange')
))

fig.add_trace(go.Scatter(
    x=df_test.index,
    y=df_test['Close'],
    mode='lines',
    name='Test (20%)',
    line=dict(color='green')
))

fig.update_layout(
    title='Divisi√≥n Temporal de Datos: Train / Validation / Test',
    xaxis_title='Fecha',
    yaxis_title='Precio de Cierre (USD)',
    template='plotly_white',
    height=500
)

fig.show()

üìä DIVISI√ìN TEMPORAL DE DATOS

üîπ Train (60%):
   Filas: 1,342
   Rango: 2016-02-02 ‚Üí 2022-01-15

üî∏ Validation (20%):
   Filas: 447
   Rango: 2022-01-17 ‚Üí 2024-05-01

üî∫ Test (20%):
   Filas: 449
   Rango: 2024-05-02 ‚Üí 2025-10-14

‚úÖ Total: 2,238 d√≠as


---

## 6Ô∏è‚É£ Normalizaci√≥n de Datos

In [30]:
# Crear escaladores (fit solo con datos de entrenamiento)
scaler_base = MinMaxScaler()
scaler_sentiment = MinMaxScaler()

# Escalar features base
X_train_base_scaled = scaler_base.fit_transform(df_train[features_base])
X_val_base_scaled = scaler_base.transform(df_val[features_base])
X_test_base_scaled = scaler_base.transform(df_test[features_base])

# Escalar features con sentimiento
X_train_sent_scaled = scaler_sentiment.fit_transform(df_train[features_sentiment])
X_val_sent_scaled = scaler_sentiment.transform(df_val[features_sentiment])
X_test_sent_scaled = scaler_sentiment.transform(df_test[features_sentiment])

# Escalar target (Close) para desnormalizar predicciones
scaler_target = MinMaxScaler()
y_train_scaled = scaler_target.fit_transform(df_train[[target]])
y_val_scaled = scaler_target.transform(df_val[[target]])
y_test_scaled = scaler_target.transform(df_test[[target]])

print("‚úÖ Normalizaci√≥n completada (MinMaxScaler)")
print(f"\nüìä Rangos de normalizaci√≥n:")
print(f"   Features base: [{X_train_base_scaled.min():.3f}, {X_train_base_scaled.max():.3f}]")
print(f"   Features sentiment: [{X_train_sent_scaled.min():.3f}, {X_train_sent_scaled.max():.3f}]")
print(f"   Target: [{y_train_scaled.min():.3f}, {y_train_scaled.max():.3f}]")

‚úÖ Normalizaci√≥n completada (MinMaxScaler)

üìä Rangos de normalizaci√≥n:
   Features base: [0.000, 1.000]
   Features sentiment: [0.000, 1.000]
   Target: [0.000, 1.000]


---

## 7Ô∏è‚É£ Creaci√≥n de Secuencias para LSTM

In [31]:
def create_sequences(X, y, seq_length=60):
    """
    Crea secuencias para LSTM.
    
    Args:
        X: Features normalizados (array 2D)
        y: Target normalizado (array 1D o 2D)
        seq_length: Longitud de la secuencia (lookback window)
    
    Returns:
        X_seq: Secuencias de features (3D: samples √ó seq_length √ó features)
        y_seq: Targets correspondientes (1D)
    """
    X_seq, y_seq = [], []
    
    for i in range(seq_length, len(X)):
        X_seq.append(X[i - seq_length:i])
        y_seq.append(y[i])
    
    return np.array(X_seq), np.array(y_seq)

# Configuraci√≥n
SEQ_LENGTH = 60  # 60 d√≠as de lookback

print(f"üîß Creando secuencias (lookback={SEQ_LENGTH} d√≠as)...\n")

# Crear secuencias para modelo BASE
X_train_base_seq, y_train_seq = create_sequences(X_train_base_scaled, y_train_scaled, SEQ_LENGTH)
X_val_base_seq, y_val_seq = create_sequences(X_val_base_scaled, y_val_scaled, SEQ_LENGTH)
X_test_base_seq, y_test_seq = create_sequences(X_test_base_scaled, y_test_scaled, SEQ_LENGTH)

# Crear secuencias para modelo con SENTIMIENTO
X_train_sent_seq, _ = create_sequences(X_train_sent_scaled, y_train_scaled, SEQ_LENGTH)
X_val_sent_seq, _ = create_sequences(X_val_sent_scaled, y_val_scaled, SEQ_LENGTH)
X_test_sent_seq, _ = create_sequences(X_test_sent_scaled, y_test_scaled, SEQ_LENGTH)

print("‚úÖ Secuencias creadas")
print("\nüìä MODELO BASE (sin sentimiento):")
print(f"   Train: {X_train_base_seq.shape} ‚Üí {y_train_seq.shape}")
print(f"   Val:   {X_val_base_seq.shape} ‚Üí {y_val_seq.shape}")
print(f"   Test:  {X_test_base_seq.shape} ‚Üí {y_test_seq.shape}")

print("\nüìä MODELO CON SENTIMIENTO:")
print(f"   Train: {X_train_sent_seq.shape} ‚Üí {y_train_seq.shape}")
print(f"   Val:   {X_val_sent_seq.shape} ‚Üí {y_val_seq.shape}")
print(f"   Test:  {X_test_sent_seq.shape} ‚Üí {y_test_seq.shape}")

print(f"\nüí° Formato: (samples, seq_length, features)")
print(f"   - samples: n√∫mero de secuencias")
print(f"   - seq_length: {SEQ_LENGTH} d√≠as de historia")
print(f"   - features: {len(features_base)} (base) / {len(features_sentiment)} (sentiment)")

üîß Creando secuencias (lookback=60 d√≠as)...



‚úÖ Secuencias creadas

üìä MODELO BASE (sin sentimiento):
   Train: (1282, 60, 6) ‚Üí (1282, 1)
   Val:   (387, 60, 6) ‚Üí (387, 1)
   Test:  (389, 60, 6) ‚Üí (389, 1)

üìä MODELO CON SENTIMIENTO:
   Train: (1282, 60, 9) ‚Üí (1282, 1)
   Val:   (387, 60, 9) ‚Üí (387, 1)
   Test:  (389, 60, 9) ‚Üí (389, 1)

üí° Formato: (samples, seq_length, features)
   - samples: n√∫mero de secuencias
   - seq_length: 60 d√≠as de historia
   - features: 6 (base) / 9 (sentiment)


---

## 8Ô∏è‚É£ Construcci√≥n del Modelo LSTM Base (sin sentimiento)

In [32]:
def build_lstm_model(input_shape, model_name="LSTM"):
    """
    Construye modelo LSTM con arquitectura 256-128-64.
    
    Args:
        input_shape: Tupla (seq_length, n_features)
        model_name: Nombre del modelo
    
    Returns:
        Modelo compilado
    """
    model = Sequential(name=model_name)
    
    # Capa de entrada
    model.add(Input(shape=input_shape))
    
    # Primera capa LSTM
    model.add(LSTM(256, return_sequences=True, name='lstm_1'))
    model.add(Dropout(0.2, name='dropout_1'))
    
    # Segunda capa LSTM
    model.add(LSTM(128, return_sequences=True, name='lstm_2'))
    model.add(Dropout(0.2, name='dropout_2'))
    
    # Tercera capa LSTM
    model.add(LSTM(64, return_sequences=False, name='lstm_3'))
    model.add(Dropout(0.2, name='dropout_3'))
    
    # Capa de salida
    model.add(Dense(1, name='output'))
    
    # Compilar modelo
    model.compile(
        optimizer=Adam(learning_rate=0.001),
        loss='mean_squared_error',
        metrics=['mae']
    )
    
    return model

# Construir modelo base
print("üîß Construyendo Modelo LSTM Base (sin sentimiento)...\n")

model_base = build_lstm_model(
    input_shape=(SEQ_LENGTH, len(features_base)),
    model_name="LSTM_Base"
)

print("‚úÖ Modelo Base construido")
print("\n" + "="*80)
model_base.summary()
print("="*80)

üîß Construyendo Modelo LSTM Base (sin sentimiento)...

‚úÖ Modelo Base construido

‚úÖ Modelo Base construido





---

## 9Ô∏è‚É£ Entrenamiento del Modelo Base

In [33]:
# Configurar callbacks
callbacks_base = [
    EarlyStopping(
        monitor='val_loss',
        patience=15,
        restore_best_weights=True,
        verbose=1
    ),
    ModelCheckpoint(
        filepath=str(MODELOS_DIR / 'lstm_base_best.keras'),
        monitor='val_loss',
        save_best_only=True,
        verbose=1
    ),
    ReduceLROnPlateau(
        monitor='val_loss',
        factor=0.5,
        patience=5,
        min_lr=1e-6,
        verbose=1
    )
]

print("üöÄ Entrenando Modelo Base...\n")
print("Configuraci√≥n:")
print(f"  - Epochs: 100 (con EarlyStopping)")
print(f"  - Batch size: 32")
print(f"  - Learning rate: 0.001 (con ReduceLROnPlateau)")
print(f"  - Patience: 15 epochs\n")
print("="*80)

# Entrenar modelo
history_base = model_base.fit(
    X_train_base_seq,
    y_train_seq,
    validation_data=(X_val_base_seq, y_val_seq),
    epochs=100,
    batch_size=32,
    callbacks=callbacks_base,
    verbose=1
)

print("\n" + "="*80)
print("‚úÖ Entrenamiento completado")

üöÄ Entrenando Modelo Base...

Configuraci√≥n:
  - Epochs: 100 (con EarlyStopping)
  - Batch size: 32
  - Learning rate: 0.001 (con ReduceLROnPlateau)
  - Patience: 15 epochs



Epoch 1/100
[1m41/41[0m [32m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[37m[0m [1m0s[0m 89ms/step - loss: 0.0254 - mae: 0.0989
Epoch 1: val_loss improved from None to 0.00601, saving model to /home/els4nchez/Videos/TECH/unificacion/modelos/lstm_base_best.keras

Epoch 1: val_loss improved from None to 0.00601, saving model to /home/els4nchez/Videos/TECH/unificacion/modelos/lstm_base_best.keras
[1m41/41[0m [32m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[37m[0m [1m9s[0m 112ms/step - loss: 0.0102 - mae: 0.0625 - val_loss: 0.0060 - val_mae: 0.0586 - learning_rate: 0.0010
Epoch 2/100
[1m41/41[0m [32m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[37m[0m [1m9s[0m 112ms/step - loss: 0.0102 - mae: 0.0625 - val_loss: 0.0060 - val_mae: 0.0586 - learning_rate: 0.0010
Epoch 2/100
[1m41/41[0m [32m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[37m[0m [1m0s[0m 90ms/step - loss: 0.0029 - mae: 0.0390

In [34]:
# Visualizar curvas de aprendizaje
fig = make_subplots(
    rows=1, cols=2,
    subplot_titles=('Loss (MSE)', 'MAE')
)

# Loss
fig.add_trace(
    go.Scatter(y=history_base.history['loss'], mode='lines', name='Train Loss'),
    row=1, col=1
)
fig.add_trace(
    go.Scatter(y=history_base.history['val_loss'], mode='lines', name='Val Loss'),
    row=1, col=1
)

# MAE
fig.add_trace(
    go.Scatter(y=history_base.history['mae'], mode='lines', name='Train MAE'),
    row=1, col=2
)
fig.add_trace(
    go.Scatter(y=history_base.history['val_mae'], mode='lines', name='Val MAE'),
    row=1, col=2
)

fig.update_xaxes(title_text="Epoch", row=1, col=1)
fig.update_xaxes(title_text="Epoch", row=1, col=2)
fig.update_yaxes(title_text="Loss", row=1, col=1)
fig.update_yaxes(title_text="MAE", row=1, col=2)

fig.update_layout(
    title_text="Curvas de Aprendizaje - Modelo Base",
    template='plotly_white',
    height=400
)

fig.show()

print(f"\nüìä Resultados finales:")
print(f"   Train Loss: {history_base.history['loss'][-1]:.6f}")
print(f"   Val Loss: {history_base.history['val_loss'][-1]:.6f}")
print(f"   Train MAE: {history_base.history['mae'][-1]:.6f}")
print(f"   Val MAE: {history_base.history['val_mae'][-1]:.6f}")


üìä Resultados finales:
   Train Loss: 0.001241
   Val Loss: 0.002267
   Train MAE: 0.022700
   Val MAE: 0.032848


---

## üîü Construcci√≥n y Entrenamiento del Modelo con Sentimiento

In [35]:
# Construir modelo con sentimiento
print("üîß Construyendo Modelo LSTM con Sentimiento...\n")

model_sentiment = build_lstm_model(
    input_shape=(SEQ_LENGTH, len(features_sentiment)),
    model_name="LSTM_Sentiment"
)

print("‚úÖ Modelo con Sentimiento construido")
print("\n" + "="*80)
model_sentiment.summary()
print("="*80)

üîß Construyendo Modelo LSTM con Sentimiento...

‚úÖ Modelo con Sentimiento construido

‚úÖ Modelo con Sentimiento construido





In [36]:
# Configurar callbacks
callbacks_sentiment = [
    EarlyStopping(
        monitor='val_loss',
        patience=15,
        restore_best_weights=True,
        verbose=1
    ),
    ModelCheckpoint(
        filepath=str(MODELOS_DIR / 'lstm_sentiment_best.keras'),
        monitor='val_loss',
        save_best_only=True,
        verbose=1
    ),
    ReduceLROnPlateau(
        monitor='val_loss',
        factor=0.5,
        patience=5,
        min_lr=1e-6,
        verbose=1
    )
]

print("üöÄ Entrenando Modelo con Sentimiento...\n")
print("="*80)

# Entrenar modelo
history_sentiment = model_sentiment.fit(
    X_train_sent_seq,
    y_train_seq,
    validation_data=(X_val_sent_seq, y_val_seq),
    epochs=100,
    batch_size=32,
    callbacks=callbacks_sentiment,
    verbose=1
)

print("\n" + "="*80)
print("‚úÖ Entrenamiento completado")

üöÄ Entrenando Modelo con Sentimiento...

Epoch 1/100
Epoch 1/100


[1m41/41[0m [32m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[37m[0m [1m0s[0m 85ms/step - loss: 0.0364 - mae: 0.1243
Epoch 1: val_loss improved from None to 0.00816, saving model to /home/els4nchez/Videos/TECH/unificacion/modelos/lstm_sentiment_best.keras

Epoch 1: val_loss improved from None to 0.00816, saving model to /home/els4nchez/Videos/TECH/unificacion/modelos/lstm_sentiment_best.keras
[1m41/41[0m [32m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[37m[0m [1m8s[0m 106ms/step - loss: 0.0143 - mae: 0.0760 - val_loss: 0.0082 - val_mae: 0.0655 - learning_rate: 0.0010
Epoch 2/100
[1m41/41[0m [32m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[37m[0m [1m8s[0m 106ms/step - loss: 0.0143 - mae: 0.0760 - val_loss: 0.0082 - val_mae: 0.0655 - learning_rate: 0.0010
Epoch 2/100
[1m41/41[0m [32m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[37m[0m [1m0s[0m 89ms/step - loss: 0.0031 - mae: 0.0399
E

In [37]:
# Visualizar curvas de aprendizaje
fig = make_subplots(
    rows=1, cols=2,
    subplot_titles=('Loss (MSE)', 'MAE')
)

# Loss
fig.add_trace(
    go.Scatter(y=history_sentiment.history['loss'], mode='lines', name='Train Loss'),
    row=1, col=1
)
fig.add_trace(
    go.Scatter(y=history_sentiment.history['val_loss'], mode='lines', name='Val Loss'),
    row=1, col=1
)

# MAE
fig.add_trace(
    go.Scatter(y=history_sentiment.history['mae'], mode='lines', name='Train MAE'),
    row=1, col=2
)
fig.add_trace(
    go.Scatter(y=history_sentiment.history['val_mae'], mode='lines', name='Val MAE'),
    row=1, col=2
)

fig.update_xaxes(title_text="Epoch", row=1, col=1)
fig.update_xaxes(title_text="Epoch", row=1, col=2)
fig.update_yaxes(title_text="Loss", row=1, col=1)
fig.update_yaxes(title_text="MAE", row=1, col=2)

fig.update_layout(
    title_text="Curvas de Aprendizaje - Modelo con Sentimiento",
    template='plotly_white',
    height=400
)

fig.show()

print(f"\nüìä Resultados finales:")
print(f"   Train Loss: {history_sentiment.history['loss'][-1]:.6f}")
print(f"   Val Loss: {history_sentiment.history['val_loss'][-1]:.6f}")
print(f"   Train MAE: {history_sentiment.history['mae'][-1]:.6f}")
print(f"   Val MAE: {history_sentiment.history['val_mae'][-1]:.6f}")


üìä Resultados finales:
   Train Loss: 0.001136
   Val Loss: 0.002345
   Train MAE: 0.022401
   Val MAE: 0.033658


---

## 1Ô∏è‚É£1Ô∏è‚É£ Evaluaci√≥n en Conjunto de Test

In [38]:
# Generar predicciones en test set
print("üîÆ Generando predicciones en conjunto de test...\n")

# Modelo Base
y_pred_base_scaled = model_base.predict(X_test_base_seq, verbose=0)
y_pred_base = scaler_target.inverse_transform(y_pred_base_scaled)

# Modelo con Sentimiento
y_pred_sent_scaled = model_sentiment.predict(X_test_sent_seq, verbose=0)
y_pred_sent = scaler_target.inverse_transform(y_pred_sent_scaled)

# Valores reales
y_test_real = scaler_target.inverse_transform(y_test_seq)

print("‚úÖ Predicciones generadas")
print(f"   Shape: {y_pred_base.shape}")

üîÆ Generando predicciones en conjunto de test...



‚úÖ Predicciones generadas
   Shape: (389, 1)
‚úÖ Predicciones generadas
   Shape: (389, 1)


In [39]:
# Calcular m√©tricas de evaluaci√≥n
def calculate_metrics(y_true, y_pred, model_name):
    """
    Calcula m√©tricas de evaluaci√≥n.
    """
    mse = mean_squared_error(y_true, y_pred)
    rmse = np.sqrt(mse)
    mae = mean_absolute_error(y_true, y_pred)
    r2 = r2_score(y_true, y_pred)
    
    # MAPE (Mean Absolute Percentage Error)
    mape = np.mean(np.abs((y_true - y_pred) / y_true)) * 100
    
    return {
        'model': model_name,
        'MSE': mse,
        'RMSE': rmse,
        'MAE': mae,
        'R¬≤': r2,
        'MAPE': mape
    }

# Calcular m√©tricas
metrics_base = calculate_metrics(y_test_real, y_pred_base, 'LSTM Base')
metrics_sent = calculate_metrics(y_test_real, y_pred_sent, 'LSTM + Sentimiento')

# Crear DataFrame de comparaci√≥n
df_metrics = pd.DataFrame([metrics_base, metrics_sent])

print("üìä COMPARACI√ìN DE MODELOS - CONJUNTO DE TEST")
print("="*80)
print(df_metrics.to_string(index=False))
print("="*80)

# Calcular mejora porcentual
mejora_rmse = ((metrics_base['RMSE'] - metrics_sent['RMSE']) / metrics_base['RMSE']) * 100
mejora_mae = ((metrics_base['MAE'] - metrics_sent['MAE']) / metrics_base['MAE']) * 100
mejora_r2 = ((metrics_sent['R¬≤'] - metrics_base['R¬≤']) / abs(metrics_base['R¬≤'])) * 100

print(f"\nüéØ MEJORA CON SENTIMIENTO:")
print(f"   RMSE: {mejora_rmse:+.2f}% ({'‚úÖ mejor' if mejora_rmse > 0 else '‚ùå peor'})")
print(f"   MAE:  {mejora_mae:+.2f}% ({'‚úÖ mejor' if mejora_mae > 0 else '‚ùå peor'})")
print(f"   R¬≤:   {mejora_r2:+.2f}% ({'‚úÖ mejor' if mejora_r2 > 0 else '‚ùå peor'})")

üìä COMPARACI√ìN DE MODELOS - CONJUNTO DE TEST
             model           MSE       RMSE        MAE        R¬≤      MAPE
         LSTM Base 310905.597806 557.589094 485.183397 -0.932172 15.142114
LSTM + Sentimiento 314052.328002 560.403719 489.214134 -0.951728 15.281935

üéØ MEJORA CON SENTIMIENTO:
   RMSE: -0.50% (‚ùå peor)
   MAE:  -0.83% (‚ùå peor)
   R¬≤:   -2.10% (‚ùå peor)


---

## 1Ô∏è‚É£2Ô∏è‚É£ Visualizaci√≥n de Predicciones

In [40]:
# Obtener fechas del test set (ajustadas por seq_length)
test_dates = df_test.index[SEQ_LENGTH:]

# Gr√°fico comparativo
fig = go.Figure()

# Valores reales
fig.add_trace(go.Scatter(
    x=test_dates,
    y=y_test_real.flatten(),
    mode='lines',
    name='Precio Real',
    line=dict(color='black', width=2)
))

# Predicciones modelo base
fig.add_trace(go.Scatter(
    x=test_dates,
    y=y_pred_base.flatten(),
    mode='lines',
    name='LSTM Base',
    line=dict(color='blue', width=1.5, dash='dash')
))

# Predicciones modelo con sentimiento
fig.add_trace(go.Scatter(
    x=test_dates,
    y=y_pred_sent.flatten(),
    mode='lines',
    name='LSTM + Sentimiento',
    line=dict(color='red', width=1.5, dash='dot')
))

fig.update_layout(
    title='üîÆ Predicciones vs Valores Reales - Conjunto de Test',
    xaxis_title='Fecha',
    yaxis_title='Precio de Cierre (USD)',
    template='plotly_white',
    height=600,
    hovermode='x unified'
)

fig.show()

In [41]:
# Gr√°fico de errores (residuos)
errors_base = y_test_real.flatten() - y_pred_base.flatten()
errors_sent = y_test_real.flatten() - y_pred_sent.flatten()

fig = make_subplots(
    rows=2, cols=1,
    subplot_titles=('Residuos - LSTM Base', 'Residuos - LSTM + Sentimiento'),
    vertical_spacing=0.1
)

# Modelo Base
fig.add_trace(
    go.Scatter(x=test_dates, y=errors_base, mode='lines', name='Residuos Base', line=dict(color='blue')),
    row=1, col=1
)
fig.add_hline(y=0, line_dash="dash", line_color="gray", row=1, col=1)

# Modelo con Sentimiento
fig.add_trace(
    go.Scatter(x=test_dates, y=errors_sent, mode='lines', name='Residuos Sent', line=dict(color='red')),
    row=2, col=1
)
fig.add_hline(y=0, line_dash="dash", line_color="gray", row=2, col=1)

fig.update_xaxes(title_text="Fecha", row=2, col=1)
fig.update_yaxes(title_text="Error (USD)", row=1, col=1)
fig.update_yaxes(title_text="Error (USD)", row=2, col=1)

fig.update_layout(
    title_text="An√°lisis de Residuos",
    template='plotly_white',
    height=600,
    showlegend=False
)

fig.show()

print(f"\nüìä Estad√≠sticas de errores:")
print(f"\nModelo Base:")
print(f"   Media: {errors_base.mean():.2f} USD")
print(f"   Std: {errors_base.std():.2f} USD")
print(f"   Min: {errors_base.min():.2f} USD")
print(f"   Max: {errors_base.max():.2f} USD")

print(f"\nModelo con Sentimiento:")
print(f"   Media: {errors_sent.mean():.2f} USD")
print(f"   Std: {errors_sent.std():.2f} USD")
print(f"   Min: {errors_sent.min():.2f} USD")
print(f"   Max: {errors_sent.max():.2f} USD")


üìä Estad√≠sticas de errores:

Modelo Base:
   Media: 485.18 USD
   Std: 274.78 USD
   Min: 92.53 USD
   Max: 1376.94 USD

Modelo con Sentimiento:
   Media: 489.21 USD
   Std: 273.35 USD
   Min: 88.92 USD
   Max: 1373.03 USD


---

## 1Ô∏è‚É£3Ô∏è‚É£ Exportaci√≥n de Resultados

In [42]:
# Exportar predicciones
df_predictions = pd.DataFrame({
    'fecha': test_dates,
    'precio_real': y_test_real.flatten(),
    'pred_base': y_pred_base.flatten(),
    'pred_sentiment': y_pred_sent.flatten(),
    'error_base': errors_base,
    'error_sentiment': errors_sent
})

archivo_pred = DATA_PROCESADO_DIR / 'predicciones_lstm.csv'
df_predictions.to_csv(archivo_pred, index=False)
print(f"‚úÖ Predicciones exportadas: {archivo_pred}")

# Exportar m√©tricas
resultados = {
    'configuracion': {
        'seq_length': SEQ_LENGTH,
        'train_size': len(df_train),
        'val_size': len(df_val),
        'test_size': len(df_test),
        'features_base': features_base,
        'features_sentiment': features_sentiment,
        'arquitectura': '256-128-64',
        'dropout': 0.2,
        'optimizer': 'Adam',
        'learning_rate': 0.001
    },
    'metricas_test': {
        'base': metrics_base,
        'sentiment': metrics_sent,
        'mejora_porcentual': {
            'rmse': float(mejora_rmse),
            'mae': float(mejora_mae),
            'r2': float(mejora_r2)
        }
    },
    'historia_entrenamiento': {
        'base': {
            'epochs': len(history_base.history['loss']),
            'final_train_loss': float(history_base.history['loss'][-1]),
            'final_val_loss': float(history_base.history['val_loss'][-1])
        },
        'sentiment': {
            'epochs': len(history_sentiment.history['loss']),
            'final_train_loss': float(history_sentiment.history['loss'][-1]),
            'final_val_loss': float(history_sentiment.history['val_loss'][-1])
        }
    }
}

archivo_resultados = DATA_PROCESADO_DIR / 'resultados_lstm.json'
with open(archivo_resultados, 'w', encoding='utf-8') as f:
    json.dump(resultados, f, indent=2, ensure_ascii=False)

print(f"‚úÖ Resultados exportados: {archivo_resultados}")

# Guardar modelos finales
model_base.save(MODELOS_DIR / 'lstm_base_final.keras')
model_sentiment.save(MODELOS_DIR / 'lstm_sentiment_final.keras')
print(f"‚úÖ Modelos guardados en: {MODELOS_DIR}")

‚úÖ Predicciones exportadas: /home/els4nchez/Videos/TECH/unificacion/datos_procesados/predicciones_lstm.csv
‚úÖ Resultados exportados: /home/els4nchez/Videos/TECH/unificacion/datos_procesados/resultados_lstm.json
‚úÖ Modelos guardados en: /home/els4nchez/Videos/TECH/unificacion/modelos
‚úÖ Modelos guardados en: /home/els4nchez/Videos/TECH/unificacion/modelos


---

## 1Ô∏è‚É£4Ô∏è‚É£ Resumen Ejecutivo

In [43]:
print("""
‚ïî‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïó
‚ïë         ü§ñ RESUMEN EJECUTIVO - MODELO LSTM INTEGRADO                   ‚ïë
‚ï†‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ï£
‚ïë                                                                        ‚ïë
‚ïë  ‚úÖ MODELOS ENTRENADOS Y EVALUADOS                                     ‚ïë
‚ïë                                                                        ‚ïë
‚ïë  üìä CONFIGURACI√ìN:                                                     ‚ïë
‚ïë                                                                        ‚ïë""")

print(f"‚ïë  ‚Ä¢ Arquitectura: 256-128-64 LSTM units                            ‚ïë")
print(f"‚ïë  ‚Ä¢ Dropout: 0.2 por capa                                          ‚ïë")
print(f"‚ïë  ‚Ä¢ Secuencias: {SEQ_LENGTH} d√≠as de lookback                                  ‚ïë")
print(f"‚ïë  ‚Ä¢ Train/Val/Test: 60%/20%/20%                                    ‚ïë")

print("""
‚ïë                                                                        ‚ïë
‚ïë  üìà RESULTADOS EN TEST SET:                                            ‚ïë
‚ïë                                                                        ‚ïë""")

print(f"‚ïë  üîπ MODELO BASE (sin sentimiento):                                 ‚ïë")
print(f"‚ïë     RMSE: ${metrics_base['RMSE']:7.2f}                                            ‚ïë")
print(f"‚ïë     MAE:  ${metrics_base['MAE']:7.2f}                                            ‚ïë")
print(f"‚ïë     R¬≤:   {metrics_base['R¬≤']:7.4f}                                            ‚ïë")
print(f"‚ïë     MAPE: {metrics_base['MAPE']:6.2f}%                                             ‚ïë")

print("""
‚ïë                                                                        ‚ïë""")

print(f"‚ïë  üî∏ MODELO CON SENTIMIENTO:                                        ‚ïë")
print(f"‚ïë     RMSE: ${metrics_sent['RMSE']:7.2f}                                            ‚ïë")
print(f"‚ïë     MAE:  ${metrics_sent['MAE']:7.2f}                                            ‚ïë")
print(f"‚ïë     R¬≤:   {metrics_sent['R¬≤']:7.4f}                                            ‚ïë")
print(f"‚ïë     MAPE: {metrics_sent['MAPE']:6.2f}%                                             ‚ïë")

print("""
‚ïë                                                                        ‚ïë
‚ïë  üéØ MEJORA CON SENTIMIENTO:                                            ‚ïë
‚ïë                                                                        ‚ïë""")

print(f"‚ïë     RMSE: {mejora_rmse:+6.2f}% ({'‚úÖ MEJOR' if mejora_rmse > 0 else '‚ùå PEOR':10s})                            ‚ïë")
print(f"‚ïë     MAE:  {mejora_mae:+6.2f}% ({'‚úÖ MEJOR' if mejora_mae > 0 else '‚ùå PEOR':10s})                            ‚ïë")
print(f"‚ïë     R¬≤:   {mejora_r2:+6.2f}% ({'‚úÖ MEJOR' if mejora_r2 > 0 else '‚ùå PEOR':10s})                            ‚ïë")

print("""
‚ïë                                                                        ‚ïë
‚ïë  üìÇ ARCHIVOS GENERADOS:                                                ‚ïë
‚ïë                                                                        ‚ïë
‚ïë  ‚Ä¢ predicciones_lstm.csv                                              ‚ïë
‚ïë  ‚Ä¢ resultados_lstm.json                                               ‚ïë
‚ïë  ‚Ä¢ lstm_base_final.keras                                              ‚ïë
‚ïë  ‚Ä¢ lstm_sentiment_final.keras                                         ‚ïë
‚ïë                                                                        ‚ïë
‚ïë  ‚û°Ô∏è  SIGUIENTE PASO:                                                   ‚ïë
‚ïë     Notebook 08 - S√≠ntesis y Resultados para Paper Cient√≠fico        ‚ïë
‚ïë                                                                        ‚ïë
‚ïö‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïù
""")

# Conclusi√≥n final
if mejora_rmse > 0 and mejora_mae > 0:
    print("\n‚úÖ CONCLUSI√ìN: Los features de sentimiento MEJORAN las predicciones del modelo LSTM.")
    print("   El an√°lisis de sentimientos de noticias tiene valor predictivo para el precio del oro.")
elif mejora_rmse > 2 or mejora_mae > 2:
    print("\n‚ö†Ô∏è  CONCLUSI√ìN: Los features de sentimiento muestran mejora moderada.")
    print("   El an√°lisis de sentimientos aporta informaci√≥n, pero no es determinante.")
else:
    print("\n‚ùå CONCLUSI√ìN: Los features de sentimiento NO mejoran significativamente las predicciones.")
    print("   Los indicadores t√©cnicos tradicionales son suficientes para este modelo.")


‚ïî‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïó
‚ïë         ü§ñ RESUMEN EJECUTIVO - MODELO LSTM INTEGRADO                   ‚ïë
‚ï†‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ï£
‚ïë                                                                        ‚ïë
‚ïë  ‚úÖ MODELOS ENTRENADOS Y EVALUADOS                                     ‚ïë
‚ïë                                                                        ‚ïë
‚ïë  üìä CONFIGURACI√ìN:                                                     ‚ïë
‚ïë                                                                        ‚ïë
‚ïë  ‚Ä¢ Arquitectura: 256-128-64 LSTM units                            ‚