In [46]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.svm import SVR
from sklearn.model_selection import train_test_split, GridSearchCV, cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
from sklearn.pipeline import Pipeline
import joblib
import warnings
import os

warnings.filterwarnings('ignore')


# Configurar estilo de gráficos
plt.style.use('seaborn-v0_8')
sns.set_palette("husl")

In [47]:
# 1. CARGAR Y PREPARAR LOS DATOS

train_df = pd.read_csv('../data/processed/energy_data_processed.csv')
test_df = pd.read_csv('../data/processed/energy_data_processed_test.csv')

In [48]:
# 2. SEPARAR CARACTERÍSTICAS Y VARIABLE OBJETIVO
# La variable objetivo es 'Energy Consumption'
# Las características (X) son todas las demás columnas excepto 'Energy Consumption'

# Para el dataset de entrenamiento
y_train = train_df['Energy Consumption']
X_train = train_df.drop('Energy Consumption', axis=1)

# Para el dataset de prueba
y_test = test_df['Energy Consumption']
X_test = test_df.drop('Energy Consumption', axis=1)

print(f"Forma del dataset de entrenamiento: {X_train.shape}")
print(f"Forma del dataset de prueba: {X_test.shape}")
print(f"Variable objetivo: {y_train.name}")


Forma del dataset de entrenamiento: (1000, 9)
Forma del dataset de prueba: (1000, 9)
Variable objetivo: Energy Consumption


In [49]:
# 4. ESCALADO DE CARACTERÍSTICAS
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [50]:
# 5. MODELO SVR BÁSICO
print("\n=== SVR BÁSICO ===")
svr_basic = SVR(kernel='rbf', C=1.0, gamma='scale')
svr_basic.fit(X_train_scaled, y_train)

# Predicciones básicas
y_pred_basic = svr_basic.predict(X_test_scaled)

# Métricas básicas
mse_basic = mean_squared_error(y_test, y_pred_basic)
rmse_basic = np.sqrt(mse_basic)
r2_basic = r2_score(y_test, y_pred_basic)
mae_basic = mean_absolute_error(y_test, y_pred_basic)

print(f"MSE: {mse_basic:.4f}")
print(f"RMSE: {rmse_basic:.4f}")
print(f"R²: {r2_basic:.4f}")
print(f"MAE: {mae_basic:.4f}")


=== SVR BÁSICO ===
MSE: 803856.3422
RMSE: 896.5804
R²: 0.0762
MAE: 736.4387


In [51]:
# pip install scikit-optimize
from skopt import BayesSearchCV
from skopt.space import Real, Categorical

bayes_search = BayesSearchCV(
    SVR(),
    {
        'C': Real(0.1, 100, prior='log-uniform'),
        'gamma': Categorical(['scale', 'auto', 0.001, 0.01, 0.1]),
        'kernel': Categorical(['rbf', 'poly']),
        'epsilon': Real(0.01, 0.2)
    },
    n_iter=30,  # Solo 30 iteraciones inteligentes
    cv=3,
    scoring='neg_mean_squared_error',
    n_jobs=-1,
    verbose=1,
    random_state=42
)

bayes_search.fit(X_train_scaled, y_train)
print(f"Mejores parámetros: {bayes_search.best_params_}")
print(f"Mejor score CV: {-bayes_search.best_score_:.4f}")

Fitting 3 folds for each of 1 candidates, totalling 3 fits
Fitting 3 folds for each of 1 candidates, totalling 3 fits
Fitting 3 folds for each of 1 candidates, totalling 3 fits
Fitting 3 folds for each of 1 candidates, totalling 3 fits
Fitting 3 folds for each of 1 candidates, totalling 3 fits
Fitting 3 folds for each of 1 candidates, totalling 3 fits
Fitting 3 folds for each of 1 candidates, totalling 3 fits
Fitting 3 folds for each of 1 candidates, totalling 3 fits
Fitting 3 folds for each of 1 candidates, totalling 3 fits
Fitting 3 folds for each of 1 candidates, totalling 3 fits
Fitting 3 folds for each of 1 candidates, totalling 3 fits
Fitting 3 folds for each of 1 candidates, totalling 3 fits
Fitting 3 folds for each of 1 candidates, totalling 3 fits
Fitting 3 folds for each of 1 candidates, totalling 3 fits
Fitting 3 folds for each of 1 candidates, totalling 3 fits
Fitting 3 folds for each of 1 candidates, totalling 3 fits
Fitting 3 folds for each of 1 candidates, totalling 3 fi

In [52]:
# 7. MODELO OPTIMIZADO
svr_optimized = bayes_search.best_estimator_

# Predicciones para entrenamiento y prueba
y_pred_train_optimized = svr_optimized.predict(X_train_scaled)
y_pred_test_optimized = svr_optimized.predict(X_test_scaled)

# Métricas del modelo optimizado
mse_train_opt = mean_squared_error(y_train, y_pred_train_optimized)
rmse_train_opt = np.sqrt(mse_train_opt)
r2_train_opt = r2_score(y_train, y_pred_train_optimized)
mae_train_opt = mean_absolute_error(y_train, y_pred_train_optimized)

# Métricas para prueba
mse_test_opt = mean_squared_error(y_test, y_pred_test_optimized)
rmse_test_opt = np.sqrt(mse_test_opt)
r2_test_opt = r2_score(y_test, y_pred_test_optimized)
mae_test_opt = mean_absolute_error(y_test, y_pred_test_optimized)

print(f"\n=== MODELO OPTIMIZADO (ENTRENAMIENTO) ===")
print(f"MSE: {mse_train_opt:.4f}")
print(f"RMSE: {rmse_train_opt:.4f}")
print(f"R²: {r2_train_opt:.4f}")
print(f"MAE: {mae_train_opt:.4f}")

print(f"\n=== MODELO OPTIMIZADO (PRUEBA) ===")
print(f"MSE: {mse_test_opt:.4f}")
print(f"RMSE: {rmse_test_opt:.4f}")
print(f"R²: {r2_test_opt:.4f}")
print(f"MAE: {mae_test_opt:.4f}")


=== MODELO OPTIMIZADO (ENTRENAMIENTO) ===
MSE: 475.5602
RMSE: 21.8073
R²: 0.9995
MAE: 15.3973

=== MODELO OPTIMIZADO (PRUEBA) ===
MSE: 475.5602
RMSE: 21.8073
R²: 0.9995
MAE: 15.3973


In [53]:
# Validación cruzada
cv_scores = cross_val_score(svr_optimized, X_train_scaled, y_train, cv=5, scoring='neg_mean_squared_error')
print(f"\nCV RMSE: {np.sqrt(-cv_scores.mean()):.4f} (+/- {np.sqrt(cv_scores.std() * 2):.4f})")


CV RMSE: 35.8807 (+/- 17.4165)


In [54]:
# 7. GUARDAR RESULTADOS EN CSV
# Resultados de entrenamiento
train_results = pd.DataFrame({
    'Valores Reales': y_train,
    'Predicciones': y_pred_train_optimized,
    'Diferencia': y_train - y_pred_train_optimized
})
train_results.to_csv('../data/results/svr_predictions_train.csv', index=False)
print("\nPredicciones de entrenamiento guardadas en '../data/results/svr_predictions_train.csv'")

# Resultados de prueba
test_results = pd.DataFrame({
    'Valores Reales': y_test,
    'Predicciones': y_pred_test_optimized,
    'Diferencia': y_test - y_pred_test_optimized
})
test_results.to_csv('../data/results/svr_predictions_test.csv', index=False)
print("Predicciones de prueba guardadas en '../data/results/svr_predictions_test.csv'")


Predicciones de entrenamiento guardadas en '../data/results/svr_predictions_train.csv'
Predicciones de prueba guardadas en '../data/results/svr_predictions_test.csv'


In [55]:
# 8. COMPARACIÓN DE MODELOS CON DIFERENTES KERNELS
print("\n=== COMPARACIÓN DE KERNELS ===")
kernels = ['linear', 'rbf', 'poly', 'sigmoid']
kernel_results = {}

for kernel in kernels:
    svr_kernel = SVR(kernel=kernel, C=1.0, gamma='scale')
    svr_kernel.fit(X_train_scaled, y_train)
    y_pred_kernel = svr_kernel.predict(X_test_scaled)
    
    mse_kernel = mean_squared_error(y_test, y_pred_kernel)
    r2_kernel = r2_score(y_test, y_pred_kernel)
    
    kernel_results[kernel] = {'MSE': mse_kernel, 'R²': r2_kernel}
    print(f"{kernel.upper()} - MSE: {mse_kernel:.4f}, R²: {r2_kernel:.4f}")


=== COMPARACIÓN DE KERNELS ===
LINEAR - MSE: 927.4379, R²: 0.9989
RBF - MSE: 803856.3422, R²: 0.0762
POLY - MSE: 787666.7367, R²: 0.0948
SIGMOID - MSE: 709897.8898, R²: 0.1842


In [56]:
# 9. VISUALIZACIONES
output_dir = "../data/figures/"

# Visualizaciones para el conjunto de prueba
fig_test, axes_test = plt.subplots(2, 2, figsize=(15, 12))

# Gráfico 1: Valores reales vs predicciones (modelo optimizado - prueba)
axes_test[0,0].scatter(y_test, y_pred_test_optimized, alpha=0.6)
axes_test[0,0].plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'r--', lw=2)
axes_test[0,0].set_xlabel('Valores Reales')
axes_test[0,0].set_ylabel('Predicciones')
axes_test[0,0].set_title(f'SVR Optimizado (Prueba) - R² = {r2_test_opt:.4f}')
axes_test[0,0].grid(True, alpha=0.3)

# Gráfico 2: Residuos (prueba)
residuos_test = y_test - y_pred_test_optimized
axes_test[0,1].scatter(y_pred_test_optimized, residuos_test, alpha=0.6)
axes_test[0,1].axhline(y=0, color='r', linestyle='--')
axes_test[0,1].set_xlabel('Predicciones')
axes_test[0,1].set_ylabel('Residuos')
axes_test[0,1].set_title('Gráfico de Residuos (Prueba)')
axes_test[0,1].grid(True, alpha=0.3)

# Gráfico 3: Comparación de kernels (MSE)
kernel_names = list(kernel_results.keys())
mse_values = [kernel_results[k]['MSE'] for k in kernel_names]
axes_test[1,0].bar(np.arange(len(kernel_names)), mse_values, alpha=0.7)
axes_test[1,0].set_xlabel('Kernel')
axes_test[1,0].set_ylabel('MSE')
axes_test[1,0].set_title('Comparación MSE por Kernel')
axes_test[1,0].set_xticks(np.arange(len(kernel_names)))
axes_test[1,0].set_xticklabels(kernel_names)

# Gráfico 4: Comparación de kernels (R²)
r2_values = [kernel_results[k]['R²'] for k in kernel_names]
axes_test[1,1].bar(np.arange(len(kernel_names)), r2_values, alpha=0.7, color='green')
axes_test[1,1].set_xlabel('Kernel')
axes_test[1,1].set_ylabel('R²')
axes_test[1,1].set_title('Comparación R² por Kernel')
axes_test[1,1].set_xticks(np.arange(len(kernel_names)))
axes_test[1,1].set_xticklabels(kernel_names)

plt.tight_layout()
fig_test.savefig(f"{output_dir}svr_comparative_analysis_test.png", dpi=300, bbox_inches='tight')
plt.close(fig_test)
print(f"📊 Gráfico de prueba guardado en: {output_dir}svr_comparative_analysis_test.png")

# Visualizaciones para el conjunto de entrenamiento
fig_train, axes_train = plt.subplots(1, 2, figsize=(15, 6))

# Gráfico 1: Valores reales vs predicciones (modelo optimizado - entrenamiento)
axes_train[0].scatter(y_train, y_pred_train_optimized, alpha=0.6)
axes_train[0].plot([y_train.min(), y_train.max()], [y_train.min(), y_train.max()], 'r--', lw=2)
axes_train[0].set_xlabel('Valores Reales')
axes_train[0].set_ylabel('Predicciones')
axes_train[0].set_title(f'SVR Optimizado (Entrenamiento) - R² = {r2_train_opt:.4f}')
axes_train[0].grid(True, alpha=0.3)

# Gráfico 2: Residuos (entrenamiento)
residuos_train = y_train - y_pred_train_optimized
axes_train[1].scatter(y_pred_train_optimized, residuos_train, alpha=0.6)
axes_train[1].axhline(y=0, color='r', linestyle='--')
axes_train[1].set_xlabel('Predicciones')
axes_train[1].set_ylabel('Residuos')
axes_train[1].set_title('Gráfico de Residuos (Entrenamiento)')
axes_train[1].grid(True, alpha=0.3)

plt.tight_layout()
fig_train.savefig(f"{output_dir}svr_comparative_analysis_train.png", dpi=300, bbox_inches='tight')
plt.close(fig_train)
print(f"📊 Gráfico de entrenamiento guardado en: {output_dir}svr_comparative_analysis_train.png")

📊 Gráfico de prueba guardado en: ../data/figures/svr_comparative_analysis_test.png
📊 Gráfico de entrenamiento guardado en: ../data/figures/svr_comparative_analysis_train.png


In [57]:
# 10. TABLA RESUMEN DE RESULTADOS
print("\n=== RESUMEN DE RESULTADOS ===")
resumen = pd.DataFrame({
    'Modelo': ['SVR Básico (Prueba)', 'SVR Optimizado (Entrenamiento)', 'SVR Optimizado (Prueba)'],
    'MSE': [mse_basic, mse_train_opt, mse_test_opt],
    'RMSE': [rmse_basic, rmse_train_opt, rmse_test_opt],
    'R²': [r2_basic, r2_train_opt, r2_test_opt],
    'MAE': [mae_basic, mae_train_opt, mae_test_opt]
})
print(resumen.round(4))


=== RESUMEN DE RESULTADOS ===
                           Modelo          MSE      RMSE      R²       MAE
0             SVR Básico (Prueba)  803856.3422  896.5804  0.0762  736.4387
1  SVR Optimizado (Entrenamiento)     475.5602   21.8073  0.9995   15.3973
2         SVR Optimizado (Prueba)     475.5602   21.8073  0.9995   15.3973


In [58]:
# 11. GUARDAR EL MODELO 
joblib.dump(svr_optimized, '../data/results/svr_model.pkl')
joblib.dump(scaler, '../data/results/svr_scaler.pkl')
print("\nModelo y scaler guardados como 'svr_model.pkl' y 'svr_scaler.pkl'")



Modelo y scaler guardados como 'svr_model.pkl' y 'svr_scaler.pkl'


In [59]:
# # Guardar en CSV
# df_test_results.to_csv('../data/results/svr_model.csv', index=False)

# print("✅ Archivo guardado como 'knn_predictions_test.csv' en la carpeta /data/results/")

In [60]:
# 12. FUNCIÓN PARA NUEVAS PREDICCIONES
def predecir_nuevos_datos(nuevos_datos, modelo=svr_optimized, escalador=scaler):
    """
    Función para hacer predicciones en nuevos datos

    Parameters:
    nuevos_datos: array-like o DataFrame, datos a predecir
    modelo: modelo SVR entrenado
    escalador: StandardScaler ajustado

    Returns:
    predicciones: array con las predicciones
    """
    # Convertir a NumPy si es un DataFrame para evitar advertencias
    if isinstance(nuevos_datos, pd.DataFrame):
        nuevos_datos = nuevos_datos.values
    datos_escalados = escalador.transform(nuevos_datos)
    predicciones = modelo.predict(datos_escalados)
    return predicciones

# Ejemplo de uso de la función
# nuevos_X = np.array([[1, 2, 3, 4]])  # Ejemplo
# prediccion = predecir_nuevos_datos(nuevos_X)
# print(f"Predicción para nuevos datos: {prediccion}")

print("\n¡SVR implementado exitosamente!")



¡SVR implementado exitosamente!
