In [18]:
import warnings
warnings.filterwarnings("ignore")
import pandas as pd
import joblib
import os
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Input, Dense
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.model_selection import KFold
import numpy as np
import keras
from keras.models import Sequential
from keras.layers import Dense, Input
from keras.callbacks import ModelCheckpoint, EarlyStopping
from keras.regularizers import l2
from tensorflow.keras.optimizers import Adam


# Cargar tus datos
data = pd.read_csv('data/clean/cleaned_data_final.csv')

In [2]:
# Seleccionar solo las columnas más relevantes
columns_to_keep = [
    'solar_electricity', 'wind_electricity', 'biofuel_electricity', 'hydro_electricity',
    'population', 'gdp', 'electricity_demand', 'energy_per_capita', 'energy_per_gdp',
    'solar_consumption', 'wind_consumption', 'biofuel_consumption', 'hydro_consumption',
    'country', 'iso_code', 'year'
]

# Filtrar el DataFrame para mantener solo las columnas seleccionadas
df_filtered = data[columns_to_keep]

# Manejar valores faltantes (opcional, según tus datos)
df_filtered = df_filtered.dropna()  # O puedes usar imputación

In [3]:
# Instanciar el codificador
label_encoder = LabelEncoder()

# Aplicar Label Encoding a las columnas 'country' e 'iso_code' en df_filtered
df_filtered['country'] = label_encoder.fit_transform(df_filtered['country'])
df_filtered['iso_code'] = label_encoder.fit_transform(df_filtered['iso_code'])

In [4]:
X = df_filtered.drop(columns=['solar_electricity', 'wind_electricity', 
                               'biofuel_electricity', 'hydro_electricity'])
y = df_filtered[['solar_consumption', 'wind_consumption', 
                 'biofuel_consumption', 'hydro_consumption']]

In [5]:
# Dividir el conjunto de datos en entrenamiento y prueba
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [6]:
# Normaliza los datos
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)  # Ajustar y transformar
X_test_scaled = scaler.transform(X_test)  # Solo transforma, no ajusta de nuevo

In [None]:
# Definir el modelo
dl_model = Sequential()

# Capa de entrada
dl_model.add(Input(shape=(X.shape[1],)))  # Asegúrate de que la forma sea correcta

# Agregar capas ocultas
dl_model.add(Dense(128, activation='relu', input_shape=(16,), kernel_regularizer=l2(0.01)))  # Primera capa oculta y regularización L2
dl_model.add(Dense(64, activation='relu'))   # Segunda capa oculta
dl_model.add(Dense(32, activation='relu'))    # Tercera capa oculta
dl_model.add(Dense(16, activation='relu'))    # Cuarta capa oculta

# Capa de salida para las 4 variables de consumo
dl_model.add(Dense(4, activation='linear'))  # 4 neuronas para regresión múltiple

# Compilar el modelo
dl_model.compile(optimizer=Adam(learning_rate=0.001), loss='mean_squared_error')

In [8]:
# Crear la carpeta final_models si no existe
os.makedirs('final_models', exist_ok=True)

In [9]:
# Callback para guardar el mejor modelo
checkpoint_path = 'final_models/deep_learning_best_model.keras'  # Ruta donde se guardará el modelo
model_checkpoint = ModelCheckpoint(
    filepath=checkpoint_path,
    save_best_only=True,
    monitor='val_loss',  # Monitorea la pérdida de validación
    mode='min',          # Guarda el modelo si la pérdida disminuye
    verbose=1
)

In [None]:
# Early Stopping para detener el modelo si no mejora
early_stopping = EarlyStopping(monitor='val_loss', patience=10, verbose=1)

# Entrenar el modelo de Deep Learning
dl_model.fit(X_train_scaled, y_train, epochs=100, batch_size=32, 
             validation_data=(X_test_scaled, y_test), 
             callbacks=[model_checkpoint, early_stopping])

In [None]:
# Predicciones con el modelo de Deep Learning
dl_predictions = dl_model.predict(X_test_scaled)

# Evaluar el modelo de Deep Learning
dl_mae = mean_absolute_error(y_test, dl_predictions)
dl_rmse = np.sqrt(mean_squared_error(y_test, dl_predictions))  # Cambiado para evitar advertencia
dl_r2 = r2_score(y_test, dl_predictions)

# Imprimir resultados
print(f"DL Model - MAE: {dl_mae}, RMSE: {dl_rmse}, R²: {dl_r2}")



In [None]:
# Implementar la validación cruzada

# Configuración de KFold
kfold = KFold(n_splits=5, shuffle=True, random_state=42)

# Almacenar métricas para cada pliegue
mae_list = []
rmse_list = []
r2_list = []

# Realizar la validación cruzada
for train_index, val_index in kfold.split(X_train_scaled):
    X_train_cv, X_val_cv = X_train_scaled[train_index], X_train_scaled[val_index]
    y_train_cv, y_val_cv = y_train.values[train_index], y_train.values[val_index]

    # Definir el modelo
    cv_model = Sequential()
    cv_model.add(Input(shape=(X.shape[1],)))
    cv_model.add(Dense(128, activation='relu'))
    cv_model.add(Dense(64, activation='relu'))
    cv_model.add(Dense(32, activation='relu'))
    cv_model.add(Dense(16, activation='relu'))
    cv_model.add(Dense(4, activation='linear'))

    # Compilar el modelo
    cv_model.compile(optimizer='adam', loss='mean_squared_error')

    # Entrenar el modelo
    cv_model.fit(X_train_cv, y_train_cv, epochs=50, batch_size=32, 
                 validation_data=(X_val_cv, y_val_cv), 
                 verbose=0)  # Suprime salida de entrenamiento para limpieza

    # Predicciones
    y_val_pred = cv_model.predict(X_val_cv)

    # Calcular métricas
    mae = mean_absolute_error(y_val_cv, y_val_pred)
    rmse = np.sqrt(mean_squared_error(y_val_cv, y_val_pred))
    r2 = r2_score(y_val_cv, y_val_pred)

    # Almacenar métricas
    mae_list.append(mae)
    rmse_list.append(rmse)
    r2_list.append(r2)

# Imprimir las métricas promedio de validación cruzada
print(f"Cross-Validation Results - MAE: {np.mean(mae_list)}, RMSE: {np.mean(rmse_list)}, R²: {np.mean(r2_list)}")

# Compilar el modelo
model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mae'])

# Entrenar el modelo
model.fit(X_train_scaled, y_train, epochs=100, batch_size=32, validation_split=0.2)

# Hacer predicciones
y_pred = model.predict(X_test_scaled)