In [None]:
import pandas as pd
import cv2
import numpy as np
from datetime import datetime, time
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, LSTM, TimeDistributed, Concatenate, Dense, Dropout, Flatten
from tensorflow.keras.applications import VGG16
from tensorflow.keras.optimizers.legacy import RMSprop
from tensorflow.keras.regularizers import l2
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error
import matplotlib.pyplot as plt
import os

# Función para buscar la imagen más cercana en un rango de segundos
def get_closest_image_path(timestamp, image_folder):
    base_image_name = f'OR_ABI-L2-ACMF-M6_G16_s{timestamp.strftime("%Y%m%d%H%M")}'
    for second in range(60):
        image_name = f"{base_image_name}{second:02d}.png"
        image_path = os.path.join(image_folder, image_name)
        if os.path.exists(image_path):
            return image_path
    return None

# Cargar imágenes y valores GHI normalizados, filtrando las parejas completas
def load_images_and_ghi(df_filtrado, image_folder, scaler=None):
    images = []
    ghi_values = []
    valid_timestamps = []
    temp_ambient = []

    for _, row in df_filtrado.iterrows():
        timestamp = row['Timestamp']
        ghi_value = row['GHI']
        temp_amb_value = row['TempAmb']
        image_path = get_closest_image_path(timestamp, image_folder)
        img = cv2.imread(image_path) if image_path else None
        
        if img is not None:
            img_resized = cv2.resize(img, (64, 64)) / 255.0
            images.append(img_resized)
            ghi_values.append(ghi_value)
            valid_timestamps.append(timestamp)
            temp_ambient.append(temp_amb_value)
        else:
            print(f"Imagen no encontrada para timestamp: {timestamp}")
    
    ghi_values = np.array(ghi_values).reshape(-1, 1)
    if scaler is None:
        scaler = MinMaxScaler()
        ghi_values_scaled = scaler.fit_transform(ghi_values)
    else:
        ghi_values_scaled = scaler.transform(ghi_values)
    
    return np.array(images), ghi_values_scaled, scaler, valid_timestamps, temp_ambient

# Crear secuencias de entrada y salida
def create_sequences(X_images, y_ghi, seq_len, num_pred):
    X_seq_images = []
    X_seq_ghi = []
    y_seq_ghi = []

    for i in range(len(X_images) - seq_len - num_pred):
        X_seq_images.append(X_images[i:i+seq_len])
        X_seq_ghi.append(y_ghi[i:i+seq_len])
        y_seq_ghi.append(y_ghi[i+seq_len:i+seq_len+num_pred])

    return np.array(X_seq_images), np.array(X_seq_ghi), np.array(y_seq_ghi)

# Función para graficar las predicciones junto con los valores reales
def plot_real_vs_predicted(timestamps, y_real, y_pred, title="Predicciones GHI vs Reales"):
    min_length = min(len(timestamps), len(y_real.flatten()), len(y_pred.flatten()))
    timestamps = timestamps[:min_length]
    y_real = y_real.flatten()[:min_length]
    y_pred = y_pred.flatten()[:min_length]
    
    plt.figure(figsize=(10, 6))
    plt.plot(timestamps, y_real, label="Valores Reales", color='blue', marker='o')
    plt.plot(timestamps, y_pred, label="Valores Predichos", color='red', linestyle='--', marker='x')
    plt.title(title)
    plt.xlabel("Hora")
    plt.ylabel("GHI")
    plt.xticks(rotation=45)
    plt.grid(True)
    plt.legend()
    plt.tight_layout()
    plt.show()

def plot_training_history(history):
    """
    Función para graficar la pérdida durante el entrenamiento y validación del modelo.
    """
    plt.figure(figsize=(10, 6))
    plt.plot(history.history['loss'], label='Pérdida de Entrenamiento')
    plt.plot(history.history['val_loss'], label='Pérdida de Validación')
    plt.title("Pérdida del Modelo durante el Entrenamiento")
    plt.xlabel("Épocas")
    plt.ylabel("Pérdida (MSE)")
    plt.legend()
    plt.grid(True)
    plt.tight_layout()
    plt.show()

# Función para ajustar seq_len y num_pred en función del rango de tiempo
def calculate_seq_len_num_pred(start_date, end_date, data_frequency_minutes=10, min_seq_len=12, min_num_pred=5):
    delta = end_date - start_date
    delta_minutes = delta.total_seconds() / 60
    data_points = int(delta_minutes // data_frequency_minutes)
    seq_len = max(min_seq_len, data_points // 3)
    num_pred = max(min_num_pred, data_points // 6)
    return seq_len, num_pred

# Entrenamiento del modelo modificado para secuencias usando VGG16 y LSTM
def train_model_vgg16(X_images, y_ghi, seq_len, num_pred):
    X_images_seq, X_ghi_seq, y_ghi_pred = create_sequences(X_images, y_ghi, seq_len, num_pred)

    from sklearn.model_selection import train_test_split
    X_images_seq_train, X_images_seq_val, X_ghi_seq_train, X_ghi_seq_val, y_ghi_pred_train, y_ghi_pred_val = train_test_split(
        X_images_seq, X_ghi_seq, y_ghi_pred, test_size=0.2, random_state=42)

    vgg16_base = VGG16(weights='imagenet', include_top=False, input_shape=(64, 64, 3))
    vgg16_base.trainable = False

    image_input = Input(shape=(seq_len, 64, 64, 3))
    vgg16_features = TimeDistributed(vgg16_base)(image_input)
    vgg16_features = TimeDistributed(Flatten())(vgg16_features)

    numeric_input = Input(shape=(seq_len, 1))
    concat = Concatenate()([vgg16_features, numeric_input])

    lstm_out = LSTM(128, return_sequences=False, kernel_regularizer=l2(0.01))(concat)
    lstm_out = Dropout(0.3)(lstm_out)
    output = Dense(num_pred, kernel_regularizer=l2(0.01))(lstm_out)

    model = Model([image_input, numeric_input], output)
    model.compile(optimizer=RMSprop(learning_rate=1e-4), loss='mse')

    early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
    
    history = model.fit([X_images_seq_train, X_ghi_seq_train], y_ghi_pred_train,
                        validation_data=([X_images_seq_val, X_ghi_seq_val], y_ghi_pred_val),
                        epochs=30, batch_size=16, callbacks=[early_stopping])

    return model, history

# Código principal
df = pd.read_pickle('df_GHI.plk')
df['Timestamp'] = pd.to_datetime(df['Timestamp'])

# Fechas de entrenamiento y predicción
fecha_inicio_entrenamiento = '2023-01-01 00:00:00'
fecha_fin_entrenamiento = '2023-03-31 23:59:59'
fecha_inicio_prediccion = '2023-04-01 00:00:00'
fecha_fin_prediccion = '2023-04-05 23:59:59'

# Filtrar datos
df_entrenamiento = df[(df['Timestamp'] >= fecha_inicio_entrenamiento) & (df['Timestamp'] <= fecha_fin_entrenamiento)]
df_prediccion = df[(df['Timestamp'] >= fecha_inicio_prediccion) & (df['Timestamp'] <= fecha_fin_prediccion)]

# Cargar datos
image_folder = 'Images/ACMF'
X_images_train, y_ghi_train, scaler_train, valid_timestamps_train, temp_ambient_train = load_images_and_ghi(df_entrenamiento, image_folder)
X_images_test, y_ghi_test, scaler_test, valid_timestamps_test, temp_ambient_test = load_images_and_ghi(df_prediccion, image_folder, scaler_train)

seq_len, num_pred = calculate_seq_len_num_pred(pd.to_datetime(fecha_inicio_entrenamiento), pd.to_datetime(fecha_fin_entrenamiento))

# Entrenar modelo
model, history = train_model_vgg16(X_images_train, y_ghi_train, seq_len, num_pred)
plot_training_history(history)

# Realizar predicciones
if len(X_images_test) > seq_len:
    X_images_seq_test, X_ghi_seq_test, y_ghi_pred_test = create_sequences(X_images_test, y_ghi_test, seq_len, num_pred)
    y_ghi_pred_test = model.predict([X_images_seq_test, X_ghi_seq_test])

    # Desnormalizar
    y_ghi_pred_test_inversed = scaler_test.inverse_transform(y_ghi_pred_test.reshape(-1, 1)).reshape(-1, num_pred)
    y_ghi_test_inversed = scaler_test.inverse_transform(y_ghi_test.reshape(-1, 1))

    # Calcular métricas
    mse = mean_squared_error(y_ghi_test_inversed.flatten(), y_ghi_pred_test_inversed.flatten())
    mae = mean_absolute_error(y_ghi_test_inversed.flatten(), y_ghi_pred_test_inversed.flatten())
    print(f"MSE: {mse}, MAE: {mae}")

    # Graficar
    plot_real_vs_predicted(valid_timestamps_test, y_ghi_test_inversed, y_ghi_pred_test_inversed, title="Predicciones GHI vs Valores Reales")

    # Crear DataFrame para los resultados predichos con los Timestamps y TempAmb de prueba
    pred_results_df = pd.DataFrame({
        'timestamp': valid_timestamps_test[-len(y_ghi_pred_test_inversed):],
        'ghi': y_ghi_pred_test_inversed.flatten(),
        'temp_ambient': temp_ambient_test[-len(y_ghi_pred_test_inversed):]
    })

    # Guardar el DataFrame de resultados en un archivo CSV con la estructura requerida
    pred_results_df.rename(columns={
        'timestamp': 'Timestamp',
        'ghi': 'GHI',
        'temp_ambient': 'TempAmb'
    }, inplace=True)

    pred_results_df.to_csv('predicciones_GHI_tempAmb.csv', index=False)
    print("Predicciones guardadas en 'predicciones_GHI_tempAmb.csv'")
else:
    print("No hay suficientes secuencias para generar predicciones.")