In [None]:
from google.colab import drive
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from sklearn.preprocessing import MinMaxScaler

# Montar Google Drive
drive.mount('/gdrive')

# Leer set de datos
ruta = '/gdrive/MyDrive/Colab_IA_Investigacion/'
df = pd.read_csv(ruta+'weather_dataset_preprocesado.csv')
df

Drive already mounted at /gdrive; to attempt to forcibly remount, call drive.mount("/gdrive", force_remount=True).


Unnamed: 0,DATE_TIME,ObjectId,SYSTEM_ID,PANELS_REPORTING_MICRO_INVERTER,WATTS,WATT_HOUR,KILOWATT_HOUR,COMMUNITY_NAME,FORWARD_SORTATION_AREA
0,2016-11-02 07:15:00+00:00,452981,1068729,11,99,,,Eastern Passage,B3G
1,2016-11-02 07:20:00+00:00,452982,1068729,11,99,,,Eastern Passage,B3G
2,2016-11-02 07:25:00+00:00,452983,1068729,11,99,,,Eastern Passage,B3G
3,2016-11-02 07:30:00+00:00,452984,1068729,11,99,,,Eastern Passage,B3G
4,2016-11-02 07:35:00+00:00,452986,1068729,11,99,,,Eastern Passage,B3G
...,...,...,...,...,...,...,...,...,...
468436,2021-04-16 19:35:00+00:00,13501540,1575841,18,5,0.0,0.0,Halifax,B3M
468437,2021-04-16 19:40:00+00:00,13501540,1575841,18,5,0.0,0.0,Halifax,B3M
468438,2021-04-16 19:45:00+00:00,8504819,1450326,13,0,0.0,0.0,Halifax,B3K
468439,2021-04-16 19:50:00+00:00,8504819,1450326,13,0,0.0,0.0,Halifax,B3K


Sets de entrenamiento, validación y prueba

In [None]:
def train_val_test_split(serie, tr_size=0.8, vl_size=0.1, ts_size=0.1 ):
    # Definir número de datos en cada subserie
    N = serie.shape[0]
    Ntrain = int(tr_size*N)  # Número de datos de entrenamiento
    Nval = int(vl_size*N)    # Número de datos de validación
    Ntst = N - Ntrain - Nval # Número de datos de prueba

    # Realizar partición
    train = serie[0:Ntrain]
    val = serie[Ntrain:Ntrain+Nval]
    test = serie[Ntrain+Nval:]

    return train, val, test

Obtencion de datos de entrenamiento, validacion y prueba


In [None]:
tr, vl, ts = train_val_test_split(df['WATTS'])

# Imprimir en pantalla el tamaño de cada subset
print(f'Tamaño set de entrenamiento: {tr.shape}')
print(f'Tamaño set de validación: {vl.shape}')
print(f'Tamaño set de prueba: {ts.shape}')

Tamaño set de entrenamiento: (374752,)
Tamaño set de validación: (46844,)
Tamaño set de prueba: (46845,)


Creacion de datos supervisado


In [None]:
def crear_dataset_supervisado(array, input_length, output_length):

    # Inicialización
    X, Y = [], []    # Listados que contendrán los datos de entrada y salida del modelo
    shape = array.shape
    if len(shape)==1: # Si tenemos sólo una serie (univariado)
        fils, cols = array.shape[0], 1
        array = array.reshape(fils,cols)
    else: # Multivariado
        fils, cols = array.shape

    # Generar los arreglos
    for i in range(fils-input_length-output_length):
        X.append(array[i:i+INPUT_LENGTH,0:cols])
        Y.append(array[i+input_length:i+input_length+output_length,-1].reshape(output_length,1))

    # Convertir listas a arreglos de NumPy
    X = np.array(X)
    Y = np.array(Y)

    return X, Y

Obtencion de datos supervisados

In [None]:
# Definición de los hiperparámetros INPUT_LENGTH y OUTPUT_LENGTH
INPUT_LENGTH = 5    # Registros de 5min (25min) consecutivas a la entrada
OUTPUT_LENGTH = 1    # El modelo va a predecir el min 30 a futuro

# Datasets supervisados para entrenamiento (x_tr, y_tr), validación
# (x_vl, y_vl) y prueba (x_ts, y_ts)
x_tr, y_tr = crear_dataset_supervisado(tr.values, INPUT_LENGTH, OUTPUT_LENGTH)
x_vl, y_vl = crear_dataset_supervisado(vl.values, INPUT_LENGTH, OUTPUT_LENGTH)
x_ts, y_ts = crear_dataset_supervisado(ts.values, INPUT_LENGTH, OUTPUT_LENGTH)

print('Tamaños entrada (BATCHES x INPUT_LENGTH x FEATURES) y de salida (BATCHES x OUTPUT_LENGTH x FEATURES)')
print(f'Set de entrenamiento - x_tr: {x_tr.shape}, y_tr: {y_tr.shape}')
print(f'Set de validación - x_vl: {x_vl.shape}, y_vl: {y_vl.shape}')
print(f'Set de prueba - x_ts: {x_ts.shape}, y_ts: {y_ts.shape}')

Tamaños entrada (BATCHES x INPUT_LENGTH x FEATURES) y de salida (BATCHES x OUTPUT_LENGTH x FEATURES)
Set de entrenamiento - x_tr: (374746, 5, 1), y_tr: (374746, 1, 1)
Set de validación - x_vl: (46838, 5, 1), y_vl: (46838, 1, 1)
Set de prueba - x_ts: (46839, 5, 1), y_ts: (46839, 1, 1)


Escalamiento de los datos, es decir normalizarlos a un rango de que la Red LSTM acepte, en este caso se usará un rango de 1 a -1

In [None]:
def escalar_dataset(data_input):
    NFEATS = data_input['x_tr'].shape[2]

    # Generar listado con "scalers"
    scalers = [MinMaxScaler(feature_range=(-1,1)) for i in range(NFEATS)]

    # Arreglos que contendrán los datasets escalados
    x_tr_s = np.zeros(data_input['x_tr'].shape)
    x_vl_s = np.zeros(data_input['x_vl'].shape)
    x_ts_s = np.zeros(data_input['x_ts'].shape)
    y_tr_s = np.zeros(data_input['y_tr'].shape)
    y_vl_s = np.zeros(data_input['y_vl'].shape)
    y_ts_s = np.zeros(data_input['y_ts'].shape)

    # Escalamiento: se usarán los min/max del set de entrenamiento para
    # escalar la totalidad de los datasets

    # Escalamiento Xs
    for i in range(NFEATS):
        x_tr_s[:,:,i] = scalers[i].fit_transform(x_tr[:,:,i])
        x_vl_s[:,:,i] = scalers[i].transform(x_vl[:,:,i])
        x_ts_s[:,:,i] = scalers[i].transform(x_ts[:,:,i])

    # Escalamiento Ys
    y_tr_s[:,:,0] = scalers[-1].fit_transform(y_tr[:,:,0])
    y_vl_s[:,:,0] = scalers[-1].transform(y_vl[:,:,0])
    y_ts_s[:,:,0] = scalers[-1].transform(y_ts[:,:,0])

    # Conformar ` de salida
    data_scaled = {
        'x_tr_s': x_tr_s, 'y_tr_s': y_tr_s,
        'x_vl_s': x_vl_s, 'y_vl_s': y_vl_s,
        'x_ts_s': x_ts_s, 'y_ts_s': y_ts_s,
    }

    return data_scaled, scalers[0]

Obtencion de datos escalados de WATTS


In [None]:
data_in = {
    'x_tr': x_tr, 'y_tr': y_tr,
    'x_vl': x_vl, 'y_vl': y_vl,
    'x_ts': x_ts, 'y_ts': y_ts,
}

data_s, scaler = escalar_dataset(data_in)

x_tr_s, y_tr_s = data_s['x_tr_s'], data_s['y_tr_s']
x_vl_s, y_vl_s = data_s['x_vl_s'], data_s['y_vl_s']
x_ts_s, y_ts_s = data_s['x_ts_s'], data_s['y_ts_s']

Importación de librerias para la creacion y entreamiento de la RED LSTM

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.optimizers import RMSprop
import tensorflow as tf
tf.random.set_seed(123)
tf.config.experimental.enable_op_determinism()
N_UNITS = 128

Creacion de RED LSTM

In [None]:
#Tamaño de datos de entrada a la RED LSTM
INPUT_SHAPE = (x_tr_s.shape[1], x_tr_s.shape[2])

#Implementacion del modelo
modelo = Sequential()
modelo.add(LSTM(N_UNITS, input_shape=INPUT_SHAPE))
modelo.add(Dense(OUTPUT_LENGTH, activation='linear'))

Compilacion del modelo

In [None]:
# Creacion de funcion para establecer la funcion de error
def root_mean_squared_error(y_true, y_pred):
    rmse = tf.math.sqrt(tf.math.reduce_mean(tf.square(y_pred-y_true)))
    return rmse

# Optimizador y tasa de aprendizaje
optimizador = RMSprop(learning_rate=5e-5)

#Compilacion del modelo
modelo.compile(
    optimizer = optimizador,
    loss = root_mean_squared_error,
)

Entrenamiento del Modelo

In [None]:
# Epocas (Numero de repeticiones de entrenamiento)
EPOCHS = 80
BATCH_SIZE = 256

#Se procede a entrenar y guardar este entrenamiento en la variable historia
historia = modelo.fit(
    x = x_tr_s,
    y = y_tr_s,
    batch_size = BATCH_SIZE,
    epochs = EPOCHS,
    validation_data = (x_vl_s, y_vl_s),
    verbose=2
)


Epoch 1/80
1464/1464 - 41s - loss: 0.2338 - val_loss: 0.1939 - 41s/epoch - 28ms/step
Epoch 2/80
1464/1464 - 39s - loss: 0.1680 - val_loss: 0.1892 - 39s/epoch - 27ms/step
Epoch 3/80
1464/1464 - 39s - loss: 0.1654 - val_loss: 0.1872 - 39s/epoch - 27ms/step
Epoch 4/80
1464/1464 - 40s - loss: 0.1639 - val_loss: 0.1858 - 40s/epoch - 28ms/step
Epoch 5/80
1464/1464 - 39s - loss: 0.1628 - val_loss: 0.1844 - 39s/epoch - 27ms/step
Epoch 6/80
1464/1464 - 39s - loss: 0.1616 - val_loss: 0.1831 - 39s/epoch - 27ms/step
Epoch 7/80
1464/1464 - 38s - loss: 0.1605 - val_loss: 0.1823 - 38s/epoch - 26ms/step
Epoch 8/80
1464/1464 - 41s - loss: 0.1599 - val_loss: 0.1818 - 41s/epoch - 28ms/step
Epoch 9/80
1464/1464 - 39s - loss: 0.1597 - val_loss: 0.1815 - 39s/epoch - 27ms/step
Epoch 10/80
1464/1464 - 38s - loss: 0.1594 - val_loss: 0.1815 - 38s/epoch - 26ms/step
Epoch 11/80
1464/1464 - 41s - loss: 0.1592 - val_loss: 0.1812 - 41s/epoch - 28ms/step
Epoch 12/80
1464/1464 - 39s - loss: 0.1591 - val_loss: 0.1811 -

Evaluacion y desempeño del modelo


In [None]:
rmse_tr = modelo.evaluate(x=x_tr_s, y=y_tr_s, verbose=0)
rmse_vl = modelo.evaluate(x=x_vl_s, y=y_vl_s, verbose=0)
rmse_ts = modelo.evaluate(x=x_ts_s, y=y_ts_s, verbose=0)

print('Comparativo desempeños:')
print(f'  RMSE train:\t {rmse_tr:.3f}')
print(f'  RMSE val:\t {rmse_vl:.3f}')
print(f'  RMSE test:\t {rmse_ts:.3f}')

Comparativo desempeños:
  RMSE train:	 0.095
  RMSE val:	 0.133
  RMSE test:	 0.093
