In [1]:
import os
import random
import numpy as np
import pandas as pd
import tensorflow as tf

In [2]:
class BoilerDataSet(object):
    
    def __init__(self, num_steps, val_ratio=0.1):
        self.num_steps = num_steps  # time steps
        self.val_ratio = val_ratio  # train test ratio
        
        # Read csv file
        self.raw_data = pd.read_csv("./data/sim_train.csv", index_col='时间戳')

        # sort csv file
        cols = self.raw_data.columns.tolist()
        # print("origin len: {0}".format(len(cols)))
        cols = (cols[51:52] + cols[53:59] + cols [60:61] + cols[62:63] + cols[150:152]   # external input 
            + cols[0:50] + cols[52:53] + cols[122:139]  # Coal Pulverizing state
            + cols[50:51] + cols[59:60] + cols[61:62] + cols[63:101] + cols[112:114] + cols[118:122] + cols[139:145] + cols[146:149] + cols[152:158]    # Burning state
            + cols[101:112] + cols[114:118] + cols[145:146] + cols[149:150] # Steam Circulation state
            + cols[158:173] + cols[196:202] # Coal Pulverizing action
            + cols[173:192]                 # Burning action
            + cols[192:196])                # Steam Circulation action
        # print("ordered len: {0}".format(len(cols)))
        self.raw_data = self.raw_data[cols]

        # divide train set and valid set
        self.train_X, self.train_y, self.valid_X, self.valid_y = self.prepare_data(self.raw_data)

    def prepare_data(self, data):
        # split into groups of num_steps

        # 取出输入数据，学习num_steps步长的历史，iloc：通过行号获取行数据
        X = np.array([data.iloc[i: i + self.num_steps].values
                    for i in range(len(data) - self.num_steps)])

        # 取出输出数据，预测第num_steps步的值训练，ix / loc 可以通过行号和行标签进行索引
        # 这里只要对状态量进行预测即可，0-157列为 'A磨煤机电流':'大渣可燃物含量'
        y = np.array([data.iloc[i + self.num_steps, 158:203].values
                    for i in range(len(data) - self.num_steps)])

        train_size = int(len(X) * (1.0 - self.val_ratio))
        train_X, valid_X = X[:train_size], X[train_size:]
        train_y, valid_y = y[:train_size], y[train_size:]
        return train_X, train_y, valid_X, valid_y

    def generate_one_epoch(self, data_X, data_y, batch_size):
        num_batches = int(len(data_X)) // batch_size
        # if batch_size * num_batches < len(self.train_X):
        #     num_batches += 1

        batch_indices = list(range(num_batches))
        random.shuffle(batch_indices)
        for j in batch_indices:
            batch_X = data_X[j * batch_size: (j + 1) * batch_size]
            batch_y = data_y[j * batch_size: (j + 1) * batch_size]
            yield batch_X, batch_y

In [3]:
num_steps = 10
valid_ratio = 0.2

input_size = 202
num_neurons = 160
num_layers = 3
output_size = 44

learning_rate = 0.001
learning_rate_decay = 0.95

max_epoch = 50
batch_size = 1

save_log_iter = 10
display_iter = 20

tf.random.set_seed(2022)

In [4]:
def SimulatorRNNModel():
    model = tf.keras.Sequential([
        tf.keras.layers.Input(shape=[num_steps, input_size], name="inputs"),
        tf.keras.layers.LSTM(256, return_sequences=True),
        tf.keras.layers.LSTM(128),
        tf.keras.layers.Dense(output_size, name="targets")
    ])

    model.compile(
        loss=tf.keras.losses.MeanSquaredError(), 
        optimizer=tf.keras.optimizers.Nadam(learning_rate=learning_rate), 
        metrics=['mean_absolute_error']
    )

    return model

model = SimulatorRNNModel()
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm (LSTM)                 (None, 10, 256)           470016    
                                                                 
 lstm_1 (LSTM)               (None, 128)               197120    
                                                                 
 targets (Dense)             (None, 44)                5676      
                                                                 
Total params: 672,812
Trainable params: 672,812
Non-trainable params: 0
_________________________________________________________________


In [5]:
def fit_and_evaluate(model, train_X, train_y, valid_X, valid_y, learning_rate, batch_size=1, epochs=500):
    callback_list = [
        tf.keras.callbacks.EarlyStopping(
            monitor="val_loss", 
            patience=50, 
            restore_best_weights=True
        ),
        tf.keras.callbacks.ModelCheckpoint(
            filepath='./logs/LSTM/saved_models/model-{epoch:02d}-{val_loss:.4f}.h5',
            monitor="val_loss",
            verbose=1,
            save_weights_only=True,
            save_best_only=True,
        ),
        tf.keras.callbacks.TensorBoard(log_dir='./logs/')
    ]   
    
    history = model.fit(
        x=train_X, y=train_y,
        epochs=epochs, 
        validation_data=(valid_X, valid_y),
        callbacks=callback_list)
    valid_loss, valid_mae = model.evaluate(x=valid_X, y=valid_y) # Returns the loss value & metrics values for the model in test mode
    return valid_mae * 1e6  # valid mean absolute error

In [6]:
# read data
boiler_dataset = BoilerDataSet(num_steps=num_steps, val_ratio=valid_ratio)
train_X, train_y = boiler_dataset.train_X, boiler_dataset.train_y
valid_X, valid_y = boiler_dataset.valid_X, boiler_dataset.valid_y

In [7]:
print(train_y.shape, valid_y.shape) 

(8, 44) (2, 44)


In [8]:
fit_and_evaluate(model, train_X, train_y, valid_X, valid_y, learning_rate, batch_size, max_epoch)

Epoch 1/50
Epoch 1: val_loss improved from inf to 0.06152, saving model to ./logs/LSTM/saved_models\model-01-0.0615.h5
Epoch 2/50
Epoch 2: val_loss improved from 0.06152 to 0.04261, saving model to ./logs/LSTM/saved_models\model-02-0.0426.h5
Epoch 3/50
Epoch 3: val_loss improved from 0.04261 to 0.02872, saving model to ./logs/LSTM/saved_models\model-03-0.0287.h5
Epoch 4/50
Epoch 4: val_loss improved from 0.02872 to 0.01825, saving model to ./logs/LSTM/saved_models\model-04-0.0183.h5
Epoch 5/50
Epoch 5: val_loss improved from 0.01825 to 0.00937, saving model to ./logs/LSTM/saved_models\model-05-0.0094.h5
Epoch 6/50
Epoch 6: val_loss improved from 0.00937 to 0.00525, saving model to ./logs/LSTM/saved_models\model-06-0.0052.h5
Epoch 7/50
Epoch 7: val_loss improved from 0.00525 to 0.00496, saving model to ./logs/LSTM/saved_models\model-07-0.0050.h5
Epoch 8/50
Epoch 8: val_loss did not improve from 0.00496
Epoch 9/50
Epoch 9: val_loss improved from 0.00496 to 0.00391, saving model to ./logs

31899.48946237564