In [1]:
import os
# os.environ['CUDA_VISIBLE_DEVICES']='2'
os.environ["KMP_DUPLICATE_LIB_OK"]="TRUE"

import shutil
import numpy as np

import tensorflow as tf
from tensorflow import keras

from py_file.MLP import MiddleLayer, OutputLayer
from py_file.pkl_data_ori import load_data
from py_file.transformer import PositionalEmbedding, Transformer_encoder

In [2]:
x_train_ori, x_test_ori, x_valid_ori, y_train, y_test, y_valid = load_data()
print(x_train_ori.shape, x_test_ori.shape, x_valid_ori.shape)
print(y_train.shape, y_test.shape, y_valid.shape)

In [14]:
# 预训练后各模型MAE
ori_mae = [0.194, 0.1994, 0.1963, 0.1972]
# for i in range(4):
#     model = tf.keras.models.load_model('./models/transformer_smooth_gelu_stack1_'+str(i)+'in4_best_model.h5',custom_objects={"PositionalEmbedding": PositionalEmbedding, "Transformer_encoder":Transformer_encoder})
#     mse_mae = model.evaluate(x_test_ori, y_test, batch_size=256)
#     ori_mae.append(mse_mae[1])

In [15]:
n_mid = 32
n_out = 4
eta = 0.000001
epochs = 5
batch_size = 100000
pred, true = [], []

window_size = x_train_ori.shape[1]
figures_size = x_train_ori.shape[2]

In [16]:
x_train = []
for i in range(figures_size):
    x_train.append(x_train_ori[:, :, i])

x_train = np.array(x_train)

In [17]:
# 初始化MLP
InputLayers = [MiddleLayer(window_size, n_mid)] * figures_size

DenseLayers = [MiddleLayer(n_mid * figures_size, n_mid),
            OutputLayer(n_mid, n_out)]

# 正向传播
def forward_propagation(x_figures):
    x_lst = []
    for i, layer in enumerate(InputLayers):
        layer.forward(x_figures[i])
        x_lst.append(layer.y)
    
    x = np.concatenate(x_lst, axis=1)
    for layer in DenseLayers:
        layer.forward(x)
        x = layer.y      
    return x
    
# 反向传播
def backpropagation(t):
    grad_y = t
    grad_y_lst = []

    for layer in reversed(DenseLayers):
        layer.backward(grad_y)
        grad_y = layer.grad_x
        
    grad_y_split = np.split(grad_y, figures_size, axis=1)
    for i, layer in enumerate(InputLayers):
        layer.backward(grad_y_split[i])
        grad_y_lst.append(layer.grad_x)
    return grad_y_lst

# 参数更新
def update_params():
    for layer in InputLayers:
        layer.update(eta)
    for layer in DenseLayers:
        layer.update(eta)

# 误差测定
def get_error(x, t):
    y = forward_propagation(x)
    # 交差熵误差
    return -np.sum(t*np.log(y+1e-7)) / len(y)

def get_n(mat):
    temp = []
    for i in mat:
        temp.append(np.argmax(i))
    return max(temp,key=temp.count)+1

In [18]:
def get_model(kd=70, nh=70):
    model = keras.Sequential([
        keras.layers.InputLayer(input_shape=(10, 15)),
        PositionalEmbedding(d_model=15),
        Transformer_encoder(key_dim=kd, num_heads=nh),
        keras.layers.Dense(86, activation="gelu", kernel_regularizer=keras.regularizers.l1_l2(0.1, 0.1)),
        keras.layers.SpatialDropout1D(0.2),
        keras.layers.GlobalAveragePooling1D(data_format='channels_last'),
        keras.layers.Dense(1, activation='linear'),
    ])
    return model


def get_temp_path(n):
    return './temp/transformer_smooth_gelu_stack1_'+str(n)+'in4_best_model.h5'

def get_save_path(n):
    return './models/transformer_smooth_gelu_stack1_'+str(n)+'in4_best_model.h5'

def get_t(x_train_batch, y_train_batch):    
    flag=True
    diff_arr = []
    # x_train_b, x_valid_b, y_train_b, y_valid_b = train_test_split(x_train_batch, y_train_batch, test_size=0.1)
    for i in range(n_out):
        temp_model = tf.keras.models.load_model(get_save_path(i),custom_objects={"PositionalEmbedding": PositionalEmbedding, "Transformer_encoder":Transformer_encoder})
        temp_model.compile(optimizer=keras.optimizers.Adam(learning_rate=0.0001), loss='mse',metrics=['mae'])
        print(f"Traing model_{i+1}: ")

        history = temp_model.fit(x_train_batch, y_train_batch,
                            validation_data=(x_test_ori, y_test),  
                            # callbacks=[save_best],
                            batch_size=32, epochs=5, verbose=1)       
        diff = history.history['val_mae'][-1] - history.history['val_mae'][0]
        
        print(f"val: {diff}")
        diff_arr.append(diff)
        
        temp_model.save(get_temp_path(i))

    t = np.zeros(n_out) 
    min_diff = min(diff_arr)


    if min_diff < 0:
        min_index = diff_arr.index(min_diff)
        
        t[min_index] = 1
        temp_list = [t.copy() for _ in range(x_train_batch.shape[0])]
        t = np.vstack(temp_list)

        # if epoch_now >= 3:
        shutil.move(get_temp_path(min_index), get_save_path(min_index))

        tf.keras.backend.clear_session()
    else:
        flag = False
    return t, flag

In [19]:
n_batch = x_train_ori.shape[0]//batch_size
flag = True

for e in range(epochs):
    print(f"-----Epoch_{e+1}-----")
    x_mb = np.zeros((figures_size, batch_size, window_size))
    for n, j in enumerate(range(n_batch)):
        print(f"-----batch {n+1}-----")
        x_mb_ori = x_train_ori[j*batch_size:(j+1)*batch_size]
        t_mb,flag = get_t(x_mb_ori, y_train[j*batch_size:(j+1)*batch_size])
        for i in range(figures_size):
            x_mb[i] = x_train[i, j*batch_size:(j+1)*batch_size, :]
        if flag:
            true.append(t_mb)
            p = forward_propagation(x_mb)
            pred.append(p)
            backpropagation(t_mb)
            update_params()

-----Epoch_1-----
-----batch 1-----
Traing model_1: 
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
val: 0.0006446540355682373
Traing model_2: 
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
val: 0.009596288204193115
Traing model_3: 
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
val: 0.001091986894607544
Traing model_4: 
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
val: 0.002717643976211548
-----batch 2-----
Traing model_1: 
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
val: 0.0026337355375289917
Traing model_2: 
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
val: -0.007837727665901184
Traing model_3: 
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
val: -0.0060104429721832275
Traing model_4: 
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
val: -0.0008137822151184082
-----Epoch_2-----
-----batch 1-----
Traing model_1: 
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
val: 0.001156449317932129
Traing model_2: 
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5

In [23]:
def get_n(mat):
    temp = []
    for i in mat:
        temp.append(np.argmax(i))

    return max(temp,key=temp.count)

test_batch_size = 10000
x_test = []
for i in range(figures_size):
    x_test.append(x_test_ori[:, :, i])
x_test = np.array(x_test)

n_batch = len(x_test_ori)//test_batch_size
for j in range(n_batch):
    print(f"-----batch {j+1}-----")

    x_mb_test = np.zeros((figures_size, test_batch_size, window_size))

    for i in range(figures_size):
        x_mb_test[i] = x_test[i, j*test_batch_size:(j+1)*test_batch_size, :]

    y_test_batch = y_test[j*test_batch_size:(j+1)*test_batch_size]

    p = forward_propagation(x_mb_test)
    pred.append(p)

    n = get_n(p)
    print(f"Predicting by model {n+1}!")

    # pred_model = tf.keras.models.load_model(get_save_path(n),custom_objects={"PositionalEmbedding": PositionalEmbedding, "Transformer_encoder":Transformer_encoder})
    # pred_model.compile(optimizer=keras.optimizers.Adam(learning_rate=0.0001), loss='mse',metrics=['mae'])
    # x_test_batch = x_test_batch.reshape(x_test_batch.shape[0],10,15)
    # pred_model.evaluate(x_test_batch, y_test_batch)

-----batch 1-----
Predicting by model 2!
-----batch 2-----
Predicting by model 2!
-----batch 3-----
Predicting by model 2!
-----batch 4-----
Predicting by model 2!
-----batch 5-----
Predicting by model 2!
-----batch 6-----
Predicting by model 2!
-----batch 7-----
Predicting by model 2!
-----batch 8-----
Predicting by model 2!
-----batch 9-----
Predicting by model 2!
-----batch 10-----
Predicting by model 2!
-----batch 11-----
Predicting by model 2!
-----batch 12-----
Predicting by model 2!
-----batch 13-----
Predicting by model 2!
-----batch 14-----
Predicting by model 2!
-----batch 15-----
Predicting by model 2!


: 