In [3]:
#
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import models, layers, utils, backend
import matplotlib.pyplot as plt

import xlrd
import time
import os

from tensorflow.core.protobuf import rewriter_config_pb2
from tensorflow.compat.v1.keras.backend import set_session
tf.keras.backend.clear_session()  # For easy reset of notebook state.
gpu_options = tf.compat.v1.GPUOptions(per_process_gpu_memory_fraction=0.9)
config_proto = tf.compat.v1.ConfigProto(gpu_options=gpu_options)
off = rewriter_config_pb2.RewriterConfig.OFF
config_proto.graph_options.rewrite_options.arithmetic_optimization = off
sess = tf.compat.v1.Session(config=config_proto)
set_session(sess)

#输入数据
INPUT_SIZE = [290, 7]  #[time_steps, input_vector]
TIME_STEPS = INPUT_SIZE[0]
INPUT_NUM = INPUT_SIZE[1]
ENTIRE_INPUT_SIZE = [5, TIME_STEPS, 7]
ENTIRE_TIME_STEPS = ENTIRE_INPUT_SIZE[0] #总共时序长度

#局部编码器
LSTM1_INPUT_SIZE = 58 #局部时间长度
LSTM1_SIZE1 = 30
LSTM1_SIZE2 = 20
LSTM1_DENSE_SIZE = 20

#局部解码器
LSTM2_SIZE1 = LSTM1_DENSE_SIZE
LSTM2_SIZE2 = 30

#全局编码器
LSTM3_INPUT_SIZE = int(TIME_STEPS/LSTM1_INPUT_SIZE) #全局-局部时间长度，feature由上层定
LSTM3_INPUT_FEATURE = int(LSTM1_DENSE_SIZE*2)
LSTM3_SIZE1 = 40
LSTM3_SIZE2 = 30
LSTM3_DENSE_SIZE = 30

#全局解码器
LSTM4_SIZE1 = LSTM3_DENSE_SIZE
LSTM4_SIZE2 = 40

#顶层时序决策器
LSTM5_INPUT_SIZE =  ENTIRE_INPUT_SIZE[0]
LSTM5_SIZE1 = 60
LSTM5_SIZE2 = 45
LSTM5_DENSE_SIZE = 45

#决策器
DENSE_SIZE = 600
OUTPUT_SIZE = 3

#训练信息
BATCH_SIZE = 128
PREDICTOR_TRAIN_BATCH1 = 15

TRAINDATA_LOADPATH = 'tool_wear_data_4/train_data.npy' #训练集数据读取路径
TRAINLABEL_LOADPATH = 'tool_wear_data_4/train_label.npy' #训练集标签读取路径
TESTDATA_LOADPATH = 'tool_wear_data_4/test_data.npy' #验证集数据读取路径
TESTLABEL_LOADPATH =  'tool_wear_data_4/test_label.npy' #验证集标签读取路径
SUMMARY_PATH = './logs'     #记录路径

os.environ["CUDA_VISIBLE_DEVICES"] = "-1"# 这一行注释掉就是使用gpu

##############################数据集读取########################################
X_train=np.load(TRAINDATA_LOADPATH)
Y_train=np.load(TRAINLABEL_LOADPATH)
X_test=np.load(TESTDATA_LOADPATH)
Y_test=np.load(TESTLABEL_LOADPATH)

#validation_split将样本集按先后比例分为训练集合样本集，样本数量应为两者和的整数倍
X_train = X_train[:(np.shape(X_train)[0]-np.shape(X_train)[0]%BATCH_SIZE), :,:INPUT_NUM]
Y_train = Y_train[:(np.shape(Y_train)[0]-np.shape(Y_train)[0]%BATCH_SIZE), :OUTPUT_SIZE]
X_test = X_test[:(np.shape(X_test)[0]-np.shape(X_test)[0]%BATCH_SIZE), :,:INPUT_NUM]
Y_test = Y_test[:(np.shape(Y_test)[0]-np.shape(Y_test)[0]%BATCH_SIZE), :OUTPUT_SIZE]

print('X_train', np.shape(X_train))


###################局部编码器####################################
def adjust_range(x):#调整范围，将每一小段的信号都调整至-0.9~0.9
    max_val = backend.max(x, axis = 1, keepdims=True)#运算Tensor的第一维都是batch，在axis上取均值，为能够广播运算，必须keepdims
    min_val = backend.min(x, axis = 1, keepdims=True)
    y = (x - min_val)/(max_val - min_val + 1e-6)*1.8 - 0.9
    return y

# model各层之间必须用layers连接，如果不是layers必须重新用Input导入
input_img = layers.Input(shape=INPUT_SIZE, batch_size=BATCH_SIZE, name = 'input')#input_shape=[time_steps, input_vector]
x = layers.Lambda(adjust_range, name = 'adjust_range')(input_img)
x = layers.Reshape([LSTM3_INPUT_SIZE, LSTM1_INPUT_SIZE, INPUT_NUM])(x)
x = layers.TimeDistributed(layers.LSTM(LSTM1_SIZE1, return_sequences=True))(x)
_, local_state_h, local_state_c = layers.TimeDistributed(layers.LSTM(LSTM1_SIZE2, return_sequences=False, return_state=True))(x)
local_state_h = layers.TimeDistributed(layers.Dense(LSTM1_DENSE_SIZE, name = 'state_h'))(local_state_h)#[batch, LSTM3_INPUT_SIZE, LSTM1_DENSE_SIZE]
local_state_c = layers.TimeDistributed(layers.Dense(LSTM1_DENSE_SIZE, name = 'state_c'))(local_state_c)
print('local_state_h', local_state_h)
local_encoder = models.Model(inputs=input_img, outputs=[local_state_h, local_state_c], name='local_encoder_model')#局部LSTM编码器

###################全局编码器##################################
#全局编码器，输入局部编码器的local_state，输出global_state_h, global_state_c
def global_encoder_model_construct(name):
    global_encoder_input = layers.Input(shape=[LSTM3_INPUT_SIZE, LSTM3_INPUT_FEATURE], 
                                        batch_size = BATCH_SIZE, name='global_encoder_input')
    x = layers.LSTM(LSTM3_SIZE1, return_sequences=True, name='global_encoded_LSTM1')(global_encoder_input)
    _, global_state_h, global_state_c = layers.LSTM(LSTM3_SIZE2, return_sequences=False, 
                                                    return_state=True, name='global_encoded_LSTM2')(x)
    global_state_h = layers.Dense(LSTM3_DENSE_SIZE, name = 'global_state_h')(global_state_h)#[batch, LSTM3_INPUT_SIZE, LSTM1_DENSE_SIZE]
    print('global_state_h', global_state_h)
    global_state_c = layers.Dense(LSTM3_DENSE_SIZE, name = 'global_state_c')(global_state_c)
    global_encoder = models.Model(inputs=global_encoder_input, outputs=[global_state_h, global_state_c], name = name)
    return global_encoder

global_encoder = global_encoder_model_construct('global_encoder_model')#全局LSTM编码器

#特征提取网络
global_encoder_input_union = layers.Concatenate(axis = 2, name='global_encoder_input_union')([local_state_h, local_state_c])
global_state_h, global_state_c = global_encoder(global_encoder_input_union)
feature_extracted = layers.Concatenate(axis = 1)([global_state_h, global_state_c])
extract_network = models.Model(inputs=input_img, outputs=feature_extracted, name='extract_network')

###################顶层编码器##################################
#顶层编码器，输入全局编码器的feature1~5，输出feature
def top_encoder_model_construct(name):
    top_encoder_input = layers.Input(shape=[LSTM5_INPUT_SIZE, LSTM3_DENSE_SIZE*2], 
                                        batch_size = BATCH_SIZE, name='top_encoder_input')
    x = layers.LSTM(LSTM5_SIZE1, return_sequences=True, name='top_encoded_LSTM1')(top_encoder_input)
    top_state_h  = layers.LSTM(LSTM5_SIZE2, return_sequences=False, name='top_encoded_LSTM2')(x)
    top_state_h = layers.Dense(LSTM5_DENSE_SIZE, name = 'top_state_h')(top_state_h)#[batch, LSTM5_INPUT_SIZE, LSTM1_DENSE_SIZE]
    top_encoder = models.Model(inputs=top_encoder_input, outputs=top_state_h, name = name)
    return top_encoder
top_encoder = top_encoder_model_construct('top_encoder_model')#全局LSTM编码器

###################顶层LSTM######################################
entire_input = layers.Input(shape=ENTIRE_INPUT_SIZE,  batch_size = BATCH_SIZE, name='entire_input')
top_input0 = layers.Lambda(lambda x:x[:, 0, :, :])(entire_input)
top_input1 = layers.Lambda(lambda x:x[:, 1, :, :])(entire_input)
top_input2 = layers.Lambda(lambda x:x[:, 2, :, :])(entire_input)
top_input3 = layers.Lambda(lambda x:x[:, 3, :, :])(entire_input)
top_input4 = layers.Lambda(lambda x:x[:, 4, :, :])(entire_input)
feature0 = extract_network(top_input0)
feature1 = extract_network(top_input1)
feature2 = extract_network(top_input2)
feature3 = extract_network(top_input3)
feature4 = extract_network(top_input4)
feature0 = layers.Reshape([1, LSTM3_DENSE_SIZE*2])(feature0)
feature1 = layers.Reshape([1, LSTM3_DENSE_SIZE*2])(feature1)
feature2 = layers.Reshape([1, LSTM3_DENSE_SIZE*2])(feature2)
feature3 = layers.Reshape([1, LSTM3_DENSE_SIZE*2])(feature3)
feature4 = layers.Reshape([1, LSTM3_DENSE_SIZE*2])(feature4)
feature = layers.Concatenate(axis = 1)([feature0, feature1, feature2, feature3, feature4])#包含sample维度

###################决策器######################################
# var_input = layers.Lambda(var_input_layer, name = 'var_input_layer')(entire_input)
# var_input = layers.Lambda(lambda x:x/10)(var_input)
# #var_model = models.Model(inputs=entire_input, outputs=var_input, name='var_input')
# top_state_h = top_encoder(feature)
# LSTM_encoder = models.Model(inputs=entire_input, outputs=top_state_h, name='LSTM_encoder')
# x = layers.Concatenate(axis = 1)([top_state_h, var_input])

top_state_h = top_encoder(feature)
LSTM_encoder = models.Model(inputs=entire_input, outputs=top_state_h, name='LSTM_encoder')
x = layers.Dense(DENSE_SIZE, activation='tanh')(top_state_h)
dense_layer1 = models.Model(inputs=entire_input, outputs=x, name='dense_layer1')
x = layers.Dropout(0.5)(x)
output = layers.Dense(OUTPUT_SIZE, name='output', activation='tanh')(x)
predictor = models.Model(inputs=entire_input, outputs=output, name='predictor_model')
print('output', output)

#############################################模型训练#########################################################################
time_start = time.time()

adam = keras.optimizers.Adam(lr=0.0001)
reduce_lr = keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.2, min_lr=0.00001, mode='auto')

#predictor
predictor.compile(optimizer=adam, loss = 'logcosh')

#输出的loss表示整体误差，由于decoded和local_decoded用的是同一个模型，误差被整合成了一个
history = predictor.fit(X_train, Y_train, validation_data=[X_validation, Y_validation],
                         epochs=PREDICTOR_TRAIN_BATCH1, batch_size=BATCH_SIZE, shuffle=True, verbose=1)

#for layer in predictor.layers:
#    print(layer.name)
#    print(predictor.get_layer(layer.name).get_weights())

print('time1 =  ', time.time()-time_start)


MemoryError: Unable to allocate 1.60 GiB for an array with shape (429506560,) and data type float32