In [None]:
import os
import math
import numpy as np
import pandas as pd
import datetime as dt
from numpy import newaxis
import datetime as dt
from keras.layers import Dense, Activation, Dropout, LSTM
from keras.models import Sequential, load_model
from keras.callbacks import EarlyStopping, ModelCheckpoint
import json
import time
import matplotlib.pyplot as plt
from keras.models import  load_model
from  sklearn import  metrics
import itertools 

In [None]:
class Timer():   

	def __init__(self):
		self.start_dt = None

	def start(self):       
		self.start_dt = dt.datetime.now()

	def stop(self):
		end_dt = dt.datetime.now()
		print('Time taken: %s' % (end_dt - self.start_dt))

class Model():
	"""LSTM Model"""

	def __init__(self):
		self.model = Sequential()

	def load_model(self, filepath):
		print('[Model] Loading model from file %s' % filepath)
		self.model = load_model(filepath)

	def build_model(self, configs):
		timer = Timer()
		timer.start()

		for layer in configs['model']['layers']:
			neurons = layer['neurons'] if 'neurons' in layer else None
			dropout_rate = layer['rate'] if 'rate' in layer else None
			activation = layer['activation'] if 'activation' in layer else None
			return_seq = layer['return_seq'] if 'return_seq' in layer else None
			input_timesteps = layer['input_timesteps'] if 'input_timesteps' in layer else None
			input_dim = layer['input_dim'] if 'input_dim' in layer else None

			if layer['type'] == 'dense':
				self.model.add(Dense(neurons, activation=activation))
			if layer['type'] == 'lstm':
				self.model.add(LSTM(neurons, input_shape=(input_timesteps, input_dim), return_sequences=return_seq))
			if layer['type'] == 'dropout':
				self.model.add(Dropout(dropout_rate))

		self.model.compile(loss=configs['model']['loss'], optimizer=configs['model']['optimizer'])

		print('[Model] Model Compiled')
		timer.stop()
		
		return self.model
		

	def train(self, x, y,x_val,y_val, epochs, batch_size, save_dir):
		timer = Timer()
		timer.start()
		print('[Model] %s epochs, %s batch size' % (epochs, batch_size))
		
		save_fname = os.path.join(save_dir, '%s-e%s.h5' % (dt.datetime.now().strftime('%d%m%Y-%H%M%S'), str(epochs)))
		callbacks = [
			EarlyStopping(monitor='val_loss', min_delta=1e-5, patience=4),
			ModelCheckpoint(filepath=save_fname, monitor='loss', save_best_only=True)
		]

		self.model.fit(
			x,
			y,
			validation_data =(x_val,y_val),
			epochs=epochs,
			batch_size=batch_size,
			callbacks=callbacks
		)

		timer.stop()


	def predict_point_by_point(self, data,debug=False):
		if debug == False:
			print('[Model] Predicting Point-by-Point...')
			predicted = self.model.predict(data)
			predicted = np.reshape(predicted, (predicted.size,))
		else:
			print('[Model] Predicting Point-by-Point...')
			print (np.array(data).shape)
			predicted = self.model.predict(data)
			print (np.array(predicted).shape)
			predicted = np.reshape(predicted, (predicted.size,))
			print (np.array(predicted).shape)
		return predicted

In [None]:
class DataLoader():

    def __init__(self, filename, split, cols):
        dataframe = pd.read_csv(filename)
        i_split = int(len(dataframe) * split)

        self.data_train = dataframe.get(cols).values[:i_split]
        self.data_test  = dataframe.get(cols).values[i_split:]
        self.all_data = dataframe.get(cols)
        self.len_all  = len(self.all_data)
        self.len_train  = len(self.data_train)
        self.len_test   = len(self.data_test)
        self.len_train_windows = None
    
    def get_all_data(self,seq_len,normalise):

        data_windows = []
        for i in range(self.len_all - seq_len):
            data_windows.append(self.all_data[i:i+seq_len])

        data_windows = np.array(data_windows).astype(float)

        data_windows = self.normalise_windows(data_windows, single_window=False) if normalise else data_windows

        x = data_windows[:, :-1,1:]
        y = data_windows[:, -1, [0]]
        return x,y        


    def get_test_data(self, seq_len, normalise):

        data_windows = []
        for i in range(self.len_test - seq_len):
            data_windows.append(self.data_test[i:i+seq_len])

        data_windows = np.array(data_windows).astype(float)

        x = data_windows[:, :-1,1:]
        y = data_windows[:, -1, [0]]
        return x,y

    def get_train_data(self, seq_len, normalise):

        data_x = []
        data_y = []
        for i in range(self.len_train - seq_len):   
            x, y = self._next_window(i, seq_len, normalise)
            data_x.append(x)
            data_y.append(y)
        return np.array(data_x), np.array(data_y)

    def _next_window(self, i, seq_len, normalise):
        window = self.data_train[i:i+seq_len]
        window = self.normalise_windows(window, single_window=True)[0] if normalise else window
        x = window[:-1,1:]
        y = window[-1, [0]]
        return x, y

    def normalise_windows(self, window_data, single_window=False):
        normalised_data = []
        window_data = [window_data] if single_window else window_data

        for window in window_data:
            normalised_window = []
            for col_i in range(window.shape[1]):
                normalised_col = [((float(p) / float(window[0, col_i])) - 1) for p in window[:, col_i]]   
                normalised_window.append(normalised_col)
            normalised_window = np.array(normalised_window).T 
            normalised_data.append(normalised_window)
        return np.array(normalised_data)

In [None]:
# 绘图展示结果
def plot_results(predicted_data, true_data):     #基于已知序列预测一个点
    fig = plt.figure(facecolor='white')
    fig = plt.figure(figsize=(16, 5))
    ax = fig.add_subplot(111)
    ax.plot(true_data, label='True Data')
    plt.plot(predicted_data, label='Prediction')

    ax.set_xlabel('Time Series') 
    ax.set_ylabel('Water Level')  
    plt.legend()
    plt.savefig('results_2.png')

def GetRMSE(y_hat,y_test):
    sum = np.sqrt(metrics.mean_squared_error(y_test, y_hat))
    return  sum

def GetMAE(y_hat,y_test):
    sum = metrics.mean_absolute_error(y_test, y_hat)
    return  sum

def GetMAPE(y_hat,y_test):
    sum = np.mean(np.abs((y_hat - y_test) / y_test)) * 100
    return sum

def GetMAPE_Order(y_hat,y_test):
    zero_index = np.where(y_test == 0)
    y_hat = np.delete(y_hat,zero_index[0])
    y_test = np.delete(y_test,zero_index[0])
    sum = np.mean(np.abs((y_hat - y_test) / y_test)) * 100
    return sum

def nash_sutcliffe(obs, sim):
    obs_mean = np.mean(obs)
    numerator = np.sum((obs - sim) ** 2)
    denominator = np.sum((obs - obs_mean) ** 2)
    nse = 1 - numerator / denominator
    return nse

def calculate_kge(observations, simulations):
    rho = np.corrcoef(observations, simulations)[0, 1]

    beta = np.std(simulations) / np.std(observations)

    alpha = np.mean(simulations) / np.mean(observations)

    kge = 1 - np.sqrt((rho - 1)**2 + (beta - 1)**2 + (alpha - 1)**2)
    return kge

def get_normalize_parameters(train_year, test_year,all_year,filename = "C:/Users/ljm/Desktop/LSTM模型的各种尝试/data/数据处理/2013-2021,1days训练测试.xlsx" ):     
    data_train = pd.read_excel(filename)
    df = data_train.iloc[int((all_year-train_year-test_year) * data_train.shape[0]/all_year) : , 1:]
    miu = []
    sigma = []
    split_propotion = train_year/(train_year+test_year)
    for i in range(df.shape[1]):
        mean = np.mean(df.iloc[:int(split_propotion*df.shape[0]),i])
        variance = np.var(df.iloc[:int(split_propotion*df.shape[0]),i])
        miu.append(mean)
        sigma.append(variance)
    return miu[0],sigma[0]**0.5,split_propotion

def F_normalise(prediction,y_test,sigma,miu):
    true_predictions_pointbypoint = []
    true_y_test = []
    for i in range(len(prediction)):
        true_predictions_pointbypoint.append(sigma * prediction[i] + miu)        
    for j in range(len(prediction)):
        true_y_test.append(sigma * y_test[j] + miu)
    return true_predictions_pointbypoint,true_y_test

## Train

In [None]:
miu,sigma,split_propotion = get_normalize_parameters(train_year = 5,test_year = 3,all_year = 8,filename = "C:/Users/ljm/Desktop/LSTM模型的各种尝试/data/数据处理/5-3-2-2d-Hankou-train.xlsx")
print(miu,sigma,split_propotion)
s_lenghth = [10,30,60]
b_size = [4,8,32,128]
net1 = [8,32,128]
net2 = [8,32,128]
net3 = [8,32,128]
d_out = [0.2,0.3]
grid_params = list(itertools.product(s_lenghth, b_size, net1,net2,net3,d_out))

best_MAE = 1000
RMSE_with = 1000
MAE_list = []
for params in grid_params:
	configs = {
    	"data": {
			"filename": "数据处理/不同robust/5-3-2-self/Robust0.45-5-3-2-self-train.csv",
			"columns": [
				"螺山日均水位",
				"自相关螺山水位",
				"枝城日均流量",
				"津市（二）日均流量",
				"湘潭日均流量",
				"桃源日均流量",
				"桃江（二）日均流量"],
			"sequence_length": params[0],"train_test_split": split_propotion,"normalise": False},
		"training": {"epochs": 1000,"batch_size": params[1]},
		"model": {"loss": "mse","optimizer": "adam","save_dir": "5-3-2-2-self-0.45",
			"layers": [
				{"type": "lstm","neurons": params[2],"input_timesteps": params[0]-1,"input_dim": 6,"return_seq": True},
				{"type": "dropout","rate": params[5]},
				{"type": "lstm","neurons": params[3],"return_seq": True},
				{"type": "lstm","neurons": params[4],"return_seq": False},
				{"type": "dropout","rate": params[5]},
				{"type": "dense","neurons": 1,"activation": "linear"}]}}

	data = DataLoader(os.path.join('data', configs['data']['filename']),configs['data']['train_test_split'],configs['data']['columns'])
	

	model = Model()
	mymodel = model.build_model(configs)
    
	x, y = data.get_train_data(seq_len=configs['data']['sequence_length'],normalise=configs['data']['normalise'])
	x_test, y_test = data.get_test_data(seq_len=configs['data']['sequence_length'],normalise=configs['data']['normalise'])
    
	model.train(x,y,x_test,y_test,
	epochs = configs['training']['epochs'],
	batch_size = configs['training']['batch_size'],
	save_dir = configs['model']['save_dir'])
	
	predictions_pointbypoint = model.predict_point_by_point(x_test,debug=False)
	true_predictions_pointbypoint,true_y_test = F_normalise(prediction = predictions_pointbypoint,y_test=y_test,sigma=sigma,miu=miu)
	MAE_index = GetMAE(true_predictions_pointbypoint, true_y_test)
	MAE_list.append(MAE_index)

	if MAE_index < best_MAE:
		best_MAE = MAE_index
		RMSE_with = GetRMSE(true_predictions_pointbypoint, true_y_test)
		best_params = params
print(best_MAE)
print(RMSE_with)
print(best_params)	
df_MAE = pd.DataFrame(MAE_list)
df_MAE.to_excel('MAE_5-3-2-2d-self-0.45.xlsx',index=False)

## Test and Plot

### Test Single Model

In [None]:
model_one = load_model('C:/Users/ljm/Desktop/LSTM模型的各种尝试/5-3-2-2-self/28012024-144125-e1000.h5')
data = DataLoader(
    os.path.join('data', configs['data']['filename']),
    configs['data']['train_test_split'],
    configs['data']['columns']
)

x_test_all, y_test_all = data.get_all_data(
    seq_len=configs['data']['sequence_length'],
    normalise=configs['data']['normalise']
    )
predictions_pointbypoint_all = model_one.predict(x_test_all)

true_predictions_pointbypoint_all = []
true_y_test_all = []
for i in range(len(predictions_pointbypoint_all)):
    true_predictions_pointbypoint_all.append(sigma * predictions_pointbypoint_all[i] + miu)        
for j in range(len(predictions_pointbypoint_all)):
    true_y_test_all.append(sigma * y_test_all[j] + miu)

plot_results(true_predictions_pointbypoint_all,true_y_test_all)
print("全部数据MAE(绝对误差的平均值）为", GetMAE(true_predictions_pointbypoint_all, true_y_test_all))
print("全部数据RMSE（均方根误差）为", GetRMSE(true_predictions_pointbypoint_all, true_y_test_all))
print(nash_sutcliffe(true_y_test_all,true_predictions_pointbypoint_all))

high_level_true = []
high_level_pre = []
ordinary_level_true = []
ordinary_level_pre = []
low_level_true = []
low_level_pre = []
for i  in range (len(true_y_test_all)):
    if true_y_test_all[i] >= (miu + sigma):
        high_level_true.append(true_y_test_all[i])
        high_level_pre.append(true_predictions_pointbypoint_all[i])
    
    if true_y_test_all[i] <= (miu - sigma):
        low_level_true.append(true_y_test_all[i])
        low_level_pre.append(true_predictions_pointbypoint_all[i])
    
    else:
        ordinary_level_true.append(true_y_test_all[i])
        ordinary_level_pre.append(true_predictions_pointbypoint_all[i])

print('High',GetMAE(high_level_pre, high_level_true), GetRMSE(high_level_pre, high_level_true), nash_sutcliffe(high_level_true,high_level_pre))
print('Ordinary',GetMAE(ordinary_level_pre, ordinary_level_true), GetRMSE(ordinary_level_pre, ordinary_level_true), nash_sutcliffe(ordinary_level_true,ordinary_level_pre))
print('Low',GetMAE(low_level_pre, low_level_true), GetRMSE(low_level_pre, low_level_true), nash_sutcliffe(low_level_true,low_level_pre))

        

### Test Multiple Models

In [None]:
base_data_dir = 'C:/Users/ljm/Desktop/LSTM模型的各种尝试/data/数据处理/不同robust/5-3-2-train'
root_dir = 'C:/Users/ljm/Desktop/LSTM模型的各种尝试/5-3-2-2-Robust'
data_name_list = []
for root, dirs, files in os.walk(base_data_dir):
    for file in files:
        data_name_list.append(file)

for root, dirs, files in os.walk(root_dir):
    num = 0
    for dir in dirs:
        sequence_length_list = []
        for j in range(len(s_lenghth)):
            for i in range(int(len(grid_params)/len(s_lenghth))):
                sequence_length_list.append(s_lenghth[j])
        MAE = []
        RMSE = []
        num2 = 0
        true_dir = os.path.join(root_dir,dir)
        print(dir)
        print(data_name_list[num])
        for root_1,dirs_1,files_1 in os.walk(true_dir):
            for file_1 in files_1:
                file_name = file_1
                model_one = load_model(os.path.join(true_dir, file_name))
                data = DataLoader(os.path.join(base_data_dir, data_name_list[num]),configs['data']['train_test_split'],
                configs['data']['columns'])
                #预测集用all,训练集用test
                x_test_all, y_test_all = data.get_test_data(seq_len=sequence_length_list[num2],
                normalise=configs['data']['normalise'])
                predictions_pointbypoint_all = model_one.predict(x_test_all)
                true_predictions_pointbypoint_all = []
                true_y_test_all = []
                for i in range(len(predictions_pointbypoint_all)):
                    true_predictions_pointbypoint_all.append(sigma * predictions_pointbypoint_all[i] + miu)        
                for j in range(len(predictions_pointbypoint_all)):
                    true_y_test_all.append(sigma * y_test_all[j] + miu)
                MAE.append(GetMAE(true_predictions_pointbypoint_all, true_y_test_all))
                MAE_df = pd.DataFrame(MAE)
                MAE_df = MAE_df.rename(columns={0: dir})

                num2 += 1
            #生成MAE列表集合
            if num == 0:
                MAE_all_df = MAE_df
            else:
                MAE_all_df = pd.concat([MAE_all_df, MAE_df], axis=1)
                
        num += 1
MAE_all_df.to_excel("5-3-2-2-train-robust.xlsx",index = False)
            

In [None]:
data = pd.read_excel("MAE_5-3-2-2d-Hankou.xlsx")
MAE_list = list(data.iloc[:,0])
best_8 = np.argsort(MAE_list)[:8]
sequence_len_list = []
for k in range(len(best_8)):
    models_num = best_8[k]
    i = 0
    for gp in grid_params:
        if i == models_num :
            print(gp)
            sequence_len_list.append(gp[0])
        i+=1


root_dir = 'C:/Users/ljm/Desktop/LSTM模型的各种尝试/5-3-2-2-0.15'
for k in range(len(best_8)):
    models_num = best_8[k]
    i = 0
    for root, dirs, files in os.walk(root_dir):
        for file in files:
            if i == models_num :
                print(file)
            i+=1