In [5]:
# 1.BP神经网络
#   见 'MPLClassifier.ipynb'
# 2.RBF神经网络
from IPython.display import Latex
from IPython.display import display, Math, Latex


def print_latex(latex_str):
    display(Latex(latex_str))
    
    
print_latex(r'RBF神经网络经过这样两层变化: $\left\{\begin{array}{l}R_i(X)=exp(-||X-C_i||^2/2\sigma_i^2),\qquad i=1,\cdots,m\\\hat{y}_k=\sum\limits_{i=1}^{m}\omega_{ik}R_i(X),\qquad k=1,\cdots,p\end{array}\right.$')
print_latex(r'这样只有小部分靠近中心的隐藏层神经元被激活($R_i(X)$随着其中范数增大,指数减少)')
print_latex(r'确定基函数中心$C_i$. 一般采用K均值聚类法.')
print_latex(r'确定基函数宽度$\sigma_i$. 通常令它等于基函数中心与子样本集中样本模式之间的平均距离')
print_latex(r'确定权值$\omega_{ik}$. 采用最小均方误差测度.')

<IPython.core.display.Latex object>

<IPython.core.display.Latex object>

<IPython.core.display.Latex object>

<IPython.core.display.Latex object>

<IPython.core.display.Latex object>

In [1]:
# RBF神经网络(based on tensorflow): https://github.com/shiluqiang/RBF_NN_tensorflow/blob/master/RBF_tensorflow.py
# 由于数据量小, 本人的不熟悉等原因, 预测结果并不理想.

import tensorflow as tf
import numpy as np
from matplotlib import pyplot as plt
import numpy as np
from mpl_toolkits.mplot3d import Axes3D
from sklearn.cluster import KMeans
import pandas as pd
from sklearn import preprocessing

class RBF_NN():
    def __init__(self, hidden_nodes, input_data_trainX, input_data_trainY):
        self.hidden_nodes = hidden_nodes #隐含层节点数
        self.input_data_trainX = input_data_trainX #训练样本的特征
        self.input_data_trainY = input_data_trainY #训练样本的标签
    
    def fit(self):
        '''模型训练
        '''
        # 1.声明输入输出的占位符
        n = 19
        n_input = (self.input_data_trainX).shape[1]
        n_output = (self.input_data_trainY).shape[0]
        X = tf.placeholder('float', [None, n_input],name = 'X')
        Y = tf.placeholder('float', [None, 1],name = 'Y')
        
        # 2.参数设置
        ## RBF函数参数
        ### K-means求中心
        random_state = 170
        kms = KMeans(n_clusters=self.hidden_nodes, random_state=None)
        pred = kms.fit_predict(trainX)
        
        # c = tf.Variable(tf.random_normal(shape=(self.hidden_nodes, n_input)),name = 'c')
        # c = tf.concat((tf.cast(tf.Variable(kms.cluster_centers_), tf.float32), c), axis=0)
        c = tf.cast(tf.Variable(kms.cluster_centers_), tf.float32)
        delta = tf.Variable(tf.random_normal(shape=(1,self.hidden_nodes)), name='delta')
        ## 隐含层到输出层权重和偏置
        W = tf.Variable(tf.random_normal(shape=(self.hidden_nodes, 1)), name='W')
        b = tf.Variable(tf.random_normal(shape=(1, 1)), name='b')
        
        # 3.构造前向传播计算图
        ## 隐含层输出
        ### 特征样本与RBF均值的距离
        dist = tf.reduce_sum(tf.square(tf.subtract(tf.tile(X,[self.hidden_nodes, 1]),c)), axis=1)
        dist = tf.multiply(1.0,tf.transpose(dist))
        ### RBF方差的平方
        delta_2 = tf.square(delta)
        ### 隐含层输出
        RBF_OUT = tf.exp(tf.multiply(-1.0,tf.divide(dist,tf.multiply(2.0,delta_2))))
        ## 输出层输入
        output_in = tf.matmul(RBF_OUT, W) + b
        
        # 4.声明代价函数优化算法
        loss = tf.reduce_mean(tf.pow(Y - output_in,2)) #损失函数为均方误差
        train_op = tf.train.AdamOptimizer(0.05).minimize(loss) #优化算法为梯度下降法
        
        # 5.反向传播求参数
        trX = self.input_data_trainX[:n]
        trY = self.input_data_trainY[:n]
        
        with tf.Session() as sess:
            ## 初始化所有参数
            tf.global_variables_initializer().run()
            for epoch in range(200):
                for i in range(trX.shape[0]):
                    feed = {X:trX[i][:,None].T, Y:[[trY[i]]]}
                    sess.run(train_op,feed_dict=feed)
                if epoch % 20. == 0 :
                    total_loss = 0.0
                    for j in range(trX.shape[0]):
                        total_loss += sess.run(loss, feed_dict={X:trX[i][:,None].T, Y:[[trY[i]]]})
                    print('Loss function at step %d is %s'%(epoch, total_loss / trX.shape[0]))
                    
            print('Training complete!')

            W = W.eval()
            b = b.eval()
            c = c.eval()
            delta = delta.eval()
            pred_trX = np.mat(np.zeros((len(trX),n_output)))
            
            ## 训练准确率
            correct_tr = 0.0
            pred = []
            for i in range(self.input_data_trainX.shape[0]):
                pred_tr = sess.run(output_in, feed_dict={X:self.input_data_trainX[i][:,None].T})
                pred.append(pred_tr[0][0])
            df_columns = ['Input', 'Pred Out', 'Raw Out', 'Error', 'Relative Error']
            pred = np.array(pred)
            df = pd.DataFrame(np.c_[[x.__str__() for x in self.input_data_trainX],
                                    pred,
                                    self.input_data_trainY,
                                    np.subtract(pred, self.input_data_trainY),
                                    np.array(np.divide(np.subtract(pred, self.input_data_trainY), self.input_data_trainY), dtype=np.float)
                                   ], columns=df_columns)
            print('Relative Error Sum: {:.4%}'.format(df['Relative Error'].astype(float).abs().sum()))
            df['Pred Out'] = df['Pred Out'].apply(lambda x: format(float(x), '.4f'))
            df['Raw Out'] = df['Raw Out'].apply(lambda x: format(float(x), '.1f'))
            df['Error'] = df['Error'].apply(lambda x: format(float(x), '.4f'))
            df['Relative Error'] = df['Relative Error'].apply(lambda x: format(float(x), '.4%'))
            print(df)
            # self.save_model('RBF_predict_results.txt',pred_trX)
            
    def save_model(self,file_name,weights):
        '''保存模型(保存权重weights)
        input：file_name(string):文件名
               weights(mat)：权重矩阵
        '''
        f_w = open(file_name,'w')
        m,n = np.shape(weights)
        for i in range(m):
            w_tmp = []
            for j in range(n):
                w_tmp.append(str(weights[i,j]))
            f_w.write('\t'.join(w_tmp)+'\n')
        f_w.close()
            

data = np.loadtxt('15.D 水库年径流与因子特征.txt')
data = preprocessing.scale(data)

trainX = data[:, :-1]
trainY = data[:, -1]
input_data_trainX = trainX
input_data_trainY = trainY
rbf = RBF_NN(10, input_data_trainX, input_data_trainY)
rbf.fit()

ModuleNotFoundError: No module named 'tensorflow'

In [159]:
# BP神经网络(based on keras)

from keras.models import Sequential
from keras.layers.core import Dense, Activation

data = np.loadtxt('15.D 水库年径流与因子特征.txt')
X = data[:, :-1]
Y = data[:, -1]
n = 5
trainX = X[n:]
trainY = Y[n:]

model = Sequential()  #层次模型
model.add(Dense(12, input_dim=4)) #输入层，Dense表示BP层
model.add(Activation('relu'))
model.add(Dense(5, input_dim=12))
model.add(Activation('relu'))
model.add(Dense(1, input_dim=5))  #输出层
model.compile(loss='mean_squared_error', optimizer='Adam') #编译模型


def eposh_callback(epoch, logs):
    if epoch % 200 == 0:
        print('At the end of epoch: {}, with loss: {}'.format(epoch, logs['loss']))
        
        
batch_print_callback = keras.callbacks.LambdaCallback(on_epoch_end=eposh_callback)
model.fit(trainX, trainY, epochs = 1000, batch_size = 5, verbose=0, callbacks=[batch_print_callback]) #训练模型1000次

pred = np.array(model.predict(X)).flatten()
df_columns = ['Input', 'Pred Out', 'Raw Out', 'Error', 'Relative Error']
df = pd.DataFrame(np.c_[[x.__str__() for x in X],
                    pred,
                    Y,
                    np.subtract(pred, Y),
                    np.array(np.divide(np.subtract(pred, Y), Y), dtype=np.float)
                   ], columns=df_columns)
print('Relative Error Sum: {:.4%}'.format(df['Relative Error'].astype(float).abs().sum()))
df['Pred Out'] = df['Pred Out'].apply(lambda x: format(float(x), '.4f'))
df['Raw Out'] = df['Raw Out'].apply(lambda x: format(float(x), '.1f'))
df['Error'] = df['Error'].apply(lambda x: format(float(x), '.4f'))
df['Relative Error'] = df['Relative Error'].apply(lambda x: format(float(x), '.4%'))
print(df)

At the end of epoch: 0, with loss: 921.4054565429688
At the end of epoch: 200, with loss: 57.29780197143555
At the end of epoch: 400, with loss: 45.08501116434733
At the end of epoch: 600, with loss: 34.48600959777832
At the end of epoch: 800, with loss: 24.012651761372883
Relative Error Sum: 928.5028%
                    Input Pred Out Raw Out    Error Relative Error
0   [15.6  5.6  3.5 25.5]  32.5505    22.9   9.6505       42.1419%
1   [27.8  4.3  1.   7.7]  22.2071    23.4  -1.1929       -5.0979%
2   [35.2  3.  38.1  3.7]  41.1654    36.8   4.3654       11.8625%
3   [10.2  3.4  3.5  7.4]  13.3210    22.0  -8.6790      -39.4500%
4   [29.1 33.2  1.6 24. ]  47.9056     6.4  41.5056      648.5251%
5   [10.2 11.6  2.2 26.7]  29.9794    29.4   0.5794        1.9707%
6   [35.4  4.1  1.3  7. ]  25.4290    26.2  -0.7710       -2.9428%
7       [8.7 3.5 7.5 5. ]  14.4933    20.9  -6.4067      -30.6539%
8   [25.4  0.7 22.2 35.4]  25.4681    26.5  -1.0319       -3.8939%
9   [15.3  6.   2.  17.5] 