In [7]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler

def rand(a, b):
    return (b - a) * np.random.random() + a

def sigmoid(x):
    return 1.0 / (1.0 + np.exp(-x))

def sigmoid_derivative(x):
    return x * (1 - x)

class BP:
    def __init__(self, layer, iter, max_error):
        self.input_n = layer[0]  # 输入层的节点个数 d
        self.hidden_n = layer[1]  # 隐藏层的节点个数 q
        self.output_n = layer[2]  # 输出层的节点个数 l
        self.gj = []
        self.eh = []
        self.input_weights = []   # 输入层与隐藏层的权值矩阵
        self.output_weights = []  # 隐藏层与输出层的权值矩阵
        self.iter = iter          # 最大迭代次数
        self.max_error = max_error  # 停止的误差范围

        # for i in range(self.input_n + 1):
        #     tmp = []
        #     for j in range(self.hidden_n):
        #         tmp.append(rand(-0.2, 0.2))
        #     self.input_weights.append(tmp)
        #
        # for i in range(self.hidden_n + 1):
        #     tmp = []
        #     for j in range(self.output_n):
        #         tmp.append(rand(-0.2, 0.2))
        #     self.output_weights.append(tmp)
        # self.input_weights = np.array(self.input_weights)
        # self.output_weights = np.array(self.output_weights)

        # 初始化一个(d+1) * q的矩阵，多加的1是将隐藏层的阀值加入到矩阵运算中
        self.input_weights = np.random.random((self.input_n + 1, self.hidden_n))
        # 初始话一个(q+1) * l的矩阵，多加的1是将输出层的阀值加入到矩阵中简化计算
        self.output_weights = np.random.random((self.hidden_n + 1, self.output_n))

        self.gj = np.zeros(layer[2])
        self.eh = np.zeros(layer[1])

    #  正向传播与反向传播
    def forword_backword(self, xj, y, learning_rate=0.1):
        xj = np.array(xj)
        y = np.array(y)
        input = np.ones((1, xj.shape[0] + 1))
        input[:, :-1] = xj
        x = input
        # ah = np.dot(x, self.input_weights)
        ah = x.dot(self.input_weights)
        bh = sigmoid(ah)

        input = np.ones((1, self.hidden_n + 1))
        input[:, :-1] = bh
        bh = input

        bj = np.dot(bh, self.output_weights)
        yj = sigmoid(bj)

        error = yj - y
        self.gj = error * sigmoid_derivative(yj)

        # wg = np.dot(self.output_weights, self.gj)

        wg = np.dot(self.gj, self.output_weights.T)
        wg1 = 0.0
        for i in range(len(wg[0]) - 1):
            wg1 += wg[0][i]
        self.eh = bh * (1 - bh) * wg1
        self.eh = self.eh[:, :-1]

        #  更新输出层权值w，因为权值矩阵的最后一行表示的是阀值多以循环只到倒数第二行
        for i in range(self.output_weights.shape[0] - 1):
            for j in range(self.output_weights.shape[1]):
                self.output_weights[i][j] -= learning_rate * self.gj[0][j] * bh[0][i]

        #  更新输出层阀值b，权值矩阵的最后一行表示的是阀值
        for j in range(self.output_weights.shape[1]):
            self.output_weights[-1][j] -= learning_rate * self.gj[0][j]

        #  更新输入层权值w
        for i in range(self.input_weights.shape[0] - 1):
            for j in range(self.input_weights.shape[1]):
                self.input_weights[i][j] -= learning_rate * self.eh[0][j] * xj[i]

        # 更新输入层阀值b
        for j in range(self.input_weights.shape[1]):
            self.input_weights[-1][j] -= learning_rate * self.eh[0][j]
        return error

    def fit(self, X, y):

        for i in range(self.iter):
            error = 0.0
            for j in range(len(X)):
                error += self.forword_backword(X[j], y[j])
            error = error.sum()
            if abs(error) <= self.max_error:
                break

    def predict(self, x_test):
        x_test = np.array(x_test)
        tmp = np.ones((x_test.shape[0], self.input_n + 1))
        tmp[:, :-1] = x_test
        x_test = tmp
        an = np.dot(x_test, self.input_weights)
        bh = sigmoid(an)
        #  多加的1用来与阀值相乘
        tmp = np.ones((bh.shape[0], bh.shape[1] + 1))
        tmp[:, : -1] = bh
        bh = tmp
        bj = np.dot(bh, self.output_weights)
        yj = sigmoid(bj)
        print(yj)
        return yj

if __name__ == '__main__':
    #  指定神经网络输入层，隐藏层，输出层的元素个数
    layer = [20, 20, 1]
    dataset_xt = pd.read_excel(r"...\Molecular_Descriptor_adjust.xlsx",sheet_name="training",
                          usecols=['MDEC-23','LipoaffinityIndex','minsssN','maxssO','maxHsOH','minHsOH','C1SP2','BCUTc-1l', 
                                   'MLogP', 'minsOH', 'VC-5', 'TopoPSA', 'ATSc3', 'minHBint5', 'nHBAcc', 'MLFER_A', 'hmin',
                                   'MDEC-33', 'nC', 'WTPT-5','ad_plc'])
    X=dataset_xt.iloc[:,0:20].values
    y=dataset_xt.iloc[:,20].values
    scaler = StandardScaler()
    X = scaler.fit_transform(X)
    # x_test = [[2, 3],
    #           [2, 2]]
    #  设置最大的迭代次数，以及最大误差值
    bp = BP(layer, 500, 0.0001)
    bp.fit(X, y)
    bp.predict(X)


[[0.68703662]
 [0.75168847]
 [0.74213491]
 ...
 [0.5339708 ]
 [0.61863429]
 [0.65680131]]


In [10]:
y_pre=bp.predict(X)
MAPE=0
for i in range(1974):
    MAPE+=(y[i][0]-y_pre[i][0])/y[i][0]
print(y_pre,y.shape)
print(MAPE)

[[0.68703662]
 [0.75168847]
 [0.74213491]
 ...
 [0.5339708 ]
 [0.61863429]
 [0.65680131]]


IndexError: invalid index to scalar variable.

In [14]:
import xlwt
y_pre=pd.DataFrame(y_pre)
writer = pd.ExcelWriter('B.xlsx')
y_pre.to_excel(writer, 'sheet1', float_format='%.5f')
writer.save()

writer.close()

  warn("Calling close() on already closed file.")


In [16]:
dataset_xte = pd.read_excel(r"C:\Users\Yuang\Desktop\pxh本人\ipynb文件\Molecular_Descriptor_adjust.xlsx",sheet_name="test",
                          usecols=['MDEC-23','LipoaffinityIndex','minsssN','maxssO','maxHsOH','minHsOH','C1SP2','BCUTc-1l', 
                                   'MLogP', 'minsOH', 'VC-5', 'TopoPSA', 'ATSc3', 'minHBint5', 'nHBAcc', 'MLFER_A', 'hmin',
                                   'MDEC-33', 'nC', 'WTPT-5'])
Xte=dataset_xte.iloc[:,0:20].values
scaler = StandardScaler()
Xte = scaler.fit_transform(Xte)
print(Xte)

[[ 5.89474251e-02  1.26619482e+00  2.34085529e-01 -6.34164643e-01
   9.26133497e-02 -4.53269379e-01  4.49805605e-01 -8.09243918e-01
   4.70124841e-01  8.11104229e-02  6.95296712e-01 -1.66005654e+00
   1.03948841e-01 -3.73580393e-01  3.49893362e-01  1.06573606e+00
   1.92411720e+00 -1.57214387e-01 -1.30234885e-01 -6.15501316e-01]
 [-5.88036509e-01  3.93123310e-02  3.89092410e-01 -4.48736385e-01
  -4.88188776e-01 -4.53269379e-01  6.00677430e-01 -8.09243918e-01
   1.59522721e-01  9.78858101e-01  5.78848210e-01  2.55498499e-02
  -2.53653507e-01 -4.56968874e-01 -4.10744381e-01  6.46280042e-01
   5.11506634e-01 -1.30367668e-01 -4.12773968e-01 -6.15501316e-01]
 [-5.16149405e-01  8.43583910e-04  3.86297319e-01 -4.48736385e-01
  -4.88188776e-01 -4.53269379e-01  5.70376227e-01 -8.09243918e-01
   1.69977291e-01  9.57377708e-01  6.04424351e-01 -1.77178159e-01
  -2.32526118e-01 -4.56968874e-01 -4.10744381e-01  7.33262406e-01
   8.67659910e-01 -1.30367668e-01 -3.63699152e-01 -6.15501316e-01]
 [-5.88

In [18]:
yte_pre=bp.predict(Xte)
yte_pre=pd.DataFrame(yte_pre)
writer = pd.ExcelWriter('A.xlsx')
yte_pre.to_excel(writer, 'sheet1', float_format='%.5f')
writer.save()

writer.close()

[[0.69626262]
 [0.46482978]
 [0.51484118]
 [0.48575784]
 [0.48366807]
 [0.44838333]
 [0.57244256]
 [0.51384158]
 [0.53125195]
 [0.59136777]
 [0.57516225]
 [0.6034618 ]
 [0.63031096]
 [0.55224484]
 [0.72543767]
 [0.74193587]
 [0.54763759]
 [0.38247376]
 [0.43671753]
 [0.42056852]
 [0.56103699]
 [0.4070803 ]
 [0.42229379]
 [0.42442641]
 [0.37814513]
 [0.38355898]
 [0.62024187]
 [0.58945396]
 [0.60121922]
 [0.59053257]
 [0.56471835]
 [0.56597974]
 [0.52114517]
 [0.60613261]
 [0.52113742]
 [0.61297178]
 [0.59193674]
 [0.53906691]
 [0.67442484]
 [0.63542867]
 [0.63946398]
 [0.64018223]
 [0.62358783]
 [0.55835075]
 [0.63946398]
 [0.57138828]
 [0.65842689]
 [0.59154431]
 [0.66142927]
 [0.55319868]]
