In [1]:
import numpy as np
import time

# 导入数据
from sklearn.datasets import load_wine
wine = load_wine()
feature = wine.data
target = wine.target

In [2]:
# min-max normalization
feature = (feature - feature.min(axis=0)) / (feature.max(axis=0) - feature.min(axis=0))

In [3]:
# use a BP neural network to classify the wine data

# 将target0，1，2转换为one-hot编码[0,0,1],[0,1,0],[1,0,0]
from sklearn.preprocessing import OneHotEncoder
encoder = OneHotEncoder(sparse=False)
target = encoder.fit_transform(target.reshape(-1, 1))

# 划分训练集和测试集
# 取前150个样本作为训练集，后28个样本作为测试集
feature_train = feature[:150]
target_train = target[:150]
feature_test = feature[150:]
target_test = target[150:]

In [4]:
feature.shape

(178, 13)

In [5]:
target.shape

(178, 3)

In [6]:
class Net:
    def __init__(self, input_size, hidden_size=10, output_size=3):
        # 输入层到隐藏层的权重和偏置
        self.w1 = np.random.randn(input_size, hidden_size)
        self.b1 = np.zeros(hidden_size)
        # 隐藏层到输出层的权重和偏置
        self.w2 = np.random.randn(hidden_size, output_size)
        self.b2 = np.zeros(output_size)

    def forward(self, x):
        # 输入层到隐藏层
        self.z1 = x.dot(self.w1) + self.b1
        # 第一层激活函数使用tanh
        self.a1 = np.tanh(self.z1)
        # 隐藏层到输出层
        self.z2 = self.a1.dot(self.w2) + self.b2
        # 隐藏层到输出层的激活函数使用sigmoid
        self.a2 = 1 / (1 + np.exp(-self.z2))

    # 反向传播
    def backward(self, x, y, lr=0.01):
        delta2 = self.a2 - y
        delta1 = delta2.dot(self.w2.T) * (1 - np.tanh(self.z1) ** 2)
        self.w2 -= self.a1.T.dot(delta2) * lr
        self.b2 -= np.sum(delta2, axis=0) * lr
        self.w1 -= x.T.dot(delta1) * lr
        self.b1 -= np.sum(delta1, axis=0) * lr

    def train(self, x, y, lr=0.01):
        self.forward(x)
        self.backward(x, y, lr)
    
    def predict(self, x):
        self.forward(x)
        return self.a2

In [7]:
# 交叉熵损失函数
def loss(y, y_hat):
    return -np.sum(y * np.log(y_hat))

In [8]:
# 创建神经网络
New = Net(13, 15, 3)

# 训练
t1 = time.time()
for i in range(1000):
    New.train(feature_train, target_train, 0.3)
    if i % 100 == 0:
        print("epoch", i, "loss", loss(target_train, New.predict(feature_train)))
t2 = time.time()
print("training time:", t2 - t1)

# 预测
prid = New.predict(feature_test)
# 计算准确率
print("accuracy: ",np.mean(np.argmax(prid, axis=1) == np.argmax(target_test, axis=1)))

epoch 0 loss 651.3707893116007
epoch 100 loss 34108.022581234436
epoch 200 loss 22904.688827503673
epoch 300 loss 22098.973703014923
epoch 400 loss 29601.14515496017
epoch 500 loss 21772.46077131911
epoch 600 loss 1.782894388806898
epoch 700 loss 43236.79315706817
epoch 800 loss 4358.050911498412
epoch 900 loss 5.060947439546613e-09
training time: 0.049237728118896484
accuracy:  0.0


In [9]:
feature

array([[0.84210526, 0.1916996 , 0.57219251, ..., 0.45528455, 0.97069597,
        0.56134094],
       [0.57105263, 0.2055336 , 0.4171123 , ..., 0.46341463, 0.78021978,
        0.55064194],
       [0.56052632, 0.3201581 , 0.70053476, ..., 0.44715447, 0.6959707 ,
        0.64693295],
       ...,
       [0.58947368, 0.69960474, 0.48128342, ..., 0.08943089, 0.10622711,
        0.39728959],
       [0.56315789, 0.36561265, 0.54010695, ..., 0.09756098, 0.12820513,
        0.40085592],
       [0.81578947, 0.66403162, 0.73796791, ..., 0.10569106, 0.12087912,
        0.20114123]])