In [None]:
'''
@ author: haijun xiong
@ date  : 2021/9/24
'''
import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets

In [None]:
def softmax(x):
    c = np.max(x)
    exp_scores = np.exp(x-c)
    y = exp_scores / np.sum(exp_scores, axis=1, keepdims=True)
    return y

def make_one_hot(x, n):
    y = np.zeros((x.shape[0], n))
    for i in range(x.shape[0]):
        y[i, x[i]] = 1
    return y

In [None]:
class ReLU:
    def __init__(self):
        self.mask = None
        self.out = None
    def forward(self, x):
        self.mask = (x <= 0)
        out = x.copy()
        out[self.mask] = 0
        self.out = out
        return out
    def backward(self):
        dx = self.out
        return dx
class Sigmoid:
    def __init__(self):
        self.out = None
    def forward(self, x):
        out = 1 / (1 + np.exp(-x))
        self.out = out
        return out
    def backward(self):
        dx = (1.0 - self.out) * self.out
        return dx
class Tanh:
    def __init__(self):
        self.out = None
    def forward(self, x):
        out = (1 - np.exp(-2 * x)) / (1 + np.exp(-2 * x))
        self.out = out
        return out
    def backward(self):
        dx = 1 - np.power(self.out, 2)
        return dx

class LeakyRelu:
    def __init__(self,alpha):
        self.out, self.alpha = None, alpha
    def forward(self, x):
        mask = (x <= 0)
        out = x.copy()
        out[mask] = x[mask] * self.alpha
        self.out = out
        return out
    def backward(self):
        dx = np.where(self.out>=0, 1, self.alpha)
        return dx

class ELU:
    def __init__(self, alpha=0.1):
        self.alpha = alpha
        self.x = None
        self.out = None

    def forward(self, x):
        self.x = x 
        self.out = np.where(x >= 0.0, x, self.alpha * (np.exp(x) - 1))
        return self.out

    def backward(self):
        return np.where(self.x >= 0.0, 1, self.out + self.alpha)

class SELU:
    def __init__(self):
        self.alpha = 1.6732632423543772848170429916717
        self.scale = 1.0507009873554804934193349852946
        self.x = None
        self.out = None
        
    def forward(self, x):
        self.x = x 
        self.out = self.scale * np.where(x >= 0.0, x, self.alpha*(np.exp(x)-1))
        return self.out

    def backward(self):
        return self.scale * np.where(self.x >= 0.0, 1, self.alpha * np.exp(x))

In [None]:
class bpNN:
    def __init__(self, input_dim, hidden_dim, output_dim, learning_rate, grad = 'sigmoid'):
        self.params = {
            'W1': np.random.random([input_dim, hidden_dim]),
            'b1': np.zeros((1, hidden_dim)),
            'W2': np.random.random([hidden_dim, output_dim]),
            'b2': np.zeros((1, output_dim)),
        }
        self.lr = learning_rate
        self.AF = None
        if grad == 'sigmoid':
            self.AF = Sigmoid()
        elif grad == 'tanh':
            self.AF = Tanh()
        elif grad == 'relu':
            self.AF = ReLU()
        elif grad == 'leakyrelu':
            self.AF = LeakyRelu()
        elif grad == 'ELU':
            self.AF = ELU()
        elif grad == 'SELU':
            self.AF = SELU()

    def z_a(self, x): #计算隐层z1 = w1x + b1, a1 = sigmoid(z1), 输出层z2 = w2a1 + b2
        z1 = np.dot(x, self.params['W1']) + self.params['b1']
        a1 = self.AF.forward(z1)
        z2 = np.dot(a1, self.params['W2']) + self.params['b2']
        probs = softmax(z2)
        return z1, a1, z2, probs

    def predict(self, x):
        predict_y = self.z_a(x)[-1]
        predict_y = np.argmax(predict_y, axis=1)
        return predict_y

    def accuracy(self, x, y):
        predict_y = self.predict(x)
        print(predict_y)
        acc = np.sum(predict_y == y)/len(y)
        return acc

    def train(self, x, y):
        z1, a1, z2, probs = self.z_a(x)
        delta2 = probs - y
        delta1 = np.dot(delta2, self.params['W2'].T) * self.AF.backward()
        
        # print(delta1.shape)
        grads = {
            'W1': np.dot(x.T, delta1),
            'b1': np.sum(delta1, axis=0),
            'W2': np.dot(a1.T, delta2),
            'b2': np.sum(delta2, axis=0),
        }
        for key in self.params.keys():
            self.params[key] -= self.lr * grads[key]

In [None]:
def draw_p(X, y, net):
    x_min, x_max = X[:, 0].min() - .5, X[:, 0].max() + .5
    y_min, y_max = X[:, 1].min() - .5, X[:, 1].max() + .5
    h = 0.01
    xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
    Z = np.floor(net.predict(np.c_[xx.ravel(), yy.ravel()]) * 1.99999)
    Z = Z.reshape(xx.shape)
    #预测分界图像s
    plt.contourf(xx, yy, Z, cmap=plt.cm.Spectral)
    plt.scatter(X[:, 0], X[:, 1], c=y, cmap=plt.cm.Spectral)

def draw(X, y, name='yuanshi.png'):
    plt.scatter(X[:, 0], X[:, 1], s=40, c=y, cmap=plt.cm.Spectral)
    plt.savefig(name)

In [None]:
num = 50000
train_num = int(num * 0.8)
X, y = datasets.make_moons(num, noise=0.1) #产生数据集

train_X, train_y = np.array(X[:train_num]), np.array(y[:train_num])
test_X, test_y = np.array(X[train_num:]), np.array(y[train_num:])
draw(train_X, train_y)

In [None]:
net = bpNN(2, 64, 2, 0.0001, 'sigmoid')
epochs = 500
batch_size = 1000
for i in range(epochs):
    for j in range(0, train_num, batch_size):
        net.train(train_X[j:j+batch_size], make_one_hot(train_y[j:j+batch_size], 2))

In [None]:
acc = net.accuracy(test_X, test_y)
acc

In [None]:
draw_p(test_X, test_y, net)