In [1]:



import sys

sys.path.append('../')

import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
import HuichuanFlow as hf



In [2]:
from sklearn.datasets import make_circles

# 获取同心圆状分布的数据，X的每行包含两个特征，y是1/0类别标签
X, y = make_circles(200, noise=0.1, factor=0.2)
y = y * 2 - 1  # 将标签转化为1/-1

# 是否使用二次项
use_quadratic = True

# 一次项，2维向量（2x1矩阵）
x1 = hf.core.Variable(dim=(2, 1), init=False, trainable=False)

# 标签
label = hf.core.Variable(dim=(1, 1), init=False, trainable=False)

# 偏置
b = hf.core.Variable(dim=(1, 1), init=True, trainable=True)

# 根据是否使用二次项区别处理
if use_quadratic:

    # 将一次项与自己的转置相乘，得到二次项2x2矩阵，再转成4维向量（4x1矩阵）
    x2 = hf.ops.Reshape(
            hf.ops.MatMul(x1, hf.ops.Reshape(x1, shape=(1, 2))),
            shape=(4, 1)
            )

    # 将一次和二次项连接成6维向量（6x1矩阵）
    x = hf.ops.Concat(x1, x2)
    
    # 权值向量是6维（1x6矩阵）
    w = hf.core.Variable(dim=(1, 6), init=True, trainable=True)
    
else:
    
    # 特征向量就是一次项
    x = x1
    
    # 权值向量是2维（1x2矩阵）
    w = hf.core.Variable(dim=(1, 2), init=True, trainable=True)


In [5]:
# 线性部分
output = hf.ops.Add(hf.ops.MatMul(w, x), b)

# 预测概率
predict = hf.ops.Logistic(output)

# 损失函数
loss = hf.ops.loss.LogLoss(hf.ops.MatMul(label, output))

learning_rate = 0.001

optimizer = hf.optimizer.Adam(hf.default_graph, loss, learning_rate)
accuracy = hf.ops.metrics.Accuracy(output, label)
precision = hf.ops.metrics.Precision(output, label)
recall = hf.ops.metrics.Recall(output, label)
auc = hf.ops.metrics.ROC_AUC(output, label)

batch_size = 8


In [7]:
batch_size = 8

for epoch in range(200):
    
    batch_count = 0
    
    for i in range(len(X)):
        
        x1.set_value(np.mat(X[i]).T)
        label.set_value(np.mat(y[i]))
        
        optimizer.one_step()
        
        batch_count += 1
        
        if batch_count >= batch_size:
            optimizer.update()
            batch_count = 0

    pred = []
    for i in range(len(X)):
                
        x1.set_value(np.mat(X[i]).T)
        label.set_value(np.mat(y[i]))
        
        predict.forward()
        pred.append(predict.value[0, 0])
            
    pred = (np.array(pred) > 0.5).astype(np.int) * 2 - 1
    
    accuracy = (y == pred).astype(np.int).sum() / len(X)
    print("epoch: {:d}, accuracy: {:.3f}".format(epoch + 1, accuracy))

epoch: 1, accuracy: 0.895
epoch: 2, accuracy: 0.930
epoch: 3, accuracy: 0.920
epoch: 4, accuracy: 0.915
epoch: 5, accuracy: 0.915
epoch: 6, accuracy: 0.925
epoch: 7, accuracy: 0.945
epoch: 8, accuracy: 0.950
epoch: 9, accuracy: 0.955
epoch: 10, accuracy: 0.965
epoch: 11, accuracy: 0.970
epoch: 12, accuracy: 0.975
epoch: 13, accuracy: 0.975
epoch: 14, accuracy: 0.980
epoch: 15, accuracy: 0.990
epoch: 16, accuracy: 0.990
epoch: 17, accuracy: 0.990
epoch: 18, accuracy: 0.995
epoch: 19, accuracy: 0.995
epoch: 20, accuracy: 0.995
epoch: 21, accuracy: 0.995
epoch: 22, accuracy: 0.995
epoch: 23, accuracy: 0.995
epoch: 24, accuracy: 0.995
epoch: 25, accuracy: 0.995
epoch: 26, accuracy: 0.995
epoch: 27, accuracy: 0.995
epoch: 28, accuracy: 0.995
epoch: 29, accuracy: 0.995
epoch: 30, accuracy: 0.995
epoch: 31, accuracy: 0.995
epoch: 32, accuracy: 0.995
epoch: 33, accuracy: 0.995
epoch: 34, accuracy: 0.995
epoch: 35, accuracy: 0.995
epoch: 36, accuracy: 0.995
epoch: 37, accuracy: 0.995
epoch: 38,