In [1]:



import sys

sys.path.append('../')

import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
import HuichuanFlow as hf

from HuichuanFlow.trainer import SimpleTrainer


In [2]:
from sklearn.datasets import make_circles

# 获取同心圆状分布的数据，X的每行包含两个特征，y是1/0类别标签
X, y = make_circles(200, noise=0.1, factor=0.2)
y = y * 2 - 1  # 将标签转化为1/-1

# 是否使用二次项
use_quadratic = False

# 一次项，2维向量（2x1矩阵）
x1 = hf.core.Variable(dim=(2, 1), init=False, trainable=False)

# 标签
label = hf.core.Variable(dim=(1, 1), init=False, trainable=False)

# 偏置
b = hf.core.Variable(dim=(1, 1), init=True, trainable=True)

# 根据是否使用二次项区别处理
if use_quadratic:

    # 将一次项与自己的转置相乘，得到二次项2x2矩阵，再转成4维向量（4x1矩阵）
    x2 = hf.ops.Reshape(
            hf.ops.MatMul(x1, hf.ops.Reshape(x1, shape=(1, 2))),
            shape=(4, 1)
            )

    # 将一次和二次项连接成6维向量（6x1矩阵）
    x = hf.ops.Concat(x1, x2)
    
    # 权值向量是6维（1x6矩阵）
    w = hf.core.Variable(dim=(1, 6), init=True, trainable=True)
    
else:
    
    # 特征向量就是一次项
    x = x1
    
    # 权值向量是2维（1x2矩阵）
    w = hf.core.Variable(dim=(1, 2), init=True, trainable=True)


In [3]:
# 线性部分
output = hf.ops.Add(hf.ops.MatMul(w, x), b)

# 预测概率
predict = hf.ops.Logistic(output)

# 损失函数
loss = hf.ops.loss.LogLoss(hf.ops.MatMul(label, output))

learning_rate = 0.001

optimizer = hf.optimizer.Adam(hf.default_graph, loss, learning_rate)
accuracy = hf.ops.metrics.Accuracy(predict, label)
precision = hf.ops.metrics.Precision(predict, label)
recall = hf.ops.metrics.Recall(predict, label)
auc = hf.ops.metrics.ROC_AUC(predict, label)

batch_size = 8


In [4]:
batch_size = 16

trainer = SimpleTrainer([x1], label,
                        loss, optimizer, epoches=20, batch=16,eval_on_train=True,metrics_ops=[accuracy,precision,recall,auc])
train_inputs = {x1.name: X}
trainer.train_and_eval(train_inputs, y, train_inputs, y)


[INIT] Variable weights init finished
- Epoch [1] train start, batch size: 8, train data size: 1
-- iteration [99] finished, time cost: 0.04  and loss value: 0.692835
-- iteration [199] finished, time cost: 0.03  and loss value: 0.692779
- Epoch [1] train finished, time cost: 0.07
Epoch [1] evaluation metrics Accuracy: 0.3800 Precision: 0.3065 Recall: 0.1900 ROC_AUC: 0.5093 
- Epoch [2] train start, batch size: 8, train data size: 1
-- iteration [99] finished, time cost: 0.04  and loss value: 0.693184
-- iteration [199] finished, time cost: 0.04  and loss value: 0.693265
- Epoch [2] train finished, time cost: 0.07
Epoch [2] evaluation metrics Accuracy: 0.3150 Precision: 0.0000 Recall: 0.0000 ROC_AUC: 0.5053 
- Epoch [3] train start, batch size: 8, train data size: 1
-- iteration [99] finished, time cost: 0.03  and loss value: 0.693465
-- iteration [199] finished, time cost: 0.03  and loss value: 0.693682
- Epoch [3] train finished, time cost: 0.06
Epoch [3] evaluation metrics Accuracy:

In [16]:
from HuichuanFlow_serving.exporter import Exporter

exporter = Exporter()
sig = exporter.signature('Variable:0', 'Logistic:6')


In [18]:

saver = hf.trainer.Saver('./epoches20')
saver.save(model_file_name='my_model.json',
           weights_file_name='my_weights.npz',service_signature=sig)

Save model into file: ./epoches20/my_model.json
Save weights to file: ./epoches20/my_weights.npz
