In [371]:
from sklearn.datasets import load_breast_cancer
import pandas as pd
import tensorflow as tf

In [372]:
breast_dataset = load_breast_cancer()
breast = pd.DataFrame(breast_dataset.data, columns=breast_dataset.feature_names)
breast['y'] = breast_dataset.target

In [373]:
breast.head()

Unnamed: 0,mean radius,mean texture,mean perimeter,mean area,mean smoothness,mean compactness,mean concavity,mean concave points,mean symmetry,mean fractal dimension,...,worst texture,worst perimeter,worst area,worst smoothness,worst compactness,worst concavity,worst concave points,worst symmetry,worst fractal dimension,y
0,17.99,10.38,122.8,1001.0,0.1184,0.2776,0.3001,0.1471,0.2419,0.07871,...,17.33,184.6,2019.0,0.1622,0.6656,0.7119,0.2654,0.4601,0.1189,0
1,20.57,17.77,132.9,1326.0,0.08474,0.07864,0.0869,0.07017,0.1812,0.05667,...,23.41,158.8,1956.0,0.1238,0.1866,0.2416,0.186,0.275,0.08902,0
2,19.69,21.25,130.0,1203.0,0.1096,0.1599,0.1974,0.1279,0.2069,0.05999,...,25.53,152.5,1709.0,0.1444,0.4245,0.4504,0.243,0.3613,0.08758,0
3,11.42,20.38,77.58,386.1,0.1425,0.2839,0.2414,0.1052,0.2597,0.09744,...,26.5,98.87,567.7,0.2098,0.8663,0.6869,0.2575,0.6638,0.173,0
4,20.29,14.34,135.1,1297.0,0.1003,0.1328,0.198,0.1043,0.1809,0.05883,...,16.67,152.2,1575.0,0.1374,0.205,0.4,0.1625,0.2364,0.07678,0


In [374]:
test_dataset = breast.iloc[-int(0.2*len(breast)):,:]
train_dataset = breast.iloc[:-int(0.2*len(breast)),:]
print(test_dataset.shape, train_dataset.shape)

(113, 31) (456, 31)


In [375]:
test_dataset = tf.convert_to_tensor(test_dataset)
X_test = test_dataset[:,:-1]
y_test = test_dataset[:,-1]
train_dataset = tf.convert_to_tensor(train_dataset)
X_train = train_dataset[:,:-1]
y_train = train_dataset[:,-1]

In [376]:
def load_array(data_arrays, batch_size, is_train=True):
    dataset = tf.data.Dataset.from_tensor_slices(data_arrays)
    if is_train:
        dataset = dataset.shuffle(buffer_size=len(data_arrays))
    dataset = dataset.batch(batch_size)
    return dataset

In [377]:
batch_size = 16
data_iter = load_array((train_dataset[:,:-1],train_dataset[:,-1]), batch_size, True)

In [378]:
for data in data_iter:
    print(data)
    break

(<tf.Tensor: shape=(16, 30), dtype=float64, numpy=
array([[1.799e+01, 1.038e+01, 1.228e+02, 1.001e+03, 1.184e-01, 2.776e-01,
        3.001e-01, 1.471e-01, 2.419e-01, 7.871e-02, 1.095e+00, 9.053e-01,
        8.589e+00, 1.534e+02, 6.399e-03, 4.904e-02, 5.373e-02, 1.587e-02,
        3.003e-02, 6.193e-03, 2.538e+01, 1.733e+01, 1.846e+02, 2.019e+03,
        1.622e-01, 6.656e-01, 7.119e-01, 2.654e-01, 4.601e-01, 1.189e-01],
       [2.057e+01, 1.777e+01, 1.329e+02, 1.326e+03, 8.474e-02, 7.864e-02,
        8.690e-02, 7.017e-02, 1.812e-01, 5.667e-02, 5.435e-01, 7.339e-01,
        3.398e+00, 7.408e+01, 5.225e-03, 1.308e-02, 1.860e-02, 1.340e-02,
        1.389e-02, 3.532e-03, 2.499e+01, 2.341e+01, 1.588e+02, 1.956e+03,
        1.238e-01, 1.866e-01, 2.416e-01, 1.860e-01, 2.750e-01, 8.902e-02],
       [1.969e+01, 2.125e+01, 1.300e+02, 1.203e+03, 1.096e-01, 1.599e-01,
        1.974e-01, 1.279e-01, 2.069e-01, 5.999e-02, 7.456e-01, 7.869e-01,
        4.585e+00, 9.403e+01, 6.150e-03, 4.006e-02, 3.832e-

In [379]:
# 初始化参数
W = tf.Variable(tf.zeros((30,1)), trainable=True)
b =tf.Variable(tf.zeros(1), trainable=True)

In [380]:
# 定义模型
def net(X, W, b):
    res = tf.matmul(tf.cast(X, dtype=W.dtype), W)+b
    res = 1/(1+tf.exp(-res))
    return res

In [381]:
# 定义损失函数
def loss(y_hat,y):
    y = tf.cast(y, dtype=y_hat.dtype)
    y = tf.reshape(y, y_hat.shape) 
    l = -y*tf.math.log(y_hat)-(1-y)*tf.math.log(1-y_hat)
    return l

In [382]:
# 定义优化算法
def sgd(params, grads, lr, batch_size):
    for param, grad in zip(params, grads):
        param.assign_sub(lr*grad/batch_size)

In [383]:
def evaluate(y_pred, y):
    ones = tf.ones_like(y_pred)
    zeros = tf.zeros_like(y_pred)
    output = tf.where(y_pred > 0.5 , ones, zeros)
    y = tf.reshape(y, output.shape)
    res = tf.reduce_sum(tf.where(y == tf.cast(output, dtype=y.dtype), ones, zeros))/y.shape[0]
    return res.numpy()

In [384]:
lr = 0.00001
batch_size = 16
for epoch in range(10):
    for X, y in data_iter:
        with tf.GradientTape() as tape:
            y_hat = net(tf.cast(X,dtype=W.dtype), W, b)
            l = loss(y_hat, y)
        dW, db = tape.gradient(l, [W,b])
        # print(dW, db)
        sgd([W, b],[dW, db], lr, batch_size)
    train_loss = loss(net(X_train, W, b), y_train)
    train_acc = evaluate(net(X_train, W, b), y_train)
    test_loss = loss(net(X_test, W, b), y_test)
    test_acc = evaluate(net(X_test, W, b), y_test)
    print('epoch:{}, train_loss:{}, train_acc:{}, test_loss:{}, test_acc:{}'\
        .format(epoch+1, float(tf.reduce_mean(train_loss)), train_acc,
                         float(tf.reduce_mean(test_loss)), test_acc))

epoch:1, train_loss:0.5787349343299866, train_acc:0.7609649300575256, test_loss:0.5272122025489807, test_acc:0.8318583965301514
epoch:2, train_loss:0.48076796531677246, train_acc:0.8377193212509155, test_loss:0.5559106469154358, test_acc:0.7345132827758789
epoch:3, train_loss:0.4356544315814972, train_acc:0.8991228342056274, test_loss:0.44666680693626404, test_acc:0.8938053250312805
epoch:4, train_loss:0.4431581199169159, train_acc:0.8355262875556946, test_loss:0.3857197165489197, test_acc:0.9203540086746216
epoch:5, train_loss:0.40765273571014404, train_acc:0.8662280440330505, test_loss:0.3692420423030853, test_acc:0.9292035102844238
epoch:6, train_loss:0.3808024823665619, train_acc:0.8947368264198303, test_loss:0.361583411693573, test_acc:0.9380530714988708
epoch:7, train_loss:0.3713128864765167, train_acc:0.8947368264198303, test_loss:0.3448375165462494, test_acc:0.9380530714988708
epoch:8, train_loss:0.3611350953578949, train_acc:0.8947368264198303, test_loss:0.3333091139793396, te