In [163]:
import numpy as np

# 加载数据
def load_data ():
    fr = open('./data.txt')

    X, y = [], []
    for linestr in fr.readlines():
        arr = linestr.strip().split()
        X.append([1.0, float(arr[0]), float(arr[1])])
        y.append([int(arr[2])])

    X = np.array(X)
    y = np.array(y)
    
    fr.close()
    return X, y
    
X_data, y_data = load_data()

X = X_data[:80]
y = y_data[:80]

test_X = X_data[80:]
test_y = y_data[80:]

In [164]:
# sigmoid function
def sigmoid (z):
    return 1 / (1 + np.exp(-z))

# loss function a = h(x) = sigmoid(z) => predicting data
def cost_fn (y, a):
    epsilon = 1e-5
    # return -y * log(a) - (1 - y) * log(1 - a) 
    return np.mean(- y * np.log2(a + epsilon) - (1 - y) * np.log2(1 - a + epsilon))

# dloss / dz
def dloss_dz_derivatives (y, a):
    return y - a

In [184]:
# 1. z = w.T * x + b = w.T * x (x0 = 1)
# 2. a = sigmoid(z)
# 3. dw -= alpha * dw
# dw = (y - a) x 或 (y - h(x)) x

def train ():
    m, n = X.shape # 100 samples, 3 features (2 features + x0 = 1)
    alpha = 0.02
    loop_num = 500
    
    # init Weights 3 * 1 [[w0], [w1], [w2]]
    W = np.random.rand(n, 1)
    L_arr = []
    
    for i in range(loop_num):
        Z = np.dot(X, W)
        A = sigmoid(Z)

        # ***** A - y ***** 不要写成 y - A
        dL_dw = 1 / m * np.dot(X.T, A - y)
        W = W - alpha * dL_dw
        
        # evaluate
        if (i % 10 == 0):
            L_arr.append(cost_fn(y, sigmoid(np.dot(X, W))))

    print(L_arr)
    return W

train_W = train()

# 训练值
train_y = sigmoid(np.dot(X, train_W))
# train_accuracy = 100 - np.mean(train_y - y) * 100

print("Train accuracy: {} %", train_accuracy)

[3.169220489745994, 0.8536751761446391, 0.6746967249226475, 0.6606572974812407, 0.6558653388073652, 0.6519910276981655, 0.6482876044244957, 0.6446743645024064, 0.6411400103623832, 0.6376809452501829, 0.6342945056285535, 0.6309782051069254, 0.6277296494568029, 0.6245465266023735, 0.6214266047226145, 0.6183677310311091, 0.6153678304490208, 0.6124249041155945, 0.6095370277544916, 0.6067023499189683, 0.6039190901373414, 0.6011855369784109, 0.59850004605486, 0.5958610379810738, 0.5932669963003049, 0.5907164653946214, 0.5882080483896686, 0.5857404050649117, 0.5833122497787546, 0.5809223494167245, 0.5785695213697973, 0.5762526315489045, 0.5739705924407107, 0.5717223612088846, 0.5695069378442984, 0.5673233633668785, 0.5651707180811965, 0.563048119887318, 0.5609547226479219, 0.5588897146122651, 0.5568523168971737, 0.5548417820249082, 0.5528573925174574, 0.550898459546573, 0.548964321638642, 0.5470543434333196, 0.5451679144947087, 0.5433044481737437, 0.5414633805203544, 0.5396441692439123]
Train

In [185]:
# testing set to predict
def test ():
    test_A = sigmoid(np.dot(test_X, train_W))
    print(test_A, test_y)
    # print("Test accuracy: {} %", 100 - np.mean(test_A - test_y) * 100)

test()

[[0.32149744]
 [0.88551685]
 [0.29412386]
 [0.51273615]
 [0.65503091]
 [0.84669685]
 [0.52585871]
 [0.15644639]
 [0.61866606]
 [0.81626727]
 [0.84775686]
 [0.17289354]
 [0.22297694]
 [0.7789005 ]
 [0.7133681 ]
 [0.7250758 ]
 [0.20793201]
 [0.67629675]
 [0.32174415]
 [0.06646688]] [[0]
 [1]
 [0]
 [1]
 [1]
 [1]
 [1]
 [0]
 [1]
 [1]
 [1]
 [0]
 [0]
 [1]
 [1]
 [1]
 [0]
 [1]
 [0]
 [0]]


In [162]:
def draw ():
    m = X.shape[0] # 样本的个数                                    
    positive_x, positive_y = [], []
    negative_x, negative_y = [], []
    for i in range(m):
        _x, _y = X[i][1], X[i][2]
        if y[i] == 1: # 正样本
            positive_x.append(_x)
            positive_y.append(_y)
        else:  #0为负样本
            negative_x.append(_x)
            negative_y.append(_y)
    
    fig = plt.figure()
    ax = fig.add_subplot(111)
    ax.scatter(positive_x, positive_y, s = 20, c = 'red', marker = 's',alpha=.5)
    ax.scatter(negative_x, negative_y, s = 20, c = 'green',alpha=.5)       
    
    plt.title('positive: red; negative: green')                                                
    plt.show()
    
# draw()