In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt 
from utils import sigmoid, get_batch, normalization


In [3]:
def relu(z):
    a = np.maximum(0,z)
    return a

In [4]:
def propogate(w,b,x):
    a = x
    for i in range(len(w)):
        z = np.dot(w[i],a) + b[i]
        a = sigmoid(z)
    return a

In [5]:
def test(w,b,x,y):
    a = propogate(w,b,x)
    predict = np.where(a > 0.5, 1., 0.)
    accury = 1 - np.mean(np.abs(predict - y))
    return accury

In [6]:

X = np.load('data/train_x.npy')
Y = np.load('data/train_y.npy')

Xt = np.load('data/test_x.npy')
Yt = np.load('data/test_y.npy')
print('the shape of X is', X.shape)
print('the shape of Y is', Y.shape)

the shape of X is (510, 48896)
the shape of Y is (1, 48896)


In [18]:

epsilon = 1e-9
TRAIN_STEP = 100
lr = 0.25
alpha = lr
BATCH_SIZE = 128
LEARNING_RATE_DECAY = 0.25
BETA = 0.9

INPUT_NODE = 510
OUTPUT_NODE = 1
HIDDEN_LAYER = 1020


w1 = np.random.randn(HIDDEN_LAYER, INPUT_NODE) * 0.01
b1 = np.zeros(shape=[HIDDEN_LAYER, 1])
v_dw1 = np.zeros(shape=[HIDDEN_LAYER, INPUT_NODE])
v_db1 = np.zeros(shape=[HIDDEN_LAYER, 1])

w2 = np.random.randn(OUTPUT_NODE, HIDDEN_LAYER) * 0.01
b2 = np.zeros(shape=[OUTPUT_NODE, 1])

v_dw2 = np.zeros(shape=[OUTPUT_NODE, HIDDEN_LAYER])
v_db2 = np.zeros(shape=[OUTPUT_NODE, 1])


losses = []
k = 1
for epoch in range(TRAIN_STEP):
    lr = alpha / (1 + LEARNING_RATE_DECAY * epoch)
    index = 0
    while True:
        x, y, index = get_batch(index, BATCH_SIZE, X, Y)
        if x is None:
            break
        
        m = y.shape[1]
        
        z1 = np.dot(w1, x) + b1
        a1 = sigmoid(z1)
        
        z2 = np.dot(w2, a1) + b2
        a = sigmoid(z2)
        
        cross_entropy = (1 - y) * np.log(1 - a + epsilon) + y * np.log(a + epsilon)
        loss = - np.mean(cross_entropy)
        
        dz2 = a - y
        dw2 = np.dot(dz2, a1.T) / m
        db2 = np.mean(dz2, axis=1, keepdims=True)
        
        da1 = np.dot(w2.T, dz2)
        dz1 = da1 * a1 * (1-a1)
        dw1 = np.dot(dz1, x.T) / m
        db1 = np.mean(dz1, axis=1, keepdims=True)
        
        v_dw1 = BETA * v_dw1 + (1 - BETA) * dw1
        v_db1 = BETA * v_db1 + (1 - BETA) * db1
        v_dw2 = BETA * v_dw2 + (1 - BETA) * dw2
        v_db2 = BETA * v_db2 + (1 - BETA) * db2
        
        w2 -= lr * v_dw2
        b2 -= lr * v_db2
        w1 -= lr * v_dw1
        b1 -= lr * v_db1
        
        if k % 500 == 0:
            losses.append(loss)
            w = [w1,w2]
            b = [b1,b2]
            accury = test(w,b,Xt,Yt)
            train_acc = test(w,b,x,y)
            print('Training on %d batch,train set accuty is %.3f, loss is %.3f. Test Accury is %.3f'\
                  %(k,train_acc, loss, accury))
        k += 1
        
    

plt.figure()
plt.plot(losses)
plt.title('batch loss')
plt.xlabel('batch')
plt.ylabel('loss')
plt.show()
        



Training on 500 batch,train set accuty is 0.797, loss is 0.492. Test Accury is 0.796
Training on 1000 batch,train set accuty is 0.820, loss is 0.472. Test Accury is 0.805
Training on 1500 batch,train set accuty is 0.820, loss is 0.435. Test Accury is 0.813
Training on 2000 batch,train set accuty is 0.820, loss is 0.379. Test Accury is 0.810
Training on 2500 batch,train set accuty is 0.812, loss is 0.401. Test Accury is 0.801
Training on 3000 batch,train set accuty is 0.789, loss is 0.453. Test Accury is 0.798
Training on 3500 batch,train set accuty is 0.805, loss is 0.408. Test Accury is 0.810
Training on 4000 batch,train set accuty is 0.797, loss is 0.470. Test Accury is 0.804
Training on 4500 batch,train set accuty is 0.797, loss is 0.382. Test Accury is 0.809
Training on 5000 batch,train set accuty is 0.758, loss is 0.446. Test Accury is 0.807
Training on 5500 batch,train set accuty is 0.844, loss is 0.382. Test Accury is 0.810
Training on 6000 batch,train set accuty is 0.812, loss 

KeyboardInterrupt: 

In [19]:
path_test ='data/X_test'
dft = pd.read_csv(path_test, dtype=np.float32)
Xtest = dft.iloc[:, 1:].values
Xtest = Xtest.T
dft = None
Xtest = normalization(Xtest)

In [20]:
# 使用训练模型预测数据
weight = [w1,w2]
bias = [b1,b2]
a = propogate(weight, bias, Xtest)
predict = np.where(a < 0.5, 0, 1)
print('prediction shape is ', predict.shape)

prediction shape is  (1, 27622)


In [21]:
result = pd.DataFrame(predict.T, columns=['label'])
# num = 5
path_result = 'data/result_{}_{}.csv'.format(HIDDEN_LAYER,lr)
result.to_csv(path_result)