In [1]:
import numpy as np
import pandas as pd
import random as rd
import math
import copy
from scipy.io import savemat

In [2]:
def sigmoid(x):
    mask = (x > 0)
    positive_out = np.zeros_like(x, dtype='float64')
    negative_out = np.zeros_like(x, dtype='float64')
    
    # 大于0的情况
    positive_out = 1 / (1 + np.exp(-x, positive_out, where=mask))
    # 清除对小于等于0元素的影响
    positive_out[~mask] = 0
    
    # 小于等于0的情况
    expX = np.exp(x,negative_out,where=~mask)
    negative_out = expX / (1+expX)
    # 清除对大于0元素的影响
    negative_out[mask] = 0
    
    return positive_out + negative_out        
def invSigmoid(x):
    return [math.log(i/(1-i))for i in x]
def sigmoid_prime(sigmoidx):
    return np.multiply(sigmoidx,1-sigmoidx)

In [3]:
class ANN:
    def __init__(self,sizes):
#         输入层之外的层数,例子为1+2，layNum=3
        self.num_layers =len(sizes)
        self.sizes=sizes
#         i索引W(i+1)矩阵
        self.biases = [np.random.randn(y, 1) for y in sizes[1:]]
        self.weights = [np.random.randn(y, x) for x, y in zip(sizes[:-1], sizes[1:])]
        
    def forward_pass(self,x):
        a=np.matrix(x).transpose()
        for w,b in zip(self.weights,self.biases):
            z=w*a+b
            a=sigmoid(z)
        return float(z)
    
    def backPropagation(self,x,y):
        grad_w=[np.zeros(w.shape)for w in self.weights]
        grad_b=[np.zeros(b.shape)for b in self.biases]
        delta=[np.zeros(b.shape)for b in self.biases]
        a=np.matrix(x).transpose()
        alist=[a]
        zlist=[]
        for w,b in zip(self.weights,self.biases):
            z=w*a+b
            a=sigmoid(z)
            zlist.append(z)
            alist.append(a)
        alist[-1]=zlist[-1]
        delta[-1]=(z-y).transpose()
        for l in range(2,self.num_layers):
            delta[-l]=np.multiply(sigmoid_prime(alist[-l]),self.weights[1-l].transpose()*delta[1-l])
        for i in range(self.num_layers-1):
            grad_w[i]=delta[i]*np.mat(alist[i]).transpose()
        return grad_w, delta
    def SGD(self,train_set,validation_set,batch_size,lr=1e-3,epochs=1e3):
        variate=float('inf')
        besta=None
        for j in range(int(epochs)):
            np.random.shuffle(train_set)
            mini_batches = [train_set[k:k+batch_size] for k in range(0, train_set.shape[0], batch_size)]
            for mini_batch in mini_batches:
                self.minibatch(mini_batch, eta)
            curVar=self.validate(validation_set)
            if curVar<variate:
                variate=curVar
                besta=copy.deepcopy(self)
            print("Epoch {} : {}, best : {}".format(j,curVar,variate));
        return besta
    def gradient_descent(self,x,y,lr=1e-3):
        grad_w,grad_b=self.backPropagation(x,y)
        self.weights=[w-lr*gw for w,gw in zip(self.weights,grad_w)]
        self.biases=[b-lr*gb for b,gb in zip(self.biases,grad_b)]
    def train(self,train_set,lr=1):
        for row in train_set:
            self.gradient_descent(row[:-1],row[-1],lr)
    def minibatch(self,batch,lr=1e-3):
        batchsize=len(batch)
        grad_b = [np.zeros(b.shape) for b in self.biases]
        grad_w = [np.zeros(w.shape) for w in self.weights]
        for row in batch:
            delta_gw,delta_gb=self.backPropagation(row[:-1],row[-1])
            grad_w=[gw+dgw for gw,dgw in zip(grad_w,delta_gw)]
            grad_b=[gb+dgb for gb,dgb in zip(grad_b,delta_gb)]
        self.weights=[w-lr/batchsize*gw for w,gw in zip(self.weights,grad_w)]
        self.biases=[b-lr/batchsize*gb for b,gb in zip(self.biases,grad_b)]
    def validate(self,validation_set):
        arr=[self.forward_pass(row[:-1])for row in validation_set]
        return sum(np.square(arr-validation_set[:,-1]))/validation_set.shape[0]
def preprocess(dataSet):
    dataSet.dteday=dataSet.dteday.map(date2int)
    maxdte=float(max(dataSet.dteday))
    dataSet.dteday=dataSet.dteday.map(lambda x:float(x)/maxdte)
    return dataSet.values[:,1:]
def random_choice(train_set,batchSize):
    return train_set[np.random.randint(0,train_set.shape[0],10), :]
def date2int(str):
    l=[int(x) for x in str.split('/')]
    return l[0]+l[1]*30+l[2]
def calcVar(ds,a):
    arr=[]
    for i in range(ds.shape[0]):
        arr.append(a.forward_pass(ds[i][:-1]))
    return np.var((np.matrix(arr)-ds[:,-1]))
def split(dataSet,proportion):
    np.random.shuffle(dataSet)
    train_num=int(dataSet.shape[0]*proportion)
    return dataSet[0:train_num,:],dataSet[train_num:,:]
def compute_eta_t(eta_min, eta_max, T_cur, Ti):
    '''Equation (5).
    # Arguments
        eta_min,eta_max,T_cur,Ti are same as equation.
    # Returns
        eta_t
    '''
    pi = np.pi
    eta_t = eta_min + 0.5 * (eta_max - eta_min) * (np.cos(pi * T_cur / Ti) + 1)
    return eta_t

In [170]:
# 单隐藏层
ds=preprocess(pd.read_csv('1/train.csv'))
inputNode=ds.shape[1]-1
learningRate=1e-3
a=ANN([inputNode,80,1])
eta=learningRate
besta = a.SGD(ds,ds,50,eta,500)
deviate = besta.validate(ds)


Epoch 0 : 5.184699927087171, best : 5.184699927087171
Epoch 1 : 5.185320934696573, best : 5.184699927087171
Epoch 2 : 5.1897861272743935, best : 5.184699927087171
Epoch 3 : 5.188395022009575, best : 5.184699927087171
Epoch 4 : 5.1913010611787795, best : 5.184699927087171
Epoch 5 : 5.184654216966735, best : 5.184654216966735
Epoch 6 : 5.185151767360343, best : 5.184654216966735
Epoch 7 : 5.184843698738812, best : 5.184654216966735
Epoch 8 : 5.194510669615333, best : 5.184654216966735
Epoch 9 : 5.185382628921789, best : 5.184654216966735
Epoch 10 : 5.184845206607574, best : 5.184654216966735
Epoch 11 : 5.185052143691567, best : 5.184654216966735
Epoch 12 : 5.1855402383750775, best : 5.184654216966735
Epoch 13 : 5.184777911071453, best : 5.184654216966735
Epoch 14 : 5.189230782612575, best : 5.184654216966735
Epoch 15 : 5.184632527515261, best : 5.184632527515261
Epoch 16 : 5.184717019617555, best : 5.184632527515261
Epoch 17 : 5.18471487803163, best : 5.184632527515261
Epoch 18 : 5.18462

KeyboardInterrupt: 

In [4]:
# 单隐藏层
ds=preprocess(pd.read_csv('1/train.csv'))
inputNode=ds.shape[1]-1
learningRate=1e-3
a=ANN([inputNode,80,1])
eta=learningRate
besta = a.SGD(ds,ds,50,eta,500)
deviate = besta.validate(ds)

Epoch 0 : 3731.399002965248, best : 3731.399002965248
Epoch 1 : 3455.7027497779786, best : 3455.7027497779786
Epoch 2 : 3262.5818749714977, best : 3262.5818749714977
Epoch 3 : 3098.6884143850843, best : 3098.6884143850843
Epoch 4 : 2959.447578615952, best : 2959.447578615952
Epoch 5 : 2837.595655166326, best : 2837.595655166326
Epoch 6 : 2729.9021784009574, best : 2729.9021784009574
Epoch 7 : 2633.266948150433, best : 2633.266948150433
Epoch 8 : 2546.6996187187183, best : 2546.6996187187183
Epoch 9 : 2468.858439715912, best : 2468.858439715912
Epoch 10 : 2399.7505400161963, best : 2399.7505400161963
Epoch 11 : 2337.1754729561826, best : 2337.1754729561826
Epoch 12 : 2279.483879020513, best : 2279.483879020513
Epoch 13 : 2222.9832825446024, best : 2222.9832825446024
Epoch 14 : 2169.2252165920986, best : 2169.2252165920986
Epoch 15 : 2128.290493522791, best : 2128.290493522791
Epoch 16 : 2094.718135939303, best : 2094.718135939303
Epoch 17 : 2065.2800800987125, best : 2065.2800800987125


Epoch 150 : 1495.5363013450522, best : 1495.5363013450522
Epoch 151 : 1494.3812352784369, best : 1494.3812352784369
Epoch 152 : 1493.16399692541, best : 1493.16399692541
Epoch 153 : 1492.1570915255163, best : 1492.1570915255163
Epoch 154 : 1490.690893168663, best : 1490.690893168663
Epoch 155 : 1489.334440159818, best : 1489.334440159818
Epoch 156 : 1488.127255971954, best : 1488.127255971954
Epoch 157 : 1486.9402245452452, best : 1486.9402245452452
Epoch 158 : 1485.7603772666005, best : 1485.7603772666005
Epoch 159 : 1484.578713089713, best : 1484.578713089713
Epoch 160 : 1483.417026229954, best : 1483.417026229954
Epoch 161 : 1482.2712311428072, best : 1482.2712311428072
Epoch 162 : 1481.0779769608305, best : 1481.0779769608305
Epoch 163 : 1479.9427274104064, best : 1479.9427274104064
Epoch 164 : 1478.7524635724067, best : 1478.7524635724067
Epoch 165 : 1477.7129222485676, best : 1477.7129222485676
Epoch 166 : 1476.4809640624048, best : 1476.4809640624048
Epoch 167 : 1475.42441186308

Epoch 296 : 1368.916062411, best : 1368.916062411
Epoch 297 : 1367.8314498441769, best : 1367.8314498441769
Epoch 298 : 1367.2264717758494, best : 1367.2264717758494
Epoch 299 : 1366.5369309324597, best : 1366.5369309324597
Epoch 300 : 1366.0945352255553, best : 1366.0945352255553
Epoch 301 : 1365.2854195404084, best : 1365.2854195404084
Epoch 302 : 1364.8721441431846, best : 1364.8721441431846
Epoch 303 : 1364.378817903385, best : 1364.378817903385
Epoch 304 : 1363.7556602234042, best : 1363.7556602234042
Epoch 305 : 1363.107820702402, best : 1363.107820702402
Epoch 306 : 1362.808657556001, best : 1362.808657556001
Epoch 307 : 1362.1885264234984, best : 1362.1885264234984
Epoch 308 : 1361.8833968987667, best : 1361.8833968987667
Epoch 309 : 1361.2832044799222, best : 1361.2832044799222
Epoch 310 : 1360.7272416418375, best : 1360.7272416418375
Epoch 311 : 1359.859141643872, best : 1359.859141643872
Epoch 312 : 1359.3150237755526, best : 1359.3150237755526
Epoch 313 : 1358.6875566127294

Epoch 440 : 1299.9054807543478, best : 1299.9054807543478
Epoch 441 : 1299.574831673488, best : 1299.574831673488
Epoch 442 : 1299.856697709628, best : 1299.574831673488
Epoch 443 : 1299.6049093178572, best : 1299.574831673488
Epoch 444 : 1298.5822596545008, best : 1298.5822596545008
Epoch 445 : 1298.5337020445224, best : 1298.5337020445224
Epoch 446 : 1297.597486413764, best : 1297.597486413764
Epoch 447 : 1297.196200853059, best : 1297.196200853059
Epoch 448 : 1296.7804800743236, best : 1296.7804800743236
Epoch 449 : 1296.4637233734843, best : 1296.4637233734843
Epoch 450 : 1296.2492844906824, best : 1296.2492844906824
Epoch 451 : 1297.0095142455802, best : 1296.2492844906824
Epoch 452 : 1295.3310633237795, best : 1295.3310633237795
Epoch 453 : 1294.9723583045854, best : 1294.9723583045854
Epoch 454 : 1294.5123472393211, best : 1294.5123472393211
Epoch 455 : 1294.3257946851095, best : 1294.3257946851095
Epoch 456 : 1293.8972492269036, best : 1293.8972492269036
Epoch 457 : 1294.846474

In [5]:
vs=preprocess(pd.read_csv('1/test.csv'))
vs=vs[:,:-1].astype('float64')
checkres=[a.forward_pass(row) for row in vs]
checkres

[41.5581151909705,
 78.86138202351329,
 74.15446167281712,
 75.39905093019868,
 59.829747258514956]