In [90]:
from sklearn.datasets import load_digits
import numpy as np
from utils import *
convweight = []
krow = 3
kcol = 2
kernelsize = 10
for i in range(0, kernelsize):
    convweight.append(np.random.random(krow * kcol) - 0.5)
fcweight = np.random.random((42, 10)) - 0.5

digits = load_digits()

X = digits['images'][:-100]
Y = digits['target'][:-100]
X_batch_sum = conv_2d_to_matrix_batch(X, krow, kcol)
X_te = digits['images'][-100:]
Y_te = digits['target'][-100:]
Y_dummy = np.zeros((len(X), 10))
for i in range(0, len(X)):
    Y_dummy[i, Y[i]] = 1
    
    
def conv_1d_to_2d(data, row, column):
    if len(data) != row * column:
        return
    res = np.zeros((row, column))
    for i in range(0, row):
        for j in range(0, column):
            res[i][j] = data[i*column + j]
    return res


def conv_2d_to_matrix(data, kernelrow, kernelcol):
    mid = []
    for xl in range(0, data.shape[0] - kernelrow + 1):
        for yl in range(0, data.shape[1] - kernelcol + 1):
            mid.append(data[xl:xl+kernelrow, yl:yl+kernelcol].flatten())
    return np.array(mid)



def conv_2d_to_matrix_batch(data, kernelrow, kernelcol):
    mid = []
    for single in data:
        mid.append(conv_2d_to_matrix(single, kernelrow, kernelcol))
    return np.array(mid)

def recover_matrix_to_2d(data, row, col):
    if (len(data) != row * col):
        return
    res = np.zeros((row, col))
    idx = 0
    for i in range(0, row):
        for j in range(0, col):
            res[i, j] = data[idx]
            idx += 1
    return res

def recover_matrix_to_2d_batch(data, row, col):
    res = []
    for d in data:
        res.append(recover_matrix_to_2d(d, row, col))
    return np.array(res)


#2d matrix with kernellist and recover
def conv(data, kernellist):
    res = None
    resrow = data.shape[0] - krow + 1
    rescol = data.shape[1] - kcol + 1
    kernelmtx = np.sum(kernellist, axis=0)
    res = np.dot(conv_2d_to_matrix(data, krow, kcol), kernelmtx)
#     ###for kernel in kernellist:
#         tmp = np.dot(conv_2d_to_matrix(data, krow, kcol), kernel)
#         if res is None:
#             res = tmp
#         else:
#             res += tmp
    return recover_matrix_to_2d(res, resrow, rescol)

def conv_batch(data, kernellist):
    res = []
    x_batch_sum = conv_2d_to_matrix_batch(data, krow, kcol)
    #print x_batch_sum.shape
    resrow = data.shape[0] - krow + 1
    rescol = data.shape[1] - kcol + 1
    kernelmtx = np.sum(kernellist, axis=0)
    return np.tensordot(x_batch_sum, kernelmtx, [2, 0])#.shape

def conv_batch_sum(data, data_cov, kernellist):
    res = []
    #print x_batch_sum.shape
    resrow = data.shape[0] - krow + 1
    rescol = data.shape[1] - kcol + 1
    kernelmtx = np.sum(kernellist, axis=0)
    return np.tensordot(data_cov, kernelmtx, [2, 0])#.shape
#     for d in data:
#         res.append(conv(d, kernellist))
#     print np.array(res).shape
#     return np.array(res)
    

def forward(x):
    #conv, relu
    tmpmtx = relu(conv_batch(x, convweight).reshape(len(x), 42))
    #fc
    return softmax(np.dot(tmpmtx * (1 - dropoutrate), fcweight))
    
regu = 0.01
    
lr = 0.05
dropoutrate = 0.0
for iter in range(0,3000):
    batchsize = len(X)
    dropout = np.random.random(42)
    dropout[dropout > dropoutrate] = 1
    dropout[dropout <= dropoutrate] = 0
    for i in range(0, X.shape[0], batchsize):
        x_batch = X[i:i+batchsize, :]
        y_batch = Y[i:i+batchsize]
        y_dummy_batch = Y_dummy[i:i+batchsize, :]
        #print 'pre'
        x_batch_sum = X_batch_sum[i:i+batchsize]
        #print 'batch'
        #print 'xbatch'
        #print x_batch_sum.shape
        #forward
        #conv + relu
        x_conv = conv_batch_sum(x_batch, x_batch_sum, convweight)#.reshape(len(x_batch), 42)
        #print 'conv'
        z = relu(x_conv * dropout)

        #fc
        a2 = (z).dot(fcweight)
        #out
        pre_batch = softmax(a2)
        batch_loss = softmax_loss(pre_batch, y_batch)
        if iter % 100 == 0:
            print z.shape
            print 'train loss:' + str(batch_loss) + ' eval loss:' + str(softmax_loss(forward(X_te), Y_te))
        grad_a2 = (pre_batch - y_dummy_batch) / batchsize
        grad_w2 = (z.T.dot(grad_a2) + regu * fcweight)
        grad_z = grad_a2.dot(fcweight.T)
        grad_z_a = z.copy()
        grad_z_a[grad_z_a > 0] = 1
        grad_z_a[grad_z_a != 1] = 0
        #print grad_z.shape
        #grad_conv = np.mean((grad_z * grad_z_a).dot(x_batch_sum), axis=0)
        grad_conv = np.tensordot(x_batch_sum, (grad_z * grad_z_a), ([0, 1], [0, 1])) / len(x_batch_sum)
        fcweight -= lr * grad_w2
        for i in range(0, kernelsize):
            convweight[i] -= lr * (grad_conv + regu * convweight[i])

(1697, 42)
train loss:20.5113407079 eval loss:19.6545267282
(1697, 42)
train loss:0.368484793092 eval loss:0.703866205814
(1697, 42)
train loss:0.238472972884 eval loss:0.519355169035
(1697, 42)
train loss:0.204480297011 eval loss:0.464013192043
(1697, 42)
train loss:0.191033693011 eval loss:0.430641246484
(1697, 42)
train loss:0.184067894679 eval loss:0.408347595945
(1697, 42)
train loss:0.179382738478 eval loss:0.393079875083
(1697, 42)
train loss:0.175878325136 eval loss:0.381687933082


KeyboardInterrupt: 