In [None]:
import os
import sys
import time
import numpy as np
import pickle
import theano
import theano.tensor as T
import matplotlib.pyplot as plt
import random
import csv

from theano.tensor.signal import downsample
from theano.tensor.nnet import conv
from logistic_sgd2 import LogisticRegression, load_data
from mlp import HiddenLayer
from PIL import Image


with open('.pkl','rb') as f1:
    datasets1 = pickle.load(f1)
    train_set_x, train_set_y = datasets1[0]
    
train_set_x = np.array(train_set_x)
train_set_x = train_set_x.reshape(train_set_x.shape[0],-1)

In [None]:

class LeNetConvPoolLayer(object):
    """CNNのstructure"""
    def __init__(self, rng, input, image_shape, filter_shape, poolsize=(2, 2)):
        assert image_shape[1] == filter_shape[1]

        fan_in = np.prod(filter_shape[1:])
        fan_out = filter_shape[0] * np.prod(filter_shape[2:]) / np.prod(poolsize)

        W_bound = np.sqrt(6.0 / (fan_in + fan_out))
        self.W = theano.shared(
            np.asarray(rng.uniform(low=-W_bound, high=W_bound, size=filter_shape),
                       dtype=theano.config.floatX),
            borrow=True)

        b_values = np.zeros((filter_shape[0],), dtype=theano.config.floatX)  
        self.b = theano.shared(value=b_values, borrow=T)

        # 特徴マップ&フィルタの畳み込み
        conv_out = conv.conv2d(
            input=input,
            filters=self.W,
            filter_shape=filter_shape,
            image_shape=image_shape)#filter_shape:カーネルサイズ,image_filter:最初のサイズ

        # Max-poolingでのダウンサンプリング
        pooled_out = downsample.max_pool_2d(
            input=conv_out,
            ds=poolsize,
            ignore_border=True)

        # バイアスを加える
        self.output = T.tanh(pooled_out + self.b.dimshuffle('x', 0, 'x', 'x'))#ここではtanh()で処理している

        self.params = [self.W, self.b]


def evaluate_lenet5(learning_rate=0.0005, n_epochs=100,
                            dataset="/", batch_size=400):
    rng = np.random.RandomState(23455)


            # 学習データのロード
            #datasets = load_data(dataset)
    with open('.pkl','rb') as f1:
                datasets1 = pickle.load(f1)
            
    with open('.pkl','rb') as f2:
                datasets2 = pickle.load(f2)
    
    train_set_x, train_set_y = datasets1[0]
    valid_set_x, valid_set_y = datasets1[1]
    test_set_x, test_set_y = datasets2[0]
    
    train_set_x = np.array(train_set_x)
    train_set_y = np.array(train_set_y)    
    valid_set_x = np.array(valid_set_x)
    valid_set_y = np.array(valid_set_y)  
    test_set_x = np.array(test_set_x)
    test_set_y = np.array(test_set_y)  

    
    train_set_x = train_set_x.reshape(train_set_x.shape[0],-1)
    train_set_y = train_set_y.reshape(train_set_y.shape[0],-1)
    valid_set_x = valid_set_x.reshape(valid_set_x.shape[0],-1)
    valid_set_y = valid_set_y.reshape(valid_set_y.shape[0],-1)
    test_set_x = test_set_x.reshape(test_set_x.shape[0],-1)
    test_set_y = test_set_y.reshape(test_set_y.shape[0],-1)
    
    
    train_dimension = train_set_x.shape[0]
    valid_dimension = valid_set_x.shape[0]
    test_dimension = test_set_x.shape[0]


    n_train_batches = train_dimension / batch_size
    n_valid_batches = valid_dimension / batch_size   
    n_test_batches = test_dimension / batch_size
   
    train_set_x = theano.shared(np.asarray(train_set_x,dtype=theano.config.floatX),borrow=True)
    train_set_y = theano.shared(np.asarray(train_set_y,dtype=theano.config.floatX),borrow=True)   
    valid_set_x = theano.shared(np.asarray(valid_set_x,dtype=theano.config.floatX),borrow=True)
    valid_set_y = theano.shared(np.asarray(valid_set_y,dtype=theano.config.floatX),borrow=True)
    test_set_x = theano.shared(np.asarray(test_set_x,dtype=theano.config.floatX),borrow=True)
    test_set_y = theano.shared(np.asarray(test_set_y,dtype=theano.config.floatX),borrow=True)
    
    index = T.lscalar() #←change

    
    x = T.matrix('x')
    y = T.ivector('y')
    
    layer0_input = x.reshape((batch_size, 1, 29, 29))   #←input
   
    layer0 = LeNetConvPoolLayer(rng,
                input=layer0_input,
                image_shape=(batch_size, 3, 29, 29),  
                filter_shape=(20, 3, 6, 6), 
                poolsize=(2, 2))

    layer1 = LeNetConvPoolLayer(rng,
                input=layer0.output,
                image_shape=(batch_size, 20, 12, 12), 
                filter_shape=(50, 20, 5, 5),        
                poolsize=(2, 2))

    layer2_input = layer1.output.flatten(2)
  
    layer2 = HiddenLayer(rng,
        input=layer2_input,
        n_in=50 * 4 * 4,
        n_out=300,
        activation=T.tanh)

    # softmax処理
    layer3 = LogisticRegression(input=layer2.output, n_in=300, n_out=2)

  
    # コスト関数を計算するシンボル
    cost = layer3.negative_log_likelihood(y)

    test_set_y = test_set_y.flatten()#change from matrix to vector
    test_set_y = T.cast(test_set_y,'int32')#cast from float32 to int32
    value = layer3.makefile1(y)
    
    #calcurate the error rate 
    test_model = theano.function(
        [index],
        layer3.errors(y),
        givens={
            x: test_set_x[index * batch_size: (index + 1) * batch_size],
            y: test_set_y[index * batch_size: (index + 1) * batch_size]
        })
    
    value_model = theano.function(
        [index],
        value,
        givens={
            x: test_set_x[index * batch_size: (index + 1) * batch_size],
            y: test_set_y[index * batch_size: (index + 1) * batch_size]
        })

 
    valid_set_y = valid_set_y.flatten()#change from matrix to vector 
    valid_set_y = T.cast(valid_set_y,'int32')#cast from float32 to int32
    validate_model = theano.function(
        [index],
        layer3.errors(y),
        givens={
            x: valid_set_x[index * batch_size: (index + 1) * batch_size],
            y: valid_set_y[index * batch_size: (index + 1) * batch_size]
        })

    # パラメータ
    params = layer3.params + layer2.params + layer1.params + layer0.params
    grads = T.grad(cost, params)

    updates = [(param_i, param_i - learning_rate * grad_i) for param_i, grad_i in zip(params, grads)]
    train_set_y = train_set_y.flatten()#change from matrix to vector 
    train_set_y = T.cast(train_set_y,'int32')#cast from float32 to int32
     
    train_model = theano.function(
        [index],
        cost,
        updates=updates,
        givens={
            x: train_set_x[index * batch_size: (index + 1) * batch_size],
            y: train_set_y[index * batch_size: (index + 1) * batch_size]
        })


    # eary-stoppingのパラメータ
    patience = 10000
    patience_increase = 2
    improvement_threshold = 0.95
    validation_frequency = min(n_train_batches, patience / 2)
  

    best_validation_loss = np.inf
    best_iter = 0
    test_score = 0
    start_time = time.clock()

    epoch = 0
    done_looping = False
    f = open(path+'.csv','w')
    Write = csv.writer(f)  

    while (epoch < n_epochs) and (not done_looping):
        epoch = epoch + 1
        
        for minibatch_index in xrange(n_train_batches):
            cost_ij = train_model(minibatch_index)
            iter = (epoch - 1) * n_train_batches + minibatch_index

            if (iter + 1) % validation_frequency == 0:
                validation_losses = [validate_model(i) for i in xrange(n_valid_batches)]   
                this_validation_loss = np.mean(validation_losses)  
                print "epoch %i, minibatch %i/%i, validation error %f %%" % (epoch, minibatch_index + 1, n_train_batches, this_validation_loss * 100)
                fp1.write("%d\t%f\n" % (epoch, this_validation_loss * 100))

                if this_validation_loss < best_validation_loss:
                    if this_validation_loss < best_validation_loss * improvement_threshold:
                        patience = max(patience, iter * patience_increase)
                        print "*** iter %d / patience %d" % (iter, patience)
                    best_validation_loss = this_validation_loss
                    best_iter = iter
                    
                        
                    
                    # テストエラー率計算        
                    test_losses = [test_model(i) for i in xrange(n_test_batches)]
                    
                    test_score = np.mean(test_losses)
                    print "    epoch %i, minibatch %i/%i, test error of best model %f %%" % (epoch, minibatch_index + 1, n_train_batches, test_score * 100)
                    fp2.write("%d\t%f\n" % (epoch, test_score * 100))
                    
            # patienceを超えたらループを終了
            if patience <= iter:
                done_looping = True
                break
    
    fp1.close()
    fp2.close()
    
    
    end_time = time.clock()
    print "Optimization complete."
    listdata = []
    testdata = []
    for i in range(n_test_batches):                
        value=value_model(i)
        listdata.append(np.array(value))                
        for j in range(batch_size):
            testdata = listdata[i][j]
            Write.writerow([testdata])
    f.close()
    print "Best validation score of %f %% obtained at iteration %i, with test performance %f %%" % (best_validation_loss * 100.0, best_iter + 1, test_score * 100.0)
    

if __name__ == '__main__':
    evaluate_lenet5(dataset="/home/dl-box/Liver/Cyst_data1016")