In [4]:
from __future__ import print_function
import sys
import os
import time
import theano
from theano import tensor as T
from theano.sandbox.rng_mrg import MRG_RandomStreams as RandomStreams
import numpy as np
from theano.tensor.nnet import conv2d
from theano.tensor.signal import pool
import lasagne
from lasagne import layers
from IPython import display
from lasagne.regularization import regularize_layer_params_weighted, l2, l1
from lasagne.regularization import regularize_layer_params
import matplotlib.pyplot as plt
import random
from collections import Counter

In [5]:
def load_data(dataName): 
    train_set = np.loadtxt(dataName + '_TRAIN', delimiter=',')
    test_set = np.loadtxt(dataName + '_TEST', delimiter= ',')
    trY= (train_set[:, 0]).astype('int32')
    teY = ((test_set[:, 0])).astype('int32')
    if min(trY)==1:
        trY = trY -1
        teY = teY -1
    if min(trY) ==-1:
        trY = (trY +1)/2
        teY =(teY +1)/2
    trX = (train_set[:, 1:]).astype('float32')
    teX = (test_set[:, 1:]).astype('float32')
    num_cls = len(set(trY))
    #trY = one_hot(trY,num_cls)
    #teY = one_hot(teY,num_cls)
    length = trX.shape[1]
    trX = trX.reshape(-1, 1, length)
    teX = teX.reshape(-1, 1, length)
    print('dataset ' + dataName + ' class distribution:')
    print( Counter(np.append(trY, teY)))
    return(trX, trY, teX, teY, length, num_cls)

In [7]:
def GCRNN(input_var, length, num_cls, batch_norm = True):
    nfeaMaps = [5, 10, 20]
    num_node_fc = 100
    filter_sizes = [21, 11, 5]
    pool_size  = 2
    
    # input layer
    network_input = layers.InputLayer(shape = (None, 1, length), input_var= input_var)

    # CONV-RELU-POOL 1
    cnn_conv1 = layers.Conv1DLayer(incoming = network_input, num_filters = nfeaMaps[0], filter_size= filter_sizes[0], pad='same')
    cnn_pool1 = layers.MaxPool1DLayer(incoming= cnn_conv1, pool_size = pool_size)
    if (batch_norm):
        cnn_pool1 = layers.BatchNormLayer(incoming=cnn_pool1)

    # CONV-RELU-POOL 2
    cnn_conv2 = layers.Conv1DLayer(incoming = cnn_pool1, num_filters = nfeaMaps[1], filter_size = filter_sizes[1], pad = 'same')
    cnn_pool2 = layers.MaxPool1DLayer(incoming = cnn_conv2, pool_size = pool_size)
    if (batch_norm):
        cnn_pool2 = layers.BatchNormLayer(incoming=cnn_pool2)

    # CONV-RELU-POOL 3 
    cnn_conv3 = layers.Conv1DLayer(incoming = cnn_pool2, num_filters = nfeaMaps[2], filter_size = filter_sizes[2], pad = 'same')
    cnn_pool3 = layers.MaxPool1DLayer(incoming = cnn_conv3, pool_size = pool_size)
    if (batch_norm):
        cnn_pool3 = layers.BatchNormLayer(incoming=cnn_pool3)

    # RNN input, needs dimension shuffle (time redistributed)
    rnn_input = layers.dimshuffle(cnn_pool3, (0,2,1))
    
    # GRU forward 1
    gru_forward1 = layers.GRULayer(incoming = rnn_input, num_units=20, only_return_final=False)
    # GRU backward 1
    gru_backwards1 = layers.GRULayer(incoming = rnn_input, num_units=20, only_return_final=False, backwards= True)
    # GRU merge 1
    
    gru_merge1 = layers.ConcatLayer([gru_forward1, gru_backwards1], axis= 2)
    gru_forward2 = layers.GRULayer(incoming = gru_merge1, num_units=20, only_return_final=False)
    # GRU backward 1
    gru_backwards2 = layers.GRULayer(incoming = gru_merge1, num_units=20, only_return_final=False, backwards= True)
    # GRU merge 1
    gru_merge2 = layers.ConcatLayer([gru_forward2, gru_backwards2], axis= 2)
    network = layers.DenseLayer(layers.dropout(gru_merge2, p =.5), num_units = num_cls, nonlinearity=lasagne.nonlinearities.softmax)
    return network

In [8]:
## iterate_minibatches function is from the lasagne tutorial 
## licience: MIT
# ############################# Batch iterator ###############################
# This is just a simple helper function iterating over training data in
# mini-batches of a particular size, optionally in random order. It assumes
# data is available as numpy arrays. For big datasets, you could load numpy
# arrays as memory-mapped files (np.load(..., mmap_mode='r')), or write your
# own custom data iteration function. For small datasets, you can also copy
# them to GPU at once for slightly improved performance. This would involve
# several changes in the main program, though, and is not demonstrated here.
# Notice that this function returns only mini-batches of size `batchsize`.
# If the size of the data is not a multiple of `batchsize`, it will not
# return the last (remaining) mini-batch.

def iterate_minibatches(inputs, targets, batchsize, shuffle=False):
    assert len(inputs) == len(targets)
    if shuffle:
        indices = np.arange(len(inputs))
        np.random.shuffle(indices)
    for start_idx in range(0, len(inputs) - batchsize + 1, batchsize):
        if shuffle:
            excerpt = indices[start_idx:start_idx + batchsize]
        else:
            excerpt = slice(start_idx, start_idx + batchsize)
        yield inputs[excerpt], targets[excerpt]

In [38]:
def train_GCRNN( dataName, num_epochs = 100, learning_rate = 0.01, batchsize =50, alpha = 0.8, lasso_l = 0.01, save_model = True):
    print('Loading data...')
    trX, trY, teX, teY, length, num_cls = load_data(dataName)
    X = T.tensor3('X') #Input
    Y = T.ivector('Y') #Target

    network = GCRNN(X, length, num_cls)

    # loss function
    prediction  = layers.get_output(network)

    ## Loss function setup:
    loss = lasagne.objectives.categorical_crossentropy(prediction, Y).mean()
    last_layer = layers.get_all_layers(network)[-1]
    # L1 panelty
    l1_panelty = regularize_layer_params(layers.get_all_layers(network)[-1], l1)*(1-alpha)*lasso_l
    # group panelty
    group_panelty = T.sum(T.sqrt(T.sum(last_layer.get_params()[0].reshape((-1, 40*num_cls))**2, axis = 1) )) * alpha* lasso_l *T.sqrt(40*num_cls) 
    loss = loss + l1_panelty + group_panelty 

    params = layers.get_all_params(network, trainable =True)
    updates = lasagne.updates.adagrad(loss, params, learning_rate= learning_rate)

    # disabling dropout for testing/validation set
    test_prediction = layers.get_output(network, deterministic= True)
    test_loss = lasagne.objectives.categorical_crossentropy(test_prediction, Y)
    test_loss = test_loss.mean()
    # classification accuracy
    test_acc = T.mean(T.eq(T.argmax(test_prediction, axis = 1), Y), dtype= theano.config.floatX)

    train_fn = theano.function([X, Y], loss, updates = updates)
    test_fn = theano.function([X, Y], [test_loss, test_acc])

    print("Starting training...")
    test_loss_epoches = []
    test_acc_epoches = []

    for epoch in range(num_epochs):
        train_err = 0
        train_batches =0
        start_time = time.time()

        for batch in iterate_minibatches(trX, trY, batchsize, shuffle = True):
            inputs, targets = batch
            train_err += train_fn(inputs, targets)
            train_batches += 1

        te_loss, te_acc = test_fn(teX, teY)
        test_loss_epoches.append(te_loss)
        test_acc_epoches.append(te_acc)

        # write results for this epoch
        print("Epoch {} of {}".format(epoch + 1, num_epochs), end = "")
        print("    training loss:{:.6f}".format(train_err/train_batches), end="")
        print("    testing loss:{:.6f}".format(float(te_loss)),  end="")
        print("    testing accuracy:{:.2f}%".format(100*te_acc))


    ## visualize the group coefficient to reveal important time series regions
    if not os.path.exists('GCRNN_visualization'):
        os.makedirs('GCRNN_visualization')

    group_coef = T.sqrt(T.sum(last_layer.get_params()[0].reshape((-1, 40*num_cls))**2, axis = 1)).eval()
    np.savetxt('GCRNN_visualization/'+ dataName + '_'+ str(alpha)+'_' +str(lasso_l) +  '_group_l2_coef', group_coef, delimiter=',')
    fig = plt.figure(figsize=(20, 8), dpi=500)
    sample_ind = random.sample(range(len(trY)), len(trY))
    color = ['b', 'r', 'g', 'c', 'm', 'y', 'k', 'w']

    t = range(1,  trX.shape[2]+1)
    fig_a = fig.add_subplot(211)
    for i in sample_ind:
        c = color[trY[i]]
        fig_a.plot(t,trX[i][0] , color = c, alpha = 0.4, ls = '-')

    fig_b = fig.add_subplot(212)
    group_l2 = np.repeat(group_coef, 8)
    fig_b.plot(group_l2)
    plt.savefig('GCRNN_visualization/'+ dataName + '_'+ str(alpha)+'_' +str(lasso_l)+ '_important_region.png', dpi = 500)

    ## save the model
    if save_model == True:
        if not os.path.exists('GCRNN_models'):
            os.makedirs('GCRNN_models')
            np.savez('GCRNN_models/'+ dataName + '_'+ str(alpha)+'_' +str(lasso_l)+'_epoch'+ str(num_epochs)+ '.npz', *layers.get_all_param_values(network))
    
    ## save the testing accuracy
    if not os.path.exists('GCRNN_testing_results'):
        os.makedirs('GCRNN_testing_results')

    np.savetxt('GCRNN_testing_results/'+ dataName + '_'+ str(alpha)+'_' +str(lasso_l)+'_epoch'+ str(num_epochs)+ '_test_acc', test_acc_epoches, delimiter=',')
    fig = plt.figure(figsize=(8, 8), dpi=100)
    plt.plot(test_acc_epoches, marker='+', color='b')
    plt.legend(loc='lower right', prop={'size':10})
    plt.xlabel('epoch')
    plt.ylabel('testing accuracy')
    plt.title(dataName)
    plt.savefig('GCRNN_testing_results/'+ dataName + '_test_acc.png', dpi = 500)
    
    return(test_acc_epoches)

In [40]:
train_GCRNN(dataName='OSULeaf', num_epochs= 10)

Loading data...
dataset OSULeaf class distribution:
Counter({3: 97, 1: 84, 4: 82, 2: 75, 0: 66, 5: 38})
Starting training...
Epoch 1 of 10    training loss:4.657432    testing loss:1.663544    testing accuracy:34.71%
Epoch 2 of 10    training loss:3.121305    testing loss:1.441896    testing accuracy:43.39%
Epoch 3 of 10    training loss:2.341209    testing loss:1.426589    testing accuracy:41.32%
Epoch 4 of 10    training loss:1.920253    testing loss:1.303332    testing accuracy:44.63%
Epoch 5 of 10    training loss:1.718176    testing loss:1.203909    testing accuracy:49.59%
Epoch 6 of 10    training loss:1.776080    testing loss:1.236831    testing accuracy:50.00%
Epoch 7 of 10    training loss:1.498159    testing loss:1.100256    testing accuracy:53.31%
Epoch 8 of 10    training loss:1.421882    testing loss:1.207689    testing accuracy:57.02%
Epoch 9 of 10    training loss:1.474748    testing loss:1.084002    testing accuracy:57.02%
Epoch 10 of 10    training loss:1.392699    tes

[array(0.34710743801652894),
 array(0.43388429752066116),
 array(0.4132231404958678),
 array(0.4462809917355372),
 array(0.49586776859504134),
 array(0.5),
 array(0.5330578512396694),
 array(0.5702479338842975),
 array(0.5702479338842975),
 array(0.5785123966942148)]