In [10]:
import os
import numpy as np
import theano
import lasagne

import common as common
import nn_utilities as nn

In [11]:
current_dir = os.getcwd()
results_dir = os.path.join(current_dir, '..', 'results')
data_dir = os.path.join(current_dir, '..', 'data')
samples_dir = os.path.join(data_dir, 'segmented')
dm_dir = os.path.join(data_dir, 'distance_matrices', 'dtw')
params_dir = os.path.join(data_dir, 'params')

print 'Current directory:\t', current_dir
print 'Results directory:\t', results_dir
print 'Data directory:\t\t', data_dir
print 'Samples directory:\t', samples_dir
print 'DM directory:\t\t', dm_dir
print 'Params directory:\t', params_dir

references = common.load_references(samples_dir)
features = ['ste_10_10_norm', 'sti_10_10_norm', 'stzcr_10_10_norm', 'ste_sti_stzcr_10_10_norm', 'log_fb_en_25_10_ham_norm', 'log_fb_en_25_10_ham_deltas_norm', 'mfcc_25_10_ham_norm', 'mfcc_25_10_ham_deltas_norm']

Current directory:	/Users/martin.majer/PycharmProjects/BP/scripts
Results directory:	/Users/martin.majer/PycharmProjects/BP/scripts/../results
Data directory:		/Users/martin.majer/PycharmProjects/BP/scripts/../data
Samples directory:	/Users/martin.majer/PycharmProjects/BP/scripts/../data/segmented
DM directory:		/Users/martin.majer/PycharmProjects/BP/scripts/../data/distance_matrices/dtw
Params directory:	/Users/martin.majer/PycharmProjects/BP/scripts/../data/params


In [12]:
speakers = [str(x + 1) for x in xrange(6)]

num_units = 200
depth = 2
drop_input=None
drop_hidden=None

num_epochs = 1500
batch_count = 10

save_params=False

if save_params:
    nn.dump_info(os.path.join(params_dir, 'nn_model.txt'), depth, num_units, drop_input, drop_hidden)

## Single speaker

In [13]:
accuracy_data = {}

for feature in features:
    print('{0: <15}{1}\n'.format('Features used:', feature))
    distance_matrix, ref_keys_all, test_keys_all = common.get_data(dm_dir, feature)
    
    feature_accuracies = []
    
    for speaker in speakers:
        print('{0: <12}{1}'.format('Speaker:', speaker))
        ref_keys = common.get_speaker_keys(ref_keys_all, speaker)
        test_keys = common.get_speaker_keys(test_keys_all, speaker)
        
        X_train, y_train, X_val, y_val, X_test, y_test = nn.prepare_dataset(ref_keys, ref_keys, test_keys, test_keys, distance_matrix, references)
        
        input_dim = X_train.shape[1]
        output_dim = y_train.shape[1]
        
        params_file = os.path.join(params_dir, feature + '_' + speaker + '.npz')
        try:
            with np.load(params_file) as fr:
                param_values = [fr['arr_%d' % i] for i in range(len(fr.files))]
            print 'Loading model parameters...'
            
            network = nn.build_mlp(input_dim, output_dim, depth, num_units, drop_input, drop_hidden)
            lasagne.layers.set_all_param_values(network, param_values)
        except IOError:
            print 'Starting training...'
            network = nn.train(num_epochs, batch_count, X_train, y_train, X_val, y_val,
                               input_dim, output_dim, depth, num_units, drop_input, drop_hidden)
            if save_params:
                np.savez(params_file, *lasagne.layers.get_all_param_values(network))
        
        print '\nComputing recognition accuracy...\n'
        accuracy = nn.compute_accuracy(X_test, y_test, network)
        feature_accuracies.append(accuracy)
        
    feature_accuracies.append(np.mean(feature_accuracies))
    accuracy_data[feature] = feature_accuracies
    
nn_single = common.create_dataframe(accuracy_data, speakers + ['Mean'])

Features used: ste_10_10_norm

Speaker:    1
X_train:    (10, 10).float64        y_train:    (10, 10).int32
X_val:      (30, 10).float64        y_val:      (30, 10).int32
X_test:     (30, 10).float64        y_test:     (30, 10).int32

Starting training...
Epoch 50 of 1500 took 0.006s
	training loss:			2.195524
	validation loss:		2.247280
	validation accuracy:		40.00 %
Epoch 100 of 1500 took 0.007s
	training loss:			2.043599
	validation loss:		2.162918
	validation accuracy:		33.33 %
Epoch 150 of 1500 took 0.004s
	training loss:			1.737665
	validation loss:		1.988023
	validation accuracy:		36.67 %
Epoch 200 of 1500 took 0.004s
	training loss:			1.281727
	validation loss:		1.732069
	validation accuracy:		33.33 %
Epoch 250 of 1500 took 0.004s
	training loss:			0.901625
	validation loss:		1.552492
	validation accuracy:		53.33 %
Epoch 300 of 1500 took 0.002s
	training loss:			0.648904
	validation loss:		1.472622
	validation accuracy:		56.67 %
Epoch 350 of 1500 took 0.007s
	training loss:			0

In [14]:
nn_single

Unnamed: 0,1,2,3,4,5,6,Mean
log_fb_en_25_10_ham_deltas_norm,96.666667,86.666667,43.333333,66.666667,73.333333,66.666667,72.222222
log_fb_en_25_10_ham_norm,93.333333,93.333333,96.666667,90.0,93.333333,90.0,92.777778
mfcc_25_10_ham_deltas_norm,93.333333,83.333333,80.0,90.0,83.333333,90.0,86.666667
mfcc_25_10_ham_norm,96.666667,96.666667,100.0,100.0,93.333333,90.0,96.111111
ste_10_10_norm,53.333333,26.666667,46.666667,40.0,36.666667,50.0,42.222222
ste_sti_stzcr_10_10_norm,93.333333,80.0,76.666667,83.333333,66.666667,73.333333,78.888889
sti_10_10_norm,50.0,43.333333,53.333333,53.333333,46.666667,53.333333,50.0
stzcr_10_10_norm,53.333333,43.333333,53.333333,43.333333,23.333333,53.333333,45.0


In [15]:
csv_name = os.path.join(results_dir, 'nn_single_speaker.csv')

with open(csv_name, 'w') as fw:
    nn_single.to_csv(fw)

## All speakers

In [16]:
accuracy_data = {}

for feature in features:
    print('{0: <15}{1}\n'.format('Features used:', feature))
    distance_matrix, ref_keys_all, test_keys_all = common.get_data(dm_dir, feature)
    
    ref_keys = []
    test_keys = []  
    
    for speaker in speakers:
        ref_keys.extend(common.get_speaker_keys(ref_keys_all, speaker))
        test_keys.extend(common.get_speaker_keys(test_keys_all, speaker))
    
    X_train, y_train, X_val, y_val, X_test, y_test = nn.prepare_dataset(ref_keys, ref_keys, test_keys, test_keys, distance_matrix, references)

    input_dim = X_train.shape[1]
    output_dim = y_train.shape[1]

    params_file = os.path.join(params_dir, feature + '_all.npz')
    try:
        with np.load(params_file) as fr:
            param_values = [fr['arr_%d' % i] for i in range(len(fr.files))]
        print 'Loading model parameters...'

        network = nn.build_mlp(input_dim, output_dim, depth, num_units, drop_input, drop_hidden)
        lasagne.layers.set_all_param_values(network, param_values)
    except IOError:
        print 'Starting training...'
        network = nn.train(num_epochs, batch_count, X_train, y_train, X_val, y_val,
                           input_dim, output_dim, depth, num_units, drop_input, drop_hidden)
        if save_params:
            np.savez(params_file, *lasagne.layers.get_all_param_values(network))

    print '\nComputing recognition accuracy...\n'
    accuracy = nn.compute_accuracy(X_test, y_test, network)
    accuracy_data[feature] = accuracy
    
nn_all = common.create_dataframe(accuracy_data, ['All speakers'])

Features used: ste_10_10_norm

X_train:    (60, 60).float64        y_train:    (60, 60).int32
X_val:     (180, 60).float64        y_val:     (180, 60).int32
X_test:    (180, 60).float64        y_test:    (180, 60).int32

Starting training...
Epoch 50 of 1500 took 0.016s
	training loss:			1.407733
	validation loss:		1.737536
	validation accuracy:		34.44 %
Epoch 100 of 1500 took 0.016s
	training loss:			0.957347
	validation loss:		1.667984
	validation accuracy:		36.67 %
Epoch 150 of 1500 took 0.016s
	training loss:			0.572726
	validation loss:		1.764706
	validation accuracy:		47.78 %
Epoch 200 of 1500 took 0.016s
	training loss:			0.266883
	validation loss:		1.977736
	validation accuracy:		48.33 %
Epoch 250 of 1500 took 0.016s
	training loss:			0.115774
	validation loss:		2.162463
	validation accuracy:		48.89 %
Epoch 300 of 1500 took 0.016s
	training loss:			0.068822
	validation loss:		2.407658
	validation accuracy:		48.89 %
Epoch 350 of 1500 took 0.017s
	training loss:			0.039698
	valid

In [17]:
nn_all

Unnamed: 0,All speakers
log_fb_en_25_10_ham_deltas_norm,10.0
log_fb_en_25_10_ham_norm,90.555556
mfcc_25_10_ham_deltas_norm,10.0
mfcc_25_10_ham_norm,86.111111
ste_10_10_norm,51.666667
ste_sti_stzcr_10_10_norm,76.666667
sti_10_10_norm,56.111111
stzcr_10_10_norm,57.777778


In [18]:
csv_name = os.path.join(results_dir, 'nn_all_speakers.csv')

with open(csv_name, 'w') as fw:
    nn_all.to_csv(fw)