In [18]:
import os
import numpy as np
import common as common
import svm_utilities as svm

In [19]:
current_dir = os.getcwd()
results_dir = os.path.join(current_dir, '..', 'results')
data_dir = os.path.join(current_dir, '..', 'data')
samples_dir = os.path.join(data_dir, 'segmented')
dm_dir = os.path.join(data_dir, 'distance_matrices', 'dtw')

print 'Current directory:\t', current_dir
print 'Results directory:\t', results_dir
print 'Data directory:\t\t', data_dir
print 'Samples directory:\t', samples_dir
print 'Dist. mat. directory:\t', dm_dir

references = common.load_references(samples_dir)
speakers = [str(x + 1) for x in xrange(6)]

Current directory:	/Users/martin.majer/PycharmProjects/BP/scripts
Results directory:	/Users/martin.majer/PycharmProjects/BP/scripts/../results
Data directory:		/Users/martin.majer/PycharmProjects/BP/scripts/../data
Samples directory:	/Users/martin.majer/PycharmProjects/BP/scripts/../data/segmented
Dist. mat. directory:	/Users/martin.majer/PycharmProjects/BP/scripts/../data/distance_matrices/dtw


In [20]:
fts_norm = ['ste_10_10_norm', 'sti_10_10_norm', 'stzcr_10_10_norm', 'ste_sti_stzcr_10_10_norm', 'log_fb_en_25_10_ham_norm', 'log_fb_en_25_10_ham_deltas_norm', 'mfcc_25_10_ham_norm', 'mfcc_25_10_ham_deltas_norm']

## Single speaker

In [21]:
features = fts_norm
accuracy_data = {}

for feature in features:
    distance_matrix, ref_keys_all, test_keys_all = common.get_data(dm_dir, feature)
    
    feature_accuracies = []
    
    for speaker in speakers:
        ref_keys = common.get_speaker_keys(ref_keys_all, speaker)
        test_keys = common.get_speaker_keys(test_keys_all, speaker)
        
        X = svm.dict_to_array(ref_keys, ref_keys, distance_matrix)
        y = svm.get_targets(ref_keys, references)
        
        z = svm.dict_to_array(ref_keys, test_keys, distance_matrix)
        ref = svm.get_targets(test_keys, references)
        
        pred = svm.fit_and_predict(X, y, z)
        acc = svm.calculate_accuracy(pred, ref)
        feature_accuracies.append(acc)

    feature_accuracies.append(np.mean(feature_accuracies))
    accuracy_data[feature] = feature_accuracies

svm_single = common.create_dataframe(accuracy_data, speakers + ['Mean'])
svm_single    

Unnamed: 0,1,2,3,4,5,6,Mean
log_fb_en_25_10_ham_deltas_norm,60.0,46.666667,20.0,20.0,26.666667,30.0,33.888889
log_fb_en_25_10_ham_norm,80.0,90.0,80.0,83.333333,76.666667,86.666667,82.777778
mfcc_25_10_ham_deltas_norm,60.0,46.666667,30.0,30.0,26.666667,36.666667,38.333333
mfcc_25_10_ham_norm,70.0,56.666667,76.666667,76.666667,70.0,73.333333,70.555556
ste_10_10_norm,60.0,33.333333,46.666667,43.333333,50.0,53.333333,47.777778
ste_sti_stzcr_10_10_norm,90.0,76.666667,73.333333,60.0,66.666667,73.333333,73.333333
sti_10_10_norm,73.333333,40.0,43.333333,60.0,53.333333,66.666667,56.111111
stzcr_10_10_norm,60.0,63.333333,53.333333,50.0,40.0,50.0,52.777778


In [22]:
csv_name = os.path.join(results_dir, 'svm_single_speaker.csv')

with open(csv_name, 'w') as fw:
    svm_single.to_csv(fw)

## All speakers

In [23]:
features = fts_norm
accuracy_data = {}

for feature in features:
    distance_matrix, ref_keys_all, test_keys_all = common.get_data(dm_dir, feature)
    
    ref_keys = []
    test_keys = []
    
    for speaker in speakers:
        ref_keys.extend(common.get_speaker_keys(ref_keys_all, speaker))
        test_keys.extend(common.get_speaker_keys(test_keys_all, speaker))
        
    X = svm.dict_to_array(ref_keys, ref_keys, distance_matrix)
    y = svm.get_targets(ref_keys, references)
        
    z = svm.dict_to_array(ref_keys, test_keys, distance_matrix)
    ref = svm.get_targets(test_keys, references)
        
    pred = svm.fit_and_predict(X, y, z)
    acc = svm.calculate_accuracy(pred, ref)
    
    accuracy_data[feature] = acc

svm_all = common.create_dataframe(accuracy_data, ['All speakers'])
svm_all    

Unnamed: 0,All speakers
log_fb_en_25_10_ham_deltas_norm,30.0
log_fb_en_25_10_ham_norm,74.444444
mfcc_25_10_ham_deltas_norm,33.333333
mfcc_25_10_ham_norm,52.777778
ste_10_10_norm,29.444444
ste_sti_stzcr_10_10_norm,76.111111
sti_10_10_norm,46.111111
stzcr_10_10_norm,53.333333


In [24]:
csv_name = os.path.join(results_dir, 'svm_all_speakers.csv')

with open(csv_name, 'w') as fw:
    svm_all.to_csv(fw)

## All test data against one speaker

In [25]:
features = fts_norm
accuracy_data = {}

for feature in features:
    distance_matrix, ref_keys_all, test_keys_all = common.get_data(dm_dir, feature)
    
    feature_accuracies = []
    test_keys = []
    
    for speaker in speakers:
        test_keys.extend(common.get_speaker_keys(test_keys_all, speaker))
        
    for speaker in speakers:
        ref_keys = common.get_speaker_keys(ref_keys_all, speaker)
        
        X = svm.dict_to_array(ref_keys, ref_keys, distance_matrix)
        y = svm.get_targets(ref_keys, references)
        
        z = svm.dict_to_array(ref_keys, test_keys, distance_matrix)
        ref = svm.get_targets(test_keys, references)
        
        pred = svm.fit_and_predict(X, y, z)
        acc = svm.calculate_accuracy(pred, ref)
        feature_accuracies.append(acc)

    feature_accuracies.append(np.mean(feature_accuracies))
    accuracy_data[feature] = feature_accuracies

svm_all_test_per_speaker = common.create_dataframe(accuracy_data, speakers + ['Mean'])
svm_all_test_per_speaker    

Unnamed: 0,1,2,3,4,5,6,Mean
log_fb_en_25_10_ham_deltas_norm,39.444444,37.222222,13.333333,18.333333,20.0,27.777778,26.018519
log_fb_en_25_10_ham_norm,62.222222,65.0,64.444444,62.222222,65.555556,66.666667,64.351852
mfcc_25_10_ham_deltas_norm,37.222222,35.0,15.0,21.111111,18.888889,24.444444,25.277778
mfcc_25_10_ham_norm,50.555556,46.666667,43.333333,41.111111,35.0,53.333333,45.0
ste_10_10_norm,28.333333,23.888889,28.888889,27.222222,21.111111,27.222222,26.111111
ste_sti_stzcr_10_10_norm,66.666667,56.666667,59.444444,48.333333,55.555556,60.555556,57.87037
sti_10_10_norm,36.666667,33.333333,35.555556,37.222222,33.333333,43.888889,36.666667
stzcr_10_10_norm,50.555556,47.222222,44.444444,42.777778,46.111111,48.888889,46.666667


In [26]:
csv_name = os.path.join(results_dir, 'svm_all_test_per_speaker.csv')

with open(csv_name, 'w') as fw:
    svm_all_test_per_speaker.to_csv(fw)

## All reference data against one speaker's test data

In [27]:
features = fts_norm
accuracy_data = {}

for feature in features:
    distance_matrix, ref_keys_all, test_keys_all = common.get_data(dm_dir, feature)
    
    feature_accuracies = []
    ref_keys = []
    
    for speaker in speakers:
        ref_keys.extend(common.get_speaker_keys(ref_keys_all, speaker)) 
    
    for speaker in speakers:
        test_keys = common.get_speaker_keys(test_keys_all, speaker)
        
        X = svm.dict_to_array(ref_keys, ref_keys, distance_matrix)
        y = svm.get_targets(ref_keys, references)
        
        z = svm.dict_to_array(ref_keys, test_keys, distance_matrix)
        ref = svm.get_targets(test_keys, references)
        
        pred = svm.fit_and_predict(X, y, z)
        acc = svm.calculate_accuracy(pred, ref)
        feature_accuracies.append(acc)

    feature_accuracies.append(np.mean(feature_accuracies))
    accuracy_data[feature] = feature_accuracies

svm_all_ref_per_speaker = common.create_dataframe(accuracy_data, speakers + ['Mean'])
svm_all_ref_per_speaker   

Unnamed: 0,1,2,3,4,5,6,Mean
log_fb_en_25_10_ham_deltas_norm,33.333333,30.0,26.666667,30.0,26.666667,33.333333,30.0
log_fb_en_25_10_ham_norm,80.0,80.0,83.333333,66.666667,63.333333,73.333333,74.444444
mfcc_25_10_ham_deltas_norm,30.0,36.666667,36.666667,30.0,30.0,36.666667,33.333333
mfcc_25_10_ham_norm,50.0,60.0,60.0,46.666667,46.666667,53.333333,52.777778
ste_10_10_norm,30.0,36.666667,26.666667,36.666667,16.666667,30.0,29.444444
ste_sti_stzcr_10_10_norm,86.666667,73.333333,73.333333,76.666667,70.0,76.666667,76.111111
sti_10_10_norm,43.333333,60.0,40.0,46.666667,33.333333,53.333333,46.111111
stzcr_10_10_norm,53.333333,63.333333,56.666667,50.0,46.666667,50.0,53.333333


In [28]:
csv_name = os.path.join(results_dir, 'svm_all_ref_per_speaker.csv')

with open(csv_name, 'w') as fw:
    svm_all_ref_per_speaker.to_csv(fw)