In [1]:
import numpy as np
import pandas as pd
from network import *
from data_process import *
from util import *

import itertools
import matplotlib.pyplot as plt

from sklearn import svm, datasets
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix

os.environ['CUDA_VISIBLE_DEVICES'] = "2"


In [8]:
def get_classes_name():
    dic = dic_ESC50()
    class_names = []
    for i in range(50):
        class_names.append(dic[i])
    return class_names


def get_probabilities(model_wave, model_logmel, test_pkl):
    model_wave.eval()
    model_logmel.eval()
    
    test_loss = 0
    correct = 0
    y_pred = []
    y_true = []

    win_size = 66150
    stride = int(44100 * 0.2)
    sampleSet = load_data(test_pkl)
    
    output_total = np.zeros((1, 101))
    for item in sampleSet:
        print item['key']
        label = item['label']
        record_data = item['data']
        wins_data = []
        feats = []
        for j in range(0, len(record_data) - win_size + 1, stride):

            win_data = record_data[j: j+win_size]
            # Continue if cropped region is silent
            maxamp = np.max(np.abs(win_data))
            if maxamp < 0.005:
                continue
                
            wins_data.append(win_data)
            
            melspec = librosa.feature.melspectrogram(win_data, 44100, n_fft=2048, hop_length=150, n_mels=64)  # (40, 442)
            logmel = librosa.logamplitude(melspec)[:,:441]
            delta = librosa.feature.delta(logmel)
            
            feat = np.stack((logmel, delta))
#             feat = logmel[np.newaxis, :]
            feats.append(feat)

        if len(wins_data) == 0:
            print item['key']

        wins_data = np.array(wins_data)
        feats = np.array(feats)
        
        wins_data = wins_data[:, np.newaxis, :]
        # print wins_data.shape

        data_wave = torch.from_numpy(wins_data).type(torch.FloatTensor) 
        data_logmel = torch.from_numpy(feats).type(torch.FloatTensor) 
        
        label = torch.LongTensor([label])


        data_wave, data_logmel, label = data_wave.cuda(), data_logmel.cuda(), label.cuda()
        data_wave, data_logmel, label = Variable(data_wave, volatile=True), Variable(data_logmel, volatile=True), Variable(label)

        # print data.size()
        output_wave = model_wave(data_wave)
        output_logmel = model_logmel(data_logmel)
        
        output = np.c_[(output_wave.data.cpu().numpy(), output_logmel.data.cpu().numpy())]
        
        label_seg = [item['label'] for l in range(output.shape[0])]
        
        label_seg = np.array(label_seg)
        
        output = np.c_[(output, label_seg)]
        
        output_total = np.concatenate((output_total, output), axis=0)
    
    output_total = output_total[1:]

    np.random.shuffle(output_total)

    return output_total



### 存储两个网络输出的概率

In [11]:
test_acc = []
for fold_num in range(5):
    model_wave_name = '../model/M9_fold' + str(fold_num) + '_v1_epoch160.pkl'
    model_logmel_name = '../model/M9Logmel_fold' + str(fold_num) + '_v1_epoch100.pkl'
    
    model_wave = torch.load(model_wave_name) 
    model_logmel = torch.load(model_logmel_name)
    
    testPkl = '../data_wave_44100/fold' + str(fold_num) + '_train.cPickle'
#     testPkl = '../data_wave_44100/fold' + str(fold_num) + '_test.cPickle'
    
    probabilities = get_probabilities(model_wave, model_logmel, testPkl)
    
    filename = 'probabilities.'+ str(fold_num)+'.train.txt'
    np.savetxt(filename, probabilities, delimiter=',')
    print('file saved in %s' % filename)
#     np.set_printoptions(precision=2)

#     # Plot non-normalized confusion matrix
#     plt.figure(figsize=(15, 15))
#     plot_confusion_matrix(cnf_matrix, classes=class_names,
#                           title='Confusion matrix on fold' + str(fold_num))

#     plt.show()


3-139331-A
4-172180-A
2-158746-B
3-110536-A
3-163607-A
4-218199-E
2-32834-A
4-128659-A
4-169508-A
4-132816-A
3-170015-A
4-180453-A
4-170078-A
4-202298-A
2-78562-A
3-203377-A
3-120526-B
3-166422-A
4-181628-A
2-122104-B
3-93010-A
3-193767-A
3-181278-A
4-147658-A
4-130891-A
4-218304-A
3-134699-A
2-77347-A
4-216211-A
4-172736-B
4-210309-A
2-102414-F
3-156393-A
4-149940-A
2-104168-A
2-141563-A
4-175846-A
4-102871-A
4-157611-A
2-82274-B
3-163727-A
4-201300-A
2-123896-A
4-197871-A
2-166644-A
4-188191-A
2-60180-A
2-109231-A
3-161010-A
4-189833-A
3-182710-A
4-212728-A
3-104958-A
2-76408-B
3-141559-A
3-127890-A
2-78799-A
2-109759-B
2-108017-A
3-182025-A
3-150231-A
3-151213-A
3-151080-A
2-91912-A
4-164206-A
4-90014-A
3-110913-D
4-204683-A
2-85139-A
4-164064-B
2-122820-B
2-102852-A
3-152039-A
4-172742-A
2-103423-A
4-151242-A
3-128512-B
3-166546-A
4-181286-A
3-104632-A
2-133863-A
2-106019-A
4-204119-A
4-198841-A
2-102567-C
4-195497-A
2-59565-A
2-250710-A
3-108451-B
4-160036-A
4-191246-B
4-218199-F


### multiscale特征单独训练后概率相加

In [7]:
test_acc = []
for fold_num in range(5):
    model_srf_name = '../model/M9_srf_fold' + str(fold_num) + '_v1_epoch160.pkl'
    model_mrf_name = '../model/M9_mrf_fold' + str(fold_num) + '_v1_epoch160.pkl'
    model_lrf_name = '../model/M9_lrf_fold' + str(fold_num) + '_v1_epoch160.pkl'
    
    model_srf = torch.load(model_srf_name) 
    model_mrf = torch.load(model_mrf_name)
    model_lrf = torch.load(model_lrf_name)
    
    testPkl = '../data_wave_44100/fold' + str(fold_num) + '_test.cPickle'
    acc, cnf_matrix = test_joint3(model_srf, model_mrf, model_lrf, testPkl)
    test_acc.append(acc)
    
#     np.set_printoptions(precision=2)

#     # Plot non-normalized confusion matrix
#     plt.figure(figsize=(15, 15))
#     plot_confusion_matrix(cnf_matrix, classes=class_names,
#                           title='Confusion matrix on fold' + str(fold_num))

#     plt.show()



Test set: Average loss: 51.0608, TestACC: 270/400 67.50%


Test set: Average loss: 47.6853, TestACC: 263/400 65.75%


Test set: Average loss: 41.5801, TestACC: 279/400 69.75%


Test set: Average loss: 33.7102, TestACC: 285/400 71.25%


Test set: Average loss: 43.5532, TestACC: 264/400 66.00%



### multiscale特征单独训练后与logmel模型概率相加

In [10]:
test_acc = []
for fold_num in range(5):
    model_srf_name = '../model/M9_srf_fold' + str(fold_num) + '_v1_epoch160.pkl'
    model_mrf_name = '../model/M9_mrf_fold' + str(fold_num) + '_v1_epoch160.pkl'
    model_lrf_name = '../model/M9_lrf_fold' + str(fold_num) + '_v1_epoch160.pkl'
    model_logmel_name = '../model/M9Logmel_fold' + str(fold_num) + '_v1_epoch100.pkl'
    
    model_srf = torch.load(model_srf_name) 
    model_mrf = torch.load(model_mrf_name)
    model_lrf = torch.load(model_lrf_name)
    model_logmel = torch.load(model_logmel_name)
    
    testPkl = '../data_wave_44100/fold' + str(fold_num) + '_test.cPickle'
    acc, cnf_matrix = test_joint4(model_srf, model_mrf, model_lrf, model_logmel, testPkl)
    test_acc.append(acc)
    
#     np.set_printoptions(precision=2)

#     # Plot non-normalized confusion matrix
#     plt.figure(figsize=(15, 15))
#     plot_confusion_matrix(cnf_matrix, classes=class_names,
#                           title='Confusion matrix on fold' + str(fold_num))

#     plt.show()



Test set: Average loss: 52.0070, TestACC: 303/400 75.75%


Test set: Average loss: 52.7684, TestACC: 293/400 73.25%


Test set: Average loss: 47.2480, TestACC: 308/400 77.00%


Test set: Average loss: 38.8772, TestACC: 303/400 75.75%


Test set: Average loss: 48.9153, TestACC: 289/400 72.25%



In [11]:
test_acc.extend([np.average(test_acc), np.std(test_acc)])
df = pd.DataFrame([test_acc])
df.columns = ['fold0', 'fold1', 'fold2', 'fold3', 'fold4', 'average', 'std']
df.index = ['Test']
df.head()

Unnamed: 0,fold0,fold1,fold2,fold3,fold4,average,std
Test,77.5,72.0,76.5,76.25,71.75,74.8,2.425902


# 