### 学習用 : チューニング用 : テスト用 のデータ生成

### 学習用データを用いて学習

In [55]:
import random

random.seed(0)

piano_notes = ['do', 're', 'mi', 'fa', 'so', 'ra']
piano_all_sounds = list(range(24))

piano_train_sounds = random.sample(piano_all_sounds, 17)

set_tune = set(piano_all_sounds) - set(piano_train_sounds)
piano_tune_sounds = random.sample(list(set_tune), 5)

set_test = set(piano_all_sounds) - set(piano_train_sounds) - set(piano_tune_sounds)
piano_test_sounds = random.sample(list(set_test), 2)

print("all_sounds : {}".format(sorted(piano_all_sounds)))
print("train_sounds : {}".format(sorted(piano_train_sounds)))
print("tune_sounds : {}".format(sorted(piano_tune_sounds)))
print("test_sounds : {}".format(sorted(piano_test_sounds)))




all_sounds : [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23]
train_sounds : [1, 2, 3, 4, 5, 8, 9, 10, 12, 13, 15, 16, 18, 19, 20, 21, 23]
tune_sounds : [6, 7, 11, 14, 22]
test_sounds : [0, 17]


In [None]:
import scipy.io.wavfile as wav
import librosa
from sklearn.svm import SVC
import numpy

def get_mfcc(fname):
    y, sr = librosa.load(fname)
    return librosa.feature.mfcc(y, sr)

if __name__  ==  '__main__':

    piano_note_training = []
    piano_sound_training = []

    for piano_note in piano_notes:
        print('\nReading data of {}...\n'.format(piano_note))
        for piano_sound in piano_train_sounds:
            print('{}/{}{}.wav'.format(piano_note, piano_note, piano_sound))
            
            # get mfcc 173次元
            mfcc = get_mfcc('{}/{}{}.wav'.format(piano_note, piano_note, piano_sound))
            piano_sound_training.append(mfcc.T)
            
            label = numpy.full((mfcc.shape[1], ), 
                               piano_notes.index(piano_note), dtype=numpy.int)
            piano_note_training.append(label)
    
    piano_sound_training = numpy.concatenate(piano_sound_training)
    piano_note_training = numpy.concatenate(piano_note_training)
    print('done.\n')


Reading data of do...

do/do12.wav
do/do13.wav
do/do1.wav
do/do8.wav
do/do16.wav
do/do15.wav
do/do23.wav
do/do9.wav
do/do18.wav
do/do5.wav
do/do19.wav
do/do3.wav
do/do20.wav
do/do2.wav
do/do4.wav
do/do10.wav
do/do21.wav

Reading data of re...

re/re12.wav
re/re13.wav
re/re1.wav
re/re8.wav
re/re16.wav
re/re15.wav
re/re23.wav
re/re9.wav
re/re18.wav
re/re5.wav
re/re19.wav
re/re3.wav
re/re20.wav
re/re2.wav
re/re4.wav
re/re10.wav
re/re21.wav

Reading data of mi...

mi/mi12.wav
mi/mi13.wav
mi/mi1.wav
mi/mi8.wav
mi/mi16.wav
mi/mi15.wav
mi/mi23.wav
mi/mi9.wav
mi/mi18.wav
mi/mi5.wav
mi/mi19.wav
mi/mi3.wav
mi/mi20.wav
mi/mi2.wav
mi/mi4.wav
mi/mi10.wav
mi/mi21.wav

Reading data of fa...

fa/fa12.wav
fa/fa13.wav
fa/fa1.wav
fa/fa8.wav
fa/fa16.wav
fa/fa15.wav
fa/fa23.wav
fa/fa9.wav
fa/fa18.wav
fa/fa5.wav
fa/fa19.wav
fa/fa3.wav
fa/fa20.wav
fa/fa2.wav
fa/fa4.wav
fa/fa10.wav
fa/fa21.wav

Reading data of so...

so/so12.wav
so/so13.wav
so/so1.wav
so/so8.wav
so/so16.wav
so/so15.wav
so/so23.wav
so/so9.wav

### パラメータチューニング
初期値 : 70%  
1e-1 : 63.3%  
1e-2 : 90.0%  
1e-3 : 96.6%  
1e-4 : 96.6%  
1e-5 : 96.6%  
1e-6 : 46.6%  
1e-7 : 23.3%  

In [84]:
# gamma値の選択

gamma_list = [1e-1, 1e-2, 1e-3, 1e-4, 1e-5, 1e-6, 1e-7]

for gamma in gamma_list:
    print('\n----- gamma={} -----\n'.format(gamma))
    svc = SVC(gamma = gamma)
    svc.fit(piano_sound_training, piano_note_training)
    print('----- Learning Done -----\n')

    # 正答率
    sounds_num = 0
    correct_sounds = 0
    correct_rate = 0.0

    for piano_note in piano_notes:
        for piano_sound in piano_tune_sounds:
            sounds_num += 1
            mfcc = get_mfcc('{}/{}{}.wav'.format(piano_note, piano_note,piano_sound))
            prediction = svc.predict(mfcc.T)
            counts = numpy.bincount(prediction) 
            result = piano_notes[numpy.argmax(counts)] # 音程の判定
            original_title = '{}'.format(piano_note)
            print('original note is {}.\n Prediction of note is {}.'.format(original_title, result))

            if result == original_title:
                print('ture')
                correct_sounds += 1
            else:
                print('false')
    correct_rate = correct_sounds / sounds_num
    print('\n correct rate : {}%.'.format(correct_rate*100))


----- gamma=0.1 -----

----- Learning Done -----

original note is do.
 Prediction of note is do.
ture
original note is do.
 Prediction of note is do.
ture
original note is do.
 Prediction of note is do.
ture
original note is do.
 Prediction of note is do.
ture
original note is do.
 Prediction of note is do.
ture
original note is re.
 Prediction of note is re.
ture
original note is re.
 Prediction of note is do.
false
original note is re.
 Prediction of note is re.
ture
original note is re.
 Prediction of note is do.
false
original note is re.
 Prediction of note is re.
ture
original note is mi.
 Prediction of note is do.
false
original note is mi.
 Prediction of note is mi.
ture
original note is mi.
 Prediction of note is do.
false
original note is mi.
 Prediction of note is mi.
ture
original note is mi.
 Prediction of note is do.
false
original note is fa.
 Prediction of note is fa.
ture
original note is fa.
 Prediction of note is fa.
ture
original note is fa.
 Prediction of note is

original note is ra.
 Prediction of note is ra.
ture
original note is ra.
 Prediction of note is ra.
ture
original note is ra.
 Prediction of note is ra.
ture

 correct rate : 96.66666666666667%.

----- gamma=1e-06 -----

----- Learning Done -----

original note is do.
 Prediction of note is do.
ture
original note is do.
 Prediction of note is fa.
false
original note is do.
 Prediction of note is do.
ture
original note is do.
 Prediction of note is fa.
false
original note is do.
 Prediction of note is mi.
false
original note is re.
 Prediction of note is mi.
false
original note is re.
 Prediction of note is mi.
false
original note is re.
 Prediction of note is mi.
false
original note is re.
 Prediction of note is mi.
false
original note is re.
 Prediction of note is mi.
false
original note is mi.
 Prediction of note is mi.
ture
original note is mi.
 Prediction of note is mi.
ture
original note is mi.
 Prediction of note is mi.
ture
original note is mi.
 Prediction of note is do.
false


### 評価

In [85]:
svc = SVC(gamma = 1e-4)
svc.fit(piano_sound_training, piano_note_training)
print('Learning Done\n')

# 正答率
sounds_num = 0
correct_sounds = 0
correct_rate = 0.0

for piano_note in piano_notes:
    for piano_sound in piano_test_sounds:
        sounds_num += 1
#         print('piano test sound {}{}'.format(piano_note, piano_sound))
        mfcc = get_mfcc('{}/{}{}.wav'.format(piano_note, piano_note,piano_sound))
        prediction = svc.predict(mfcc.T)
        counts = numpy.bincount(prediction) 
        result = piano_notes[numpy.argmax(counts)] # 音程の判定
        original_title = '{}'.format(piano_note)
        print('original note is {}.\n Prediction of note is {}.'.format(original_title, result))
        
        if result == original_title:
            print('ture')
            correct_sounds += 1
        else:
            print('false')

correct_rate = correct_sounds / sounds_num
print('\n correct rate : {}%.'.format(correct_rate*100))

Learning Done

original note is do.
 Prediction of note is do.
ture
original note is do.
 Prediction of note is do.
ture
original note is re.
 Prediction of note is re.
ture
original note is re.
 Prediction of note is re.
ture
original note is mi.
 Prediction of note is mi.
ture
original note is mi.
 Prediction of note is mi.
ture
original note is fa.
 Prediction of note is fa.
ture
original note is fa.
 Prediction of note is fa.
ture
original note is so.
 Prediction of note is so.
ture
original note is so.
 Prediction of note is so.
ture
original note is ra.
 Prediction of note is ra.
ture
original note is ra.
 Prediction of note is ra.
ture

 correct rate : 100.0%.
