In [5]:
import librosa
import numpy as np
import matplotlib.pyplot as plt
from note_seq.midi_io import midi_file_to_note_sequence
import torch
import dill

In [6]:
def get_logmel(audio_numpy, sr) :
    S = librosa.feature.melspectrogram(y=audio_numpy, sr=sr, n_mels=128, fmax=12000)
    S_dB = librosa.power_to_db(S, ref=np.max)
    return S_dB

In [11]:
def preprocess_OF(num) :
    note_instruments = {36 : 1, 39 : 2, 42 : 3, 49 : 4}

    # input preprocess
    y, sr = librosa.load(f'dataset/full/beatbox/{num}.wav')
    logmel = torch.Tensor(get_logmel(y, sr))

    midi = midi_file_to_note_sequence(f'dataset/full/midi/{num}.mid')
    v = logmel.shape[1]/midi.total_time
    onset = torch.zeros(logmel.shape[1])
    frameset = torch.zeros(logmel.shape[1])
    
#     print(onset.shape, offset.shape)

    for note in midi.notes :
        onset[int(note.start_time*v)] = 1
        for n in range(int(note.start_time*v), int(note.end_time*v)-1) :
            frameset[n] = note_instruments[note.pitch]
    return logmel, onset, frameset, v

In [None]:
# preprocess_OF(1)

In [14]:
note_instruments = {36 : 1, 39 : 2, 42 : 3, 49 : 4}

In [15]:
PATCH = 300

In [20]:
input_train = []
output_onset_train = []
output_frameset_train = []
vs = []

for i in range(1, 4376) :
    input_, output_onset_, output_frameset_, v = preprocess_OF(i)
    
    if input_.shape[1] <= 300 :
        to_app_inp = torch.zeros((input_.shape[0], PATCH))
        to_app_inp[:,:input_.shape[1]] = input_
        
        to_app_out_on = torch.zeros(PATCH)
        to_app_out_on[:output_onset_.shape[0]] = output_onset_
        
        to_app_out_off = torch.zeros(PATCH)
        to_app_out_off[:output_frameset_.shape[0]] = output_frameset_
        
        input_train.append(to_app_inp.numpy())
        output_onset_train.append(to_app_out_on.numpy())
        output_frameset_train.append(to_app_out_off.numpy())
        vs.append(v)
        
    elif input_.shape[1] > 300 and input_.shape[1] <= 600 :
        
        to_app_inp1 = torch.zeros((input_.shape[0], PATCH))
        to_app_inp2 = torch.zeros((input_.shape[0], PATCH))
        
        to_app_out1_on = torch.zeros(PATCH)
        to_app_out2_on = torch.zeros(PATCH)
        to_app_out1_off = torch.zeros(PATCH)
        to_app_out2_off = torch.zeros(PATCH)
        
        to_app_inp1 = input_[:,:PATCH]
        to_app_inp2 = input_[:,-PATCH:]
        to_app_out1_on = output_onset_[:PATCH]
        to_app_out2_on = output_onset_[-PATCH:]
        to_app_out1_off = output_frameset_[:PATCH]
        to_app_out2_off = output_frameset_[-PATCH:]
        
        input_train.append(to_app_inp1.numpy())
        input_train.append(to_app_inp2.numpy())
        output_onset_train.append(to_app_out1_on.numpy())
        output_onset_train.append(to_app_out2_on.numpy())
        
        output_frameset_train.append(to_app_out1_off.numpy())
        output_frameset_train.append(to_app_out2_off.numpy())
        
        vs.append(v)
        vs.append(v)
    
    else :
        print('Error')
        
    if i%100 == 0: 
        print(i, end=' ')
    if i%1000 == 0:
        print()
        
dill.dump(torch.Tensor(input_train), open('logmels_patch300_train', 'wb'))
dill.dump(torch.Tensor(output_onset_train), open('onset_patch300_train', 'wb'))
dill.dump(torch.Tensor(output_frameset_train), open('frameset_patch300_train', 'wb'))
dill.dump(torch.Tensor(vs), open('v_patch300_train', 'wb'))
print('Done')    

100 200 300 400 500 600 700 800 900 1000 
1100 1200 1300 1400 1500 1600 1700 1800 1900 2000 
2100 2200 2300 2400 2500 2600 2700 2800 2900 3000 
3100 3200 3300 3400 3500 3600 3700 3800 3900 4000 
4100 4200 4300 Done


In [21]:
input_train = []
output_onset_train = []
output_frameset_train = []
vs = []

for i in range(4376, 6251) :
    input_, output_onset_, output_frameset_, v = preprocess_OF(i)
    
    if input_.shape[1] <= 300 :
        to_app_inp = torch.zeros((input_.shape[0], PATCH))
        to_app_inp[:,:input_.shape[1]] = input_
        
        to_app_out_on = torch.zeros(PATCH)
        to_app_out_on[:output_onset_.shape[0]] = output_onset_
        
        to_app_out_off = torch.zeros(PATCH)
        to_app_out_off[:output_frameset_.shape[0]] = output_frameset_
        
        input_train.append(to_app_inp.numpy())
        output_onset_train.append(to_app_out_on.numpy())
        output_frameset_train.append(to_app_out_off.numpy())
        vs.append(v)
        
    elif input_.shape[1] > 300 and input_.shape[1] <= 600 :
        
        to_app_inp1 = torch.zeros((input_.shape[0], PATCH))
        to_app_inp2 = torch.zeros((input_.shape[0], PATCH))
        
        to_app_out1_on = torch.zeros(PATCH)
        to_app_out2_on = torch.zeros(PATCH)
        to_app_out1_off = torch.zeros(PATCH)
        to_app_out2_off = torch.zeros(PATCH)
        
        to_app_inp1 = input_[:,:PATCH]
        to_app_inp2 = input_[:,-PATCH:]
        to_app_out1_on = output_onset_[:PATCH]
        to_app_out2_on = output_onset_[-PATCH:]
        to_app_out1_off = output_frameset_[:PATCH]
        to_app_out2_off = output_frameset_[-PATCH:]
        
        input_train.append(to_app_inp1.numpy())
        input_train.append(to_app_inp2.numpy())
        output_onset_train.append(to_app_out1_on.numpy())
        output_onset_train.append(to_app_out2_on.numpy())
        
        output_frameset_train.append(to_app_out1_off.numpy())
        output_frameset_train.append(to_app_out2_off.numpy())
        
        vs.append(v)
        vs.append(v)
    
    else :
        print('Error')
        
    if i%100 == 0: 
        print(i, end=' ')
    if i%1000 == 0:
        print()
        
dill.dump(torch.Tensor(input_train), open('logmels_patch300_test', 'wb'))
dill.dump(torch.Tensor(output_onset_train), open('onset_patch300_test', 'wb'))
dill.dump(torch.Tensor(output_frameset_train), open('frameset_patch300_test', 'wb'))
dill.dump(torch.Tensor(vs), open('v_test', 'wb'))
print('Done')    

4400 4500 4600 4700 4800 4900 5000 
5100 5200 5300 5400 5500 5600 5700 5800 5900 6000 
6100 6200 Done
