# Imports and constants

In [8]:
# import statemets

import numpy as np
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score
from sklearn.metrics import roc_curve
from sklearn.metrics import roc_auc_score
from sklearn.metrics import auc
import matplotlib.pyplot as plt
from customScripts import utilities as util
from customScripts import features as feat
from customScripts import onset

import librosa

In [9]:
# parameters

#number of samples / second
sampling_rate = 100000 #96000

#length of frame in samples
frame_length = 2000 #2048

#number of samples used as offset for earch consecutive frame
hop_length = 1000 #1024

#number of mel frequency bins to use
bin_number = 40 #80

#set the desired number of frames / second here
ground_thruth_conversion_const = 100 #2 # not used

#input data
#file_paths = ['music_data/shortName.flac']
#grount_truth_paths = ['music_data/shortName.onsets']
# prediction_paths = ['predictions/Muppets-02-01-01.csv', 'predictions/Muppets-02-04-04.csv', 'predictions/Muppets-03-04-03.csv']
# file_lengths = [1547, 1548, 1539] #in seconds

# File loading and feature extraction

In [10]:
# get train file paths
train_onsets_gt_paths, train_beats_gt_paths, train_bpm_gt_paths, train_onsets_audio_paths, train_beats_audio_paths, train_bpm_audio_paths = util.get_file_paths('music_data/train')
# get test file paths
test_onsets_gt_paths, test_beats_gt_paths, test_bpm_gt_paths, test_onsets_audio_paths, test_beats_audio_paths, test_bpm_audio_paths = util.get_file_paths('music_data/test')

In [11]:
# load single audio
# ah_development_guitar_2684_TexasMusicForge_Dandelion_pt1
single_audio = util.load_audio('music_data/train/al_Media-103515(9.1-19.1).flac', sampling_rate)

In [12]:
# compute single audio spectrogram
single_spectrogram = feat.compute_spectrogram(single_audio, frame_length, hop_length, bin_number)
single_spectrogram_T = single_spectrogram.transpose()
print('frame number:',len(single_spectrogram_T))
print('bin number:',len(single_spectrogram))

frame number: 1001
bin number: 40


In [13]:
# compute ground truth for single audio
single_gt_path_onsets = 'music_data/train/al_Media-103515(9.1-19.1).onsets'
single_gt_path_beats = 'music_data/train/al_Media-103515(9.1-19.1).beats'
single_gt_path_bpm = 'music_data/train/al_Media-103515(9.1-19.1).bpm'

sigle_gt_onsets = util.load_onsets_gt(single_gt_path_onsets, '/n')
sigle_gt_beats = util.load_beats_gt(single_gt_path_beats, '/n')
single_gt_bpm = util.load_bpm_gt(single_gt_path_bpm, '/n')

print(sigle_gt_onsets)
print(sigle_gt_beats)
print(single_gt_bpm)

[0.02, 0.3, 0.4, 0.67, 0.76, 1.02, 1.15, 1.16, 1.4, 1.44, 1.52, 1.8, 1.91, 1.92, 2.17, 2.26, 2.52, 2.55, 2.65, 2.71, 2.93, 3.02, 3.3, 3.39, 3.6, 3.76, 4.02, 4.13, 4.38, 4.42, 4.5, 4.76, 4.88, 5.25, 5.5, 5.54, 5.63, 5.89, 5.99, 6.27, 6.38, 6.75, 7.02, 7.13, 7.4, 7.49, 7.51, 7.88, 8.13, 8.24, 8.27, 8.49, 8.63, 8.9, 9.01, 9.26, 9.37, 9.63, 9.74, 9.87]
[]
160.0


In [None]:
# load onsets audio
train_onsets_audio = util.load_audios(train_onsets_audio_paths, sampling_rate)

In [None]:
# compute onsets audio spectrogram

In [None]:
# compute ground truth for onsets audio

In [None]:
# load beats audio
train_beats_audio = util.load_audios(train_beats_audio_paths, sampling_rate)

In [None]:
# compute beats audio spectrogram

In [None]:
# compute ground truth for beats audio

In [None]:
# load bpm audio
train_bpm_audio = util.load_audios(train_bpm_audio_paths, sampling_rate)

In [None]:
# compute bpm audio spectrogram

In [None]:
# compute ground truth for bpm audio
train_bpm_gt = util.load_ground_truths(train_bpm_gt_paths, '/n')
print(train_bpm_gt)

In [None]:
#load ground truth, pad it with zeroes and adjust to frame number

#episode 1
"""
gt_1 = util.load_ground_truth(grount_truth_paths[0], '/n')
gt_1 = util.compute_0_padded_gt(gt_1, file_lengths[0])
gt_1 = util.adjust_gt_to_frames(gt_1, ground_thruth_conversion_const)
# append zeros so that lenght equals the frames list length
for i in range(len(spectrogram_1T) - len(gt_1)):
    gt_1.append(0)
print('ground truth size:',len(gt_1))
"""

In [None]:
#concatenate training data
"""
gt_train = gt_1 + gt_2

spectrogram_train = np.concatenate((spectrogram_1T, spectrogram_2T), axis=0)

spectrogram_predict = spectrogram_3T

#spectrogram_train[3195] == spectrogram_2T[100]
"""
#for i,g in enumerate(gt_train):
#    if g == 1:
#        print(i)

# Classic onset detection method

In [16]:
odf_single = onset.compute_odf(single_spectrogram)
peaks_single = onset.apply_threshold(odf_single, 1500)
#print(peaks_single)

for i, p in enumerate(peaks_single):
    if p > 0:
        print(i/100, '   ', p)

print(single_gt_path_onsets)
        
        
# maxima = o.pick_local_peaks(peaks)

# print(maxima)

0.01     1742.6392129428284
0.02     1600.7588761630718
0.3     1801.2446125331671
0.75     1822.5924735585686
0.76     1632.8056901210032
1.15     1587.9560521283174
1.49     7875.300035069225
1.5     1882.0180840719413
1.52     2043.8718512218793
2.26     1666.8560254506956
3.02     1507.6319824731281
3.74     1752.6967106371621
3.75     2720.5941962364595
3.76     3136.860407678414
4.37     1554.4353469450339
4.47     1595.3874809142633
4.49     3007.067819069324
4.5     2256.413859780441
4.75     1554.6523644829608
4.86     4732.329759478317
5.21     1566.2847433313234
5.6     1507.600289405688
5.97     2772.0946463335895
5.98     1911.2935683658259
5.99     3146.4133197320093
6.72     1958.0668389468638
7.46     2374.6976953303392
8.6     1595.6899785886158
9.71     2053.5047510169798
9.74     1856.5917227166296
10.0     7823.449531895277
music_data/train/al_Media-103515(9.1-19.1).onsets


# Classifier training and prediction for onsets

In [None]:
# use a Multi-layer Perceptron classifier with default parameters
#clf = MLPClassifier()

In [None]:
#clf.fit(spectrogram_train, gt_train)

In [None]:
#prediction = clf.predict(spectrogram_predict)

In [None]:
#np.savetxt(prediction_paths[2], prediction, delimiter="/n")

# Evaluation for onset detection

In [None]:
"""
print('precision (micro average):',precision_score(gt_3, prediction, average='micro'))
print('precision (macro average):',precision_score(gt_3, prediction, average='macro'))
print('precision (weighted average):',precision_score(gt_3, prediction, average='weighted'))
"""

In [None]:
"""
print('recall (micro average):',recall_score(gt_3, prediction, average='micro'))
print('recall (macro average):',recall_score(gt_3, prediction, average='macro'))
print('recall (weighted average):',recall_score(gt_3, prediction, average='weighted'))
"""

In [None]:
"""
print('f1 (micro average):',f1_score(gt_3, prediction, average='micro'))
print('f1 (macro average):',f1_score(gt_3, prediction, average='macro'))
print('f1 (weighted average):',f1_score(gt_3, prediction, average='weighted'))
"""

In [None]:
"""
# calculate false positive / true positive rate and area under curve
fpr, tpr, threshold = roc_curve(gt_3, prediction)
roc_auc = auc(fpr, tpr)

# plot ROC curve
plt.title('Receiver Operating Characteristic')
plt.plot(fpr, tpr, 'b', label = 'AUC = %0.2f' % roc_auc)
plt.legend(loc = 'lower right')
plt.plot([0, 1], [0, 1],'r--')
plt.xlim([0, 1])
plt.ylim([0, 1])
plt.ylabel('True Positive Rate')
plt.xlabel('False Positive Rate')
plt.show()
"""