In [1]:
import librosa
import numpy as np
import os
import matplotlib.pyplot as plt
import pandas as pd

path_to_ogg = 'openmic-2018/audio/'

# convert all .ogg files to log-scaled mel spectrograms

# load label file
# sample_key,instrument,relevance,num_responses
names = ['sample_key', 'instrument', 'relevance', 'num_responses']
labels = pd.read_csv('openmic-2018/openmic-2018-aggregated-labels.csv', delimiter=',', names=names, header=0)


In [2]:
# get all unique labels
unique_labels = labels.instrument.unique()
print(unique_labels)

['clarinet' 'flute' 'trumpet' 'saxophone' 'voice' 'accordion' 'ukulele'
 'mallet_percussion' 'piano' 'guitar' 'mandolin' 'banjo' 'synthesizer'
 'trombone' 'organ' 'drums' 'bass' 'cymbals' 'cello' 'violin']


In [3]:
# Architectural constants.
NUM_FRAMES = 96  # Frames in input mel-spectrogram patch.
NUM_BANDS = 64  # Frequency bands in input mel-spectrogram patch.
EMBEDDING_SIZE = 128  # Size of embedding layer.

# Hyperparameters used in feature and example generation.
SAMPLE_RATE = 16000
STFT_WINDOW_LENGTH_SECONDS = 0.025
STFT_HOP_LENGTH_SECONDS = 0.010
NUM_MEL_BINS = NUM_BANDS
MEL_MIN_HZ = 125
MEL_MAX_HZ = 7500
LOG_OFFSET = 0.01  # Offset used for stabilized log of input mel-spectrogram.
EXAMPLE_WINDOW_SECONDS = 0.96  # Each example contains 96 10ms frames
EXAMPLE_HOP_SECONDS = 0.96     # with zero overlap.

In [5]:
import vggish_input

# for saving to csv along with file name
labels_to_save = []

# loop through all .ogg files
# loop through all subfolders
for folder in os.listdir(path_to_ogg):
    full_path = path_to_ogg + folder

    print(f'Processing folder {folder}')

    for filename in os.listdir(full_path):
        if filename.endswith(".ogg"):
            # load audio file and resample to 16000 Hz
            y, sr = librosa.load(path_to_ogg + '/' + folder + '/' + filename, sr=SAMPLE_RATE)

            # get mel-spectrogram
            spec = vggish_input.waveform_to_examples(y, sr)

            np.save('spectrograms/' + filename[:-4] + '.npy', spec)

            # get the labels for this file
            file_labels = labels.loc[labels['sample_key'] == filename[:-4]]

            label = np.zeros(len(unique_labels))

            for i in range(len(file_labels)):
                label[np.where(unique_labels == file_labels.iloc[i]['instrument'])] = 1

            labels_to_save.append(np.append(filename[:-4], label))

            continue
        else:
            continue

# print shape of spectrogram
print(spec.shape)


# save labels to csv
labels_to_save = np.array(labels_to_save)

# add header
# ['filename' 'clarinet' 'flute' 'trumpet' 'saxophone' 'voice' 'accordion' 'ukulele' 'mallet_percussion' 'piano' 'guitar' 'mandolin' 'banjo' 'synthesizer' 'trombone' 'organ' 'drums' 'bass' 'cymbals' 'cello' 'violin']
headers = np.append(['filename'], unique_labels)
labels_to_save = np.vstack((headers, labels_to_save))

np.savetxt('labels.csv', labels_to_save, delimiter=',', fmt='%s')

Processing folder 132
Processing folder 130
Processing folder 029
Processing folder 039
Processing folder 155
Processing folder 149
Processing folder 057
Processing folder 002
Processing folder 083
Processing folder 006
Processing folder 126
Processing folder 097
Processing folder 125
Processing folder 145
Processing folder 076
Processing folder 139
Processing folder 142
Processing folder 037
Processing folder 104
Processing folder 059
Processing folder 140
Processing folder 080
Processing folder 010
Processing folder 075
Processing folder 115
Processing folder 043
Processing folder 033
Processing folder 063
Processing folder 124
Processing folder 018
Processing folder 131
Processing folder 091
Processing folder 047
Processing folder 154
Processing folder 100
Processing folder 118
Processing folder 056
Processing folder 086
Processing folder 088
Processing folder 081
Processing folder 078
Processing folder 090
Processing folder 027
Processing folder 045
Processing folder 141
Processing