<a href="https://colab.research.google.com/github/QColeman97/AudioTagger/blob/master/CreateDataFiles.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [8]:
# This cell contains preferred data pre-processing methods

from google.colab import drive
drive.mount('/content/drive')

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import librosa
from librosa import display
import os
import glob


input_path = 'drive/My Drive/AudioTaggerData/'

train_files_path = input_path + 'FSDKaggle2018.audio_train'
test_files_path = input_path + 'FSDKaggle2018.audio_test'
train_csv_path = (input_path +
                  'FSDKaggle2018.meta/train_post_competition.csv')
test_csv_path = (input_path +
                 'FSDKaggle2018.meta/' +
                 'test_post_competition_scoring_clips.csv')

# Data preprocessing part

df_train = pd.read_csv(train_csv_path)
df_test = pd.read_csv(test_csv_path)

unique_labels = df_train.label.unique()
num_class = len(unique_labels)
label2index = {label: index for index, label in enumerate(unique_labels)}

train_dict = pd.Series.from_csv(train_csv_path, header = 0).to_dict()
test_dict = pd.Series.from_csv(test_csv_path, header = 0).to_dict()


#array of labels in number form (0 = hi-hat, 1 = saxophone, etc)
label_emb_indices = np.array([label2index[label] for label in df_train.label])

label_emb_test_indices = np.array([label2index[label] for label in df_test.label])


def pre_process(pathname):
    sampling_rate = 32000
    hop_length = 192
    fmax = None
    n_mels = 32
    n_fft = 1024


    # y, sr = librosa.load(pathname, sr = None)
    y, sr = librosa.load(pathname, sr = sampling_rate)
    # print('Y len:', len(y))
    y, (trim_begin, trim_end) = librosa.effects.trim(y)

    # Stretch for low time-scale (for 32 n_mels and 64 time restriction)
    y = librosa.effects.time_stretch(y, 2.0)
    
    # Amplitudes of STFT
    stft = np.abs(librosa.stft(y, n_fft = n_fft, hop_length = hop_length,
                               window = 'hann', center = True,
                               pad_mode = 'reflect'))

    # Get the fast fourier transform of frequencies over time
    freqs = librosa.core.fft_frequencies(sr = sampling_rate, n_fft = n_fft)
    # Perceptually weight the spectrogram - better visual
    stft = librosa.perceptual_weighting(stft*2, freqs, ref = 1.0, amin = 1e-10,
                                        top_db = 99.0)

    # Convert into a mel-scaled spectrogram
    # (Power param is set to 2 (for power) by default)
    mel_spect = librosa.feature.melspectrogram(S = stft, sr = sampling_rate,
                                               n_mels = n_mels, fmax = fmax)

    # Convert mel-scaled spectrogram into log-mel spectrogram
    log_mel_spect = librosa.core.power_to_db(mel_spect)

    return np.asarray(log_mel_spect)


# Get data in the form of a file from .wav files (preferred)
def get_data(pathname, training = True):
    file_list = glob.glob(os.path.join(pathname, '*.wav'))

    if training:
        data_f = open('Audio.train', 'w')
    else:
        data_f = open('Audio.test', 'w')

    spectrograms, times = [], []
    for i, file in enumerate(file_list):
        print("%04d / %d | %s" % (i + 1, len(file_list), file))

        spectrogram = pre_process(file)

        time_restriction = 64
        if time_restriction >= spectrogram.shape[1]:
            pad_amount = time_restriction - spectrogram.shape[1]
            # Use avg or max time
            spectrogram = np.pad(spectrogram, ((0, 0), (0, pad_amount)),
                                 'minimum')
        else:
            spectrogram = spectrogram[:, :time_restriction]

        spectrogram = spectrogram.transpose()

        np.savetxt(data_f, spectrogram)
        data_f.write('\n')

    data_f.close()


# Get data in the form of a file from .csv file (preferred)
def get_labels(pathname, training = True):
    file_list = glob.glob(os.path.join(pathname, '*.wav'))

    if training:
        labels_f = open('Labels.train', 'w')
    else:
        labels_f = open('Labels.test', 'w')

#     labels = []
    for i, file in enumerate(file_list):
        label = np.zeros((41,))
        categ = (train_dict[file.split('/')[-1]] if
            (training) else test_dict[file.split('/')[-1]])
        hot_index = label2index[categ]
        label[hot_index] = 1
        # labels.append(label)
        labels_f.write(' '.join([str(x) for x in label]) + '\n')

    # return np.array(labels)
    labels_f.close()


# Run to get these files: Audio.train, Audio.test
get_data(train_files_path)
get_data(test_files_path, training = False)

# Run to get these files: Labels.train, Labels.test
get_labels(train_files_path)
get_labels(test_files_path, training = False)


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


  infer_datetime_format=infer_datetime_format)
