In [1]:
%%time
import numpy as np 
import pandas as pd
import os
import librosa
import librosa.display
import matplotlib.pyplot as plt
import time
from tqdm import tqdm, tqdm_notebook; tqdm.pandas()
from sklearn.metrics import label_ranking_average_precision_score
from sklearn.model_selection import train_test_split
from filtr import Filtr
import random

# Machine Learning
import tensorflow as tf
from keras import backend as K
from keras.engine.topology import Layer
from keras import initializers, regularizers, constraints, optimizers, layers
from keras.layers import (Dense, Bidirectional, CuDNNLSTM, ELU,
                          Dropout, LeakyReLU, Conv1D, BatchNormalization)
from keras.models import Sequential
from keras.optimizers import Adam
from keras.callbacks import EarlyStopping

Using TensorFlow backend.


Wall time: 54.2 s


# Data prep
- load each audio file into a list that is named after the category (ex. Kick = [], snare = [])
- read each sample from each list and convert it to audio data
- convert the audio data into a mel spectrogram and also add its category number as well
- add the category list into a training list and shuffle it
- seperate the features from the labels and pickle the data for the model

In [2]:
# load audio files
Data_path = "D:\\Documents\\Atom\\myrepos\\Filtr\\Filtr\\Audio"
Categories = ['Kick','Snare']
os.chdir("D:\\Documents\\Atom\\myrepos\\Filtr\\Filtr")
f = Filtr(Data_path,dest = None)

snare = []
kick = []

for category in Categories:
    path = os.path.join(Data_path,category)
    files = f.files(path)
    os.chdir(path)
    print(category)
    for file in files:
        if ('DK' or 'Kick' or 'kick') in file:
            full_path = os.path.join(path,file)
            kick.append(full_path)
        elif ("DS" or 'snare') in file:
            full_path = os.path.join(path,file)
            snare.append(full_path) 

Kick
Snare


In [107]:
# read each sample as audio data
import soundfile as sf
import sys

def trim(y,samples = 44100):
    if 0 < len(y): # workaround: 0 length causes error
        y, _ = librosa.effects.trim(y)
    return y

def audio_data(training_names,type = None,category = None):
    audio_info = []
    error = 0
    
    
    for name in training_names:
        try:
            # loading the data
            
            data, samplerate = sf.read(name)
            #print(f"Data before: {data}\n")
            
            data = data[:, 0]
            #print(f"Data after: {data}")
            
            # optional trim silence
            data = trim(data)
            
            audio_info.append(data)
            #print(f"filename: {name}")
            
        except:
            #print(e)
            error += 1
            #print(f"there was an error loading this file: {name}")
            #print("Unexpected error:", sys.exc_info()[0])
            #print()
            #raise
            pass
            
    print(f"there was an error loading {error} files")    
    return np.array(audio_info)

In [108]:
%%time
kick_data = audio_data(kick,category = 'kick')
snare_data = audio_data(snare,category = 'snare')

there was an error loading 171 files
there was an error loading 382 files
Wall time: 21.2 s


In [109]:
print(len(kick_data))
print(len(snare_data))

2976
4584


18109

In [3]:
# convert audio data into a Mel spectrogram 
def mel(audio,sr = 44100,size = 150):
    spectrograms = []
    error = []
    try:
        for data in audio[:5]:
            mel = librosa.feature.melspectrogram(y = data, sr = sr)
            mel = librosa.power_to_db(mel,ref=np.max).astype(np.float32)
            #mel.resize((size,size))
            spectrograms.append(mel) 
    except:
        error.append(data)
        pass
    if len(error) > 0:
        print(f"there was {len(error)} errors. this is the data \n{error}")
    return spectrograms   

In [4]:
%%time
kmels = mel(kick_data)
smels = mel(snare_data)
#test =[[1,2,3,4,5],['test']]

NameError: name 'kick_data' is not defined

In [502]:
#librosa.display.specshow(smels[0][0],y_axis = 'mel',x_axis = 's',sr=44100)

In [138]:
def normalize(img):
    '''
    Normalizes an array 
    (subtract mean and divide by standard deviation)
    '''
    eps = 0.001
    if np.std(img) != 0:
        img = (img - np.mean(img)) / np.std(img)
    else:
        img = (img - np.mean(img)) / eps
    return img

def train_data(audio_data,categories,data_limit = None):
    '''
    This function takes in audio data as a list and the categories as a list and returns a list of training data anolng with
    its labels.
    
    -------------------
    parameters
    -------------------
        audio_data: the audio data list is an array of each of the categories' audio data.
            for example:
                L1 = [1,2,3,4] -> category1
                L2 = [5,6,7,8] -> category2
                train_data(audio_data = [L1,L2])
        
        categories: a list of categories used to determine class labels. The categories must be passed in as the same order
        as the audio data passed in and it also must be the same category as used when .
            for example:
            train_data([L1,L2],[category1,category2])
        
        data_limit: The amount of trainig data to add for each category. If a number isn't provided it will use the 
        minimum between all of the audio data lists.
                
    
    '''
    train = []
    x = []
    for data in audio_data:
        x.append(len(data))
    
    print(x)    
    data_limit = min(x) if data_limit is None else data_limit
    print(f"Data limit: {data_limit}")

    #print(audio_data[1][0][1])
    for datas in audio_data:
        for category in categories:
            y = 0
            for data in datas:
                data = normalize(data)
                
                class_num = categories.index(category)
                train.append([data,class_num])
                if y == data_limit:
                    break
                y += 1
            if y == data_limit:
                break
                    
    random.seed(66)                
    random.shuffle(train)                
    return train

In [139]:
%%time
training = train_data([kmels,smels],['kick','snare'])

[5, 5]
Data limit: 5
Wall time: 7.28 ms


In [15]:
#librosa.display.specshow(X[1],y_axis = 'mel',x_axis = 's',sr=44100)

In [141]:
training


[array([[ 2.2676501,  1.9019651,  1.009496 , ..., -0.8894738, -1.2905978,
         -1.1797109],
        [ 2.1392572,  1.8092521,  0.9670504, ..., -1.2245531, -1.4794313,
         -1.8999516],
        [ 1.9215908,  1.5356799,  0.9195003, ..., -1.6863644, -1.8874991,
         -2.0193913],
        ...,
        [ 1.8183211,  1.5977492,  1.3123639, ..., -2.0250356, -2.0459478,
         -2.0471504],
        [ 1.9065216,  1.7477198,  1.5279608, ..., -1.9491813, -1.9058753,
         -2.018233 ],
        [ 1.9437186,  1.8265319,  1.582386 , ..., -1.9550165, -1.9999135,
         -2.023254 ]], dtype=float32), 0]

In [142]:
X = []
y = []

for feat,label in training:
    #print(feat)
    X.append(np.resize(feat,(128,128)))
    y.append(label)
    

[0, 0, 0, 0, 0, 0, 0, 0, 0, 0]

In [78]:
import pickle
os.chdir("D:\\Documents\\Atom\\myrepos\\Filtr\\Filtr")
with open('X.pickle','wb') as file:
    pickle.dump(X,file)
    
with open('y.pickle','wb') as file:
    pickle.dump(y,file)

