# <font color='12284A' style="font-family: andale mono, monospace; font-size: 28px; text-transform: uppercase">Drum Audio Sample Classifier<font><a class='anchor' id='top'></a>.


In [57]:
# Imports modules
# Tensorflow may throw Optimization errors. This should not effect this page
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras import models, layers
import librosa
from IPython.display import Audio
import os
import warnings
warnings.filterwarnings("ignore")

abs_path = os.getcwd()

# Custom layers

class TripleChannel(layers.Layer):
    def call(self, x):
        return tf.concat([x, x, x], axis=-1)

# Loads Deep Learning Model
model = tf.keras.models.load_model(
    abs_path + "/saved_model/model_raw_last.keras",
    custom_objects={"TripleChannel": TripleChannel},
    safe_mode=False
)
model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=1e-4),
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)

LENGTH = 22050

# Creates Function to convert file location
def sample_preparer(location):
    
    # Creates Empty Numpy Array the size required to fit into the model
    # Loads the Audio File, converts it to a stft / spectrogram, and fits it into Numpy Array
    sample_data = []
    y, sr = librosa.load(location,sr=22050)
    y, _ = librosa.effects.trim(y, top_db=50)
    y = librosa.resample(y=y, orig_sr=sr, target_sr=22050)

    if len(y) < LENGTH:
        y = np.pad(y, (0, LENGTH - len(y)))
    else:
        y = y[:LENGTH]
    # audio is now LENGTH long: shape into (LENGTH, 1)
    y = y.reshape((LENGTH, 1))
    sample_data = [y]
    
    return sample_data

After running the previous code block, please run the following and provide a input file path. 

In [69]:
# Asks for User to Input File Path to audio file needing classification

# Available file paths are avialable below, copy them into the input upon request
# Clap: dataset/samples_reserved/input_clap.wav
# Closed Hat: dataset/samples_reserved/input_closedhat.wav
# Kick: dataset/samples_reserved/input_kick.wav
# Open Hat: dataset/samples_reserved/input_openhat.wav
# Snare: dataset/samples_reserved/input_snare.wav

# dataset/additional_reserved/snares/577131__deadrobotmusic__dr-snare-129.wav
# dataset/additional_reserved/snares/577170__deadrobotmusic__dr-snare-015.wav

location = input("Input Audio File Path: ")

# Removes quotes around filepath if they exist
location = location.strip('\"')

y, sr = librosa.load(location)

Audio(data=y, rate=sr)

Input Audio File Path:  dataset/additional_reserved/snares/577170__deadrobotmusic__dr-snare-015.wav


In [70]:
# Audio file is fed into the model, and a prediction for classification is returned
prediction = model.predict(np.array(sample_preparer(location))) 

# Unneeded data is removed, only the highest predicted result is required
type_num = np.argmax(prediction,axis=1)

drum_types = ['Clap', 'Closed Hat', 'Kick', 'Open Hat', 'Snare']

# Numeric Drum Classification is converted to String name of drum and outputted.
print(f"\nDrum Sample is: {drum_types[int(type_num)]}")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step

Drum Sample is: Snare
