In [None]:
import numpy as np
import librosa
import librosa.display
from os import listdir

In [None]:
audio_dir = '' # Directory that should store the audio files

In [None]:
# Function to extract important features from the audio files
def feature_extraction(dir):
    features = []
    
    for audio_file in listdir(dir):
        X, sample_rate = librosa.load(dir + audio_file, res_type='kaiser_fast') # Convert audio file into a time series numpy array
        mfccs = np.mean(librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=40).T, axis=0) # Extract Mel-frequency ceptral coefficients
        stft = np.abs(librosa.stft(X)) # Extract Short-time Fourier transform (STFT)
        chroma = np.mean(librosa.feature.chroma_stft(S=stft, sr=sample_rate).T, axis=0) # Calculate chromagram
        mel = np.mean(librosa.feature.melspectrogram(X, sr=sample_rate).T,axis=0) # Calculate mel-scaled spectogram
        contrast = np.mean(librosa.feature.spectral_contrast(S=stft, sr=sample_rate).T,axis=0) # Calculate spectral contrast
        tonnetz = np.mean(librosa.feature.tonnetz(y=librosa.effects.harmonic(X), sr=sample_rate).T,axis=0) # Calculate tonal centroid features
        
        feat = np.concatenate((mfccs, chroma, mel, contrast, tonnetz), axis=0)
        features.append(feat)
        
    np_features = np.array(features)
    
    return np_features

In [None]:
from sklearn.preprocessing import LabelEncoder
from keras.utils.np_utils import to_categorical

In [None]:
X_data = feature_extraction(audio_dir) # X data for the model
y_data = [] # y data for the model

In [None]:
encoder = LabelEncoder()
y_data = to_categorical(encoder.fit_transform(y_data)) # One-hot encodes the y data

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X_data, y_data, test_size=0.5, shuffle=True)

In [None]:
from sklearn.preprocessing import StandardScaler

In [None]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.fit_transform(X_test)

In [None]:
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten, Convolution2D, MaxPooling2D
from keras.callbacks import EarlyStopping

In [None]:
model = Sequential()

model.add(Dense(193, input_shape=(193,), activation = 'relu'))
model.add(Dropout(0.1))

model.add(Dense(128, activation = 'relu'))
model.add(Dropout(0.25))  

model.add(Dense(128, activation = 'relu'))
model.add(Dropout(0.5))    

model.add(Dense(115, activation = 'softmax'))

model.compile(loss='categorical_crossentropy', metrics=['accuracy'], optimizer='adam')

early_stop = EarlyStopping(monitor='val_loss', min_delta=0, patience=100, verbose=1, mode='auto')

In [None]:
# Training the model
model.fit(X_train, y_train, batch_size=256, epochs=100,
                    callbacks=[early_stop])

In [None]:
# Prediction
preds = model.predict_classes(X_test)

In [None]:
preds = encoder.inverse_transform(preds)