In [10]:
folder = '/home/daansan/David/MusicDetection/data2'

# Load libraries

import numpy as np
import scipy
from scipy.ndimage import convolve1d
from scipy.io.wavfile import read, write
import matplotlib.pyplot as plt
import os
import sys
import math

import librosa
import librosa.display

In [6]:
base_dir = '/tmp/music_and_voice'

train_dir = os.path.join(base_dir, 'train')
validation_dir = os.path.join(base_dir, 'validation')

# Directory with our training cat/dog pictures
train_music_dir = os.path.join(train_dir, 'music')
train_voice_dir = os.path.join(train_dir, 'voice')

# Directory with our validation cat/dog pictures
validation_music_dir = os.path.join(validation_dir, 'music')
validation_voice_dir = os.path.join(validation_dir, 'voice')

In [11]:
dataset_path = folder
json_path = "data.json"


def save_chroma(dataset_path, json_path, num_mfcc=13, n_fft=2048, hop_length=512, num_segments=25, segment_length = 0.5):

    data = {
        "mapping": [],
        "labels": [],
        "chroma": []
    }

    for i, (dirpath, dirnames, filenames) in enumerate(os.walk(folder)):
            #print(dirpath)
            # ensure we're processing a genre sub-folder level
            #print(dataset_path)
            if dirpath != dataset_path:
                
                # save genre label (i.e., sub-folder name) in the mapping
                semantic_label = dirpath.split("/")[-1]
                if semantic_label[0] == '.':
                  continue
                
                data['mapping'].append(semantic_label)
                #print("\nProcessing: {}".format(semantic_label))

                # process all audio files in genre sub-dir
                for f in filenames:

                    # load audio file
                    file_path = os.path.join(dirpath, f)
                    sr, signal = read(file_path)
                    #signal, sr = librosa.load(file_path, sr=None)
                    #signal = signal[:,1]

                    y = signal[:,1].astype(np.float32)
                    signal = y/np.abs(y).max()/2

                    track_duration = signal.size/sr # measured in seconds
                    num_segments = int(track_duration / segment_length)
                    samples_per_track = sr * track_duration
                    samples_per_segment = int(samples_per_track / num_segments)
                    num_chroma_vectors_per_segment = math.ceil(samples_per_segment / hop_length)
                    #print(f, track_duration, num_mfcc_vectors_per_segment)

                    # process all segments of audio file
                    for d in range(num_segments):

                        # calculate start and finish sample for current segment
                        start = samples_per_segment * d
                        finish = start + samples_per_segment

                        # extract chroma
                        chroma = librosa.feature.chroma_cqt(y=signal[start:finish], sr=sr, hop_length=hop_length)
                        #print(signal[start:finish].shape)


                        ## extract mfcc
                        #mfcc = librosa.feature.mfcc(signal[start:finish], sr, n_mfcc=num_mfcc, n_fft=n_fft, hop_length=hop_length)
                        #mfcc = mfcc.T
                        
                        #print(chroma)
                        


                        data["chroma"].append(chroma.tolist())
                        data["labels"].append(i-1)
                        #print("{}, segment:{}".format(file_path, d+1))

    # save MFCCs to json file
    # with open(json_path, "w") as fp:
    #   json.dump(data, fp, indent=4)    
    return data           

In [12]:
data = save_chroma(dataset_path, json_path)

  n_fft, y.shape[-1]
  n_fft, y.shape[-1]
  n_fft, y.shape[-1]
  n_fft, y.shape[-1]
  n_fft, y.shape[-1]
  n_fft, y.shape[-1]
  n_fft, y.shape[-1]
  n_fft, y.shape[-1]
  n_fft, y.shape[-1]
  n_fft, y.shape[-1]
  n_fft, y.shape[-1]
  n_fft, y.shape[-1]
  n_fft, y.shape[-1]
  n_fft, y.shape[-1]
  n_fft, y.shape[-1]
  n_fft, y.shape[-1]
  n_fft, y.shape[-1]
  n_fft, y.shape[-1]
  n_fft, y.shape[-1]
  n_fft, y.shape[-1]
  n_fft, y.shape[-1]
  n_fft, y.shape[-1]
  n_fft, y.shape[-1]
  n_fft, y.shape[-1]
  n_fft, y.shape[-1]


In [None]:
with open(json_path, "w") as fp:
       json.dump(np.array(data['chroma'][0]).tolist(), fp, indent=4)    

In [24]:
data['chroma'][0] = np.array(data['chroma'][0])

In [40]:
import json
import numpy as np
from sklearn.model_selection import train_test_split
import tensorflow.keras as keras
import matplotlib.pyplot as plt

DATA_PATH = "data.json"

def load_data_file(data):
    """Loads training dataset from json file.
        :param data_path (str): Path to json file containing data
        :return X (ndarray): Inputs
        :return y (ndarray): Targets
    """

    # with open(data_path, "r") as fp:
    #     data = json.load(fp)

    
    for i in range(len(data['chroma'])):
      data['chroma'][i] = np.array(data['chroma'][i])
    X = data["chroma"]
    y = np.array(data["labels"])
    return X, y


# def load_data(data_path):
#     """Loads training dataset from json file.
#         :param data_path (str): Path to json file containing data
#         :return X (ndarray): Inputs
#         :return y (ndarray): Targets
#     """

#     with open(data_path, "r") as fp:
#         data = json.load(fp)

#     X = np.array(data["chroma"])
#     y = np.array(data["labels"])
#     return X, y


def plot_history(history):
    """Plots accuracy/loss for training/validation set as a function of the epochs
        :param history: Training history of model
        :return:
    """

    fig, axs = plt.subplots(2)

    # create accuracy sublpot
    axs[0].plot(history.history["accuracy"], label="train accuracy")
    axs[0].plot(history.history["val_accuracy"], label="test accuracy")
    axs[0].set_ylabel("Accuracy")
    axs[0].legend(loc="lower right")
    axs[0].set_title("Accuracy eval")

    # create error sublpot
    axs[1].plot(history.history["loss"], label="train error")
    axs[1].plot(history.history["val_loss"], label="test error")
    axs[1].set_ylabel("Error")
    axs[1].set_xlabel("Epoch")
    axs[1].legend(loc="upper right")
    axs[1].set_title("Error eval")

    plt.show()


def prepare_datasets(test_size, validation_size, data):
    """Loads data and splits it into train, validation and test sets.
    :param test_size (float): Value in [0, 1] indicating percentage of data set to allocate to test split
    :param validation_size (float): Value in [0, 1] indicating percentage of train set to allocate to validation split
    :return X_train (ndarray): Input training set
    :return X_validation (ndarray): Input validation set
    :return X_test (ndarray): Input test set
    :return y_train (ndarray): Target training set
    :return y_validation (ndarray): Target validation set
    :return y_test (ndarray): Target test set
    """

    # load data
    X, y = load_data_file(data)

    # create train, validation and test split
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size)
    X_train, X_validation, y_train, y_validation = train_test_split(X_train, y_train, test_size=validation_size)

    # add an axis to input sets
    X_train = X_train[..., np.newaxis]
    X_validation = X_validation[..., np.newaxis]
    X_test = X_test[..., np.newaxis]

    return X_train, X_validation, X_test, y_train, y_validation, y_test


def build_model(input_shape):
    """Generates CNN model
    :param input_shape (tuple): Shape of input set
    :return model: CNN model
    """

    # build network topology
    model = keras.Sequential()

    # 1st conv layer
    model.add(keras.layers.Conv2D(32, (3, 3), activation='relu', input_shape=input_shape))
    model.add(keras.layers.MaxPooling2D((3, 3), strides=(2, 2), padding='same'))
    model.add(keras.layers.BatchNormalization())

    # 2nd conv layer
    model.add(keras.layers.Conv2D(32, (3, 3), activation='relu'))
    model.add(keras.layers.MaxPooling2D((3, 3), strides=(2, 2), padding='same'))
    model.add(keras.layers.BatchNormalization())

    # 3rd conv layer
    model.add(keras.layers.Conv2D(32, (2, 2), activation='relu'))
    model.add(keras.layers.MaxPooling2D((2, 2), strides=(2, 2), padding='same'))
    model.add(keras.layers.BatchNormalization())

    # flatten output and feed it into dense layer
    model.add(keras.layers.Flatten())
    model.add(keras.layers.Dense(64, activation='relu'))
    model.add(keras.layers.Dropout(0.3))

    # output layer
    model.add(keras.layers.Dense(10, activation='softmax'))

    return model


def predict(model, X, y):
    """Predict a single sample using the trained model
    :param model: Trained classifier
    :param X: Input data
    :param y (int): Target
    """

    # add a dimension to input data for sample - model.predict() expects a 4d array in this case
    X = X[np.newaxis, ...] # array shape (1, 130, 13, 1)

    # perform prediction
    prediction = model.predict(X)

    # get index with max value
    predicted_index = np.argmax(prediction, axis=1)

    print("Target: {}, Predicted label: {}".format(y, predicted_index))

In [None]:
# get train, validation, test splits
X_train, X_validation, X_test, y_train, y_validation, y_test = prepare_datasets(0.25, 0.2)

# create network
input_shape = (X_train.shape[1], X_train.shape[2], 1)
model = build_model(input_shape)

# compile model
optimiser = keras.optimizers.Adam(learning_rate=0.0001)
model.compile(optimizer=optimiser,
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

model.summary()

# train model
history = model.fit(X_train, y_train, validation_data=(X_validation, y_validation), batch_size=32, epochs=30)

# plot accuracy/error for training and validation
plot_history(history)

# evaluate model on test set
test_loss, test_acc = model.evaluate(X_test, y_test, verbose=2)
print('\nTest accuracy:', test_acc)

# pick a sample to predict from the test set
X_to_predict = X_test[100]
y_to_predict = y_test[100]

# predict sample
predict(model, X_to_predict, y_to_predict)

NameError: ignored

In [53]:
X, y = load_data_file(data)

X[0] = np.hstack(X[0])

# create train, validation and test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25)
X_train, X_validation, y_train, y_validation = train_test_split(X_train, y_train, test_size=0.15)

# add an axis to input sets
# X_train = X_train[..., np.newaxis]
# X_validation = X_validation[..., np.newaxis]
# X_test = X_test[..., np.newaxis]

In [1]:
X.shape

NameError: ignored

In [41]:
X_train, X_validation, X_test, y_train, y_validation, y_test = prepare_datasets(0.25, 0.2, data)

TypeError: ignored

In [39]:
len(data['chroma'])

232

In [35]:
np.array(data["chroma"][0])

array([[0.7288013 , 0.75724936, 0.83448064, 0.81660644, 0.76865371,
        0.62699085, 0.5883597 , 0.56742175, 0.58321921, 0.59538077,
        0.59700023, 0.60815506, 0.61369623, 0.54508944, 0.60310508,
        0.56123704, 0.49720041, 0.55792345, 0.63186636, 0.55324167,
        0.58548555, 0.65506838, 0.75310949, 0.8313863 , 0.82142502,
        0.82192932, 0.57785087, 0.65255588, 0.51218425, 0.63614095,
        0.47345992, 0.55523374, 0.44203873, 0.48525513, 0.40575816,
        0.55065949, 0.43363816, 0.4333998 , 0.47565911, 0.36319805,
        0.30794827, 0.40454989, 0.31559102, 0.30951695, 0.34457948,
        0.32261498, 0.32748413],
       [0.35089718, 0.41995519, 0.53942848, 0.51436999, 0.34981604,
        0.16274504, 0.10951279, 0.09213558, 0.1154954 , 0.2092654 ,
        0.24597213, 0.27875345, 0.24463225, 0.21947437, 0.237796  ,
        0.2444938 , 0.24799479, 0.2758819 , 0.27929768, 0.24682381,
        0.25737952, 0.31469437, 0.46132661, 0.62721023, 0.57127657,
        0.60028

In [34]:
data["chroma"][0]

[[0.7288013022389698,
  0.7572493567955018,
  0.834480636331949,
  0.8166064447882895,
  0.7686537063127805,
  0.6269908500166702,
  0.5883597001008098,
  0.5674217538980294,
  0.5832192064957951,
  0.5953807736285751,
  0.597000234069787,
  0.6081550612255971,
  0.6136962331968159,
  0.5450894366834673,
  0.6031050797302732,
  0.5612370426491379,
  0.49720040695923945,
  0.5579234510777252,
  0.6318663578176883,
  0.5532416735825664,
  0.5854855492988712,
  0.6550683827275349,
  0.7531094877424219,
  0.8313862994021818,
  0.8214250179752771,
  0.8219293152885315,
  0.5778508724149634,
  0.6525558827902895,
  0.5121842548483573,
  0.6361409534561175,
  0.4734599241973687,
  0.5552337411153029,
  0.4420387335310675,
  0.4852551315761424,
  0.4057581582932399,
  0.5506594900528115,
  0.43363815756208596,
  0.4333998031069389,
  0.475659114704717,
  0.36319805198229344,
  0.30794826504551037,
  0.4045498855986784,
  0.31559101952157786,
  0.30951695009468316,
  0.3445794800208758,
  0.322

In [None]:
X_train[0]

array([[list([1.0, 1.0, 0.921459132091177, 0.8425096004325365, 0.7818169595536819, 0.7557599637918447, 0.787966509468703, 0.8433780645275688, 0.9585288606699726, 0.9195850638424878, 0.7148726266539676, 0.4889599594745931, 0.3691376531544215, 0.2772072399916381, 0.2532487847401799, 0.26018992194895635, 0.21863379558174703, 0.17250024972372768, 0.14091192561580715, 0.13975586792258657, 0.12831543639491927, 0.12908232476477982, 0.11593384140353465, 0.11417479471416508, 0.11349497794515002, 0.119837864267342, 0.12409849080151195, 0.12233008270820675, 0.1220512860966902, 0.11208661079799202, 0.14714336075493023, 0.22214714358976428, 0.2897846101369739, 0.3212523841715654, 0.37551383407667155, 0.39007963418547276, 0.35778753020434995, 0.3081538897242404, 0.24908210761817048, 0.22701136888951903, 0.22423208266462558, 0.24011447346695886, 0.248208671780013, 0.26470507181851405, 0.2291580034188743])],
       [list([0.3998285841353844, 0.3855430102240854, 0.3278434701431348, 0.2758905435862441, 

In [None]:
input_shape = (X_train.shape[1], X_train.shape[2], 1)

In [None]:
input_shape

(12, 1, 1)

In [None]:
X_train[0]

array([[list([0.6234039599703074, 0.37661644085578255, 0.3168832563395006, 0.2866493357834577, 0.27497397868156964, 0.31080917022958426, 0.37008477731291994, 0.45613058497212944, 0.5550712329491228, 0.27396993219754584, 0.2237661429465293, 0.22881507273820695, 0.3005191231947147, 0.33773862129780147, 0.4122238045008005, 0.45708267093886684, 0.5551373985651784, 0.5730173510311065, 0.5858487519357544, 0.6497741377870693, 0.6799904377205382, 0.5907583018354723, 0.6180051059508019, 0.5788365141365021, 0.5919438801139771, 0.5798578608850993, 0.540347656280515, 0.5501907579850024, 0.4828329647671563, 0.41423602889967326, 0.40401073101168683, 0.43242404547096547, 0.47394704150825, 0.6088859271643858, 0.7450792991283116, 0.9090209816552306, 0.31257257888469514, 0.35591137744396617, 0.4937941694281021, 0.5586421815389166, 0.5624105937669147, 0.5464021457266203, 0.5498884808381371, 0.563428348720709, 0.688885223746597])],
       [list([0.13096763171262008, 0.09427673960543258, 0.0892936863325013