In [None]:
import numpy as np
import matplotlib.pyplot as plt
import os
import codecs
import json
from sklearn.metrics import confusion_matrix
from keras.utils import to_categorical
from keras.models import Sequential, load_model
from keras.layers import Dense, Conv2D, Flatten

import warnings
warnings.filterwarnings('ignore')

In [None]:
# All foldernames
ORIGINAL_FOLDERS = ["Dark_Forest/", "Full_On/", "Goa/", "Hi_Tech/"] # The folders with the original songs
CLIP_FOLDERS = ["Dark_Forest_Clips/", "Full_On_Clips/", "Goa_Clips/", "Hi_Tech_Clips/"] # The folders with the songs divided into clips
FEATURE_FOLDERS = ["Dark_Forest_Features/", "Full_On_Features/", "Goa_Features/", "Hi_Tech_Features/"] # The folders with the extracted features from the clips

# Variables for dividing the songs into clips
CLIP_TIME = 30                        # 30 second clips
SAMPLE_RATE = 44100                   # 44100 samples/sec
CLIP_SIZE = CLIP_TIME * SAMPLE_RATE   # = 1313000 samples/clip

# Number of total songs
n_dark_forest_songs = len(os.listdir(ORIGINAL_FOLDERS[0]))
n_full_on_songs = len(os.listdir(ORIGINAL_FOLDERS[1]))
n_goa_songs = len(os.listdir(ORIGINAL_FOLDERS[2]))
n_hi_tech_songs = len(os.listdir(ORIGINAL_FOLDERS[3]))

N_SONGS = n_dark_forest_songs + n_full_on_songs + n_goa_songs + n_hi_tech_songs

# Number of clips/features
n_dark_forest_clips = len(os.listdir(CLIP_FOLDERS[0]))
n_full_on_clips = len(os.listdir(CLIP_FOLDERS[1]))
n_goa_clips = len(os.listdir(CLIP_FOLDERS[2]))
n_hi_tech_clips = len(os.listdir(CLIP_FOLDERS[3]))

N_CLIPS = n_dark_forest_clips + n_full_on_clips + n_goa_clips + n_hi_tech_clips
N_FEATURES = N_CLIPS

In [None]:
# Initialize the feature matrices using an example file, each feature is normalized individually
folder = FEATURE_FOLDERS[0]
filenames = os.listdir(folder)
    
# "Unjsonify" the file to a dictionary
obj_text = codecs.open(folder + filenames[0], 'r', encoding='utf-8').read()
dictionary = json.loads(obj_text)

# Initialize all matrices
# Librosa Tempo
matrix_librosatempo = np.empty(N_FEATURES)

# MFCC
rows, cols = np.array(dictionary["mfcc"]).shape
matrix_mfcc = np.empty((N_FEATURES, rows, cols))

# Melspectrogram
rows, cols = np.array(dictionary["melspectrogram"]).shape
matrix_melspec = np.empty((N_FEATURES, rows, cols))

# Chroma STFT
rows, cols = np.array(dictionary["chroma_stft"]).shape
matrix_chroma = np.empty((N_FEATURES, rows, cols))

# Spectral Centroid
rows, cols = np.array(dictionary["spectral_centroid"]).shape
matrix_centroid = np.empty((N_FEATURES, rows, cols))

# add kalman
matrix_kalmantempo = np.empty(N_FEATURES)

# Genre
matrix_genres = np.empty(N_FEATURES)

print(matrix_librosatempo.shape)
print(matrix_kalmantempo.shape)
print(matrix_mfcc.shape)
print(matrix_melspec.shape)
print(matrix_chroma.shape)
print(matrix_centroid.shape)

In [None]:
count = 0         # Keeps track of the pointer

# Iteration over folders and files
for folder in FEATURE_FOLDERS:
    filenames = os.listdir(folder)
    for file in filenames:
        # "Unjsonify" the file to a dictionary
        obj_text = codecs.open(folder + file, 'r', encoding='utf-8').read()
        dictionary = json.loads(obj_text)
        
        # Insert features into matrices
        matrix_librosatempo[count] = dictionary["librosa_tempo"]
        matrix_mfcc[count] = dictionary["mfcc"]
        matrix_melspec[count] = dictionary["melspectrogram"]
        matrix_chroma[count] = dictionary["chroma_stft"]
        matrix_centroid[count] = dictionary["spectral_centroid"]
        
        # add kalman
        matrix_kalmantempo[count] = dictionary["kalman_tempo"]
        
        matrix_genres[count] = dictionary["genre"]
        
        # Increase counter
        count += 1
        if (count % (25) == 0):
            print(count)


In [None]:
n_bins = 40

matrix_librosatempo_df = matrix_librosatempo[:157]
matrix_librosatempo_fo = matrix_librosatempo[157:157+163]
matrix_librosatempo_go = matrix_librosatempo[157+163:157+163+175]
matrix_librosatempo_ht = matrix_librosatempo[157+163+175:]

plt.hist(matrix_librosatempo_df, bins=n_bins);
plt.hist(matrix_librosatempo_fo, bins=n_bins);
plt.hist(matrix_librosatempo_go, bins=n_bins);
plt.hist(matrix_librosatempo_ht, bins=n_bins);
plt.legend(["Dark Forest", "Full On", "Goa", "Hi Tech"]);
plt.title("Distribution of librosa BPM");
plt.xlabel("BPM");
plt.ylabel("Amount of Clips");

# CNN with Mel spectrogram

In [None]:
def shuffle_in_unison(a, b):
    #Check Array size
    assert a.shape[0] == b.shape[0], "First dimensions of matrix must be the same"
    
    #save state of "random"
    rng_state = np.random.get_state()
    np.random.shuffle(a)
    #set state of random in order to get the same distrubtion as before
    np.random.set_state(rng_state)
    np.random.shuffle(b)
    return (a, b)

In [None]:
#One-hot encoding of genre
Y_org = to_categorical(matrix_genres)

# take a window in order to have fewer data per classification but more samples to train the network on
window_size = 200
offset = 0

#take the first slice out of melspec_matrix
X = matrix_melspec[:,:,offset:window_size]
Y = Y_org

while offset + 2*window_size < matrix_melspec.shape[2]:
    #shift the window
    offset += window_size
    
    #take a new slice
    X2 = matrix_melspec[:,:,offset:offset + window_size]
    
    #stack the features and the coresponding genres
    X = np.vstack((X, X2))
    Y = np.vstack((Y, Y_org))

#calculate the mean and standard deviation
mean = np.mean(X, axis=0)
std = np.std(X, axis=0)

#match array shapes
mean_3d = np.repeat(mean[np.newaxis,:,:], X.shape[0], axis=0)
std_3d = np.repeat(mean[np.newaxis,:,:], X.shape[0], axis=0)
 
#substract mean and divide
X = np.divide(np.subtract(X, mean_3d), std_3d)

X = X.reshape(-1, 128, window_size, 1)
print(X.shape)
print(Y.shape)

In [None]:
#show how many samples we have per genre
genres_after_windowing = np.argmax(Y, axis=1)
print("After windowing/spliting")
print("Samples of Dark Forest " + str(np.count_nonzero(genres_after_windowing == 0)))
print("Samples of Full On " + str(np.count_nonzero(genres_after_windowing == 1)))
print("Samples of Goa " + str(np.count_nonzero(genres_after_windowing == 2)))
print("Samples of Hi Tech " + str(np.count_nonzero(genres_after_windowing == 3)))

In [None]:
#create shuffled data
X_shuffle, Y_shuffle = shuffle_in_unison(X, Y)

In [None]:
n_split = int(Y.shape[0] * 0.6) # split between test data and validation data
n_split2 = int(Y.shape[0] * 0.8) # split bewteen validation data and final test data

#split features and real genre into train, test and final validation data set
X_train = X_shuffle[:n_split]
X_test = X_shuffle[n_split:n_split2]
X_final = X_shuffle[n_split2:]
Y_train = Y_shuffle[:n_split]
Y_test = Y_shuffle[n_split:n_split2]
Y_final = Y_shuffle[n_split2:]

#print the size of each data set
print(str(X_train.shape[0]) + " samples to train on")
print(str(X_test.shape[0]) + " samples to test on")
print(str(X_final.shape[0]) + " samples for final validation")

In [None]:
#create model
model = Sequential()

#add model layers
model.add(Conv2D(64, kernel_size=3, activation="relu", input_shape=(128,window_size,1)))
model.add(Conv2D(32, kernel_size=3, activation="relu"))
model.add(Flatten())
model.add(Dense(4, activation="softmax"))

#compile model using accuracy to measure model performance
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [None]:
#train the model
model.fit(X_train, Y_train, validation_data=(X_test, Y_test), epochs=3)

In [None]:
# Test on the final data, which the network has never seen before
n_final = X_final.shape[0]

#predict genre
prediction = model.predict(X_final)
prediction_genre = np.argmax(prediction, axis=1)

#real genre
real_genre = np.argmax(Y_final, axis=1)

# accuracy 
final_acc = (n_final - np.count_nonzero(real_genre - prediction_genre))/n_final
print("The Overall final Accuracy is: " + str(final_acc*100) + "%")

In [None]:
#calculate confusion matrix
cm = confusion_matrix(real_genre, prediction_genre, normalize="true")



#display confusion matrix
labels = ["Dark Forest", "Full On", "Goa", "Hi Tech"]
fig = plt.figure(figsize=(8, 6));
ax = fig.add_subplot(111);
cax = ax.matshow(cm);

for (i, j), z in np.ndenumerate(cm):
    ax.text(j, i, '{:0.2f}'.format(z), ha='center', va='center',
            bbox=dict(boxstyle='round', facecolor='white', edgecolor='0.3'))

plt.title('Confusion matrix of the classifier');
ax.set_xticklabels([" "] + labels);
ax.set_yticklabels([" "] + labels);
plt.xlabel('Predicted');
plt.ylabel('True');
plt.show();

In [None]:
#print the sample size of each genre of the final validation data
print("Samples of genre in final validation Data")
print("Samples of Dark Forest " + str(np.count_nonzero(real_genre == 0)))
print("Samples of Full On " + str(np.count_nonzero(real_genre == 1)))
print("Samples of Goa " + str(np.count_nonzero(real_genre == 2)))
print("Samples of Hi Tech " + str(np.count_nonzero(real_genre == 3)))

In [None]:
model.save("CNN/melspec_200")

In [None]:
model = load_model("CNN/melspec_200")