In [6]:

import gc
import os
import ast
import sys
import configparser

import numpy as np
import matplotlib.pyplot as plt

from sklearn.metrics import confusion_matrix, roc_auc_score
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import tensorflow.keras
from tensorflow.keras import backend as K

from audiomanip.audiostruct import AudioStruct
# from audiomanip.audiomodels import ModelZoo
from audiomanip.audioutils import AudioUtils
from audiomanip.audioutils import MusicDataGenerator
# import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input
from tensorflow.keras.layers import Dense, Reshape
from tensorflow.keras.layers import Dropout, GRU, ELU, Permute
from tensorflow.keras.layers import Flatten
from tensorflow.keras.layers import Activation
from tensorflow.keras.layers import concatenate
from tensorflow.keras.layers import Conv1D, Conv2D
from tensorflow.keras.layers import MaxPooling1D, MaxPooling2D
from tensorflow.keras.layers import GlobalMaxPooling1D, GlobalMaxPooling2D
from tensorflow.keras.layers import GlobalAveragePooling1D, GlobalAveragePooling2D
from tensorflow.keras.layers import BatchNormalization
# from keras import backend as K
from tensorflow.keras.layers import LeakyReLU

In [7]:
TYPE = 'NPY'
#TYPE = AUDIO_FILES, NPY
GTZAN_FOLDER = 'Data/genres_original/'
MODEL_PATH = 'modelkwonta.h5'
SAVE_NPY = False
SAVE_SPEC_IMG = True
# SAVE_SPEC_IMG_DIR = ../out/
TENSORBOARD_LOG_DIR = 'tensorboard_log/'


CNN_TYPE = 'RNN'
EXEC_TIMES = 1
batch_size = 32
epochs = 100
OPTIMIZER = 'adam'

data_type = 'AUDIO_FILES'
input_shape = (1280, 128)

In [9]:
print("data_type: %s" % data_type)

## Read the .au files
if data_type == 'AUDIO_FILES':
    song_rep = AudioStruct()
    songs, genres = song_rep.getdata()

    # Save the audio files as npy files to read faster next time
    if SAVE_NPY:
      np.save(GTZAN_FOLDER + 'songs.npy', songs)
      np.save(GTZAN_FOLDER + 'genres.npy', genres)

## Read from npy file
elif data_type == 'NPY':
    songs = np.load(GTZAN_FOLDER + 'songs.npy')
    genres = np.load(GTZAN_FOLDER + 'genres.npy')

## Not valid datatype
else:
    raise ValueError('Argument Invalid: The options are AUDIO_FILES or NPY for data_type')

print("Original songs array shape: {0}".format(songs.shape))
print("Original genre array shape: {0}".format(genres.shape))

data_type: AUDIO_FILES
Data/genres_original/metal/metal.00000.wav
Data/genres_original/metal/metal.00001.wav
Data/genres_original/metal/metal.00002.wav
Data/genres_original/metal/metal.00003.wav
Data/genres_original/metal/metal.00004.wav
Data/genres_original/metal/metal.00005.wav
Data/genres_original/metal/metal.00006.wav
Data/genres_original/metal/metal.00007.wav
Data/genres_original/metal/metal.00008.wav
Data/genres_original/metal/metal.00009.wav
Data/genres_original/metal/metal.00010.wav
Data/genres_original/metal/metal.00011.wav
Data/genres_original/metal/metal.00012.wav
Data/genres_original/metal/metal.00013.wav
Data/genres_original/metal/metal.00014.wav
Data/genres_original/metal/metal.00015.wav
Data/genres_original/metal/metal.00016.wav
Data/genres_original/metal/metal.00017.wav
Data/genres_original/metal/metal.00018.wav
Data/genres_original/metal/metal.00019.wav
Data/genres_original/metal/metal.00020.wav
Data/genres_original/metal/metal.00021.wav
Data/genres_original/metal/meta

In [8]:
 val_acc = []
test_history = []
test_acc = []
test_acc_mvs = []

best_acc = 0
best_cnn = None


  # Tensorboard Callback Definition
K.set_learning_phase(1) #set learning phase


tensorflow.keras.backend.clear_session()
tbCallBack = tensorflow.keras.callbacks.TensorBoard(log_dir=TENSORBOARD_LOG_DIR,
 histogram_freq=3,
 write_grads=True,
 write_graph=True,
 write_images=True)

# Split the dataset into training and test
X_train, X_test, y_train, y_test = train_test_split(
  songs, genres, test_size=0.1, stratify=genres)

# Split training set into training and validation
X_train, X_Val, y_train, y_val = train_test_split(
  X_train, y_train, test_size=1/6, stratify=y_train)

# split the train, test and validation data in size 128x128
X_Val, y_val = AudioUtils().splitsongs_melspect(X_Val, y_val, CNN_TYPE)
X_test, y_test = AudioUtils().splitsongs_melspect(X_test, y_test, CNN_TYPE)
X_train, y_train = AudioUtils().splitsongs_melspect(X_train, y_train, CNN_TYPE)

# Construct the model
if CNN_TYPE == '1D':
    cnn = ModelZoo.cnn_melspect_1D(input_shape)
elif CNN_TYPE == '2D':
    cnn = ModelZoo.cnn_melspect_2D((*input_shape, 1))
elif CNN_TYPE == 'RNN':
#   cnn = ModelZoo.crnn_melspect_2D((*input_shape, 1))
    print('sdkaf')
    
pair_two = [2, 2]
pair_three = [3, 3]
pair_four = [4, 4]

num_classes = 10
drop_ratio = 0.1
channel_axis = -1

# activation_func = LeakyReLU()
activation_func = Activation('relu')

print(input_shape)
#model = Sequential()
input_shape = (*input_shape, 1)

inputs = Input(input_shape)
print('input_shape: ', input_shape)
print(inputs)

# Convolutional block_1
conv1 = Conv2D(64, kernel_size=pair_three, name='conv1')(inputs)
#bn1 = BatchNormalization(axis=channel_axis, name='bn1')(conv1)
elu1 = ELU()(conv1)
pool1 = MaxPooling2D(pool_size=pair_two, strides=pair_two, name='pool1')(elu1)
dr1 = Dropout(drop_ratio, name='dropout1')(pool1)

# Convolutional block_2
conv2 = Conv2D(128, kernel_size=pair_three,  name='conv2')(dr1)
#bn2 = BatchNormalization(axis=channel_axis, name='bn2')(conv2)
elu2 = ELU()(conv2)
pool2 = MaxPooling2D(pool_size=pair_two, strides=pair_two, name='pool2')(elu2)
dr2 = Dropout(drop_ratio, name='dropout2')(pool2)

# Convolutional block_3
conv3 = Conv2D(128, kernel_size=pair_three, name='conv3')(dr2)
#bn3 = BatchNormalization(axis=channel_axis, name='bn3')(conv3)
elu3 = ELU()(conv3)
pool3 = MaxPooling2D(pool_size=pair_three, strides=pair_three, name='pool3')(elu3)
dr3 = Dropout(drop_ratio, name='dropout3')(pool3)

# Convolutional block_4
conv4 = Conv2D(128, kernel_size=pair_three,  name='conv4')(dr3)
#bn4 = BatchNormalization(axis=channel_axis, name='bn4')(conv4)
elu4 = ELU()(conv4)
pool4 = MaxPooling2D(pool_size=pair_four, strides=pair_four, name='pool4')(elu4)
dr4 = Dropout(drop_ratio, name='dropout4')(pool4)
# print('dr4shape:', K.get_variable_shape(dr4))

# Reshaping
# x = Permute((3, 1, 2))(dr4)

rs = Reshape((25, 128))(dr4)    # 15, 128

# GRU block 1, 2, output
gru1 = GRU(32, return_sequences=True, name='gru1')(rs)
gru2 = GRU(32, return_sequences=False, name='gru2')(gru1)
reg = Dropout(0.3)(gru2)

dense2 = Dense(num_classes, activation='sigmoid', name='output')(reg)
cnn = Model(inputs=[inputs], outputs=[dense2])
cnn.summary()



print("\nTrain shape: {0}".format(X_train.shape))
print("Validation shape: {0}".format(X_Val.shape))
print("Test shape: {0}\n".format(X_test.shape))
print("Size of the CNN: %s\n" % cnn.count_params())

# Optimizers
if OPTIMIZER == 'sgd':
  opt = tensorflow.keras.optimizers.SGD(lr=0.001, momentum=0.9, decay=1e-5, nesterov=True)
elif OPTIMIZER == 'adam':
  opt = tensorflow.keras.optimizers.Adam(lr=5e-3) # lr=0.001 #, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=1e-5)

# Compiler for the model
cnn.compile(loss=tensorflow.keras.losses.categorical_crossentropy, #loss=keras.losses.categorical_crossentropy,
  optimizer=opt,
  metrics=['accuracy'])

# Early stop
earlystop = tensorflow.keras.callbacks.EarlyStopping(monitor='val_loss',
  min_delta=0,
  patience=2,
  verbose=0,
  mode='auto')



NameError: name 'songs' is not defined

In [None]:
# Fit the model
history = cnn.fit(X_train, y_train,
  batch_size=batch_size,
  epochs=epochs,
  verbose=1,
  validation_data=(X_Val, y_val),
  callbacks = [earlystop])
print('history: ', history.history['acc'])

score = cnn.evaluate(X_test, y_test, verbose=0)
score_val = cnn.evaluate(X_Val, y_val, verbose=0)




In [None]:
# Majority Voting System
    pred_org_values = cnn.predict(X_test)
    pred_label_values = np.argmax(pred_org_values, axis = 1)
    mvs_truth, mvs_res = AudioUtils().voting(np.argmax(y_test, axis = 1), pred_label_values)
    acc_mvs = accuracy_score(mvs_truth, mvs_res)
    mvs_roc_auc = roc_auc_score(y_test, pred_org_values)


    # Save metrics
    val_acc.append(score_val[1])
    test_acc.append(score[1])
    test_history.append(history)
    test_acc_mvs.append(acc_mvs)

    # Print metrics
    print('Test accuracy:', score[1])
    print('Test accuracy for Majority Voting System:', acc_mvs)
    print('Test auc_roc_score for Majority Voting System:', mvs_roc_auc)

    # Print the confusion matrix for Voting System
    cm = confusion_matrix(mvs_truth, mvs_res)
    print(cm)

    # Records Best Model
    if (best_acc < acc_mvs):
        best_acc = acc_mvs
        best_cnn = cnn
        best_history = history
        print('best_history:', best_history.history['acc'])
        print('best_acc changed:', best_acc)

  # Print the statistics
  print("Validation accuracy - mean: %s, std: %s" % (np.mean(val_acc), np.std(val_acc)))
  print("Test accuracy - mean: %s, std: %s" % (np.mean(test_acc), np.std(test_acc)))
  print("Test accuracy MVS - mean: %s, std: %s" % (np.mean(test_acc_mvs), np.std(test_acc_mvs)))

  # summarize history for accuracy
  print('best_acc:', best_acc)
  plt.plot(best_history.history['acc'])
  plt.plot(best_history.history['val_acc'])
  plt.title('model accuracy')
  plt.ylabel('accuracy')
  plt.xlabel('epoch')
  plt.legend(['train', 'test'], loc='upper left')
  plt.show()

  # summarize history for loss
  plt.plot(best_history.history['loss'])
  plt.plot(best_history.history['val_loss'])
  plt.title('model loss')
  plt.ylabel('loss')
  plt.xlabel('epoch')
  plt.legend(['train', 'test'], loc='upper left')
  plt.show()

  # Save the model
  best_cnn.save(MODEL_PATH)

  # Free memory
  del songs
  del genres
  gc.collect()
    