In [1]:
import numpy as np
import random
import cv2
import os

from tensorflow import keras
from keras.layers import *
from keras.models import *
from keras.optimizers import *

from keras.utils import np_utils
from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.metrics import confusion_matrix, roc_auc_score
import tensorflow as tf

### 讀取頻譜圖路徑

In [2]:
INPUT_SELECT = 0 #決定模型的輸入 0: log mel scale spectrogram, 1: mfcc
CNN_SELECT = 1 #決定CNN的類型 0: VGG16, 1: 自訂義CNN
IMG_SOURCE=0 #0: original, 2: augementation, 4: frame len 40ms
HIS_SHOW = False #是否show出訓練的history loss
epochs = 20
batch_size = 8
fold_num = 5
MODEL_DIR = ("Model/ms_model", "Model/mfcc_model")
MODEL_NAME = ("MS", "MFCC", "MS_AUG", "MFCC_AUG", "MS_40", "MFCC_40")
IMG_PATH = ("ImageData/original/spectrogram", "ImageData/original/mfcc",
                        "ImageData/augmentation/spectrogram", "ImageData/augmentation/mfcc",
                        "ImageData/frameLen_40ms/spectrogram","ImageData/frameLen_40ms/mfcc")

In [3]:
root_path = IMG_PATH[INPUT_SELECT+IMG_SOURCE]
model_save_dir = MODEL_DIR[INPUT_SELECT]
model_name = MODEL_NAME[INPUT_SELECT+IMG_SOURCE]
folder_names = os.listdir(root_path)
print(root_path, folder_names)

ImageData/original/spectrogram ['Blue', 'Classical', 'Country', 'Disco', 'EDM', 'Hiphop', 'Jazz', 'Metal', 'Pop', 'Reggae']


In [4]:
folders=[] #儲存各類別資料夾中的檔案名稱
for folder in folder_names:
    file_names=[]
    for file_name in os.listdir(root_path+"/"+folder):
        file_names.append(root_path+"/"+folder+"/"+file_name)
    folders.append(file_names)

class_count_list = [len(files) for files in folders] #紀錄每個類別各有多少檔案(後面會用到)
print("class count: ", len(folders))
print("class file count: ", class_count_list)

class count:  10
class file count:  [20, 20, 20, 20, 20, 20, 20, 20, 20, 20]


### 讀取頻譜圖

In [5]:
Data = []
for folder in folders:
    for name in folder:
        spect = cv2.imread(name) #shape=(220, 449, 3)
        spect = cv2.resize(spect, (224, 224))
#         spect = cv2.cvtColor(spect, cv2.COLOR_BGR2GRAY) #轉灰階
#         spect = spect.T #轉置
        Data.append(spect)
Data = np.array(Data)

In [6]:
Data.shape

(200, 224, 224, 3)

### 製作Label

In [7]:
Label = []
for l, class_count in enumerate(class_count_list):
    for _ in range(class_count):
        Label.append(np.ones(1)*l)
Label = np.array(Label)

### CNN層定義

In [8]:
def VGG16(inputs):
    x = Conv2D(64, (3,3), activation='relu', padding = 'same')(inputs)
    x = Dropout(0.03)(x)
    x = Conv2D(64, (3,3), activation='relu', padding = 'same')(x)
    x = Dropout(0.03)(x)
    x = MaxPooling2D(pool_size=(2, 2), strides=(2, 2))(x)
    x = Conv2D(128, (3,3), activation='relu', padding = 'same')(x)
    x = Dropout(0.03)(x)
    x = Conv2D(128, (3,3), activation='relu', padding = 'same')(x)
    x = Dropout(0.03)(x)
    x = MaxPooling2D(pool_size=(2, 2), strides=(2, 2))(x)
    x = Conv2D(256, (3,3), activation='relu', padding = 'same')(x)
    x = Dropout(0.03)(x)
    x = Conv2D(256, (3,3), activation='relu', padding = 'same')(x)
    x = Dropout(0.03)(x)
    x = Conv2D(256, (3,3), activation='relu', padding = 'same')(x)
    x = Dropout(0.03)(x)
    x = MaxPooling2D(pool_size=(2, 2), strides=(2, 2))(x)
    x = Conv2D(512, (3,3), activation='relu', padding = 'same')(x)
    x = Dropout(0.03)(x)
    x = Conv2D(512, (3,3), activation='relu', padding = 'same')(x)
    x = Dropout(0.03)(x)
    x = Conv2D(512, (3,3), activation='relu', padding = 'same')(x)
    x = Dropout(0.03)(x)
    x = MaxPooling2D(pool_size=(2, 2), strides=(2, 2))(x)
    x = Conv2D(512, (3,3), activation='relu', padding = 'same')(x)
    x = Dropout(0.03)(x)
    x = Conv2D(512, (3,3), activation='relu', padding = 'same')(x)
    x = Dropout(0.03)(x)
    x = Conv2D(512, (3,3), activation='relu', padding = 'same')(x)
    x = Dropout(0.03)(x)
    x = MaxPooling2D(pool_size=(2, 2), strides=(2, 2))(x)
    x = Flatten()(x)
    x = Dense(512, activation='relu')(x)
    x = Dense(512, activation='relu')(x)
    return x

In [9]:
def CONV(inputs):
    x = Conv2D(64, (7,3), activation='relu', padding = 'same')(inputs)
    x = MaxPooling2D(pool_size=(2, 2), strides=(2, 2))(x)
    x = Conv2D(128, (3,3), activation='relu', padding = 'same')(x)
    x = MaxPooling2D(pool_size=(2, 2), strides=(2, 2))(x)
    x = Conv2D(256, (3,3), activation='relu', padding = 'same')(x)
    x = MaxPooling2D(pool_size=(2, 2), strides=(2, 2))(x)
    x = Conv2D(512, (3,3), activation='relu', padding = 'same')(x)
    x = MaxPooling2D(pool_size=(2, 2), strides=(2, 2))(x)
    x = Flatten()(x)
    x = Dense(512, activation='relu')(x)
    x = Dense(256, activation='relu')(x)
    return x

In [10]:
def create_new_model(data, cnn_selection):
    input_shape = data.shape
    output_dim = len(folders)

    inputs = Input(shape=input_shape)
    
    if cnn_selection == 0:
        x = VGG16(inputs)
    else:
        x = CONV(inputs)
    outputs = Dense(output_dim, activation='softmax')(x)

    model=Model(inputs,outputs)
    return model

In [11]:
import matplotlib.pyplot as plt
def show_train_history(train_history, train, validation):
    plt.plot(train_history.history[train])
    plt.plot(train_history.history[validation])
    plt.title('Train History')
    plt.ylabel(train)
    plt.xlabel('Epoch')
    plt.legend(['train', 'validation'], loc='upper left')
    plt.show()

### 資料切割(Train and Test)

In [12]:
seed =  random.randint(0, 1000**2)
X_train, X_test, y_train, y_test = train_test_split(Data, Label, test_size=0.3,
                                                    random_state=seed, stratify = Label)
y_test_onehot = np_utils.to_categorical(y_test)

### Stratified K-fold

In [13]:
VALIDATION_ACCURACY = []
VALIDATION_LOSS = []
TEST_ACCURACY=[]

skf = StratifiedKFold(n_splits=fold_num)
fold_var = 1

for train_index, valid_index in skf.split(X_train, y_train):
    X_train_fold = X_train[train_index]
    y_train_fold = y_train[train_index]
    y_train_fold_onehot = np_utils.to_categorical(y_train_fold)
    
    X_val_fold = X_train[valid_index]
    y_val_fold = y_train[valid_index]
    y_val_fold_onehot = np_utils.to_categorical(y_val_fold)
    
    model = create_new_model(X_train_fold[0], CNN_SELECT)
    opt = adam_v2.Adam(learning_rate=0.0001)
    model.compile(loss="categorical_crossentropy", optimizer=opt, metrics=['accuracy'])
    # CREATE CALLBACKS
    checkpoint = tf.keras.callbacks.ModelCheckpoint(model_save_dir+"/"+model_name+"_fold"+str(fold_var)+".h5",
                                    monitor='val_accuracy', 
                                    save_best_only=True)
    callbacks_list = [checkpoint]
    
    history = model.fit(x=X_train_fold, y=y_train_fold_onehot,
                    validation_data=(X_val_fold, y_val_fold_onehot),
                    epochs=epochs, batch_size=batch_size,
                    callbacks=callbacks_list,
                    verbose=1) # 訓練紀錄顯示
    
    model.load_weights(model_save_dir+"/"+model_name+"_fold"+str(fold_var)+".h5")
    if HIS_SHOW:
        show_train_history(history, 'loss', 'val_loss')
    
    results = model.evaluate(X_val_fold, y_val_fold_onehot, batch_size=8, verbose=0)
    results = dict(zip(model.metrics_names,results))
    
    VALIDATION_ACCURACY.append(results['accuracy'])
    VALIDATION_LOSS.append(results['loss'])
    
    
    scores = model.evaluate(X_test, y_test_onehot, batch_size=8, verbose=0)
    print(str(fold_var)+" Fold: "+"Accuracy of testing data = {:2.2f}%".format(scores[1]*100.0))
    print()
    TEST_ACCURACY.append(scores[1]*100.0)
    
    tf.keras.backend.clear_session()
    fold_var+=1

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
1 Fold: Accuracy of testing data = 56.67%

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
2 Fold: Accuracy of testing data = 50.00%

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20


Epoch 19/20
Epoch 20/20
3 Fold: Accuracy of testing data = 66.67%

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
4 Fold: Accuracy of testing data = 70.00%

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
5 Fold: Accuracy of testing data = 58.33%



In [14]:
print("validation accuracy: ",VALIDATION_ACCURACY)
print("validation loss: ", VALIDATION_LOSS)
print("test accuracy: ",TEST_ACCURACY)

validation accuracy:  [0.7142857313156128, 0.5714285969734192, 0.6071428656578064, 0.6428571343421936, 0.6785714030265808]
validation loss:  [1.1626501083374023, 1.9327161312103271, 1.3477848768234253, 1.5044485330581665, 1.2531265020370483]
test accuracy:  [56.66666626930237, 50.0, 66.66666865348816, 69.9999988079071, 58.33333134651184]


In [15]:
print("average validation accuracy: ",sum(VALIDATION_ACCURACY)/len(VALIDATION_ACCURACY))
print("average test accuracy: ",sum(TEST_ACCURACY)/len(TEST_ACCURACY))

average validation accuracy:  0.6428571462631225
average test accuracy:  60.333333015441895
