In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
import cv2
import numpy as np
import os



import keras
from keras.applications import DenseNet121
from keras.models import Model, Sequential
from keras import layers
from keras.callbacks import EarlyStopping
from keras.metrics import AUC
# from keras.metrics import f1_score
from keras.metrics import Precision
from sklearn.metrics import accuracy_score, recall_score
from sklearn.preprocessing import LabelEncoder


import tensorflow as tf
from tensorflow.keras.preprocessing import image
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.metrics import F1Score, AUC, Precision, Recall
from tensorflow.keras.layers import MaxPooling2D, Dropout



from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, OneHotEncoder, LabelBinarizer




from PIL import Image

# DenseNet121


In [3]:
import os
import librosa
import numpy as np
from skimage.transform import resize

def get_mel_spectrogram(file_directory, sr=22050, desired_shape=(244, 244, 3)):
    """
    Get mel-spectrogram (db) with desired shape
    Input: file_directory: str, sr: int, desired_shape: tuple
    Output: mel_spec: numpy array
    """
    src, sc = librosa.load(file_directory, sr=22050)
    mel_gram = librosa.feature.melspectrogram(y=src, sr=22050)

    # Convert to decibels
    mel_spec_db = librosa.power_to_db(mel_gram, ref=np.max)

    # Resize to desired shape
    mel_spec_resized = resize(mel_spec_db, desired_shape)

    return mel_spec_resized

def process_audio_files(audio_dir):
    ''' Process audio files in the specified directory and return log-mel spectrograms and labels.

    Parameters:
    ----------
    audio_dir: str
        Path to the directory containing audio files.

    Returns:
    -------
    tuple
        A tuple containing a list of log-mel spectrograms and corresponding labels.
    '''

    # List to store log-mel spectrograms and corresponding labels
    data = []
    labels = []

    # Iterate over the subdirectories (genres)
    for genre_folder in os.listdir(audio_dir):
        genre_path = os.path.join(audio_dir, genre_folder)
        if os.path.isdir(genre_path):  # Check if it's a directory
            # Iterate over all audio files in the genre directory
            for filename in os.listdir(genre_path):
                if filename.endswith('.wav'):  # Assuming all audio files are in .wav format
                    file_path = os.path.join(genre_path, filename)

                    # Compute log-mel spectrogram for the current audio file
                    log_mel_spectrogram = get_mel_spectrogram(file_path)

                    # Append log-mel spectrogram to data list
                    data.append(log_mel_spectrogram)

                    # Append label to labels list (genre folder name)
                    labels.append(genre_folder)

    return data, labels

# Directory containing audio files
audio_dir = '/content/drive/MyDrive/Music_Project/music_data/VNTM3'

# Process audio files and retrieve log-mel spectrograms and labels
data, labels = process_audio_files(audio_dir)


In [None]:
# def process_audio_files(audio_dir, desired_shape=(244, 244, 3)):
#     ''' Process precomputed log mel spectrogram PNG files in the specified directory and return them along with labels.

#     Parameters:
#     ----------
#     audio_dir: str
#         Path to the directory containing precomputed log mel spectrogram PNG files.
#     desired_shape: tuple
#         Desired shape for the log mel spectrograms.

#     Returns:
#     -------
#     tuple
#         A tuple containing a list of log mel spectrograms and corresponding labels.
#     '''

#     # List to store log mel spectrograms and corresponding labels
#     data = []
#     labels = []

#     # Iterate over the subdirectories (genres)
#     for genre_folder in os.listdir(audio_dir):
#         genre_path = os.path.join(audio_dir, genre_folder)
#         if os.path.isdir(genre_path):  # Check if it's a directory
#             # Iterate over all precomputed log mel spectrogram PNG files in the genre directory
#             for filename in os.listdir(genre_path):
#                  # Assuming all precomputed log mel spectrogram files are in .png format
#                   file_path = os.path.join(genre_path, filename)

#                   # Load precomputed log mel spectrogram PNG file
#                   log_mel_spectrogram = cv2.imread(file_path)

#                   # Convert image to numpy array and normalize
#                   log_mel_spectrogram_array = log_mel_spectrogram / 255.0

#                   # Resize the array to the desired shape
#                   log_mel_spectrogram_array_resized = cv2.resize(log_mel_spectrogram_array, (desired_shape[1], desired_shape[0]))

#                   # Append log mel spectrogram to data list
#                   data.append(log_mel_spectrogram_array_resized)

#                   # Append label to labels list (genre folder name)
#                   labels.append(genre_folder)
#     return data, labels


# audio_dir = r'/content/drive/MyDrive/Music_Project/music_data/mel_images'
# data, labels = process_audio_files(audio_dir)


In [4]:
# Split data into training and combined validation-evaluation set
x_train, x_combined, y_train, y_combined = train_test_split(data, labels, test_size=0.2, random_state=42, shuffle=True, stratify =labels)

# Split combined set into validation and evaluation sets
x_validate, x_test, y_validate, y_test = train_test_split(x_combined, y_combined, test_size=0.5, random_state=42, shuffle=True, stratify =y_combined)


In [5]:
label_encoder = LabelEncoder()
label_binarizer = LabelBinarizer()
# Fit and transform training labels
y_train_one_hot = label_binarizer.fit_transform(y_train)
y_validate_one_hot = label_binarizer.transform(y_validate)
y_test_one_hot = label_binarizer.transform(y_test)
y_test_encoded = label_encoder.fit_transform(y_test)
# Fit and transform evaluation labels
y_test_encoded = label_encoder.fit_transform(y_test)
y_test_one_hot = label_binarizer.transform(y_test)

In [None]:
# x_test = np.array(x_test, dtype=np.float32)
# x_train = np.array(x_train, dtype = np.float32)
# x_validate = np.array(x_validate, dtype=np.float32)

In [6]:
X_test = np.array(x_test, dtype=np.float32)
X_train = np.array(x_train, dtype = np.float32)
X_validate = np.array(x_validate, dtype=np.float32)

In [7]:
base_model = DenseNet121(weights='imagenet',
                         include_top=False,
                         input_shape=(244,244,3))

early = EarlyStopping(monitor='val_loss',
                      min_delta=0.0001,
                      mode='min',
                      verbose=0,
                      patience=5,
                      baseline= None,
                      restore_best_weights= True)

top_model = Sequential([
    layers.GlobalAveragePooling2D(),
    layers.Dense(128, activation='relu'),
    Dropout(0.2),
    layers.Dense(5, activation='softmax')
])


model = Model(inputs=base_model.input,
              outputs=top_model(base_model.output))

# Freeze base layers to prevent retraining
for layer in base_model.layers:
    layer.trainable = False

model.compile(loss='categorical_crossentropy',
              optimizer=Adam(),
              metrics=["accuracy"])
#model.summary()

model.fit(X_train, y_train_one_hot,
          epochs=100,
          validation_data=(X_validate, y_validate_one_hot),
          callbacks=[early],
          shuffle=True,
          verbose=1,
          batch_size=32)

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/densenet/densenet121_weights_tf_dim_ordering_tf_kernels_notop.h5
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100


<keras.src.callbacks.History at 0x7cf7ac745fc0>

In [8]:
predict = model.predict(X_test)


# Convert one-hot encoded predicted labels back to original labels
test_predictions_labels = label_encoder.inverse_transform(np.argmax(predict, axis=1))
# Convert one-hot encoded predicted labels back to original labels
test_predictions_labels_encoded = label_encoder.inverse_transform(np.argmax(predict, axis=1))


# Calculate AUC_ROC
auc_roc = AUC()
auc_roc.update_state(y_test_one_hot, predict)
print("AUC ROC:", auc_roc.result().numpy())

# Calculate F1 Score
f1 = F1Score()
f1.update_state(y_test_one_hot, predict)
print("F1 Score:", np.mean(f1.result().numpy()))

# Calculate Precision
pre = Precision()
pre.update_state(y_test_one_hot, predict)
print("Precision Score", pre.result().numpy())

# Accuracy Score
acc = accuracy_score(y_test, test_predictions_labels)
print("Accuracy Score", acc)

print("Recall Score:", recall_score(y_test_encoded, np.argmax(predict, axis=1), average='macro'))

AUC ROC: 0.998442
F1 Score: 0.9605526
Precision Score 0.9676113
Accuracy Score 0.96
Recall Score: 0.96


In [9]:
test_predictions_labels_encoded_list = test_predictions_labels_encoded.tolist()

In [10]:
# Print results
print("Sample\t\tTrue Label\tPredicted Label")
print("-" * 40)
for i in range(len(y_test)):
    print(f"{i+1}\t\t{y_test[i]}\t\t{test_predictions_labels_encoded_list[i]}")


Sample		True Label	Predicted Label
----------------------------------------
1		cheo		cheo
2		cailuong		cailuong
3		cheo		cheo
4		chauvan		chauvan
5		catru		catru
6		hatxam		hatxam
7		cheo		cheo
8		cailuong		cailuong
9		hatxam		hatxam
10		cailuong		cailuong
11		cailuong		cailuong
12		hatxam		hatxam
13		chauvan		chauvan
14		cheo		cheo
15		cheo		cheo
16		catru		catru
17		cheo		cheo
18		hatxam		hatxam
19		hatxam		chauvan
20		chauvan		chauvan
21		catru		catru
22		chauvan		chauvan
23		hatxam		hatxam
24		hatxam		hatxam
25		hatxam		hatxam
26		chauvan		chauvan
27		catru		catru
28		chauvan		chauvan
29		cheo		hatxam
30		cheo		cheo
31		hatxam		hatxam
32		chauvan		chauvan
33		hatxam		hatxam
34		hatxam		chauvan
35		cailuong		cailuong
36		cailuong		cailuong
37		catru		catru
38		chauvan		chauvan
39		cheo		cheo
40		chauvan		chauvan
41		catru		catru
42		hatxam		hatxam
43		cheo		cheo
44		cheo		cheo
45		chauvan		chauvan
46		hatxam		chauvan
47		hatxam		hatxam
48		hatxam		hatxam
49		cheo		cheo
50		cailuong	

In [None]:
# # Define the checkpoint filepath
# checkpoint_dir = '/content/drive/MyDrive/Music_Project/music_data/checkpoint/DenseNet121'
# os.makedirs(checkpoint_dir, exist_ok=True)

# # Create a ModelCheckpoint callback
# checkpoint_filepath = os.path.join(checkpoint_dir, 'Densenet')
# checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
#     filepath=checkpoint_filepath,
#     save_weights_only=True,  # Save only the model weights
#     monitor='val_loss',  # Monitor validation loss
#     mode='min',  # Save the model when validation loss is minimized
#     save_best_only=True  # Save only the best model
# )

# # Assuming you have already trained and evaluated your model
# # model.fit(train_data, epochs=10, validation_data=val_data, callbacks=[checkpoint_callback])

# # After training, save the entire model
# model.save('/content/drive/MyDrive/Music_Project/music_data/checkpoint/DenseNet121/densenet121_model_10s.h5')

# # Also, save only the weights of the model
# model.save_weights('/content/drive/MyDrive/Music_Project/music_data/checkpoint/DenseNet121/densenet121_weights_10s.h5')


In [None]:

# # Tải checkpoint đã lưu
# checkpoint_path = "/content/drive/MyDrive/Music_Project/music_data/checkpoint/DenseNet121/densenet121_weights.h5"
# model_path = r"/content/drive/MyDrive/Music_Project/music_data/checkpoint/DenseNet121/densenet121_model.h5"
# model = tf.keras.models.load_model(model_path)
# model.load_weights(checkpoint_path)

# # Đánh giá mô hình sau khi tải lại checkpoint

# predict = model.predict(x_test)


# # Convert one-hot encoded predicted labels back to original labels
# test_predictions_labels = label_encoder.inverse_transform(np.argmax(predict, axis=1))
# # Convert one-hot encoded predicted labels back to original labels
# test_predictions_labels_encoded = label_encoder.inverse_transform(np.argmax(predict, axis=1))


# # Calculate AUC_ROC
# auc_roc = AUC()
# auc_roc.update_state(y_test_one_hot, predict)
# print("AUC ROC:", auc_roc.result().numpy())

# # Calculate F1 Score
# f1 = F1Score()
# f1.update_state(y_test_one_hot, predict)
# print("F1 Score:", np.mean(f1.result().numpy()))

# # Calculate Precision
# pre = Precision()
# pre.update_state(y_test_one_hot, predict)
# print("Precision Score", pre.result().numpy())

# # Accuracy Score
# acc = accuracy_score(y_test, test_predictions_labels)
# print("Accuracy Score", acc)

# print("Recall Score:", recall_score(y_test_encoded, np.argmax(predict, axis=1), average='macro'))

In [None]:
# checkpoint_path = "/content/drive/MyDrive/Music_Project/music_data/checkpoint/DenseNet121/densenet121_weights.h5"
# model.load_weights(checkpoint_path)

# Late Fusion CNN

In [4]:
import librosa
def compute_melgram(audio_path, new_shape=(256, 1296)):


    # Mel-spectrogram parameters
    SR = 12000
    N_FFT = 2048
    N_MELS = 256
    HOP_LEN = 256
    DURA = 27.64  # to make it 1296 frames

    # Load audio file
    src, sr = librosa.load(audio_path, sr=SR)

    # Ensure the audio is of desired duration
    n_sample = src.shape[0]
    n_sample_fit = int(DURA * SR)
    if n_sample < n_sample_fit:  # if too short
        src = np.hstack((src, np.zeros((int(DURA * SR) - n_sample,))))
    elif n_sample > n_sample_fit:  # if too long
        src = src[(n_sample - n_sample_fit) // 2:(n_sample + n_sample_fit) // 2]
    melgram = librosa.feature.melspectrogram(y=src, sr=SR, hop_length=HOP_LEN, n_fft=N_FFT, n_mels=N_MELS)

    return melgram
def compute_stft(audio_path,new_shape=(256, 1296)):


    # STFT parameters
    SR = 22115
    N_FFT = 510
    HOP_LEN = 512

    # Load audio file
    src, sr = librosa.load(audio_path, sr=SR)

    # Compute STFT
    stft = librosa.stft(src, n_fft=N_FFT, hop_length=HOP_LEN)

    return stft

def compute_mfcc(audio_path,new_shape=(256, 1296)):


    # MFCC parameters
    SR = 22115
    N_MFCC = 256
    N_FFT = 510
    HOP_LEN = 512

    # Load audio file
    src, sr = librosa.load(audio_path, sr=SR)

    # Compute MFCC
    mfcc = librosa.feature.mfcc(y=src, sr=SR, n_mfcc=N_MFCC, n_fft=N_FFT, hop_length=HOP_LEN)
    return mfcc


def process_audio_files(audio_dir):
    stft_data = []
    mel_data = []
    mfcc_data = []
    labels = []

    max_frames = 1296  # Set a maximum number of frames
    n_freq_bins = 256  # Number of frequency bins in the Mel-spectrogram

    for genre_folder in os.listdir(audio_dir):
        genre_path = os.path.join(audio_dir, genre_folder)
        if os.path.isdir(genre_path):
            for filename in os.listdir(genre_path):
                if filename.endswith('.wav'):
                    file_path = os.path.join(genre_path, filename)
                    stft = compute_stft(file_path)
                    mel = compute_melgram(file_path)
                    mfcc = compute_mfcc(file_path)

                    # Upsample MFCCs
                    mfcc_upsampled = upsample_mfcc(mfcc, n_freq_bins)

                    # Pad or crop arrays to ensure consistent shape
                    stft = pad_or_crop_array(stft, max_frames)
                    mel = pad_or_crop_array(mel, max_frames)
                    mfcc_upsampled = pad_or_crop_array(mfcc_upsampled, max_frames)

                    stft_data.append(stft)
                    mel_data.append(mel)
                    mfcc_data.append(mfcc_upsampled)
                    labels.append(genre_folder)

    stft_data = np.array(stft_data)
    mel_data = np.array(mel_data)
    mfcc_data = np.array(mfcc_data)
    labels = np.array(labels)

    return stft_data, mel_data, mfcc_data, labels

# Function to upsample MFCCs
def upsample_mfcc(mfcc_data, n_freq_bins):
    n_mfcc, n_frames = mfcc_data.shape
    mfcc_upsampled = np.zeros((n_freq_bins, n_frames))

    # Compute interpolation factor for each MFCC coefficient
    factor = n_freq_bins // n_mfcc

    # Upsample each MFCC coefficient
    for i in range(n_mfcc):
        mfcc_upsampled[i*factor:(i+1)*factor, :] = mfcc_data[i]

    return mfcc_upsampled


def pad_or_crop_array(array, target_length):
    if array.shape[1] < target_length:
        # Pad array with zeros along the time axis
        padding = target_length - array.shape[1]
        array = np.pad(array, ((0, 0), (0, padding)), mode='constant')
    elif array.shape[1] > target_length:
        # Crop array along the time axis
        array = array[:, :target_length]
    return array

audio_dir = r'/content/drive/MyDrive/Music_Project/music_data/VNTM3'
stft_data, mel_data, mfcc_data, labels = process_audio_files(audio_dir)

In [5]:
# Here you can continue with your splitting and reshaping steps as before
stft_data = stft_data.reshape(stft_data.shape[0], stft_data.shape[1], stft_data.shape[2], 1)
mel_data = mel_data.reshape(mel_data.shape[0], mel_data.shape[1], mel_data.shape[2], 1)
mfcc_data = mfcc_data.reshape(mfcc_data.shape[0], mfcc_data.shape[1], mfcc_data.shape[2], 1)


In [6]:
stft_data=stft_data.astype("float32")
mel_data =  mel_data.astype("float32")
mfcc_data =mfcc_data.astype("float32")

  stft_data=stft_data.astype("float32")


In [7]:

# Split the data into training and testing sets (80% train, 20% test)
stft_train, stft_test, mel_train, mel_test, mfcc_train, mfcc_test, labels_train, labels_test = train_test_split(
    stft_data, mel_data, mfcc_data, labels, test_size=0.2, random_state=42)

# Split the training data into training and validation sets (80% train, 20% validation)
stft_test, stft_val, mel_test, mel_val, mfcc_test, mfcc_val, labels_test, labels_val = train_test_split(
    stft_test, mel_test, mfcc_test, labels_test, test_size=0.5, random_state=42)


In [8]:
checkpoint_filepath = r'/content/drive/MyDrive/Music_Project/music_data/checkpoint/LateFusion'

saved_model_path = r'/content/drive/MyDrive/Music_Project/music_data/checkpoint/LateFusion'


In [9]:
from tensorflow.keras.layers import Dropout, Concatenate, GlobalAveragePooling2D
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, Conv2D, MaxPooling2D, Flatten, BatchNormalization

def late_fusion(model,stft_input_shape, melgram_input_shape,mfcc_input_shape):
    # Define Group1 Block
    input_stft = Input(shape=stft_input_shape)
    stft = Conv2D(20, (7, 7), strides=(2, 2), activation='relu', padding='same')(input_stft)
    stft = MaxPooling2D(pool_size=(3, 3), strides=(2, 2))(stft)
    stft = dropout_block(stft, units=42)
    stft = transition_layer(stft, filters=42)
    stft = dropout_block(stft, units=85)
    stft = transition_layer(stft, filters=85)
    stft = dropout_block(stft, units=170)
    stft = transition_layer(stft, filters=170)
    stft = dropout_block(stft, units=341)
    stft = GlobalAveragePooling2D()(stft)  # Adding Global Average Pooling


    # Define Group2 Block
    input_melgram = Input(shape=melgram_input_shape)
    melgram = Conv2D(20, (7, 7), strides=(2, 2), activation='relu', padding='same')(input_melgram)
    melgram = MaxPooling2D(pool_size=(3, 3), strides=(2, 2))(melgram)
    melgram = dropout_block(melgram, units=42)
    melgram = transition_layer(melgram, filters=42)
    melgram = dropout_block(melgram, units=85)
    melgram = transition_layer(melgram, filters=85)
    melgram = dropout_block(melgram, units=170)
    melgram = transition_layer(melgram, filters=170)
    melgram = dropout_block(melgram, units=341)
    melgram = GlobalAveragePooling2D()(melgram)
    # Define Group3 Block


    input_mfcc = Input(shape=mfcc_input_shape)
    mfcc = Conv2D(20, (7, 7), strides=(2, 2), activation='relu', padding='same')(input_mfcc)
    mfcc = MaxPooling2D(pool_size=(3, 3), strides=(2, 2))(mfcc)
    mfcc = dropout_block(mfcc, units=42)  # Using a function for dropout block
    mfcc = transition_layer(mfcc, filters=42)
    mfcc = dropout_block(mfcc, units=85)
    mfcc = transition_layer(mfcc, filters=85)
    mfcc = dropout_block(mfcc, units=170)
    mfcc = transition_layer(mfcc, filters=170)
    mfcc = dropout_block(mfcc, units=341)
    mfcc = GlobalAveragePooling2D()(mfcc)  # Adding Global Average Pooling

    # Concatenate outputs of all groups
    concatenated_output = Concatenate()([ stft, melgram,mfcc])

    # Global Average Pooling
    # Fully Connected Layer
    output = Dense(5, activation='softmax')(concatenated_output)

    # Define the model
    model = Model(inputs=[ input_stft, input_melgram,input_mfcc], outputs=output)
    if not os.path.exists(checkpoint_filepath + '/latefusion'):
        os.makedirs(checkpoint_filepath + '/latefusion')
    checkpoint1= tf.keras.callbacks.ModelCheckpoint(
    filepath= checkpoint_filepath + '/latefusion' + '/latefusion_{epoch:02d}_{val_accuracy:.4f}.weights.h5',
    monitor='val_accuracy',
    save_best_only=True,
    save_weights_only=True,
    verbose=1
    )

    return model,checkpoint1

def dropout_block(input_layer, units):
    x = Dense(units, activation='relu')(input_layer)
    x = Dropout(0.1)(x)
    return x

def transition_layer(input_layer, filters):
    x = Conv2D(filters, (1, 1), activation='relu')(input_layer)
    x = MaxPooling2D(pool_size=(2, 2))(x)
    return x


In [10]:
# Instantiate the late fusion model using provided input shapes
mfcc_input_shape = (256, 1296, 1)  # Example shape, adjust according to your actual data
stft_input_shape = (256, 1296, 1)  # Example shape, adjust according to your actual data
melgram_input_shape = (256, 1296, 1)  # Example shape, adjust according to your actual data
model1 = tf.keras.Sequential()
model1,checkpoint1 = late_fusion(model1,stft_input_shape ,melgram_input_shape, mfcc_input_shape,)

# Compile the model
model1.compile(optimizer=Adam(), loss='categorical_crossentropy', metrics=['accuracy'])
model1.summary()



Model: "model"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_1 (InputLayer)        [(None, 256, 1296, 1)]       0         []                            
                                                                                                  
 input_2 (InputLayer)        [(None, 256, 1296, 1)]       0         []                            
                                                                                                  
 input_3 (InputLayer)        [(None, 256, 1296, 1)]       0         []                            
                                                                                                  
 conv2d (Conv2D)             (None, 128, 648, 20)         1000      ['input_1[0][0]']             
                                                                                              

In [11]:
from keras.callbacks import EarlyStopping

early = EarlyStopping(monitor='loss',
    patience= 5,
    verbose= 0,
    mode='auto',
    baseline= None,
    restore_best_weights= True)


In [12]:
from sklearn.preprocessing import LabelBinarizer

label_binarizer = LabelBinarizer()
y_train_one_hot = label_binarizer.fit_transform(labels_train)
y_val_one_hot = label_binarizer.transform(labels_val)
y_eval_one_hot = label_binarizer.transform(labels_test)

In [14]:
history = model1.fit([stft_train,  mel_train,mfcc_train],
                     y_train_one_hot,
                     batch_size=32,
                     epochs=100,
                     validation_data=([stft_val,mel_val,mfcc_val], y_val_one_hot),
                     callbacks=[checkpoint1, early])


Epoch 1/100
Epoch 1: val_accuracy improved from -inf to 0.56800, saving model to /content/drive/MyDrive/Music_Project/music_data/checkpoint/LateFusion/latefusion/latefusion_01_0.5680.weights.h5
Epoch 2/100
Epoch 2: val_accuracy improved from 0.56800 to 0.58000, saving model to /content/drive/MyDrive/Music_Project/music_data/checkpoint/LateFusion/latefusion/latefusion_02_0.5800.weights.h5
Epoch 3/100
Epoch 3: val_accuracy improved from 0.58000 to 0.71200, saving model to /content/drive/MyDrive/Music_Project/music_data/checkpoint/LateFusion/latefusion/latefusion_03_0.7120.weights.h5
Epoch 4/100
Epoch 4: val_accuracy improved from 0.71200 to 0.82000, saving model to /content/drive/MyDrive/Music_Project/music_data/checkpoint/LateFusion/latefusion/latefusion_04_0.8200.weights.h5
Epoch 5/100
Epoch 5: val_accuracy did not improve from 0.82000
Epoch 6/100
Epoch 6: val_accuracy did not improve from 0.82000
Epoch 7/100
Epoch 7: val_accuracy improved from 0.82000 to 0.82800, saving model to /cont

In [15]:
from sklearn.metrics import accuracy_score
from keras.metrics import AUC, F1Score, Precision, Accuracy

# Evaluate the model
predict = model1.predict([stft_test,mel_test,mfcc_test])
predicted_labels = label_binarizer.inverse_transform(predict).tolist()

# loss_eval, accuracy_eval = model.evaluate(X_eval, y_eval_one_hot)

# print(f'Evaluation Loss: {loss_eval}, Evaluation Accuracy: {accuracy_eval}')
# Calculate AUC ROC
auc_roc = AUC()
auc_roc.update_state(y_eval_one_hot, predict)
print("AUC ROC:", auc_roc.result().numpy())

# Calculate F1 Score
f1 = F1Score()
f1.update_state(y_eval_one_hot, predict)
print("F1 Score:", np.mean(f1.result().numpy()))

# Calculate Precision
pre = Precision()
pre.update_state(y_eval_one_hot, predict)
print("Precision Score", pre.result().numpy())

# Accuracy Score
acc = accuracy_score(labels_test, predicted_labels)
print("Accuracy Score", acc)


AUC ROC: 0.99575007
F1 Score: 0.93935597
Precision Score 0.93877554
Accuracy Score 0.94


In [16]:
for i in range(len(labels_test)):
    print(labels_test[i], predicted_labels[i])

cailuong cailuong
catru catru
cailuong cailuong
catru catru
cailuong cailuong
cheo cheo
chauvan chauvan
cheo cheo
cailuong cailuong
cheo cheo
cheo cheo
cailuong cailuong
catru catru
cailuong cailuong
cailuong cailuong
cailuong hatxam
cailuong cailuong
hatxam hatxam
cailuong cailuong
cailuong cailuong
catru catru
cailuong cailuong
hatxam hatxam
cailuong cailuong
cailuong cailuong
cheo cheo
cailuong cailuong
hatxam hatxam
chauvan chauvan
cailuong cailuong
chauvan chauvan
hatxam hatxam
cailuong cailuong
cheo cheo
cailuong cailuong
cheo cheo
hatxam hatxam
cheo cheo
hatxam hatxam
chauvan catru
cailuong cailuong
hatxam hatxam
chauvan chauvan
hatxam hatxam
catru catru
hatxam hatxam
hatxam hatxam
chauvan hatxam
catru catru
cailuong cailuong
cailuong cailuong
hatxam hatxam
catru catru
hatxam hatxam
cheo cailuong
catru catru
cheo cheo
chauvan catru
cheo cheo
catru catru
catru catru
cheo cheo
catru catru
catru catru
chauvan chauvan
cheo cheo
cailuong cailuong
cailuong cailuong
catru catru
catru c