In [None]:
# abrir musicar e pré-processar melspectogram
import pandas as pd
import librosa as mimosa
import audioread
import os
import matplotlib as plt
import seaborn as sns
import numpy as np

musicGender = os.listdir("musics") # returns list
genderlabel = []
musicData = []

for gender in musicGender:
    genderMusics = os.listdir('./musics/{}'.format(gender))
    for music in genderMusics:
        genderlabel.append(gender)
        
        amplitude, sr = mimosa.load('./musics/{}'.format(gender)+'/{}'.format(music), offset=15, duration=15)

        melSpec = mimosa.feature.melspectrogram(y = amplitude, sr=sr, n_mels=128, fmax=8000)    
        melSpec_to_db = mimosa.power_to_db(melSpec, ref=np.max)
        resized_melSpec = np.resize(melSpec_to_db, (melSpec_to_db.shape[0], 1293))
        
        musicData.append(resized_melSpec)

mfcc_specData=np.stack(musicData)
musicData=np.array(musicData)
genderlabel=np.array(genderlabel)


In [None]:
# fazer encoding das categorias
import sklearn.preprocessing as preProSkL

labelEncoder = preProSkL.LabelEncoder()
labelEncoded = labelEncoder.fit(genderlabel)
labelEncodedTrans = labelEncoded.transform(genderlabel)
# inverse_transform to revert to normal

In [None]:
# Normalização Min Max

indexMin=0
indexMax = 100

normalized_data = []
numCategories = int( len(labelEncodedTrans)/ 100)


for i in range(numCategories):

    max = mfcc_specData[indexMin:indexMax].max()
    min = mfcc_specData[indexMin:indexMax].min()

    for music in musicData[indexMin:indexMax]:

        xScaled = (music - min) / (max - min)
        normalized_data.append(xScaled)

    indexMin += 100
    indexMax += 100  

normalized_data=np.array(normalized_data)

print(len(normalized_data))


In [None]:
from tensorflow import keras

def theModel(filters, neurons, data):
    input_shape=(data.shape[1],data.shape[2],1)

    model=keras.Sequential()

    #input layer
    model.add(keras.layers.Conv2D(filters,(3,3),activation="relu",input_shape=input_shape))
    model.add(keras.layers.MaxPool2D((3,3),strides=(2,2),padding="same")) 
    model.add(keras.layers.BatchNormalization())


    #flatten the output
    model.add(keras.layers.Flatten())
    model.add(keras.layers.Dense(filters,activation="relu"))

    #output layer
    model.add(keras.layers.Dense(neurons,activation="softmax"))


    optimizer=keras.optimizers.Adam(learning_rate=0.0001)
    model.compile(optimizer=optimizer,
        loss="sparse_categorical_crossentropy",
        metrics=['accuracy'])
    
    return model



In [None]:
from sklearn.model_selection import KFold
import matplotlib.pyplot as pyplot

def trainTestModel(splitNumb, filters, data, labels):
    acc_per_fold = []
    loss_per_fold = []

    fold_no = 1


    kf = KFold(n_splits=splitNumb, random_state=42, shuffle=True)
    for train, test in kf.split(data):
        
        print("index:{}".format(splitNumb))
        print("filter:{}".format(filters))

        cnnModel = theModel(filters, splitNumb, data)
        
        print('------------------------------------------------------------------------')
        print(f'Training for fold {fold_no} ...')
            
        history = cnnModel.fit(data[train],labels[train],epochs=25, validation_data=(data[test], labels[test]))

        testError, testAccuracy = cnnModel.evaluate(data[test], labels[test])
        print("Accuracy on test set is:{}".format(testAccuracy))

        print(f'Score for fold {fold_no}: {cnnModel.metrics_names[0]} of {testError}; {cnnModel.metrics_names[1]} of {testAccuracy*100}%')
        acc_per_fold.append(testAccuracy * 100)
        loss_per_fold.append(testError)

        fold_no = fold_no + 1

    print('------------------------------------------------------------------------')
    print('Score per fold')
    for i in range(0, len(acc_per_fold)):
        print('------------------------------------------------------------------------')
        print(f'> Fold {i+1} - Loss: {loss_per_fold[i]} - Accuracy: {acc_per_fold[i]}%')
        print('------------------------------------------------------------------------')
        print('Average scores for all folds:')
        print(f'> Accuracy: {np.mean(acc_per_fold)} (+- {np.std(acc_per_fold)})')
        print(f'> Loss: {np.mean(loss_per_fold)}')
        print('------------------------------------------------------------------------')

    return np.mean(acc_per_fold)

In [None]:
import math

index = 1
filters = 5
mappingAccuracy = []


for x in range(numCategories):
    if index != 1:
        for filter in range(filters):
            filter += 4
            dataInterval =  normalized_data[0:100 * index]
            labels = labelEncodedTrans[0:100 * index]
            accuracy  = trainTestModel(index, filter, dataInterval, labels)
          
            mappingAccuracy.append({'acc': math.ceil(accuracy), 'numMusicas': index, 'filtrosCNN': filter})
        
    index = index + 1


In [None]:
df = pd.DataFrame(mappingAccuracy)

elementos_maximos = df.groupby('numMusicas').apply(lambda x: x.loc[x['acc'].idxmax()]).reset_index(drop=True)

for _, elemento in elementos_maximos.iterrows():
    print(elemento.to_dict())

In [None]:
import matplotlib.pyplot as plt

fig = plt.figure(figsize = (10, 5))
 
# creating the bar plot
plt.bar(elementos_maximos['numMusicas'],elementos_maximos['acc'], color ='blue',
        width = 0.4)
 
plt.xlabel('Numero de Generos')
plt.ylabel('Accuracy')
plt.show()


In [None]:
fig = plt.figure()
ax = plt.axes(projection='3d')

zline = elementos_maximos['acc']
xline = elementos_maximos['filtrosCNN']
yline = elementos_maximos['numMusicas']
ax.plot3D(xline, yline, zline, 'gray')

# Data for three-dimensional scattered points
zdata =  elementos_maximos['acc']
xdata = elementos_maximos['filtrosCNN']
ydata =  elementos_maximos['numMusicas']
ax.set_ylabel('Numero de Generos')
ax.set_xlabel('filtrosCNN')
ax.set_zlabel('Accuracy')
ax.scatter3D(xdata, ydata, zdata)


ax.view_init(elev=20., azim=-45, roll=0)

In [None]:
# criados dos plots/imagens através dos dados normalizados 

import matplotlib.pyplot as pyplot

musicCounter = 0

path = './musicsPlts'
if not os.path.exists(path):
    os.mkdir(path)

for gender in musicGender:
    genderPath = '{}'.format(path) + '/{}'.format(gender)
    if not os.path.exists(genderPath):
        os.mkdir(genderPath)

for index in range(len(normalized_data)):
    fig, ax = pyplot.subplots()
    img = mimosa.display.specshow(normalized_data[index], x_axis='time', y_axis='mel', sr=sr, fmax=8000, ax=ax)
    fig.colorbar(img, ax=ax, format='%+2.0f dB')

    ax.set(title='Mel-frequency spectrogram')

    if index % 100 == 0:
        musicCounter = 0
    
    pyplot.savefig('{}'.format(path) + '/{}'.format(genderlabel[index]) + '/{}'.format(musicCounter) + '.png')

    musicCounter +=1


