In [7]:
# abrir musicar e pré-processar melspectogram
import pandas as pd
import librosa as mimosa
import audioread
import os
import matplotlib as plt
import seaborn as sns
import numpy as np

musicGender = os.listdir("musics") # returns list
genderlabel = []
musicData = []

for gender in musicGender:
    genderMusics = os.listdir('./musics/{}'.format(gender))
    for music in genderMusics:
        genderlabel.append(gender)
        
        amplitude, sr = mimosa.load('./musics/{}'.format(gender)+'/{}'.format(music))

        melSpec = mimosa.feature.melspectrogram(y = amplitude, sr=sr, n_mels=128, fmax=8000)    
        melSpec_to_db = mimosa.power_to_db(melSpec, ref=np.max)
        resized_melSpec = np.resize(melSpec_to_db, (melSpec_to_db.shape[0], 1293))
        
        musicData.append(resized_melSpec)

mfcc_specData=np.stack(musicData)
musicData=np.array(musicData)
genderlabel=np.array(genderlabel)


In [8]:
# fazer encoding das categorias
import sklearn.preprocessing as preProSkL

labelEncoder = preProSkL.LabelEncoder()
labelEncoded = labelEncoder.fit(genderlabel)
labelEncodedTrans = labelEncoded.transform(genderlabel)
# inverse_transform to revert to normal

In [None]:
# normalizar dados por media
# não usado, mas pode ser comentado

# from sklearn.preprocessing import StandardScaler

# sc_X=StandardScaler()
# mfcc_specData = sc_X.fit_transform(mfcc_specData)

# mfcc_specData.head()


# mean = np.mean(musicData, axis=-1, keepdims=True)
# std = np.std(musicData, axis=-1, keepdims=True)


# Normalize the array by subtracting the mean and dividing by the standard deviation
# normalized_data = (musicData - mean) / std

# print(len(normalized_data))


In [9]:
# Normalização Min Max

indexMin=0
indexMax = 100

normalized_data = []
numCategories = int( len(labelEncodedTrans)/ 100)


for i in range(numCategories):

    max = mfcc_specData[indexMin:indexMax].max()
    min = mfcc_specData[indexMin:indexMax].min()

    h = 0
    for music in musicData[indexMin:indexMax]:

        xScaled = (music - min) / (max - min)
        normalized_data.append(xScaled)

    indexMin += 100
    indexMax += 100  

normalized_data=np.array(normalized_data)


200
[[[0.5783409  0.5788618  0.5487855  ... 0.61045194 0.62712353 0.7378836 ]
  [0.62866545 0.66257346 0.66708696 ... 0.6991476  0.73972845 0.7818091 ]
  [0.5650602  0.56712735 0.65379363 ... 0.66821975 0.77540904 0.7718417 ]
  ...
  [0.17659025 0.22293349 0.2625003  ... 0.22600517 0.25689825 0.26358634]
  [0.12187986 0.18058339 0.24473663 ... 0.16336517 0.2247715  0.24890284]
  [0.08055706 0.15793267 0.23598161 ... 0.1612997  0.23033647 0.23180266]]

 [[0.5609849  0.60876155 0.7363566  ... 0.6960176  0.63633746 0.6941823 ]
  [0.58704716 0.72307044 0.88060904 ... 0.8538593  0.8184128  0.8121853 ]
  [0.53404224 0.760989   0.89426595 ... 0.82095623 0.81130344 0.80466497]
  ...
  [0.         0.12093868 0.24444851 ... 0.34721032 0.32010967 0.32215595]
  [0.         0.11132898 0.23897576 ... 0.33503366 0.3226734  0.321418  ]
  [0.         0.14814854 0.25449848 ... 0.35689202 0.33806244 0.34973305]]

 [[0.5630524  0.5390273  0.48941955 ... 0.6870567  0.65241706 0.5791148 ]
  [0.6623803  0.62

In [None]:
# modelo usado, atravé da separação dos dados com k-folds

from sklearn.model_selection import KFold
from tensorflow import keras
import matplotlib.pyplot as pyplot

acc_per_fold = []
loss_per_fold = []

input_shape=(normalized_data.shape[1],normalized_data.shape[2],1)
fold_no = 1

kf = KFold(n_splits=4, random_state=42, shuffle=True)
for train, test in kf.split(normalized_data):
    model=keras.Sequential()
    #1st layer
    model.add(keras.layers.Conv2D(8,(3,3),activation="relu",input_shape=input_shape))
    model.add(keras.layers.MaxPool2D((3,3),strides=(2,2),padding="same")) 
    model.add(keras.layers.BatchNormalization())

    # #2nd layer
    # model.add(keras.layers.Conv2D(16,(3,3),activation="relu"))
    # model.add(keras.layers.MaxPool2D((3,3),strides=(2,2),padding="same")) #pooling
    # model.add(keras.layers.BatchNormalization())

    #3rd layer
    model.add(keras.layers.Conv2D(8,(1,1),activation="relu"))
    model.add(keras.layers.MaxPool2D((1,1),strides=(1,1),padding="same")) #pooling
    model.add(keras.layers.BatchNormalization())


    #flatten the output
    model.add(keras.layers.Flatten())
    model.add(keras.layers.Dense(4,activation="relu"))
    # model.add(keras.layers.Dropout(0.2))

    #output layer
    model.add(keras.layers.Dense(2,activation="softmax"))

    optimizer=keras.optimizers.Adam(learning_rate=0.0001)
    model.compile(optimizer=optimizer,
        loss="sparse_categorical_crossentropy",
        metrics=['accuracy'])
    
    
    # Generate a print
    print('------------------------------------------------------------------------')
    print(f'Training for fold {fold_no} ...')
        
    # normalized_data[train] = normalized_data[train].reshape((-1, normalized_data[train].shape[1], normalized_data[train].shape[2], 1))
    history = model.fit(normalized_data[train],labelEncodedTrans[train],epochs=25, validation_data=(normalized_data[test], labelEncodedTrans[test]))

    testError, testAccuracy = model.evaluate(normalized_data[test], labelEncodedTrans[test])
    print("Accuracy on test set is:{}".format(testAccuracy))

    print(f'Score for fold {fold_no}: {model.metrics_names[0]} of {testError}; {model.metrics_names[1]} of {testAccuracy*100}%')
    acc_per_fold.append(testAccuracy * 100)
    loss_per_fold.append(testError)

    # Increase fold number
    fold_no = fold_no + 1

# == Provide average scores ==
print('------------------------------------------------------------------------')
print('Score per fold')
for i in range(0, len(acc_per_fold)):
    print('------------------------------------------------------------------------')
    print(f'> Fold {i+1} - Loss: {loss_per_fold[i]} - Accuracy: {acc_per_fold[i]}%')
    print('------------------------------------------------------------------------')
    print('Average scores for all folds:')
    print(f'> Accuracy: {np.mean(acc_per_fold)} (+- {np.std(acc_per_fold)})')
    print(f'> Loss: {np.mean(loss_per_fold)}')
    print('------------------------------------------------------------------------')
    
    # print("%s %s" % (train, test))
    # print( len(normalized_data[train]))
    # print( len(normalized_data[test]))

# print(normalized_data[train])
# X_train, X_test, y_train, y_test = normalized_data[train], normalized_data[test], labelEncodedTrans[train], labelEncodedTrans[test]

In [None]:
# separação antiga dos dados de treino e teste

# import sklearn.model_selection as modelSelec

# X_train, X_test, y_train, y_test = modelSelec.train_test_split(normalized_data, labelEncodedTrans, test_size=0.25, random_state=42)

# # X_train, X_test, y_val, y_val = modelSelec.train_test_split(mfcc_specData, labelEncodedTrans, test_size=0.5, random_state=42)
# X_test,X_validation,y_test,y_validation=modelSelec.train_test_split(X_test,y_test,test_size=0.2, random_state=42)


In [None]:
# plot do mel spectogram

import matplotlib.pyplot as pyplot



fig, ax = pyplot.subplots()
img = mimosa.display.specshow(normalized_data[100], x_axis='time', y_axis='mel', sr=sr, fmax=8000, ax=ax)
fig.colorbar(img, ax=ax, format='%+2.0f dB')

ax.set(title='Mel-frequency spectrogram')

pyplot.savefig('0.png')

  

In [None]:
# criados dos plots através dos dados normalizados 

# import matplotlib.pyplot as pyplot

# musicCounter = 0

# path = './musicsPlts'
# if not os.path.exists(path):
#     os.mkdir(path)

# for gender in musicGender:
#     genderPath = '{}'.format(path) + '/{}'.format(gender)
#     if not os.path.exists(genderPath):
#         os.mkdir(genderPath)

# for index in range(len(mfcc_specData)):
#     fig, ax = pyplot.subplots()
#     img = mimosa.display.specshow(mfcc_specData[index], x_axis='time', y_axis='mel', sr=sr, fmax=8000, ax=ax)
#     fig.colorbar(img, ax=ax, format='%+2.0f dB')

#     ax.set(title='Mel-frequency spectrogram')

#     if index % 100 == 0:
#         musicCounter = 0
    
#     pyplot.savefig('{}'.format(path) + '/{}'.format(genderlabel[index]) + '/{}'.format(musicCounter) + '.png')

#     musicCounter +=1




In [None]:
# formato dos dados para modelo antigo

print(X_train.shape)
input_shape=(X_train.shape[1],X_train.shape[2],1)
print(X_train.shape)
print(input_shape)


In [None]:
# modelo antigo testado com variantes

from tensorflow import keras

model=keras.Sequential()
#1st layer
model.add(keras.layers.Conv2D(24,(3,3),activation="relu",input_shape=input_shape))
model.add(keras.layers.MaxPool2D((3,3),strides=(2,2),padding="same")) 
model.add(keras.layers.BatchNormalization())

#2nd layer
model.add(keras.layers.Conv2D(24,(3,3),activation="relu"))
model.add(keras.layers.MaxPool2D((3,3),strides=(2,2),padding="same")) #pooling
model.add(keras.layers.BatchNormalization())

#3rd layer
model.add(keras.layers.Conv2D(24,(2,2),activation="relu"))
model.add(keras.layers.MaxPool2D((2,2),strides=(2,2),padding="same")) #pooling
model.add(keras.layers.BatchNormalization())


#flatten the output
model.add(keras.layers.Flatten())
model.add(keras.layers.Dense(20,activation="relu"))
model.add(keras.layers.Dropout(0.2))

#output layer
model.add(keras.layers.Dense(10,activation="softmax"))

optimizer=keras.optimizers.Adam(learning_rate=0.0001)
model.compile(optimizer=optimizer,
    loss="sparse_categorical_crossentropy",
    metrics=['accuracy'])


In [None]:
#  treino do modelo antigo 

X_train = X_train.reshape((-1, X_train.shape[1], X_train.shape[2], 1))
print(X_train.shape[0])
# history = model.fit(X_train,y_train,epochs=35, validation_data=(X_validation, y_validation))
history = model.fit(X_train,y_train,epochs=35, validation_data=(X_test, y_test))

In [None]:
# validação do modelo antigo

import matplotlib.pyplot as pyplot

print(history.history)

testError, testAccuracy = model.evaluate(X_test, y_test)
print("Accuracy on test set is:{}".format(testAccuracy))

pyplot.plot(history.history['accuracy'], label='train_accuracy')
pyplot.plot(history.history['val_accuracy'], label = 'val_accuracy')
pyplot.xlabel('Epoch')
pyplot.ylabel('Accuracy')
pyplot.ylim([0.1, 1.1])
pyplot.legend(loc='lower right')