In [1]:
# Import Libraries

import os, sys, cv2
import seaborn as sn
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
import keras
from keras.models import Sequential,Model, load_model
from keras.layers import Dense, Dropout, Flatten, LSTM
from keras.layers import Conv2D, MaxPooling2D, Conv1D, MaxPooling1D, MaxPool1D, GaussianNoise, GlobalMaxPooling1D
from keras.layers import BatchNormalization
from keras.layers import Input
from keras.callbacks import ModelCheckpoint
from keras.layers import LeakyReLU
import tensorflow
import matplotlib.pyplot as plt
from sklearn.model_selection import KFold

2024-06-13 01:28:00.232818: I external/local_tsl/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2024-06-13 01:28:00.237286: I external/local_tsl/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2024-06-13 01:28:00.299617: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
# Diretório contendo os espectrogramas em formato PNG
diretorio_espectrogramas = '/home/missantroop/SongAnalysis/spectrograms'

# Lista para armazenar os espectrogramas convertidos em matrizes numpy
espectrogramas_np = []

# Itera sobre os arquivos no diretório
for arquivo in os.listdir(diretorio_espectrogramas):
    if arquivo.endswith('.png'):
        # Carrega o espectrograma usando o OpenCV
        espectrograma = cv2.imread(os.path.join(diretorio_espectrogramas, arquivo), cv2.IMREAD_GRAYSCALE)
        # Adiciona o espectrograma convertido para numpy na lista
        espectrogramas_np.append(espectrograma)

# Suponha que você tenha esses arrays de dados
spec = np.array([1, 2, 3, 4, 5])
mfcc = np.array([6, 7, 8, 9, 10])
mel = np.array([11, 12, 13, 14, 15])
chroma = np.array([16, 17, 18, 19, 20])
target = np.array([21, 22, 23, 24, 25])

# Salve todos os conjuntos de dados em um único arquivo .npz
np.savez('espectrogramas.npz', spec=spec, mfcc=mfcc, mel=mel, chroma=chroma, target=target)


In [3]:
# Load the .npz file of features
f = np.load(os.getcwd() + "/espectrogramas.npz")
S = f['spec']
mfcc = f['mfcc']
mel = f['mel']
chroma = f['chroma']
y = f['target']

# Split train-test data
S_train, S_test, mfcc_train, mfcc_test, mel_train, mel_test, chroma_train, chroma_test, y_train, y_test = train_test_split(S, mfcc, mel, chroma, y, test_size=0.2)


In [4]:
# Spectrogram
maximum1 = np.amax(S_train)
S_train = S_train / maximum1
S_test = S_test / maximum1

# Adiciona uma dimensão extra para representar os canais
S_train = np.expand_dims(S_train, axis=-1)
S_test = np.expand_dims(S_test, axis=-1)

# Converte para o tipo de dados float32
S_train = S_train.astype(np.float32)
S_test = S_test.astype(np.float32)


In [5]:
newtrain_mfcc = np.empty((mfcc_train.shape[0], 120, 600))
newtest_mfcc = np.empty((mfcc_test.shape[0], 120, 600))

for i in range(mfcc_train.shape[0]):
    curr = mfcc_train[i]
    curr = np.array(curr)  # Converta para uma matriz numpy
    curr = curr.astype(np.float32)
    curr = cv2.resize(curr, (600, 120))
    newtrain_mfcc[i] = curr

mfcc_train = newtrain_mfcc

for i in range(mfcc_test.shape[0]):
    curr = mfcc_test[i]
    curr = np.array(curr)  # Converta para uma matriz numpy
    curr = curr.astype(np.float32)
    curr = cv2.resize(curr, (600, 120))
    newtest_mfcc[i] = curr

mfcc_test = newtest_mfcc

mfcc_train = mfcc_train.astype(np.float32)
mfcc_test = mfcc_test.astype(np.float32)

N, row, col = mfcc_train.shape
mfcc_train = mfcc_train.reshape((N, row, col, 1))

N, row, col = mfcc_test.shape
mfcc_test = mfcc_test.reshape((N, row, col, 1))

mean_data = np.mean(mfcc_train)
std_data = np.std(mfcc_train)

mfcc_train = (mfcc_train - mean_data)/ std_data
mfcc_test = (mfcc_test - mean_data)/ std_data

print("Shape de mfcc_train:", mfcc_train.shape)
print("Shape de mfcc_test:", mfcc_test.shape)

Shape de mfcc_train: (4, 120, 600, 1)
Shape de mfcc_test: (1, 120, 600, 1)


In [6]:
# Define a altura e a largura desejadas
altura = 120
largura = 600

# Adiciona uma dimensão extra para representar os canais
mel_train = np.expand_dims(mel_train, axis=-1)
mel_test = np.expand_dims(mel_test, axis=-1)

# Redimensiona para a altura e largura desejadas
mel_train = np.resize(mel_train, (mel_train.shape[0], altura, largura, 1))
mel_test = np.resize(mel_test, (mel_test.shape[0], altura, largura, 1))

# Converte para o tipo de dados float32
mel_train = mel_train.astype(np.float32)
mel_test = mel_test.astype(np.float32)

# Obtendo o formato das matrizes mel_train e mel_test
print("Shape de mel_train:", mel_train.shape)
print("Shape de mel_test:", mel_test.shape)

Shape de mel_train: (4, 120, 600, 1)
Shape de mel_test: (1, 120, 600, 1)


In [7]:
# Mel-Spectrogram

maximum = np.amax(mel_train)
mel_train = mel_train / maximum
mel_test = mel_test / maximum

# Garantindo que as dimensões estão corretas
N, height, width, channels = mel_train.shape
mel_train = mel_train.reshape((N, height, width, channels))

N, height, width, channels = mel_test.shape
mel_test = mel_test.reshape((N, height, width, channels))

# Verificando as formas finais
print("Shape final de mel_train:", mel_train.shape)
print("Shape final de mel_test:", mel_test.shape)

Shape final de mel_train: (4, 120, 600, 1)
Shape final de mel_test: (1, 120, 600, 1)


In [8]:
# Save Spectrogram train-test
np.savez_compressed(os.getcwd()+"/new_spectrogram_train_test.npz", S_train= S_train, S_test= S_test, y_train = y_train, y_test= y_test)

# Save MFCC train-test
np.savez_compressed(os.getcwd()+"/new_mfcc_train_test.npz", mfcc_train= mfcc_train, mfcc_test= mfcc_test, y_train = y_train, y_test= y_test)

# Save Mel-Spectrogram train-test
np.savez_compressed(os.getcwd()+"/new_mel_train_test.npz", mel_train= mel_train, mel_test= mel_test, y_train = y_train, y_test= y_test)
