<a href="https://colab.research.google.com/github/Ayrsz/SignalAndSistemyProject/blob/main/FeatureExtract.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
#Model CNN
import torch
import torch.nn as nn
import torchaudio

#Scientific computation
import numpy as np
import jax
import jax.numpy as jnp
import cupy as cp

#Plot and view
from matplotlib import pyplot as plt
import IPython as ipy
from IPython import display
from IPython.display import Audio

#Data manipulation
import scipy as sci
import os
import gc

#Audio manipulation
import librosa
import soundfile as sf


In [4]:
from google.colab import drive
from zipfile import ZipFile

drive.mount("/content/drive", force_remount= True)

Mounted at /content/drive


In [5]:
#Unzip in a specific folder
path_zip = "/content/drive/MyDrive/Datasets/DatasetAudios/GTZAN.zip"
with ZipFile(path_zip , 'r') as zip_archive:
    if not os.path.exists("/content/drive/MyDrive/Datasets/DatasetAudios/GTZAN/"):
        zip_archive.extractall("/content/drive/MyDrive/Datasets/DatasetAudios/")
    else:
        print("Arquivo já unzipado, sem necessidade de ações.")

Arquivo já unzipado, sem necessidade de ações.


In [6]:
def dir_genres(path_all_genres : str):
    return [path_all_genres + '/' + pasta for pasta in os.listdir(path_all_genres)]

#Return all files.wav separated per genres
def dir_files_wav(path_genre : str):
  return [path_genre + '/' + arquivo for arquivo in os.listdir(path_genre)]


##Generate Escpectogram

In [None]:
def is_dir_empty(path):
    with os.scandir(path) as iterator:
        for entry in iterator:
            if entry.name == '.ipynb_checkpoints':
                continue
            return False  # Se encontrar qualquer outro arquivo/pasta, não está vazio
        return True  # Se só havia .ipynb_checkpoints (ou nada), está vazio

def create_batch(files, size, genre, shape = 661794):
    batch_size = min(size, len(files))
    batch = jnp.zeros((batch_size, shape))  # Inicializa batch zerado

    nums = []
    new_files = []  # Lista para armazenar arquivos que foram processados corretamente

    for i in range(batch_size):
        try:

            audio, sample_rate = sf.read(files[i])  # Mantém o SR original

            if len(audio) < shape:
                # Preenche com zeros se o áudio for menor que o tamanho esperado
                audio = jnp.pad(audio, (0, shape - len(audio)))
            elif len(audio) > shape:
                # Se for maior, corta
                audio = audio[:shape]

            batch = batch.at[i].set(audio)

            num = files[i].split("/")[-1].split(".")[-2]

            nums.append(num)
            new_files.append(files[i])  # Adiciona à lista de arquivos processados

        except Exception as e:
            print(f"Erro ao processar {files[i]}: {e}")

    # Remove os arquivos processados da lista original
    for file in new_files:
        files.remove(file)
    batch = jax.device_put(batch)
    return batch, nums, sample_rate, files


def spectro_feat(audio, sample_rate):

    audio = jax.device_put(audio)
    FFT_SIZE = 512
    HOP_SIZE = 256

    hamming = jnp.hamming(FFT_SIZE)
    num_frames = (len(audio) - FFT_SIZE) // HOP_SIZE + 1

    def compute_fft(i):
        start = i * HOP_SIZE
        signal = jax.lax.dynamic_slice(audio, (start,), (FFT_SIZE,))
        signal = signal * hamming
        return jnp.fft.rfft(signal, n = FFT_SIZE)


    sfft = jax.vmap(compute_fft)(jnp.arange(num_frames))

    ssft = jnp.abs(sfft)**2 #Spectro
    dB_format =  20 * jnp.log10( ssft / 1e-10) #Spectro
    return dB_format.T

def spectro_feat_batch(batch, sample_rate):
    return jax.vmap(lambda audio: spectro_feat(audio, sample_rate))(batch)

def plt_spectogram(batch, sample_rate, y_axis_type= "linear"):
  escala_Y = spectro_feat_batch(batch, sample_rate)
  escala_Y = np.array(escala_Y)
  #fig = plt.figure(figsize=(10,10))
  #plt.subplot(3,3,1)
  for (i,audio) in enumerate(escala_Y):
    plt.subplot(3,3,i+1)
    librosa.display.specshow(audio, sr = sample_rate, x_axis = "time", y_axis = y_axis_type, )
    plt.set_cmap("magma")
    plt.colorbar()

def write_spectrogram(audios, sample_rate, paths_write):
  escala_Y = spectro_feat_batch(audios, sample_rate)

  del audios
  gc.collect()

  escala_Y = np.array(escala_Y)

  assert len(escala_Y) == len(paths_write)

  for (audio, path_write) in zip(escala_Y, paths_write):
    plt.figure(figsize=(6,6))
    librosa.display.specshow(audio, sr = sample_rate)
    plt.set_cmap("magma")
    plt.savefig(path_write, dpi = 100, bbox_inches = "tight", pad_inches = 0)


    del audio
    gc.collect()
    plt.close("all")


  del escala_Y
  del paths_write
  gc.collect()

def write_spectrogram_from_genre(genre_path):
    files = dir_files_wav(genre_path)
    total_files_start = len(files)
    paths = np.array(genre_path.split("/"))
    genre = paths[-1]
    size_path = len(paths)
    genre_images_path = '/'.join(paths[0:size_path-2]) + "/images/" + genre
    batch_size = 5

    print(f"Writing on: {genre_images_path}")

    if is_dir_empty(genre_images_path):
        while(len(files) != 0):
            try:
                audios, nums, sample_rate, files = create_batch(files, 2, genre)

                paths_write = [genre_images_path + "/" + genre + "." + num + ".png" for num in nums]
                write_spectrogram(audios, sample_rate, paths_write)#, path_write)

                if(len(files) % 25 == 0):
                    print(f"Carregando imagens, {genre} : {((total_files_start - len(files))/total_files_start)*100:.2f}%")

            except Exception as e:
                print(f"Erro {e}")
                print(f"Erro em {files[0]}")
    else:
        print(f"Diretorio com imagens, porfavor esvazie: {genre}")

#Writting new images spectogram
def write_all_spectrograms(path_genres : str):
  genres_path = dir_genres(path_genres)

  for genre_path in genres_path:
    write_spectrogram_from_genre(genre_path)


write_all_spectrograms("/content/drive/MyDrive/Datasets/DatasetAudios/GTZAN/Data/genres_original")

Writing on: /content/drive/MyDrive/Datasets/DatasetAudios/GTZAN/Data/images/blues
Diretorio com imagens, porfavor esvazie: blues
Writing on: /content/drive/MyDrive/Datasets/DatasetAudios/GTZAN/Data/images/classical
Diretorio com imagens, porfavor esvazie: classical
Writing on: /content/drive/MyDrive/Datasets/DatasetAudios/GTZAN/Data/images/country
Diretorio com imagens, porfavor esvazie: country
Writing on: /content/drive/MyDrive/Datasets/DatasetAudios/GTZAN/Data/images/disco
Diretorio com imagens, porfavor esvazie: disco
Writing on: /content/drive/MyDrive/Datasets/DatasetAudios/GTZAN/Data/images/hiphop
Diretorio com imagens, porfavor esvazie: hiphop
Writing on: /content/drive/MyDrive/Datasets/DatasetAudios/GTZAN/Data/images/jazz
Diretorio com imagens, porfavor esvazie: jazz
Writing on: /content/drive/MyDrive/Datasets/DatasetAudios/GTZAN/Data/images/metal
Carregando imagens, metal : 8.33%
Carregando imagens, metal : 16.67%
Carregando imagens, metal : 25.00%


In [15]:
def spectro_audio(audio):
    audio = jax.device_put(audio)
    FFT_SIZE = 1024
    HOP_SIZE = 1290

    hamming = jnp.hamming(FFT_SIZE)
    num_frames = (len(audio) - FFT_SIZE) // HOP_SIZE + 1
    print(len(audio))
    def compute_fft(i):
        start = i * HOP_SIZE
        signal = jax.lax.dynamic_slice(audio, (start,), (FFT_SIZE,))
        signal = signal * hamming
        return jnp.fft.rfft(signal, n = FFT_SIZE)


    sfft = jax.vmap(compute_fft)(jnp.arange(num_frames))
    print(sfft.shape)
    ssft = jnp.abs(sfft)**2 #Spectro
    dB_format =  20 * jnp.log10( ssft / 1e-10) #Spectro

    return dB_format.T

audio, sr = librosa.load("/content/drive/MyDrive/Datasets/DatasetAudios/GTZAN/Data/genres_original/blues/blues.00001.wav")

audio_resp = spectro_audio(audio)

661794
(513, 513)


##Features on time domain

## Features on frequency domain