<a href="https://colab.research.google.com/github/Ayrsz/SignalAndSistemyProject/blob/main/FeatureExtract.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
#Scientific computation
import numpy as np
import jax
import jax.numpy as jnp

#Plot and view
from matplotlib import pyplot as plt
import IPython as ipy
from IPython import display
from IPython.display import Audio

#Data manipulation
import os
import gc

#Audio manipulation
import librosa
import soundfile as sf
import cv2 as cv


In [3]:
from zipfile import ZipFile

try:
    from google.colab import drive

    drive.mount("/content/drive", force_remount=True)
except Exception as e:
    print(e)
    print("Rodando localmente")

No module named 'google.colab'
Rodando localmente


In [4]:
# Unzip in a specific folder
path_root = "/home/mas11/Documents/Datasets"
path_zip = path_root + "/GTZAN.zip"
with ZipFile(path_zip, "r") as zip_archive:
    if not os.path.exists(path_root + "/Data"):
        print(f"Unzipando em : {path_root}")
        zip_archive.extractall(path_root)
        print("Unzipado!")
    else:
        print("Arquivo já unzipado, sem necessidade de ações.")

Arquivo já unzipado, sem necessidade de ações.


In [5]:
def dir_genres(path_all_genres : str) -> list[str]:
    return [path_all_genres + '/' + pasta for pasta in os.listdir(path_all_genres)]

#Return all files.wav separated per genres
def dir_files_wav(path_genre : str) -> list[str]:
  return [path_genre + '/' + arquivo for arquivo in os.listdir(path_genre)]


## Generate Escpectogram

In [6]:
def is_dir_empty(path : str) -> bool:
    with os.scandir(path) as iterator:
        for entry in iterator:
            if entry.name == '.ipynb_checkpoints':
                continue
            return False  # Se encontrar qualquer outro arquivo/pasta, não está vazio
        return True  # Se só havia .ipynb_checkpoints (ou nada), está vazio
def spectro_feat(audio : jnp.ndarray, sample_rate : int) -> jnp.ndarray:

    audio = jax.device_put(audio)
    FFT_SIZE = 512
    HOP_SIZE = 256

    hamming = jnp.hamming(FFT_SIZE)
    num_frames = (len(audio) - FFT_SIZE) // HOP_SIZE + 1

    def compute_fft(i):
        start = i * HOP_SIZE
        signal = jax.lax.dynamic_slice(audio, (start,), (FFT_SIZE,))
        signal = signal * hamming
        return jnp.fft.rfft(signal, n = FFT_SIZE)


    sfft = jax.vmap(compute_fft)(jnp.arange(num_frames))

    ssft = jnp.abs(sfft)**2 #Spectro
    dB_format =  20 * jnp.log10( (ssft+1e-10) / 1e-10) #Spectro
    return dB_format.T

def spectro_feat_batch(batch : jnp.ndarray, sample_rate : int) -> jnp.ndarray:
    return jax.vmap(lambda audio: spectro_feat(audio, sample_rate))(batch)

def plt_spectogram(batch : jnp.ndarray, sample_rate : int, y_axis_type= "linear"):
  escala_Y = spectro_feat_batch(batch, sample_rate)
  escala_Y = np.array(escala_Y)
  #fig = plt.figure(figsize=(10,10))
  #plt.subplot(3,3,1)
  for (i,audio) in enumerate(escala_Y):
    plt.subplot(3,3,i+1)
    librosa.display.specshow(audio, sr = sample_rate, x_axis = "time", y_axis = y_axis_type, )
    plt.set_cmap("magma")
    plt.colorbar()


def create_batch(files : list[str], size : int, genre : str, shape = 661794):
    batch_size = min(size, len(files))
    batch = jnp.zeros((batch_size, shape))  # Inicializa batch zerado

    nums = []
    new_files = []  # Lista para armazenar arquivos que foram processados corretamente

    for i in range(batch_size):
        try:

            audio, sample_rate = sf.read(files[i])  # Mantém o SR original

            if len(audio) < shape:
                # Preenche com zeros se o áudio for menor que o tamanho esperado
                audio = jnp.pad(audio, (0, shape - len(audio)))
            elif len(audio) > shape:
                # Se for maior, corta
                audio = audio[:shape]

            batch = batch.at[i].set(audio)

            num = files[i].split("/")[-1].split(".")[-2]

            nums.append(num)
            new_files.append(files[i])  # Adiciona à lista de arquivos processados

        except Exception as e:
            print(f"Erro ao processar {files[i]}: {e}")

    # Remove os arquivos processados da lista original
    for file in new_files:
        files.remove(file)
    batch = jax.device_put(batch)
    return batch, nums, sample_rate, files


def write_spectrogram(audios : jnp.ndarray, sample_rate: int, paths_write : list[str]):
  escala_Y = spectro_feat_batch(audios, sample_rate)

  del audios
  gc.collect()

  escala_Y = np.array(escala_Y)

  assert len(escala_Y) == len(paths_write)

  for (spec, path_write) in zip(escala_Y, paths_write):
    spec = 255*(spec - np.max(spec))/(np.max(spec) - np.min(spec))
    spec = spec.astype(np.uint8)
    spec = 255 - spec
    img = cv.applyColorMap(spec, cv.COLORMAP_INFERNO)

    img = cv.resize(img, dsize = (500, 500), interpolation=cv.INTER_AREA)
    cv.imwrite(path_write, img)


def write_spectrogram_from_genre(genre_path : str):
    files = dir_files_wav(genre_path)
    total_files_start = len(files)
    paths = np.array(genre_path.split("/"))
    genre = paths[-1]
    size_path = len(paths)
    genre_images_path = '/'.join(paths[0:size_path-2]) + "/images/" + genre
    batch_size = 3

    print(f"Writing on: {genre_images_path}")

    if is_dir_empty(genre_images_path):
        while(len(files) != 0):
            try:
                audios, nums, sample_rate, files = create_batch(files, batch_size, genre)

                paths_write = [genre_images_path + "/" + genre + "." + num + ".png" for num in nums]
                write_spectrogram(audios, sample_rate, paths_write)#, path_write)

                if(len(files) % 100 == 0):
                    print(f"Carregando imagens, {genre} : {((total_files_start - len(files))/total_files_start)*100:.2f}%")
                
            except Exception as e:
                print(f"Erro {e}")
                print(f"Erro em {files[0]}")
                

    else:
        print(f"Diretorio com imagens, porfavor esvazie: {genre}")

#Writting new images spectogram
def write_all_spectrograms(path_genres : str):
  genres_path = dir_genres(path_genres)

  for genre_path in genres_path:
    write_spectrogram_from_genre(genre_path)
    


write_all_spectrograms(path_root + "/Data/genres_original")

Writing on: /home/mas11/Documents/Datasets/Data/images/classical
Carregando imagens, classical : 50.00%
Carregando imagens, classical : 100.00%
Writing on: /home/mas11/Documents/Datasets/Data/images/country
Carregando imagens, country : 50.00%
Carregando imagens, country : 100.00%
Writing on: /home/mas11/Documents/Datasets/Data/images/metal
Carregando imagens, metal : 50.00%
Carregando imagens, metal : 100.00%
Writing on: /home/mas11/Documents/Datasets/Data/images/jazz
Carregando imagens, jazz : 49.49%
Carregando imagens, jazz : 100.00%
Writing on: /home/mas11/Documents/Datasets/Data/images/hiphop
Carregando imagens, hiphop : 50.00%
Carregando imagens, hiphop : 100.00%
Writing on: /home/mas11/Documents/Datasets/Data/images/blues
Carregando imagens, blues : 50.00%
Carregando imagens, blues : 100.00%
Writing on: /home/mas11/Documents/Datasets/Data/images/rock
Carregando imagens, rock : 50.00%
Carregando imagens, rock : 100.00%
Writing on: /home/mas11/Documents/Datasets/Data/images/pop
Ca

# Features on time domain

In [9]:
print(jax.devices())

[CudaDevice(id=0)]


In [10]:
audio_example, sr = librosa.load(path_root + "/Data/genres_original/metal/metal.00041.wav")

In [11]:
def round_float_decimal(number):
    if not isinstance(number, np.ndarray):
        return float(f"{number:.4f}")
    else:
        return np.array([float(f"{d:.4f}") for d in number])

def get_statistics_feature(audio : np.typing.NDArray, name) -> dict:
    mean = round_float_decimal(np.mean(audio))
    std = round_float_decimal(np.std(audio))
    max = round_float_decimal(np.max(audio))
    min = round_float_decimal(np.min(audio))
    quartis = round_float_decimal(np.quantile(audio, [0.25, 0.5, 0.75]))
    return {name + "_mean": mean, name + "_std":std,  name + "_max": max, name + "_min": min, name + "_25p":quartis[0], name + "_50p": quartis[1],  name+ "_75p": quartis[2]}

## Low level features

### Energy

In [12]:
def get_rms(audio, FRAME_SIZE = 1024):
    rms = librosa.feature.rms(y = audio, frame_length= FRAME_SIZE)
    feats = get_statistics_feature(rms, "rms")
    return feats
rms = get_rms(audio_example)
print(rms)


{'rms_mean': 0.0968, 'rms_std': 0.0227, 'rms_max': 0.1687, 'rms_min': 0.0383, 'rms_25p': np.float64(0.0797), 'rms_50p': np.float64(0.0946), 'rms_75p': np.float64(0.1082)}


### Zero-Cross-Rate

In [13]:
def get_zero_cross_rate(audio, FRAME_SIZE = 1024):
    zero_cross = librosa.feature.zero_crossing_rate(audio, frame_length= FRAME_SIZE)
    feats = get_statistics_feature(zero_cross, "zero_cross")
    return feats


rate = get_zero_cross_rate(audio_example)


### Amplitude Envelope

In [14]:
def get_amplitude_envelope(audio, sample_rate, frame_size = 1024):
    AEm = []
    number_of_frames = len(audio) // frame_size
    for i in range(number_of_frames):
        start = i * frame_size
        stop = start + frame_size
        max_amp = np.max(audio[start:stop])
        AEm.append(max_amp)
    AEm = np.array(AEm)

    feats = get_statistics_feature(AEm, "amplitude_envelope")
    return feats




audio, sr = librosa.load("/content/drive/MyDrive/Datasets/DatasetAudios/GTZAN/Data/genres_original/blues/blues.00001.wav")
rms_info = get_amplitude_envelope(audio_example, sr)
print(rms_info)

  audio, sr = librosa.load("/content/drive/MyDrive/Datasets/DatasetAudios/GTZAN/Data/genres_original/blues/blues.00001.wav")
	Deprecated as of librosa version 0.10.0.
	It will be removed in librosa version 1.0.
  y, sr_native = __audioread_load(path, offset, duration, dtype)


FileNotFoundError: [Errno 2] No such file or directory: '/content/drive/MyDrive/Datasets/DatasetAudios/GTZAN/Data/genres_original/blues/blues.00001.wav'

## Medium level Features

### Kurtosis

In [18]:
#Analise do formato da curva da distribuição dos valores do sinal
# Kurtosis -> O quão intenso é o pico em comparação ao resto
# K = E([SINAL - MEDIA]^4)/DESVIO_PADRÃO ^4
# K < 3 -> Mais suave que uma distribuição gaussiana
# K = 3 -> Parecido com uma distribuição gaussiana
# K > 3 -> Mais "pontudo" que a distribuicação gaussiana
def get_kurtosis(audio):
    audio_normalizado = (audio - np.mean(audio))/np.max(audio)

    mean = np.mean(audio_normalizado)
    std = np.std(audio_normalizado)
    dif_audio_media = audio - mean

    valor_esperado = np.mean(dif_audio_media**4)

    kurtosis = np.float64(valor_esperado/std**4)
    return round_float_decimal(kurtosis)

print(get_kurtosis(audio_example))

0.8641


## High level features

### Tempo

In [19]:
def get_tempo(audio, sample_rate):
    tempo, beats = librosa.beat.beat_track(y = audio, sr = sample_rate)
    tempo = tempo #É um vetor de apenas um unico valor
    return np.int32(tempo)


print(get_tempo(audio_example, sr))

[117]


# Features on frequency domain

### Low level feature

In [21]:
#Gera um "centro de massa"
#Média das frequencias, ponderadas pela amplitude daquela sample para cada frame (usa sft)
def get_spectral_centroid(audio, sample_rate):
    spectral_centroid = librosa.feature.spectral_centroid(y = audio, sr = sample_rate)
    feats = get_statistics_feature(spectral_centroid, "spectral_centroid")
    return feats

centroid = get_spectral_centroid(audio_example, sr)
print(centroid)

{'spectral_centroid_mean': 2269.1619, 'spectral_centroid_std': 368.4678, 'spectral_centroid_max': 3496.8095, 'spectral_centroid_min': 1306.5515, 'spectral_centroid_25p': np.float64(2005.6192), 'spectral_centroid_50p': np.float64(2233.7791), 'spectral_centroid_75p': np.float64(2532.513)}


### High level feature

In [22]:
def get_mel_coef(audio, sample_rate):
    mfcc = librosa.feature.mfcc(y = audio, sr = sample_rate, n_mfcc = 13)
    coefs_estatistics = {}
    for (i,coefs) in enumerate(mfcc):
        coefs_estatistics["mfcc#" + f"{i:02d}_mean"] = round_float_decimal(np.mean(coefs))
        coefs_estatistics["mfcc#" + f"{i:02d}_std"] = round_float_decimal(np.std(coefs))


    return coefs_estatistics

mfcc = get_mel_coef(audio_example, sr)
print(mfcc)

{'mfcc#00_mean': -100.4234, 'mfcc#00_std': 36.4089, 'mfcc#01_mean': 104.6932, 'mfcc#01_std': 19.8516, 'mfcc#02_mean': -57.254, 'mfcc#02_std': 18.1442, 'mfcc#03_mean': 56.5769, 'mfcc#03_std': 10.628, 'mfcc#04_mean': -5.5556, 'mfcc#04_std': 9.6729, 'mfcc#05_mean': 22.8333, 'mfcc#05_std': 10.0632, 'mfcc#06_mean': -6.1715, 'mfcc#06_std': 10.5025, 'mfcc#07_mean': 23.0971, 'mfcc#07_std': 8.8752, 'mfcc#08_mean': -19.072, 'mfcc#08_std': 7.0803, 'mfcc#09_mean': 17.7954, 'mfcc#09_std': 7.2162, 'mfcc#10_mean': -13.783, 'mfcc#10_std': 6.9338, 'mfcc#11_mean': 9.9628, 'mfcc#11_std': 7.0728, 'mfcc#12_mean': -14.8587, 'mfcc#12_std': 6.9912}


# Create CSV

In [None]:
import csv



def get_features_dict(audio, sample_rate, name, dir_image):
    features_dict = {"name": name + ".wav", "dir_image": dir_image}

    rms = (get_rms(audio, sample_rate))
    zero_cross = (get_zero_cross_rate(audio))
    amplitude_env = (get_amplitude_envelope(audio, sample_rate))
    spectral_centroid = (get_spectral_centroid(audio, sample_rate))
    mfcc_coef = (get_mel_coef(audio, sample_rate))

    features_dict = features_dict | rms | zero_cross | amplitude_env | spectral_centroid | mfcc_coef

    kurtosis = get_kurtosis(audio)
    tempo = get_tempo(audio, sample_rate)
    label = name.split(".")[0]

    features_dict.update({"tempo":tempo, "kurtosis":kurtosis, "label":label})

    return features_dict



def write_csv_genre(genre_path, path_write):
    files = dir_files_wav(genre_path)
    features_vector = []

   

    for (i, file) in enumerate(files):
        path = file.split("/")
        name = path[-1].replace(".wav", "")
        genre = name.split(".")[0]
        idx_data = [i for i in range(len(path)) if path[i] == "Data" ][0]
        dir_image = "/".join(path[:idx_data]) + "/Data/images/" + genre + "/" + name + ".png"
        audio, sample_rate = librosa.load(file)
        features_dict = get_features_dict(audio, sample_rate, name, dir_image)
        features_vector.append(features_dict)
            
    return features_vector


    


def write_csv(root_dir, path_write):
    genres_dir = dir_genres(root_dir)
    columns = ['name', 'dir_image', 'rms_mean', 'rms_std', 'rms_max', 'rms_min', 'rms_25p', 'rms_50p', 'rms_75p', 
               'zero_cross_mean', 'zero_cross_std', 'zero_cross_max', 'zero_cross_min', 'zero_cross_25p', 'zero_cross_50p', 'zero_cross_75p', 
               'amplitude_envelope_mean', 'amplitude_envelope_std', 'amplitude_envelope_max', 'amplitude_envelope_min', 'amplitude_envelope_25p',
               'amplitude_envelope_50p', 'amplitude_envelope_75p', 'spectral_centroid_mean', 'spectral_centroid_std', 'spectral_centroid_max', 
               'spectral_centroid_min', 'spectral_centroid_25p', 'spectral_centroid_50p', 'spectral_centroid_75p', 'mfcc#00_mean', 'mfcc#00_std', 
               'mfcc#01_mean', 'mfcc#01_std', 'mfcc#02_mean', 'mfcc#02_std', 'mfcc#03_mean', 'mfcc#03_std', 'mfcc#04_mean', 'mfcc#04_std', 'mfcc#05_mean', 
               'mfcc#05_std', 'mfcc#06_mean', 'mfcc#06_std', 'mfcc#07_mean', 'mfcc#07_std', 'mfcc#08_mean', 'mfcc#08_std', 'mfcc#09_mean', 'mfcc#09_std', 
               'mfcc#10_mean', 'mfcc#10_std', 'mfcc#11_mean', 'mfcc#11_std', 'mfcc#12_mean', 'mfcc#12_std', 'tempo', 'kurtosis', 'label']
    
    with open(path_write + "/feats.csv", "w") as f:
        writer = csv.DictWriter(f, columns)
        writer.writeheader()
        for genre in genres_dir:
            print("Acessing: ", genre)
            lines = write_csv_genre(genre, path_write)
            writer.writerows(lines)

        f.close()




write_csv(path_root + "/Data/genres_original",  path_root + "/Data")

Acessing:  /home/mas11/Documents/Datasets/Data/genres_original/classical
Acessing:  /home/mas11/Documents/Datasets/Data/genres_original/country
Acessing:  /home/mas11/Documents/Datasets/Data/genres_original/metal
Acessing:  /home/mas11/Documents/Datasets/Data/genres_original/jazz
Acessing:  /home/mas11/Documents/Datasets/Data/genres_original/hiphop
Acessing:  /home/mas11/Documents/Datasets/Data/genres_original/blues
Acessing:  /home/mas11/Documents/Datasets/Data/genres_original/rock
Acessing:  /home/mas11/Documents/Datasets/Data/genres_original/pop
Acessing:  /home/mas11/Documents/Datasets/Data/genres_original/reggae
Acessing:  /home/mas11/Documents/Datasets/Data/genres_original/disco
