# Transformación de audio a espectrograma

Este notebook tiene como objetivo transformar los archivos de audio del dataset GTZAN Genre Collection en espectrogramas Mel de 3 segundos.

## Librerias necesarias

In [1]:
import librosa
import librosa.display
import matplotlib.pyplot as plt
import numpy as np
import os
from glob import glob
from PIL import Image
import random

## Parametros

In [None]:
# Constantes
SAMPLE_RATE = 22050
SEGMENT_DURATION = 3
NUMBER_OF_SEGMENTS = 10

### Configuración de carpetas

In [None]:
AUDIO_INPUT_FOLDER = "Data/genresWav"
SPECTROGRAM_OUTPUT_FOLDER = "Data/Spectrograms"

Funcion para crear espectrogramas, parametros:

- Sample rate: 22050
- n_mels: 128
- Duration: 3


In [None]:
def generate_mel_spectrogram(audio_segment, sr):
    mel = librosa.feature.melspectrogram(y=audio_segment, sr=sr, n_mels=128)
    return librosa.power_to_db(mel, ref=np.max)

In [None]:
def process_audio_files():
    try:
        # Se obtiene la lista de archivos de audio
        segment_length = int(SEGMENT_DURATION * SAMPLE_RATE)
        total_images = 0

        genres = [d for d in os.listdir(AUDIO_INPUT_FOLDER)
                  if os.path.isdir(os.path.join(AUDIO_INPUT_FOLDER, d))]

        for genre in genres:

            # Se obtiene la lista de archivos de audio por género
            genre_path = os.path.join(AUDIO_INPUT_FOLDER, genre)
            audio_files = glob(os.path.join(genre_path, '*.wav'))

            if not audio_files:
                continue

            print(f"\nGenre '{genre}': {len(audio_files)} files found")

            # Se mezclan los archivos de audio y se dividen en conjuntos de entrenamiento, validación y prueba
            random.shuffle(audio_files)
            train_split = int(0.6 * len(audio_files))
            val_split = int(0.8 * len(audio_files))

            sets = {
                'train': audio_files[:train_split],
                'val': audio_files[train_split:val_split],
                'test': audio_files[val_split:]
            }

            for split, files in sets.items():
                print(f"  {split}: {len(files)} files")

                for path in files:
                    try:
                        # Se carga el archivo de audio y se obtiene la carpeta de salida
                        audio, sr = librosa.load(path, sr=SAMPLE_RATE)
                        file_stem = os.path.splitext(os.path.basename(path))[0]
                        output_dir = os.path.join(SPECTROGRAM_OUTPUT_FOLDER, split, genre)
                        os.makedirs(output_dir, exist_ok=True)

                        # Se crea un espectrograma para cada segmento de audio
                        for i in range(NUMBER_OF_SEGMENTS):
                            start = i * segment_length
                            end = start + segment_length
                            segment = audio[start:end]

                            if len(segment) != segment_length:
                                continue

                            spec = generate_mel_spectrogram(segment, sr)


                            # Se guarda el espectrograma como imagen
                            img_path = os.path.join(output_dir, f"{file_stem}_seg_{i+1}.png")
                            plt.imsave(img_path, np.flipud(spec), cmap='viridis', origin='lower')
                            total_images += 1

                    except Exception as e:
                        print(f"Error in {path}: {e}")

        print(f"\nDone! Total spectrograms saved: {total_images}")

    except Exception as e:
        print(f"Error: {e}")

process_audio_files()


Genre 'pop': 100 files found
  TRAIN: 60 files
  VAL: 20 files
  TEST: 20 files

Genre 'rock': 100 files found
  TRAIN: 60 files
  VAL: 20 files
  TEST: 20 files

Genre 'disco': 100 files found
  TRAIN: 60 files
  VAL: 20 files
  TEST: 20 files

Genre 'blues': 100 files found
  TRAIN: 60 files
  VAL: 20 files
  TEST: 20 files

Genre 'country': 100 files found
  TRAIN: 60 files
  VAL: 20 files
  TEST: 20 files

Genre 'hiphop': 100 files found
  TRAIN: 60 files
  VAL: 20 files
  TEST: 20 files

Genre 'metal': 100 files found
  TRAIN: 60 files
  VAL: 20 files
  TEST: 20 files

Genre 'reggae': 100 files found
  TRAIN: 60 files
  VAL: 20 files
  TEST: 20 files

Genre 'jazz': 100 files found
  TRAIN: 60 files
  VAL: 20 files
  TEST: 20 files

Genre 'classical': 100 files found
  TRAIN: 60 files
  VAL: 20 files
  TEST: 20 files

✅ Done! Total spectrograms saved: 9991
