<a href="https://colab.research.google.com/github/DawidStepniewski/MusicGenreRecognition/blob/development/spectograms.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Create standard, mel and mfcc spectrograms

In [None]:
from google.colab import drive
drive.mount('/content/drive/')

Mounted at /content/drive/


In [None]:
import os
import cv2
import numpy as np
import librosa
import librosa.display

## Create spectrograms and save them as image files

In [None]:
dataset_path = '/content/drive/MyDrive/GoogleCollab/Data/GTZAN/genres_original'
standard_spectrograms_path = '/content/drive/MyDrive/GoogleCollab/Data/GTZAN/spectrograms/standard_spectrograms'
mel_spectrograms_path = '/content/drive/MyDrive/GoogleCollab/Data/GTZAN/spectrograms/mel_spectrograms'
mfcc_spectrograms_path = '/content/drive/MyDrive/GoogleCollab/Data/GTZAN/spectrograms/mfcc_spectrograms'

## Create sub-directories for spectrograms

In [None]:
for path in [standard_spectograms_path, mel_spectograms_path, mfcc_spectograms_path]:
  os.makedirs(path, exist_ok=True)

  for genre in os.listdir(dataset_path):
      genre_path = os.path.join(path, genre)
      os.makedirs(genre_path, exist_ok=True)

## Generate spectrograms

In [None]:
def generate_spectrogram(file_path, save_path, spectrogram_type):
    HOP_LENGTH = 256
    N_FFT = 1024
    N_MELS = 128
    y, sr = librosa.load(file_path)

    if spectrogram_type == 'standard':
      spectrogram = librosa.amplitude_to_db(librosa.stft(y), ref=np.max)
    elif spectrogram_type == 'mel':
      mel_spec = librosa.feature.melspectrogram(y=y, sr=sr, hop_length=HOP_LENGTH, n_mels=N_MELS)
      spectrogram = librosa.amplitude_to_db(mel_spec, ref=np.max)
    elif spectrogram_type == 'mfcc':
        spectrogram = librosa.feature.mfcc(y=y, sr=sr)

    plt.figure(figsize=(6, 4))
    librosa.display.specshow(spectrogram, x_axis='time', y_axis='mel', sr=sr, hop_length=HOP_LENGTH)
    plt.axis('off')
    plt.savefig(save_path)

    temp_save_path = "temp_spectrogram.png"
    plt.savefig(temp_save_path, bbox_inches='tight', pad_inches=0)
    plt.close()

    img = cv2.imread(temp_save_path, cv2.IMREAD_UNCHANGED)
    non_white_pixels = np.any(img[:, :, :3] < 255, axis=-1)
    coords = np.argwhere(non_white_pixels)
    x0, y0 = coords.min(axis=0)
    x1, y1 = coords.max(axis=0) + 1

    cropped_img = img[x0:x1, y0:y1]
    cv2.imwrite(save_path, cropped_img)

    del img, cropped_img, y, sr, spectrogram
    os.remove(temp_save_path)

## Save spectrograms as .png

In [None]:
def save_spectrogram(path):
  spectrogram_type = os.path.basename(os.path.normpath(path)).split('_')[0]
  print(f"Current spectogram type: {spectrogram_type}")
  for genre_label, genre in enumerate(os.listdir(path)):
    print(f"Current genre: {genre}")
    genre_path = os.path.join(dataset_path, genre)
    for filename in os.listdir(genre_path):
      if filename != 'jazz.00054.wav':
        file_path = os.path.join(genre_path, filename)
        save_file_path = os.path.join(path, genre, f"{filename.replace('.wav', '.png')}")
        generate_spectrogram(file_path, save_file_path, spectrogram_type)
        del file_path, save_file_path

In [None]:
for path in [standard_spectograms_path, mel_spectograms_path, mfcc_spectograms_path]:
  save_spectrogram(path)