In [None]:
from google.colab import drive
drive.mount('/content/drive/')

Mounted at /content/drive/


In [None]:
import tensorflow
import os
import cv2
import numpy as np
import librosa
import librosa.display
import matplotlib.pyplot as plt
import shutil
import soundfile as sf
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization, LeakyReLU, GlobalAveragePooling2D
from tensorflow.keras.utils import to_categorical
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.preprocessing.image import load_img, img_to_array, ImageDataGenerator
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score, classification_report
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, LearningRateScheduler
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.applications import ResNet50
from skimage.transform import resize
from keras.applications.resnet50 import preprocess_input

In [None]:
dataset_path = '/content/drive/MyDrive/GoogleCollab/Data/GTZAN/genres_original'
split_path = '/content/drive/MyDrive/GoogleCollab/Data/GTZAN/genres_split'
one_second_path = '/content/drive/MyDrive/GoogleCollab/Data/GTZAN/genres_1s'
one_second_spectrogram_path ='/content/drive/MyDrive/GoogleCollab/Data/GTZAN/spectrograms_1s'

In [1]:
# Processing thorugh local storage is way faster than GDrive
!unzip '/content/drive/MyDrive/GoogleCollab/Data/GTZAN/genres_1s.zip' -d "/content"

unzip:  cannot find or open /content/drive/MyDrive/GoogleCollab/Data/GTZAN/genres_1s.zip, /content/drive/MyDrive/GoogleCollab/Data/GTZAN/genres_1s.zip.zip or /content/drive/MyDrive/GoogleCollab/Data/GTZAN/genres_1s.zip.ZIP.


# Preprocess data

## Split data into test and train subsets
This will allow track segmentation without data interference

In [None]:
if not os.path.exists(split_path):
    os.makedirs(split_path)

genres = os.listdir(dataset_path)

for genre in genres:
  genre_path = os.path.join(dataset_path, genre)

  genre_output_path = os.path.join(split_path, genre)
  os.makedirs(genre_output_path, exist_ok=True)

  tracks = [track for track in os.listdir(genre_path) if track.endswith('.wav')]

  train_tracks, test_tracks = train_test_split(tracks, test_size=0.2, random_state=42)

  train_output_path = os.path.join(genre_output_path, "train")
  os.makedirs(train_output_path, exist_ok=True)
  for track in train_tracks:
      shutil.copy(os.path.join(genre_path, track), os.path.join(train_output_path, track))

  test_output_path = os.path.join(genre_output_path, "test")
  os.makedirs(test_output_path, exist_ok=True)
  for track in test_tracks:
      shutil.copy(os.path.join(genre_path, track), os.path.join(test_output_path, track))


## Track segmentation

In [None]:
def split_audio(input_path, output_path):
  if not os.path.exists(output_path):
          os.makedirs(output_path)

  for genre_dir in os.listdir(input_path):
      genre_dir_path = os.path.join(input_path, genre_dir)
      if os.path.isdir(genre_dir_path):
          output_genre_dir_path = os.path.join(output_path, genre_dir)
          if not os.path.exists(output_genre_dir_path):
              os.makedirs(output_genre_dir_path)

          # Loop through subdirectories
          for split_dir in os.listdir(genre_dir_path):
              split_dir_path = os.path.join(genre_dir_path, split_dir)
              if os.path.isdir(split_dir_path) and split_dir in ['train', 'test']:
                  output_split_dir_path = os.path.join(output_genre_dir_path, split_dir)
                  if not os.path.exists(output_split_dir_path):
                      os.makedirs(output_split_dir_path)

              # Loop through audio files
              for file in os.listdir(split_dir_path):
                  if file.endswith('.wav'):
                      audio_path = os.path.join(split_dir_path, file)
                      y, sr = sf.read(audio_path)

                      # Calculate number of samples for 1 second
                      clip_length = sr

                      # Split audio into 1-second clips
                      for i in range(len(y) // clip_length):
                          clip = y[i*clip_length : (i+1)*clip_length]
                          output_filename = os.path.splitext(file)[0] + f'_clip{i}.wav'
                          output_file_path = os.path.join(output_split_dir_path, output_filename)
                          sf.write(output_file_path, clip, sr)
                          print(f"Saved {output_file_path}")

In [None]:
split_audio(split_path, one_second_path)

## Generate MEL spectrograms

In [None]:
def generate_spectrogram(file_path, save_path):
    HOP_LENGTH = 1024
    N_FFT = 4096
    N_MELS = 384
    y, sr = librosa.load(file_path)

    mel_spec = librosa.feature.melspectrogram(y=y, sr=sr, hop_length=HOP_LENGTH, n_mels=N_MELS)
    spectrogram = librosa.amplitude_to_db(mel_spec, ref=np.max)

    plt.figure(figsize=(6, 4))
    librosa.display.specshow(spectrogram, x_axis='time', y_axis='mel', sr=sr, hop_length=HOP_LENGTH)
    plt.axis('off')
    plt.savefig(save_path)

    temp_save_path = "temp_spectrogram.png"
    plt.savefig(temp_save_path, bbox_inches='tight', pad_inches=0)
    plt.close()

    img = cv2.imread(temp_save_path, cv2.IMREAD_UNCHANGED)
    non_white_pixels = np.any(img[:, :, :3] < 255, axis=-1)
    coords = np.argwhere(non_white_pixels)
    x0, y0 = coords.min(axis=0)
    x1, y1 = coords.max(axis=0) + 1

    cropped_img = img[x0:x1, y0:y1]
    cv2.imwrite(save_path, cropped_img)

    del img, cropped_img, y, sr, spectrogram
    os.remove(temp_save_path)

## Create spectrogram subdirectories based on audio files directory structure

In [None]:
def create_directories(audio_path, spectrogram_path):
    os.makedirs(spectrogram_path, exist_ok=True)

    for genre_dir in os.listdir(audio_path):
        genre_dir_path = os.path.join(audio_path, genre_dir)
        if os.path.isdir(genre_dir_path):
            spectrogram_genre_dir_path = os.path.join(spectrogram_path, genre_dir)
            if not os.path.exists(spectrogram_genre_dir_path):
                os.makedirs(spectrogram_genre_dir_path)

            for split_dir in os.listdir(genre_dir_path):
                split_dir_path = os.path.join(genre_dir_path, split_dir)
                if os.path.isdir(split_dir_path) and split_dir in ['train', 'test']:
                    spectrogram_split_dir_path = os.path.join(spectrogram_genre_dir_path, split_dir)
                    if not os.path.exists(spectrogram_split_dir_path):
                        os.makedirs(spectrogram_split_dir_path)

## Save spectrograms as .png

In [None]:
import os
from concurrent.futures import ThreadPoolExecutor

def save_spectrogram(audio_path, spectrogram_path):
    # Use multi-threading to speed-up processing
    def process_genre(genre):
          genre_dir_path = os.path.join(audio_path, genre)
          for split_subset in os.listdir(genre_dir_path):
              split_dir_path = os.path.join(genre_dir_path, split_subset)

              for filename in os.listdir(split_dir_path):
                  file_path = os.path.join(split_dir_path, filename)

                  save_dir_fragment = split_dir_path.split('genres_1s', 1)
                  stripped_path = save_dir_fragment[1].lstrip('/')  # 'genre/subset'

                  save_file_path = os.path.join(spectrogram_path, stripped_path, f"{filename.replace('.wav', '.png')}")
                  generate_spectrogram(file_path, save_file_path)

              print(f"Current genre: {genre}, current subset: {split_subset}, track count: {len(os.listdir(split_dir_path))}")

    with ThreadPoolExecutor() as executor:
        executor.map(process_genre, os.listdir(audio_path))

In [None]:
create_directories('/content/genres_1s', '/content/spectrograms_1s')

In [None]:
save_spectrogram('/content/genres_1s', '/content/spectrograms_1s')

Current genre: rock, current subset: train, track count: 2399
Current genre: rock, current subset: test, track count: 600


## Zip and download due to Colab speed restrictions

In [None]:
!zip -r /content/spectrograms_1s_rock.zip /content/spectrograms_1s/

In [None]:
from google.colab import files
files.download("/content/spectrograms_1s_rock.zip")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>