In [1]:
from google.colab import drive
drive.mount('/content/drive/')

Mounted at /content/drive/


In [2]:
import tensorflow
import os
import cv2
import numpy as np
import librosa
import librosa.display
import matplotlib.pyplot as plt
import shutil
import soundfile as sf
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization, LeakyReLU, GlobalAveragePooling2D
from tensorflow.keras.utils import to_categorical
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.preprocessing.image import load_img, img_to_array, ImageDataGenerator
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score, classification_report
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, LearningRateScheduler
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.applications import ResNet50
from skimage.transform import resize
from keras.applications.resnet50 import preprocess_input

# Preprocess data

In [None]:
dataset_path = '/content/drive/MyDrive/GoogleCollab/Data/GTZAN/genres_original'
split_path = '/content/drive/MyDrive/GoogleCollab/Data/GTZAN/genres_split'

one_second_path = '/content/drive/MyDrive/GoogleCollab/Data/GTZAN/genres_1s'
one_second_spectrogram_path ='/content/drive/MyDrive/GoogleCollab/Data/GTZAN/spectrograms_1s'

three_second_path = '/content/drive/MyDrive/GoogleCollab/Data/GTZAN/genres_3s'
three_second_spectrogram_path ='/content/drive/MyDrive/GoogleCollab/Data/GTZAN/spectrograms_3s'

In [None]:
# Processing thorugh local storage is way faster than GDrive
!unzip '/content/drive/MyDrive/GoogleCollab/Data/GTZAN/genres_3s.zip' -d "/content"

[1;30;43mStrumieniowane dane wyjściowe obcięte do 5000 ostatnich wierszy.[0m
  inflating: /content/genres_3s/jazz/test/jazz.00000_clip2.wav  
  inflating: /content/genres_3s/jazz/test/jazz.00000_clip3.wav  
  inflating: /content/genres_3s/jazz/test/jazz.00000_clip4.wav  
  inflating: /content/genres_3s/jazz/test/jazz.00000_clip5.wav  
  inflating: /content/genres_3s/jazz/test/jazz.00000_clip6.wav  
  inflating: /content/genres_3s/jazz/test/jazz.00000_clip7.wav  
  inflating: /content/genres_3s/jazz/test/jazz.00000_clip8.wav  
  inflating: /content/genres_3s/jazz/test/jazz.00000_clip9.wav  
  inflating: /content/genres_3s/jazz/test/jazz.00001_clip0.wav  
  inflating: /content/genres_3s/jazz/test/jazz.00001_clip1.wav  
  inflating: /content/genres_3s/jazz/test/jazz.00001_clip2.wav  
  inflating: /content/genres_3s/jazz/test/jazz.00001_clip3.wav  
  inflating: /content/genres_3s/jazz/test/jazz.00001_clip4.wav  
  inflating: /content/genres_3s/jazz/test/jazz.00001_clip5.wav  
  inflating

## Split data into test and train subsets
This will allow track segmentation without data interference

In [None]:
if not os.path.exists(split_path):
    os.makedirs(split_path)

genres = os.listdir(dataset_path)

for genre in genres:
  genre_path = os.path.join(dataset_path, genre)

  genre_output_path = os.path.join(split_path, genre)
  os.makedirs(genre_output_path, exist_ok=True)

  tracks = [track for track in os.listdir(genre_path) if track.endswith('.wav')]

  train_tracks, test_tracks = train_test_split(tracks, test_size=0.2, random_state=42)

  train_output_path = os.path.join(genre_output_path, "train")
  os.makedirs(train_output_path, exist_ok=True)
  for track in train_tracks:
      shutil.copy(os.path.join(genre_path, track), os.path.join(train_output_path, track))

  test_output_path = os.path.join(genre_output_path, "test")
  os.makedirs(test_output_path, exist_ok=True)
  for track in test_tracks:
      shutil.copy(os.path.join(genre_path, track), os.path.join(test_output_path, track))


## Track segmentation

In [None]:
def split_audio(input_path, output_path, split_length_seconds):
  if not os.path.exists(output_path):
          os.makedirs(output_path)

  for genre_dir in os.listdir(input_path):
      genre_dir_path = os.path.join(input_path, genre_dir)
      if os.path.isdir(genre_dir_path):
          output_genre_dir_path = os.path.join(output_path, genre_dir)
          if not os.path.exists(output_genre_dir_path):
              os.makedirs(output_genre_dir_path)

          # Loop through subdirectories
          for split_dir in os.listdir(genre_dir_path):
              split_dir_path = os.path.join(genre_dir_path, split_dir)
              if os.path.isdir(split_dir_path) and split_dir in ['train', 'test']:
                  output_split_dir_path = os.path.join(output_genre_dir_path, split_dir)
                  if not os.path.exists(output_split_dir_path):
                      os.makedirs(output_split_dir_path)

              # Loop through audio files
              for file in os.listdir(split_dir_path):
                  if file.endswith('.wav'):
                      audio_path = os.path.join(split_dir_path, file)
                      y, sr = sf.read(audio_path)

                      # Calculate number of samples for x second
                      clip_length = int(sr * split_length_seconds)

                      # Split audio into x-second clips
                      for i in range(len(y) // clip_length):
                          clip = y[i*clip_length : (i+1)*clip_length]
                          output_filename = os.path.splitext(file)[0] + f'_clip{i}.wav'
                          output_file_path = os.path.join(output_split_dir_path, output_filename)
                          sf.write(output_file_path, clip, sr)
                          print(f"Saved {output_file_path}")

In [None]:
split_audio(split_path, three_second_path, 3)

[1;30;43mStrumieniowane dane wyjściowe obcięte do 5000 ostatnich wierszy.[0m
Saved /content/drive/MyDrive/GoogleCollab/Data/GTZAN/genres_3s/rock/test/rock.00015_clip5.wav
Saved /content/drive/MyDrive/GoogleCollab/Data/GTZAN/genres_3s/rock/test/rock.00015_clip6.wav
Saved /content/drive/MyDrive/GoogleCollab/Data/GTZAN/genres_3s/rock/test/rock.00015_clip7.wav
Saved /content/drive/MyDrive/GoogleCollab/Data/GTZAN/genres_3s/rock/test/rock.00015_clip8.wav
Saved /content/drive/MyDrive/GoogleCollab/Data/GTZAN/genres_3s/rock/test/rock.00015_clip9.wav
Saved /content/drive/MyDrive/GoogleCollab/Data/GTZAN/genres_3s/rock/test/rock.00024_clip0.wav
Saved /content/drive/MyDrive/GoogleCollab/Data/GTZAN/genres_3s/rock/test/rock.00024_clip1.wav
Saved /content/drive/MyDrive/GoogleCollab/Data/GTZAN/genres_3s/rock/test/rock.00024_clip2.wav
Saved /content/drive/MyDrive/GoogleCollab/Data/GTZAN/genres_3s/rock/test/rock.00024_clip3.wav
Saved /content/drive/MyDrive/GoogleCollab/Data/GTZAN/genres_3s/rock/test/ro

In [None]:
!zip -r /content/genres_3s.zip /content/drive/MyDrive/GoogleCollab/Data/GTZAN/genres_3s

[1;30;43mStrumieniowane dane wyjściowe obcięte do 5000 ostatnich wierszy.[0m
  adding: content/drive/MyDrive/GoogleCollab/Data/GTZAN/genres_3s/hiphop/train/ (stored 0%)
  adding: content/drive/MyDrive/GoogleCollab/Data/GTZAN/genres_3s/hiphop/train/hiphop.00061_clip0.wav (deflated 5%)
  adding: content/drive/MyDrive/GoogleCollab/Data/GTZAN/genres_3s/hiphop/train/hiphop.00061_clip1.wav (deflated 5%)
  adding: content/drive/MyDrive/GoogleCollab/Data/GTZAN/genres_3s/hiphop/train/hiphop.00061_clip2.wav (deflated 5%)
  adding: content/drive/MyDrive/GoogleCollab/Data/GTZAN/genres_3s/hiphop/train/hiphop.00061_clip3.wav (deflated 4%)
  adding: content/drive/MyDrive/GoogleCollab/Data/GTZAN/genres_3s/hiphop/train/hiphop.00061_clip4.wav (deflated 5%)
  adding: content/drive/MyDrive/GoogleCollab/Data/GTZAN/genres_3s/hiphop/train/hiphop.00061_clip5.wav (deflated 5%)
  adding: content/drive/MyDrive/GoogleCollab/Data/GTZAN/genres_3s/hiphop/train/hiphop.00061_clip6.wav (deflated 5%)
  adding: content

# Generate MEL spectrograms

In [None]:
import cv2

image_dimensions = (465, 308)

def crop_and_resize_white_area(image_path):
    # Read the image using OpenCV
    image = cv2.imread(image_path)

    # Convert the image to grayscale
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

    # Threshold the image to get a binary mask
    _, binary = cv2.threshold(gray, 240, 255, cv2.THRESH_BINARY)

    # Invert the binary image
    inverted_binary = cv2.bitwise_not(binary)

    # Find contours
    contours, _ = cv2.findContours(inverted_binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

    # Find the bounding box of the non-white area
    x, y, w, h = cv2.boundingRect(contours[0])

    # Crop the image using the bounding box
    cropped_image = image[y:y+h, x:x+w]

    return cv2.resize(cropped_image, image_dimensions)

In [None]:
def generate_spectrogram(file_path, save_path):
    HOP_LENGTH = 256
    N_FFT = 256
    N_MELS = 64
    y, sr = librosa.load(file_path)

    mel_spec = librosa.feature.melspectrogram(y=y, sr=sr, n_fft=N_FFT,hop_length=HOP_LENGTH, n_mels=N_MELS)
    spectrogram = librosa.amplitude_to_db(mel_spec, ref=np.max)

    plt.figure(figsize=(6, 3))
    librosa.display.specshow(spectrogram, x_axis='time', y_axis='mel', sr=sr, hop_length=HOP_LENGTH)
    plt.axis('off')
    temp_save_path = "temp_spectrogram.png"
    plt.savefig(temp_save_path)
    plt.close()

    cropped_img = crop_and_resize_white_area(temp_save_path)
    return cv2.imwrite(save_path, cropped_img)

## Create spectrogram subdirectories based on audio files directory structure

In [None]:
def create_directories(audio_path, spectrogram_path):
    os.makedirs(spectrogram_path, exist_ok=True)

    for genre_dir in os.listdir(audio_path):
        genre_dir_path = os.path.join(audio_path, genre_dir)
        if os.path.isdir(genre_dir_path):
            spectrogram_genre_dir_path = os.path.join(spectrogram_path, genre_dir)
            if not os.path.exists(spectrogram_genre_dir_path):
                os.makedirs(spectrogram_genre_dir_path)

            for split_dir in os.listdir(genre_dir_path):
                split_dir_path = os.path.join(genre_dir_path, split_dir)
                if os.path.isdir(split_dir_path) and split_dir in ['train', 'test']:
                    spectrogram_split_dir_path = os.path.join(spectrogram_genre_dir_path, split_dir)
                    if not os.path.exists(spectrogram_split_dir_path):
                        os.makedirs(spectrogram_split_dir_path)

## Save spectrograms as .png

In [None]:
import os
import shutil

def save_spectrogram(audio_path, spectrogram_path):
    for genre in os.listdir(audio_path):
      genre_dir_path = os.path.join(audio_path, genre)
      print(f"Starting generation of genre: {genre}")
      for split_subset in os.listdir(genre_dir_path):
          split_dir_path = os.path.join(genre_dir_path, split_subset)

          for filename in os.listdir(split_dir_path):
              file_path = os.path.join(split_dir_path, filename)

              save_dir_fragment = split_dir_path.split('genres_3s', 1)
              stripped_path = save_dir_fragment[1].lstrip('/')

              save_file_path = os.path.join(spectrogram_path, stripped_path, f"{filename.replace('.wav', '.png')}")
              generate_spectrogram(file_path, save_file_path)
          print(f"Done generating: genre: {genre}, subset: {split_subset}, track count: {len(os.listdir(split_dir_path))}")

In [None]:
create_directories('/content/genres_3s', '/content/spectrograms_3s')

In [None]:
save_spectrogram('/content/genres_3s', '/content/spectrograms_3s')

Starting generation of genre: disco
Done generating: genre: disco, subset: train, track count: 799
Done generating: genre: disco, subset: test, track count: 200
Starting generation of genre: metal
Done generating: genre: metal, subset: train, track count: 800
Done generating: genre: metal, subset: test, track count: 200
Starting generation of genre: rock
Done generating: genre: rock, subset: train, track count: 799
Done generating: genre: rock, subset: test, track count: 200
Starting generation of genre: country
Done generating: genre: country, subset: train, track count: 797
Done generating: genre: country, subset: test, track count: 200


## Zip and download due to Colab speed restrictions

In [None]:
!zip -r /content/spectrograms_3s.zip /content/spectrograms_3s/

[1;30;43mStrumieniowane dane wyjściowe obcięte do 5000 ostatnich wierszy.[0m
updating: content/spectrograms_3s/pop/train/pop.00037_clip8.png (deflated 14%)
updating: content/spectrograms_3s/pop/train/pop.00049_clip0.png (deflated 11%)
updating: content/spectrograms_3s/pop/train/pop.00050_clip4.png (deflated 13%)
updating: content/spectrograms_3s/pop/train/pop.00031_clip0.png (deflated 16%)
updating: content/spectrograms_3s/pop/train/pop.00088_clip3.png (deflated 11%)
updating: content/spectrograms_3s/pop/train/pop.00000_clip6.png (deflated 9%)
updating: content/spectrograms_3s/pop/train/pop.00050_clip2.png (deflated 11%)
updating: content/spectrograms_3s/pop/train/pop.00098_clip2.png (deflated 14%)
updating: content/spectrograms_3s/pop/train/pop.00058_clip0.png (deflated 14%)
updating: content/spectrograms_3s/pop/train/pop.00078_clip1.png (deflated 9%)
updating: content/spectrograms_3s/pop/train/pop.00083_clip0.png (deflated 13%)
updating: content/spectrograms_3s/pop/train/pop.00046_

In [None]:
from google.colab import files
files.download("/content/spectrograms_3s.zip")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

# Reshape directory trees and download

In [3]:
def copy_files_to_new_directories(main_directory, new_test_dir, new_train_dir):
    # Traverse through the genre directories
    for genre_dir in os.listdir(main_directory):
        genre_path = os.path.join(main_directory, genre_dir)

        # Check if the genre directory contains 'test' and 'train' subdirectories
        if os.path.isdir(genre_path):
            test_path = os.path.join(genre_path, 'test')
            train_path = os.path.join(genre_path, 'train')

            new_test_genre_dir = os.path.join(new_test_dir, genre_dir)
            new_train_genre_dir = os.path.join(new_train_dir, genre_dir)

            # Create genre subdirectories in the new test and train directories if they don't exist
            if not os.path.exists(new_test_genre_dir):
                os.makedirs(new_test_genre_dir)
            if not os.path.exists(new_train_genre_dir):
                os.makedirs(new_train_genre_dir)

            if os.path.exists(test_path) and os.path.isdir(test_path):
                # Copy all files from current 'test' directory to new 'test' genre subdirectory
                for filename in os.listdir(test_path):
                    file_path = os.path.join(test_path, filename)
                    if os.path.isfile(file_path):
                        shutil.copy2(file_path, os.path.join(new_test_genre_dir, filename))

            if os.path.exists(train_path) and os.path.isdir(train_path):
                # Copy all files from current 'train' directory to new 'train' genre subdirectory
                for filename in os.listdir(train_path):
                    file_path = os.path.join(train_path, filename)
                    if os.path.isfile(file_path):
                        shutil.copy2(file_path, os.path.join(new_train_genre_dir, filename))

In [4]:
!unzip '/content/drive/MyDrive/GoogleCollab/Data/GTZAN/spectrograms_3s.zip' -d "/content"

[1;30;43mStrumieniowane dane wyjściowe obcięte do 5000 ostatnich wierszy.[0m
  inflating: /content/content/spectrograms_3s/pop/train/pop.00037_clip8.png  
  inflating: /content/content/spectrograms_3s/pop/train/pop.00049_clip0.png  
  inflating: /content/content/spectrograms_3s/pop/train/pop.00050_clip4.png  
  inflating: /content/content/spectrograms_3s/pop/train/pop.00031_clip0.png  
  inflating: /content/content/spectrograms_3s/pop/train/pop.00088_clip3.png  
  inflating: /content/content/spectrograms_3s/pop/train/pop.00000_clip6.png  
  inflating: /content/content/spectrograms_3s/pop/train/pop.00050_clip2.png  
  inflating: /content/content/spectrograms_3s/pop/train/pop.00098_clip2.png  
  inflating: /content/content/spectrograms_3s/pop/train/pop.00058_clip0.png  
  inflating: /content/content/spectrograms_3s/pop/train/pop.00078_clip1.png  
  inflating: /content/content/spectrograms_3s/pop/train/pop.00083_clip0.png  
  inflating: /content/content/spectrograms_3s/pop/train/pop.000

In [6]:
main_directory = '/content/content/spectrograms_3s'
new_test_dir = '/content/test'
new_train_dir = '/content/train'

copy_files_to_new_directories(main_directory, new_test_dir, new_train_dir)

In [8]:
!zip -r /content/test.zip /content/test/
!zip -r /content/train.zip /content/train/

[1;30;43mStrumieniowane dane wyjściowe obcięte do 5000 ostatnich wierszy.[0m
updating: content/train/classical/classical.00013_clip8.png (deflated 15%)
updating: content/train/classical/classical.00009_clip8.png (deflated 10%)
updating: content/train/classical/classical.00082_clip0.png (deflated 8%)
updating: content/train/classical/classical.00096_clip9.png (deflated 11%)
updating: content/train/classical/classical.00029_clip0.png (deflated 14%)
updating: content/train/classical/classical.00061_clip6.png (deflated 8%)
updating: content/train/classical/classical.00059_clip8.png (deflated 9%)
updating: content/train/classical/classical.00002_clip6.png (deflated 14%)
updating: content/train/classical/classical.00022_clip0.png (deflated 10%)
updating: content/train/classical/classical.00098_clip3.png (deflated 9%)
updating: content/train/classical/classical.00082_clip3.png (deflated 11%)
updating: content/train/classical/classical.00007_clip0.png (deflated 13%)
updating: content/train/c

In [9]:
from google.colab import files

files.download("/content/test.zip")
files.download("/content/train.zip")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>