<a href="https://colab.research.google.com/github/BrMrtn/GoogleColab/blob/main/Saving_spectrograms_to_disk_not_final_v0.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import librosa
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm
import os

from google.colab import drive
drive.mount('/content/drive')

path = '/content/drive/MyDrive/DeepLearning'

Mounted at /content/drive


We could randomly choose 5-second-long parts in every iteration (epoch), but that'd most likely skip most of the important data from the recordings.

Function for converting the input audio to a Fourier-transformed spectrograms, than padding/clipping it's first dimension to 2048

In [None]:
# Turns audio to spectrograms and saves them to the given folder
def save_spectrogram_to_disk(path_to_audio, path_to_spectrogram_ogg, sample_duration_sec=5.0, hop_length_sec=2.5):
  audio_data, audio_rate = librosa.load(path_to_audio)

  sample_duration_frames = int(sample_duration_sec * audio_rate)
  hop_length_frames = int(hop_length_sec * audio_rate)

  n_fft = 2048
  hop_length = 512
  n_mels = 250
  # win_length is automatically equal to n_fft

  # Pad audio with zeros if smaller than sample_duration_sec
  if len(audio_data) < sample_duration_frames:
    audio_data = librosa.util.fix_length(audio_data, size=sample_duration_frames)

  # Divide the audio into sample_duration_sec seconds long samples
  for i, sample_start in tqdm(enumerate(range(0, len(audio_data), hop_length_frames))):
    sample_end = min(sample_start+sample_duration_frames, len(audio_data)-1)
    sample = audio_data[sample_start:sample_end]

    # If the sample is the remainder of dividing the audio, thus smaller than the sample duration sould be, we discard the sample
    # The end of the audio containing crucial data is highly unlikely
    if len(sample) < sample_duration_frames:
      continue

    # Generate mel spectrogram
    S = librosa.feature.melspectrogram(
        y=sample,
        sr=audio_rate,
        n_fft=n_fft,
        hop_length=hop_length,
        n_mels=n_mels
    )

    # Convert to decibel scale
    S_db = librosa.power_to_db(S, ref=np.max)

    # Convert spectrogram into an image
    # img = librosa.display.specshow(S_db, sr=audio_rate, x_axis='time', y_axis='mel')

    spectrogram_file_path = path_to_spectrogram_ogg.split('.')[0]+f'_{i}.png'

    plt.figure(figsize=(4, 4))
    plt.axis('off')
    plt.imshow(S_db, aspect='auto', origin='lower', cmap='viridis')
    plt.savefig(spectrogram_file_path, bbox_inches='tight', pad_inches=0)
    plt.close()

Save 5 second long spectrograms to the disk (Google Drive) as pictures

In [None]:
folders = os.listdir(path+'/sample_train_audio')

for j, folder in enumerate(folders):
  if (j >= 5): break # for now, we only create spectrograms for audio in the first 5 folders

  print("current folder:", folder)
  folder_path = path+'/sample_train_audio/'+folder+'/'

  spectrogram_folder_path = path+'/sample_train_spectrograms/'+folder+'/'
  if not os.path.exists(spectrogram_folder_path):
    os.makedirs(spectrogram_folder_path)

  files = os.listdir(folder_path)

  for i, file in enumerate(files):
    if (i >= 20): break # for now, we only look at the first 20 elements of a folder (maximum)
    save_spectrogram_to_disk(folder_path+file, spectrogram_folder_path+file)

current folder: blrwar1


57it [00:09,  5.89it/s]
51it [00:07,  6.52it/s]
64it [00:11,  5.60it/s]
58it [00:10,  5.47it/s]
19it [00:02,  7.50it/s]
57it [00:10,  5.33it/s]
56it [00:10,  5.41it/s]
47it [00:06,  6.84it/s]
14it [00:01,  7.66it/s]
20it [00:03,  5.61it/s]
10it [00:01,  5.38it/s]
16it [00:02,  6.50it/s]
57it [00:08,  6.92it/s]
104it [00:18,  5.74it/s]
84it [00:14,  5.97it/s]
38it [00:05,  6.95it/s]
213it [00:38,  5.59it/s]
11it [00:01,  8.26it/s]
71it [00:11,  5.93it/s]
40it [00:05,  7.17it/s]


current folder: categr


5it [00:00, 10.84it/s]
2it [00:00, 3327.49it/s]
25it [00:07,  3.37it/s]
30it [00:04,  7.04it/s]
7it [00:00, 10.32it/s]
17it [00:02,  6.64it/s]
41it [00:07,  5.72it/s]
30it [00:03,  7.60it/s]
4it [00:00, 13.20it/s]
5it [00:00, 11.14it/s]
9it [00:01,  8.82it/s]
22it [00:04,  4.93it/s]
9it [00:01,  7.19it/s]
8it [00:00,  8.97it/s]
10it [00:01,  8.30it/s]
12it [00:01,  7.93it/s]
17it [00:02,  7.88it/s]
48it [00:08,  5.50it/s]
5it [00:00, 12.24it/s]
11it [00:01,  8.44it/s]


current folder: forwag1


17it [00:02,  7.59it/s]
25it [00:05,  4.85it/s]
5it [00:00,  6.59it/s]
11it [00:01,  8.18it/s]
63it [00:10,  5.88it/s]
6it [00:00, 10.12it/s]
12it [00:01,  8.14it/s]
16it [00:02,  7.50it/s]
29it [00:05,  5.37it/s]
16it [00:02,  7.62it/s]
23it [00:03,  7.56it/s]
9it [00:01,  8.81it/s]
6it [00:00,  6.77it/s]
63it [00:09,  6.79it/s]
4it [00:00,  7.89it/s]
12it [00:01,  7.95it/s]
4it [00:00, 12.17it/s]
2it [00:00, 3587.94it/s]
10it [00:01,  8.70it/s]
5it [00:00, 11.08it/s]


current folder: grenig1


29it [00:05,  5.43it/s]
9it [00:01,  8.35it/s]
11it [00:01,  7.87it/s]
20it [00:02,  7.64it/s]
9it [00:00,  9.08it/s]
5it [00:00,  7.69it/s]
15it [00:02,  6.60it/s]
21it [00:04,  5.19it/s]
4it [00:00, 13.93it/s]
10it [00:01,  8.29it/s]
64it [00:10,  6.04it/s]
19it [00:03,  6.23it/s]
11it [00:01,  6.12it/s]
8it [00:00,  8.80it/s]
15it [00:01,  7.72it/s]
20it [00:02,  7.32it/s]
16it [00:03,  4.73it/s]
10it [00:01,  8.34it/s]
10it [00:01,  8.18it/s]
8it [00:00,  8.42it/s]


current folder: jerbus2


27it [00:04,  6.39it/s]
50it [00:07,  6.69it/s]
10it [00:01,  8.89it/s]
14it [00:02,  5.02it/s]
8it [00:00,  8.87it/s]
8it [00:00,  8.87it/s]
8it [00:00,  8.87it/s]
7it [00:00,  9.27it/s]
11it [00:01,  7.85it/s]
36it [00:06,  5.37it/s]
17it [00:02,  7.81it/s]
6it [00:00, 10.40it/s]
11it [00:01,  8.05it/s]
8it [00:01,  6.06it/s]
2it [00:00, 3865.72it/s]
5it [00:00, 11.42it/s]
20it [00:02,  7.34it/s]
14it [00:01,  8.08it/s]
20it [00:03,  5.92it/s]
