<a href="https://colab.research.google.com/github/BrMrtn/GoogleColab/blob/main/Saving_spectrograms_to_disk_final_faster.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [13]:
import librosa
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm
import os
from multiprocessing.pool import ThreadPool

from google.colab import drive
drive.mount('/content/drive')

path = '/content/drive/MyDrive/DeepLearning'

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [14]:
# Turns audio to spectrograms and saves them to the given folder
def save_spectrogram_to_disk(path_to_audio, path_to_spectrogram_png, sample_duration_sec=5.0):
  audio_data, _ = librosa.load(path_to_audio)
  audio_rate = 32000 # given by the competition rules

  sample_duration_frames = int(sample_duration_sec * audio_rate) # length of the samples in frames

  n_fft = 2048 # [2024 2nd place]
  hop_length = 512 # [2024 2nd place]
  n_mels = 128 # [2024 2nd place]
  # win_length is automatically equal to n_fft

  # Pad audio with zeros if it's smaller than sample_duration_sec - original data is in the center
  if len(audio_data) < sample_duration_frames:
    padding_length = sample_duration_frames - len(audio_data)
    pad_start = padding_length // 2
    pad_end = padding_length - pad_start
    audio_data = np.pad(audio_data, (pad_start, pad_end), mode='constant', constant_values=0)

  # Cut first 5 seconds from audio_data
  sample = audio_data[:sample_duration_frames]

  # Generate mel spectrogram
  S = librosa.feature.melspectrogram(
      y=sample,
      sr=audio_rate,
      n_fft=n_fft,
      hop_length=hop_length,
      n_mels=n_mels,
      fmin=40, # [2024 1st place]
      fmax=16000 # Bird calls are usually high pitched, we don't want to remove those frequencies
  )

  # Convert to decibel scale
  S_db = librosa.power_to_db(S, ref=np.max)

  img_data = (255 * (S_db - S_db.min()) / (S_db.max() - S_db.min())).astype(np.uint8)

  img = Image.fromarray(img_data)
  img = img.transpose(Image.FLIP_TOP_BOTTOM)
  img.save(path_to_spectrogram_png)

In [15]:
def process_file(args):
    folder_path, file, spectrogram_folder_path = args
    save_spectrogram_to_disk(folder_path + file, spectrogram_folder_path + file.replace(".ogg", ".png"))

def process_folder(folder):
    folder_path = path + '/sample_train_audio/' + folder + '/'
    spectrogram_folder_path = path + '/ugyanaz?/' + folder + '/'
    os.makedirs(spectrogram_folder_path, exist_ok=True)
    files = os.listdir(folder_path)
    args = [(folder_path, file, spectrogram_folder_path) for file in files]
    with Pool(processes=os.cpu_count()) as pool:
        pool.map(process_file, args)

In [16]:
folders = os.listdir(path+'/sample_train_audio')

with ThreadPool(processes=os.cpu_count()) as pool:
    pool.map(process_folder, folders)

Process ForkPoolWorker-704:
Process ForkPoolWorker-1099:
Process ForkPoolWorker-1394:
Process ForkPoolWorker-1865:
Process ForkPoolWorker-1887:
Process ForkPoolWorker-1942:
Process ForkPoolWorker-1879:
Process ForkPoolWorker-1912:
Process ForkPoolWorker-632:
Process ForkPoolWorker-1922:
Process ForkPoolWorker-1886:
Process ForkPoolWorker-1920:
Process ForkPoolWorker-1904:
Process ForkPoolWorker-1892:
Process ForkPoolWorker-1855:
Process ForkPoolWorker-1560:
Process ForkPoolWorker-1881:
Process ForkPoolWorker-1632:
Process ForkPoolWorker-1907:


KeyboardInterrupt: 

Process ForkPoolWorker-1916:
Process ForkPoolWorker-1856:
Process ForkPoolWorker-1537:
Process ForkPoolWorker-290:
Process ForkPoolWorker-207:
Process ForkPoolWorker-1867:
Process ForkPoolWorker-1873:
Process ForkPoolWorker-880:
Process ForkPoolWorker-1937:
Process ForkPoolWorker-1936:
Process ForkPoolWorker-200:
Process ForkPoolWorker-952:
Process ForkPoolWorker-873:
Process ForkPoolWorker-1827:
Process ForkPoolWorker-1686:
Process ForkPoolWorker-1852:
Process ForkPoolWorker-1700:
Process ForkPoolWorker-1899:
Process ForkPoolWorker-1784:
Process ForkPoolWorker-1549:
Process ForkPoolWorker-1769:
Process ForkPoolWorker-1447:
Process ForkPoolWorker-275:
Process ForkPoolWorker-195:
Process ForkPoolWorker-1584:
Process ForkPoolWorker-1846:
Process ForkPoolWorker-1542:
Process ForkPoolWorker-1613:
Process ForkPoolWorker-1877:
Process ForkPoolWorker-1473:
Process ForkPoolWorker-1534:
Process ForkPoolWorker-1833:
Process ForkPoolWorker-1889:
Process ForkPoolWorker-98:
Process ForkPoolWorker-6