# Saving Spectogram as Images

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import librosa as lr
import os
import cv2 as cv

import tensorflow as tf

2024-11-04 18:01:09.967993: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
DEST_DIR = "../../extracted_features"
AUDIO_DIR = "../../extracted_audio"

In [4]:
def save_spectogram(split_type):
    
    # create dirs
    if not os.path.exists(f"{DEST_DIR}/spect_images"):
        os.makedirs(f"{DEST_DIR}/spect_images")
        
    if not os.path.exists(f"{DEST_DIR}/spect_images/{split_type}"):
        os.makedirs(f"{DEST_DIR}/spect_images/{split_type}")
        
    files = sorted(os.listdir(f"{AUDIO_DIR}/{split_type}"))
    
    for file in files:
        if not file.endswith(".wav"):
            continue
        
        y, sr = lr.load(f"{AUDIO_DIR}/{split_type}/{file}")
        y = lr.resample(y, orig_sr=sr, target_sr=16000)
        
        # compute mel spectrogram
        melspectogram = lr.feature.melspectrogram(y=y, sr=16000, n_mels=128, fmax=8000)
        log_spect = np.log(melspectogram + 1e-9)
        log_spect = log_spect.T
        
        # normalize
        log_spect -= log_spect.min()
        log_spect /= log_spect.max()
        log_spect *= 255
        log_spect = log_spect.astype(np.uint8)
        
        # resize
        resized_log_spect = cv.resize(log_spect, (512, 512), interpolation=cv.INTER_CUBIC)
        
        # save image
        try:
            filename = file.replace('.wav', '.png')
            cv.imwrite(f"{DEST_DIR}/spect_images/{split_type}/{filename}", resized_log_spect)
        except Exception as e:
            print(f"Error processing {file}: {e}")
    
    print(f"Saved {split_type} spectograms")

In [5]:
save_spectogram("test")

Saved test spectograms
