In [1]:
import torchaudio
import torch
import matplotlib.pyplot as plt
import numpy as np
import os

In [2]:
original_dataset_path = 'Dataset'
new_dataset_path = 'Dataset_Spec'
new_sample_rate = 16000

def create_dir(path):
    if not os.path.exists(path):
        os.makedirs(path)

def save_spectrogram(waveform, sample_rate, save_path):
    # Resample the waveform
    resample = torchaudio.transforms.Resample(orig_freq=sample_rate, new_freq=new_sample_rate)
    waveform = resample(waveform)
    waveform = np.array(waveform)

    # Plot the spectrogram
    plt.figure(figsize=(4, 3)) #790 x 490 pixels
    plt.specgram(waveform[0], Fs=new_sample_rate)
    plt.axis('off')
    # Save the spectrogram as an image
    plt.savefig(save_path, bbox_inches='tight', pad_inches=0)
    plt.close()

# Traverse the original dataset directory
for root, dirs, files in os.walk(original_dataset_path):
    for file in files:
        if file.endswith(('.mp3', '.wav')):
            file_path = os.path.join(root, file)
            relative_path = os.path.relpath(root, original_dataset_path)
            new_dir = os.path.join(new_dataset_path, relative_path)
            create_dir(new_dir)

            # Load the audio file
            waveform, sample_rate = torchaudio.load(file_path)

            # Save the spectrogram
            save_path = os.path.join(new_dir, os.path.splitext(file)[0] + '.png')
            save_spectrogram(waveform, sample_rate, save_path)