In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
# changing the working directory
%cd '/content/drive/MyDrive/Amy'

/content/drive/MyDrive/Amy


In [3]:
import os
import librosa
import librosa.display
import matplotlib.pyplot as plt
import numpy as np
from PIL import Image

In [4]:
audio_dir = r'/content/drive/MyDrive/Amy/DC'
spectrogram_dir = r'/content/drive/MyDrive/Amy/MS_PCEN/DC'

os.makedirs(spectrogram_dir, exist_ok=True)
count = 0

for audio_file in os.listdir(audio_dir):
    count = count + 1
    print(f"The data sample {audio_file} is processed: {count}/{len(os.listdir(audio_dir))}")
    audio_path = os.path.join(audio_dir, audio_file)
    audio, sr = librosa.load(audio_path, sr=6000)

    # Mel
    spectrogram = librosa.feature.melspectrogram(y=audio, sr=sr, n_fft=2048,
                                                 hop_length=512, win_length=2048,
                                                 n_mels=128, power=1) # I think he was using 128 as default
    mel_pcen = librosa.pcen(spectrogram, sr=sr, hop_length=512,
                            gain=0.98, bias=0.2, power=0.9,
                            time_constant=0.0004, eps=1e-6)
    plt.figure(figsize=(8, 4))
    librosa.display.specshow(mel_pcen, sr=sr, x_axis='time', y_axis='mel')
    plt.tight_layout()
    file_name = os.path.splitext(audio_file)[0]
    save_path = os.path.join(spectrogram_dir, f'{file_name}.png')
    plt.axis('off')
    plt.savefig(save_path, bbox_inches='tight', pad_inches=0)
    plt.close()
    # Following 6 lines are to convert CYMK images to RGB images with the grayscale values ranging between 0-255
    spec_image = Image.open(save_path)
    spec_image_rgb = spec_image.convert('RGB')
    rgb_array = np.array(spec_image_rgb)
    rgb_array_norm = 255*((rgb_array-np.min(rgb_array))/(np.max(rgb_array)-np.min(rgb_array)))
    rgb_image_spec = Image.fromarray(rgb_array_norm.astype(np.uint8))
    rgb_image_spec.save(save_path)

The data sample 377_24_EI_DEPLOYMENT_20240311_125039_479_300.wav is processed: 1/122
The data sample 374_24_EI_DEPLOYMENT_20240311_070820_614_1000.wav is processed: 2/122
The data sample 374_24_EI_DEPLOYMENT_20240314_005256_559_900.wav is processed: 3/122
The data sample 374_24_EI_DEPLOYMENT_20240318_032950_484_1500.wav is processed: 4/122
The data sample 374_24_EI_DEPLOYMENT_20240311_080621_126_1010.wav is processed: 5/122
The data sample 377_24_EI_DEPLOYMENT_20240316_163539_736_420.wav is processed: 6/122
The data sample 374_24_EI_DEPLOYMENT_20240314_125803_148_660.wav is processed: 7/122
The data sample 374_24_EI_DEPLOYMENT_20240313_112049_149_1190.wav is processed: 8/122
The data sample 377_24_EI_DEPLOYMENT_20240315_182128_904_1500.wav is processed: 9/122
The data sample 374_24_EI_DEPLOYMENT_20240312_133537_252_1360.wav is processed: 10/122
The data sample 377_24_EI_DEPLOYMENT_20240317_125349_621_490.wav is processed: 11/122
The data sample 377_24_EI_DEPLOYMENT_20240315_123326_084_

In [5]:
audio_dir = r'/content/drive/MyDrive/Amy/NoDC'
spectrogram_dir = r'/content/drive/MyDrive/Amy/MS_PCEN/NoDC'

os.makedirs(spectrogram_dir, exist_ok=True)
count = 0

for audio_file in os.listdir(audio_dir):
    count = count + 1
    print(f"The data sample {audio_file} is processed: {count}/{len(os.listdir(audio_dir))}")
    audio_path = os.path.join(audio_dir, audio_file)
    audio, sr = librosa.load(audio_path, sr=6000)

    # Mel
    spectrogram = librosa.feature.melspectrogram(y=audio, sr=sr, n_fft=2048,
                                                 hop_length=512, win_length=2048,
                                                 n_mels=128, power=1) # I think he was using 128 as default
    mel_pcen = librosa.pcen(spectrogram, sr=sr, hop_length=512,
                            gain=0.98, bias=0.2, power=0.9,
                            time_constant=0.0004, eps=1e-6)
    plt.figure(figsize=(8, 4))
    librosa.display.specshow(mel_pcen, sr=sr, x_axis='time', y_axis='mel')
    plt.tight_layout()
    file_name = os.path.splitext(audio_file)[0]
    save_path = os.path.join(spectrogram_dir, f'{file_name}.png')
    plt.axis('off')
    plt.savefig(save_path, bbox_inches='tight', pad_inches=0)
    plt.close()
    # Following 6 lines are to convert CYMK images to RGB images with the grayscale values ranging between 0-255
    spec_image = Image.open(save_path)
    spec_image_rgb = spec_image.convert('RGB')
    rgb_array = np.array(spec_image_rgb)
    rgb_array_norm = 255*((rgb_array-np.min(rgb_array))/(np.max(rgb_array)-np.min(rgb_array)))
    rgb_image_spec = Image.fromarray(rgb_array_norm.astype(np.uint8))
    rgb_image_spec.save(save_path)

The data sample 374_24_EI_DEPLOYMENT_20240311_152125_095_1460.wav is processed: 1/122
The data sample 377_24_EI_DEPLOYMENT_20240316_192941_153_1180.wav is processed: 2/122
The data sample 377_24_EI_DEPLOYMENT_20240317_001943_517_270.wav is processed: 3/122
The data sample 377_24_EI_DEPLOYMENT_20240312_051647_489_1080.wav is processed: 4/122
The data sample 377_24_EI_DEPLOYMENT_20240316_205641_851_1140.wav is processed: 5/122
The data sample 374_24_EI_DEPLOYMENT_20240317_005435_951_1050.wav is processed: 6/122
The data sample 377_24_EI_DEPLOYMENT_20240317_053846_108_1310.wav is processed: 7/122
The data sample 374_24_EI_DEPLOYMENT_20240316_004422_725_60.wav is processed: 8/122
The data sample 374_24_EI_DEPLOYMENT_20240313_072847_035_1510.wav is processed: 9/122
The data sample 377_24_EI_DEPLOYMENT_20240311_125039_479_1680.wav is processed: 10/122
The data sample 377_24_EI_DEPLOYMENT_20240313_195706_304_800.wav is processed: 11/122
The data sample 374_24_EI_DEPLOYMENT_20240315_101414_783