In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# Install Required Libraries (if not already installed)

# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

import os
import pandas as pd
import numpy as np
import librosa
import librosa.display
from tqdm.notebook import tqdm
import matplotlib.pyplot as plt

# Configuration
class Config:
    SR = 32000
    N_MELS = 128
    MAX_SEQ_LEN = 200
    ROOT_FOLDER = '/content/drive/MyDrive/dataset/TeamDeepwave/dataset/open/'
    PREPROCESSED_FOLDER = '/content/drive/MyDrive/dataset/TeamDeepwave/dataset/preprocessed/'
    SUBSET_SIZE = 50000  # You can adjust this for faster testing

CONFIG = Config()

# Ensure output directory for test data exists
os.makedirs(os.path.join(CONFIG.PREPROCESSED_FOLDER, 'test'), exist_ok=True)

# Function to load file paths from CSV
def load_test_file_paths(csv_path):
    df = pd.read_csv(csv_path)
    file_paths = df['path'].apply(lambda x: os.path.join(CONFIG.ROOT_FOLDER, x)).tolist()
    return file_paths

# Function to save Mel-spectrogram as PNG
def save_mel_spectrogram(file_path, output_folder):
    filename = os.path.basename(file_path).replace('.ogg', '.png')
    output_path = os.path.join(output_folder, filename)

    # Check if file already exists in output folder
    if os.path.exists(output_path):
        print(f"Skipping {filename} (already exists)")
        return  # Skip if file already exists

    try:
        y, sr = librosa.load(file_path, sr=CONFIG.SR)
    except Exception as e:  # Catch any loading errors
        print(f"Error loading {file_path}: {e}")
        return

    mel_spectrogram = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=CONFIG.N_MELS)
    mel_spectrogram_db = librosa.power_to_db(mel_spectrogram, ref=np.max)

    plt.figure(figsize=(10, 4))
    librosa.display.specshow(mel_spectrogram_db, sr=sr, x_axis='time', y_axis='mel')
    plt.colorbar(format='%+2.0f dB')
    plt.title('Mel-spectrogram')
    plt.tight_layout()

    plt.savefig(output_path)
    plt.close()

# Preprocess Test Data Only
# Load test file paths from CSV
test_files = load_test_file_paths('/content/drive/MyDrive/dataset/TeamDeepwave/dataset/open/test.csv')

# Get existing preprocessed test filenames
preprocessed_test_folder = os.path.join(CONFIG.PREPROCESSED_FOLDER, 'test')
existing_test_files = set(os.listdir(preprocessed_test_folder))

# Find files that need preprocessing
remaining_files_to_preprocess = []
for file_path in tqdm(test_files, desc="Checking existing files"):
    filename = os.path.basename(file_path).replace('.ogg', '.png')
    if filename not in existing_test_files:
        remaining_files_to_preprocess.append(file_path)

print(f"Found {len(remaining_files_to_preprocess)} files that need preprocessing.")

# Preprocess the remaining files
for file_path in tqdm(remaining_files_to_preprocess, desc="Preprocessing remaining files"):
    save_mel_spectrogram(file_path, preprocessed_test_folder)

print('Mel-spectrogram images saved successfully.')


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


Checking existing files:   0%|          | 0/50000 [00:00<?, ?it/s]

Found 25356 files that need preprocessing.


Preprocessing remaining files:   0%|          | 0/25356 [00:00<?, ?it/s]

Mel-spectrogram images saved successfully.
