In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import librosa
import soundfile as sf
import pandas as pd
import numpy as np
import os
from tqdm import tqdm

In [3]:
dataset = "/content/drive/MyDrive/IOT/SoundClasification/processed_sounds"
metadata_file = "/content/drive/MyDrive/IOT/SoundClasification/metadata.csv"
output_dir = "/content/drive/MyDrive/IOT/SoundClasification/Final_dataset"
os.makedirs(output_dir, exist_ok=True)

# Load metadata
df = pd.read_csv(metadata_file)
SR = 16000

# Determine maximum audio length
max_length = 0
for index, row in tqdm(df.iterrows(), total=len(df), desc="Calculating max length"):
    file_path = os.path.join(dataset, row["file_path"])
    audio, sr = librosa.load(file_path, sr=SR)
    max_length = max(max_length, len(audio))

print(f"Max length of audio found: {max_length} samples")

# Process and standardize all audio files
for index, row in tqdm(df.iterrows(), total=len(df), desc="Processing files"):
    file_path = os.path.join(dataset, row["file_path"])
    audio, sr = librosa.load(file_path, sr=SR)

    # Trim or pad to max_length
    if len(audio) > max_length:
        audio = audio[:max_length]  # Trim
    else:
        audio = np.pad(audio, (0, max_length - len(audio)), mode="constant")  # Pad with silence

    # Save processed file
    output_path = os.path.join(output_dir, os.path.basename(file_path))
    sf.write(output_path, audio, SR)

print("All files have been standardized to the longest audio duration!")

Calculating max length: 100%|██████████| 14520/14520 [1:24:12<00:00,  2.87it/s]


Max length of audio found: 156917 samples


Processing files: 100%|██████████| 14520/14520 [08:14<00:00, 29.38it/s]

All files have been standardized to the longest audio duration!



