In [14]:
import os
import zipfile
import pandas as pd
import librosa
import random
import soundfile as sf
import shutil

In [15]:
inner_folder_path = 'dataset2'
audio_files = [f for f in os.listdir(inner_folder_path) if not f.startswith("._") and f.endswith(".wav")]
audio_labels = [os.path.splitext(f)[0] for f in audio_files]

audio_labels

['children',
 'nothing2',
 'drilling',
 'engine',
 'siren',
 'gunshot',
 'aircon',
 'jackhammer',
 'carhorn',
 'glass',
 'nock',
 'street_music',
 'dog_bark',
 'nothing1']

In [16]:
# 2. Parameters for audio processing
duration = 2
sampling_rate = 44100
num_clips_per_label = 1000
csv_entries = []

In [17]:
from tqdm import tqdm

# 3. Process each audio file, extract clips, and save details to CSV
parent_output_folder = '/Volumes/검은 상자/@CODE'
for audio_label, audio_file in (zip(audio_labels, audio_files)):
    print(audio_label)
    file_path = os.path.join(inner_folder_path, audio_file)
    y, sr = librosa.load(file_path, sr=sampling_rate)
    num_samples = int(duration * sr)
    if len(y) < num_samples:
        continue
    output_folder = os.path.join(parent_output_folder, audio_label)  # Place in the organized parent folder
    os.makedirs(output_folder, exist_ok=True)
    for i in tqdm(range(num_clips_per_label)):
        start_sample = random.randint(0, len(y) - num_samples)
        end_sample = start_sample + num_samples
        clip = y[start_sample:end_sample]
        clip_filename = f"{audio_label}_cut_{i:04d}.wav"
        clip_filepath = os.path.join(output_folder, clip_filename)
        sf.write(clip_filepath, clip, sr)
        entry = {
            "slice_file_name": clip_filename,
            "fsID": random.randint(100000, 999999),
            "start": start_sample / sr,
            "end": end_sample / sr,
            "salience": 1,
            "fold": audio_label,  # Use label name for fold
            "classID": audio_labels.index(audio_label),
            "class": audio_label
        }
        csv_entries.append(entry)

children


KeyboardInterrupt: 

In [None]:
# 4. Save the CSV entries to a file
csv_output_path = parent_output_folder
csv_filename = "data2.csv"  
csv_output_path = os.path.join(parent_output_folder, csv_filename)
new_df = pd.DataFrame(csv_entries)
new_df.to_csv(csv_output_path, index=False)