In [1]:
import json
import pandas as pd
from pathlib import Path
from tqdm import tqdm

from python import converter

In [2]:
INPUT_DIR = Path("data/raw")
INPUT_CSV_FILE = INPUT_DIR / "UrbanSound8K.csv"

OUTPUT_DIR = Path("data/processed")
OUTPUT_CLASSES_FILE = OUTPUT_DIR / "classes.json"

In [3]:
df = pd.read_csv(INPUT_CSV_FILE)
df = df[['slice_file_name', 'classID', 'class']]

df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8732 entries, 0 to 8731
Data columns (total 3 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   slice_file_name  8732 non-null   object
 1   classID          8732 non-null   int64 
 2   class            8732 non-null   object
dtypes: int64(1), object(2)
memory usage: 204.8+ KB


In [4]:
class_names = (
    df[['classID', 'class']]
    .drop_duplicates()
    .sort_values(by='classID')['class']
    .to_list()
)

OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
with open(OUTPUT_CLASSES_FILE, 'w') as f:
    json.dump(class_names, f, indent=4)

In [5]:
def get_file_class_name(file_name):
    return df[df['slice_file_name'] == file_name].iloc[0]['class']

In [6]:
tasks = []

for i in range(1, 11, 2):
    fold_a = INPUT_DIR / f"fold{i}"
    fold_b = INPUT_DIR / f"fold{i+1}"
    fold_dir = OUTPUT_DIR / f"cv{(i+1)//2}"
    for class_name in class_names:
        class_dir = fold_dir / class_name
        class_dir.mkdir(parents=True, exist_ok=True)

    for folder in [fold_a, fold_b]:
        for file in folder.iterdir():
            if file.suffix == ".wav":
                wav_path = file
                file_class = get_file_class_name(file.name)
                img_name = file.stem + ".png"
                img_path = fold_dir / file_class / img_name
                tasks.append((wav_path, img_path))
            else:
                print(f"Unexpected file type: {file}")

In [7]:
for wav_path, img_path in tqdm(tasks, desc="Converting to spectrograms"):
    converter.wav_to_spectrogram(wav_path, img_path)

  mel_basis = filters.mel(sr=sr, n_fft=n_fft, **kwargs)
Converting to spectrograms: 100%|██████████| 8732/8732 [15:53<00:00,  9.16it/s]  
