# Loader for Space Divers Mini sound lib

In [1]:
SOURCE_NAME = "space_divers_mini"
ORIGINAL_PATH = "./data/original/" + SOURCE_NAME + "/"
TARGET_PATH = "./data/intermediate/" + SOURCE_NAME + "/"

## Get the filenames to parse

In [2]:
import os

In [7]:
def load_filenames(path: str) -> list[str]:
    "Load a list of audio filenames in the directory."
    filenames = []
    for filename in os.listdir(path):
        if filename.endswith(".wav"):
            filenames.append(filename)
    return filenames

In [4]:
filenames = load_filenames(ORIGINAL_PATH + "Samples/")
filenames[:5]

['99S LT Ambience Underwater - Coral Sea.wav',
 '99S LT Manipulate - Distant.wav',
 '99S LT Impact - Crash Distant A.wav',
 '99S LT Underwater - Move D.wav',
 '99S LT Vehicle - Resonation Slow.wav']

## Parse the files into labels

The titles follow a couple of patterns but follow within 2:  
`<space separated categories> - <specific label>.wav`  
`<specific label>.wav`  

Space separated categories can also include the `-` split.  
The specific labels can have `A B C...` variants which get ignored.

In [66]:
import string

In [73]:
def parse_filename(filename: str) -> list[str]:
    "Parse the filename to get a list of labels."
    # Remove the prefix and extension
    title = filename.removeprefix("99S LT ").removesuffix(".wav")
    *labels, specific_label = title.split("-")
    labels = [l.strip() for label in labels for l in label.split()]
    # Remove the variant at the end
    specific_label = specific_label.rstrip(string.whitespace + string.ascii_uppercase)
    return [l.strip() for l in labels + [specific_label]]

In [74]:
labels = [parse_filename(filename) for filename in filenames]
labels[:5]

[['Ambience', 'Underwater', 'Coral Sea'],
 ['Manipulate', 'Distant'],
 ['Impact', 'Crash Distant'],
 ['Underwater', 'Move'],
 ['Vehicle', 'Resonation Slow']]

## Copy the files to the intermediate folder

## Export the CSV rows