# Loader for Edward sound lib

This library includes both footsteps and the sound of moving with different material clothes.

In [1]:
CLOTHES_PREFIX = "Clothes_Movement"
FOOTSTEPS_PREFIX = "Footsteps"
SOURCE_NAME = "edward_v1.1"
ORIGINAL_PATH = "./data/original/" + SOURCE_NAME + "/"
FOOTSTEPS_PATH = ORIGINAL_PATH + "WAV/Footsteps/"
CLOTHES_PATH = ORIGINAL_PATH + "WAV/Clothes_Movement/"
TARGET_PATH = "./data/intermediate/" + SOURCE_NAME + "/"

## Get the filenames to parse

In [2]:
import os

In [3]:
audio_extensions = (".wav", ".mp3", ".flac", ".aac", ".m4a", ".ogg", ".opus")

def load_filenames(path: str) -> list[str]:
    "Load a list of audio filenames in the directory."
    return [(root, filename) for root, _, files in os.walk(path)
            for filename in files
            if filename.endswith(audio_extensions)]

In [4]:
paths, filenames = zip(*(load_filenames(CLOTHES_PATH) + load_filenames(FOOTSTEPS_PATH)))
filenames[:5]

('589_Foley_Clothes_Movement_Medium_Normal_Walk_Close.wav',
 '563_Foley_Clothes_Movement_Jeans_Jump_Land_Special_Close.wav',
 '562_Foley_Clothes_Movement_Jeans_Fast_Walk_Run_Distance.wav',
 '571_Foley_Clothes_Movement_Jeans_Very_Slow_Walk_Creep_Close.wav',
 '573_Foley_Clothes_Movement_Light_Fast_Walk_Run_Close.wav')

In [5]:
extensions = [filename.split(".")[-1] for filename in filenames]
assert len(extensions) == len(filenames)
extensions[:5]

['wav', 'wav', 'wav', 'wav', 'wav']

## Parse the files into labels

In [6]:
import string
import stringcase

In [7]:
# We have to lower() first otherwise stringcase.snakecase will prepend multiple underscores
def normalise_label(label: str) -> str:
    return stringcase.capitalcase(stringcase.snakecase(label.lower().strip()))

In [8]:
def get_category(filename: str) -> str:
    if CLOTHES_PREFIX in filename:
        return CLOTHES_PREFIX
    elif FOOTSTEPS_PREFIX in filename:
        return FOOTSTEPS_PREFIX
    else:
        raise ValueError("Unknown category for filename: " + filename)

In [9]:
def get_step_type(filename: str) -> str:
    step_types = ("Walk", "Scuffs", "Stomps", "Squishes", "Wade", "Scrape")
    step_label = None
    for step in step_types:
        if step in filename:
            step_label = step
    return step_label

In [10]:
def get_labels(filename: str, category: str) -> list[str]:
    "Parse the filename to get a list of labels."
    # Remove the prefix and extension
    labels = []

    filename = filename.split(category + "_", maxsplit=1)[1].rsplit(".", maxsplit=1)[0]

    if category == FOOTSTEPS_PREFIX:
        # Footsteps start with the type of material
        material, filename = filename.split("_", maxsplit=1)
        labels.append(material)

    step_label = get_step_type(filename)
    if step_label is not None:
        labels.append(step_label)
    
    labels.append(filename)
    return labels

In [11]:
labels = [get_labels(filename, get_category(filename)) for filename in filenames]
assert len(labels) == len(filenames)
labels

[['Walk', 'Medium_Normal_Walk_Close'],
 ['Jeans_Jump_Land_Special_Close'],
 ['Walk', 'Jeans_Fast_Walk_Run_Distance'],
 ['Walk', 'Jeans_Very_Slow_Walk_Creep_Close'],
 ['Walk', 'Light_Fast_Walk_Run_Close'],
 ['Walk', 'Medium_Fast_Walk_Run_Distance'],
 ['Walk', 'Light_Normal_Walk_Distance'],
 ['Jeans_Stop_Small_Special_Close'],
 ['Walk', 'Medium_Very_Slow_Walk_Creep_Close'],
 ['Walk', 'Medium_Normal_Walk_Distance'],
 ['Light_Stop_Small_Special_Distance'],
 ['Walk', 'Medium_Slow_Walk_Distance'],
 ['Walk', 'Light_Fast_Walk_Run_Distance'],
 ['Medium_Jump_Land_Special_Close'],
 ['Jeans_Jump_Land_Special_Distance'],
 ['Walk', 'Light_Slow_Walk_Close'],
 ['Jeans_Stop_Small_Special_Distance'],
 ['Walk', 'Light_Slow_Walk_Distance'],
 ['Light_Stop_Small_Special_Close'],
 ['Walk', 'Jeans_Very_Slow_Walk_Creep_Distance'],
 ['Walk', 'Jeans_Normal_Walk_Distance'],
 ['Walk', 'Medium_Slow_Walk_Close'],
 ['Medium_Stop_Small_Special_Close'],
 ['Light_Jump_Land_Special_Close'],
 ['Walk', 'Medium_Very_Slow_Wa

## Copy the files to the intermediate folder

In [12]:
import xxhash
import shutil

In [13]:
def get_hash(filename: str) -> str:
    "Get the sha2 hash of the file."
    with open(os.path.normpath(filename), "rb") as f:
        return xxhash.xxh64(f.read()).hexdigest()

In [14]:
hashed_filename = ["{}.{}".format(get_hash(os.path.join(path, filename)), extension)
                   for path, filename, extension in zip(paths, filenames, extensions)]
assert len(hashed_filename) == len(filenames)
hashed_filename[:5]

['2fe56997f8de5b02.wav',
 '5e0fc337b16bc512.wav',
 'ad7eb7674ef0c3a6.wav',
 '289b7703cab05b0a.wav',
 '55241b0f36af1bb7.wav']

In [15]:
if not os.path.exists(TARGET_PATH):
    os.makedirs(TARGET_PATH)

In [16]:
# Copy new the files to the target directory
for path, filename, hash in zip(paths, filenames, hashed_filename):
    target_file_path = os.path.join(TARGET_PATH, hash)
    if not os.path.exists(target_file_path):
        shutil.copy2(os.path.join(path, filename), target_file_path)

## Export the CSV rows

In [17]:
import csv

In [18]:
with open("version", "r") as f:
    version = int(f.read())
print("Version:", version)

Version: 10


In [19]:
# Create a CSV file with the labels
category_rows = [normalise_label(get_category(filename)) for filename in filenames]
label_rows = [",".join(l) for l in labels]
extra_rows = [""] * len(labels)
source_rows = [SOURCE_NAME] * len(labels)
version_rows = [version] * len(labels)

with open(TARGET_PATH + "metadata.csv", "w") as f:
    writer = csv.writer(f)
    writer.writerow(["filename", "category", "label", "extra", "source", "version"])
    writer.writerows(zip(hashed_filename, category_rows, label_rows, extra_rows, source_rows, version_rows))