In [None]:
#default_exp sources.footsteps_one_ppsfx

# Source / Footsteps One PPSFX 004

In [None]:
#|export

from DataImporters.sources.core import *

import os
import string
import re
import pandas as pd

## Filename Parsing

* All file names start with `Footsteps`, which we set as category.  
* All file names follow with the type of material/ground they were recorded in, which we separate into a label.  
* Some names will then include type (e.g. Walk / Stomps), which we separate into a label.  
* Some include variations (e.g. 01 / 02...), which we ignore.  
* Some include the step length (e.g. Short / Medium), which we keep.  

In [None]:
#exports

class FootstepsOnePpsfx(Source):
    def preload(self, root_dir: str):
        metadata_path = os.path.join(root_dir,
            "Documents", "PPSFX 004 - Footsteps One Metadata.xls")
        self.extra_metadata = pd.read_excel(metadata_path)
    
    @property
    def name(self) -> str:
        return "footsteps_one_ppsfx_004"
    
    def get_files(self, root_dir: str) -> list[tuple[str, str]]:
        samples_dir = os.path.join(root_dir, "Audio")
        return get_filenames(samples_dir)
    
    def get_category(self, path: str, filename: str) -> str:
        return "footsteps"
    
    def get_labels(self, path: str, filename: str) -> list[str]:
        # Remove the prefix
        filename = remove_extension(filename).removeprefix("Footsteps")

        # Remove number at the end of the filename
        filename = filename.rsplit("0", maxsplit=1)[0]

        # For each step type, we remove it from the filename and add it to the labels
        labels = []

        step_label = get_footstep_type(filename)    
        if step_label is not None:
            filename = filename.lower().replace(" " + step_label, "")
            labels.append(normalise_label(step_label))
        
        labels.append(filename)

        return labels
    
    def get_extra(self, path: str, filename: str) -> str:
        row = self.extra_metadata.loc[self.extra_metadata["Filename"] == filename]
        return row["BWDescription"].values[0]