In [None]:
#default_exp sources.custom_fsd

# Source / Custom FSD Downloads

> We have downloaded the data using a custom FSD scrapper, at https://github.com/DiogoNeves/freesound-scraper/tree/custom  

This scrapper works by providing a list of queries and then downloading the metadata and audio it can find for those.  
The output includes a `metadata.csv` with the output audio details and a `samples` folder with all the audio.  

In [None]:
#|export

from DataImporters.sources.core import *

import os
import pandas as pd

## Data Import Instructions

1. Clone https://github.com/DiogoNeves/freesound-scraper/tree/custom  
1. Follow installation instructions  
1. Run command

```bash
python freesounds_scraper.py --query <this_project>/data/original/custom_fsd/query.csv \
	--target <this_project>/data/original/custom_fsd/samples/ \
	--data-file-name <this_project>/data/original/custom_fsd/metadata \
	--download true --append-to-csv true
```

### credentials.json

```json
{
	"client_id": "TKmwjcIrmDFvgcUQtyqm",
	"client_secret": "ZXPLg0GOZDBc93n2jZ4fmAR9qSMua8Mrsz9sx1Dc"
}
```

## Format conversion

The `metadata.csv` includes the tags and no further title parsing is necessary.  

We also assume the category is the query and simply copy it over, with a small amount of processing to unescape the strings (e.g. `"test+query&s=Rating+(highest+first)" -> "test query"`).

In [None]:
#exporti

def _clean_category_name(category_name):
    return category_name.replace("+", " ").split("&")[0]

In [None]:
#hide

assert _clean_category_name("Test+Category") == "Test Category"
assert _clean_category_name("test") == "test"
assert _clean_category_name("test&category") == "test"

In [None]:
#exports

class CustomFsd(Source):
    def preload(self, root_dir: str):
        metadata_path = os.path.join(root_dir, "metadata.csv")
        columns = ["id", "filename", "category", "tags"]
        self.metadata = pd.read_csv(metadata_path, header=None, names=columns)

    @property
    def name(self) -> str:
        return "custom_fsd"
    
    def get_files(self, root_dir: str) -> list[tuple[str, str]]:
        return get_filenames(root_dir)
    
    def get_category(self, path: str, filename: str) -> str:
        category = self.metadata.loc[self.metadata["filename"] == filename, "category"].values[0]
        return _clean_category_name(category)
    
    def get_labels(self, path: str, filename: str) -> list[str]:
        tags = self.metadata.loc[self.metadata["filename"] == filename, "tags"].values[0]
        return tags.split(",")