# Marc Siquier - Dataset Creation

In [1]:
from freesound import freesound
import random
import utils
from utils.run_entity_linking import spotlight
from IPython.core.display import display, HTML

# Get the API key from http://www.freesound.org/apiv2/apply/ (you'll need Freesound user account)
API_KEY='9GUfpnadafRpnB3v76G7M5x9i7X3tNHtZ2a4FrBb'
c = freesound.FreesoundClient()
c.set_token(API_KEY,"token")

### 1) Define a number of audio categories and find audio examples from Freesound for each category

In [2]:
# Configure dataset parameters and audio categories
DATASET_NAME = 'broad' # Dataset will be saved in a .json file with this name
DATASET_CLASSES = {  
    # Must be dictionary with structure like {'class name': 'query terms', 'class name 2': 'query terms 2',... }
    'Acoustic Guitar': 'acoustic guitar',
    'Electric Guitar': 'electric guitar',
    'Distorted Guitar' : 'distorted guitar'
}
N = 100 # Number of sounds per class
N_SOUNDS_PER_USER = 3  # Do not get more than 3 sounds per user

# Get sound examples from Freesound
dataset = dict()
for name, target_query in DATASET_CLASSES.items():
    print 'Getting sounds for class %s...' % name,
    
    # Get first page of results
    PAGE_SIZE = 150 # Page size for fs requests
    N_PAGES = int((N * 1) / PAGE_SIZE)  # Number of pages to retrieve
    fields = "id,tags,description,username"
    results_pager = c.text_search(
        query=target_query,
        page_size=PAGE_SIZE,
        group_by_pack=1,
        fields = "id,tags,description,username,analysis",
        #descriptors = "lowlevel.mfcc.mean,lowlevel.barkbands.mean",
        )
    all_results = results_pager.results

    # TIP ON AUDIO FEATURES: you can get also audio features extracted in freesound by passing a 'descriptors' 
    # parameter in the text_search function and including 'analysis' in the fields list 
    # (see http://www.freesound.org/docs/api/resources_apiv2.html#response-sound-list):
    #
    # fields = "id,tags,description,username,analysis"
    # descriptors = "lowlevel.spectral_centroid,lowlevel.barkbands.mean"
    #
    # e.g.: results_page = c.text_search(query=target_query, ..., fields=fields, descriptors=descriptors)
    # ...
    
    # Get extra pages
    for i in range(0, N_PAGES):
        if results_pager.count > (i+1) * PAGE_SIZE:
            results_pager = results_pager.next_page()
            all_results += results_pager.results
    print len(all_results)
    # Get only N sounds max per user
    user_sounds_count = dict()
    filtered_results = list()
    random.shuffle(all_results)  # Shuffle list of sounds (randomise order)
    for result in all_results:
        if result["username"] in user_sounds_count:
            user_sounds_count[result["username"]] += 1
        else:
            user_sounds_count[result["username"]] = 1
        if user_sounds_count[result["username"]] <= N_SOUNDS_PER_USER:
            filtered_results.append(result)

    # Randomly select N sounds from al results obtained
    if len(filtered_results) >= N:
        selected_sounds = random.sample(filtered_results, N)
        dataset[name] = selected_sounds
        print 'selected %i sounds out of %i!' % (len(selected_sounds), len(filtered_results))
    else:
        print 'not enough sounds were found for current class (%i sounds found).' % len(filtered_results)

    # TIP ON KEYWORD EXTRACTION: we could extract some keywords from the textual descriptions using functions
    # provided in ELVIS (see https://github.com/sergiooramas/elvis and run_entity_linking.py file in utils folder)
    # You can uncomment the example code below to get keywords for each sound (dataset creation will take longer)
    
    
    for class_name, sounds in dataset.items():
        for sound in sounds:
            sound_textual_description = sound['description']
            results = spotlight(sound_textual_description.split('\n'))
            keywords = list()
            for element in results:
                for entity in element['entities']:
                    keywords.append(entity['label'])
            sound['keywords'] = keywords
    

# Save dataset to file so we can work with it later on
utils.save_to_json('%s.json' % DATASET_NAME, dataset)

Getting sounds for class Distorted Guitar... 150
selected 100 sounds out of 106!
Getting sounds for class Electric Guitar... 150
selected 100 sounds out of 103!
Getting sounds for class Acoustic Guitar... 150
selected 100 sounds out of 137!


### 2) Explore the dataset (know your data!)

In [3]:
# Load dataset from saved file
DATASET_NAME = 'broad'
dataset = utils.load_from_json('%s.json' % DATASET_NAME)
N = len(dataset[dataset.keys()[0]]) # Number of sounds per class
print 'Loaded dataet "%s" (%i classes, %i sounds per class)' % (DATASET_NAME, len(dataset.keys()), N)

Loaded dataet "broad" (3 classes, 100 sounds per class)


In [4]:
# Pick some sounds from each category and show players (Freesound embeds) to listen to them
for class_name, sounds in dataset.items():
    html = "<h3>%s</h3>" % class_name
    html += "<h4>Example sounds:</h4>"
    html += utils.generate_html_with_sound_examples([sound['id'] for sound in sounds][:6])
    class_tags = utils.get_all_tags_from_class(class_name, dataset)
    class_keywords = utils.get_all_keywords_from_class(class_name, dataset)
    html += "<h4>Most commons tags tagcloud:</h4>"
    html += utils.generate_html_tagcloud(class_tags, N=100, max_px=50, min_px=10, pow_scale=1.2)
    html += "<h4>Most commons keywords tagcloud:</h4>"
    html += utils.generate_html_tagcloud(class_keywords, N=100, max_px=50, min_px=10, pow_scale=1.2)
    html += "<br>"
    display(HTML(html)) # <- This is pure jupyter notebook AWESOMENESS magic which renders the HTML in the output of the cell