# You can use this script for:

* If you want to filter the dataset by specific metadata (certain ragas, talas etc) 
* Download the filtered dataset
* Or do some analysis on the filtered data (counts of other related information for instance)

In [1]:
import os
import sys
import pdb
import pandas as pd
import numpy as np
sys.path.append(os.path.join(os.path.dirname('__file__'), '../'))
from utilities.dataset_filtering_util import DatasetFilteringUtil

# Setting API token to access the data

You can get an API token by registering yourself in : https://dunya.compmusic.upf.edu/

NOTE: Tradition slug (i.e. a machine readable unique identifier for the tradition) for hindustani is ```dunya-hindustani-cc```

In [2]:
api_token = "" # set your token here
tradition_slug = 'dunya-hindustani-cc'

# Fetching information and setting up filters

### Note: 
Fetching information every time from the servers is a time taking process (~5 min). To make quicker, we store the information we fetched from the server on the local machine. 

```use_cached = True``` : uses the information saved on the local machine

```use_cached = False``` : pulls the information from the server

If you are using this notebook after a long time you might want to use ```use_cached = False``` to make sure the information stored locally is synced with that on the server.

In [9]:
# Lets create an object which will perform filtering for us
obj = DatasetFilteringUtil(api_token, tradition_slug, use_cached=True)

In [10]:
# Lets fetch all the information and set the filters
obj.prepare_filters()

Output()

SelectMultiple(description='Album Artists', options=('Ajoy Chakrabarty', 'Anol Chatterjee', 'Brajeshwar Mukher…

Output()

SelectMultiple(description='Raga', options=('Ahira bhairav', 'Bahār', 'Bairāgi', 'Basantī kēdār', 'Bhairav', '…

Output()

SelectMultiple(description='Tala', options=('Dādrā', 'Jaṭ', 'Jhaptāl', 'Jhūmrā', 'Kēharvā', 'Rūpak', 'Tilavāḍā…

Output()

SelectMultiple(description='Form', options=('Bhajan', 'Dādrā', 'Khyāl', 'Tarānā', 'ṭhumri'), value=())

Output()

SelectMultiple(description='File slugs', options=('bpm-manual', 'tempo-manual', 'pitch', 'ctonic', 'sama-manua…

In [11]:
# After you select items, run this to filter the dataset
filtered_dataset = obj.perform_filtering()
print("There are %d number of mbids (recordings) selected after filtering"%filtered_dataset.index.size)

There are 108 number of mbids (recordings) selected after filtering


In [12]:
# This is a "preview" of filtered dataset (top 5 rows)
filtered_dataset.head()

Unnamed: 0,mbid,title,release,works,raags,taals,forms,layas,album_artists,mp3,pitch,ctonic,sama-manual,bpm-manual,tempo-manual,sections-manual-p,mphrases-manual,is_selected
0,450a6fcc-3c0a-483d-a31b-dde91413dcdd,Shrutinandan Concept - an Introduction by Pand...,[Geetinandan : Part-3],[],[],[],[],[],[Ajoy Chakrabarty],1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,True
1,b71c2774-2532-4692-8761-5452e2a83118,Bairagi,[Geetinandan : Part-3],"[Bar Bar Har Gai, Mere Maname Baso Ram Abhiram...",[Bairāgi],[ēktāl],[Khyāl],[Vilaṁbit],[Ajoy Chakrabarty],1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,True
2,3124479b-5118-4cf3-823f-8fefad45e586,Bilaskhani Todi,[Geetinandan : Part-3],"[Aankhia Bhar Aayee, Basia Bajee Mohana Shyama...",[Bilāsakhānī tōḍī],[Tīntāl],[Khyāl],[Vilaṁbit],[Ajoy Chakrabarty],1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,True
3,6a2c841d-5a0e-4886-a5c0-f856fccbb938,Nat Bhairon,[Geetinandan : Part-3],"[Dim Tom Tana Tadim Tanana, Gada Gada Tiharee ...",[Naṭ bhairav],"[Jhūmrā, Tīntāl, ēktāl]","[Khyāl, Tarānā]",[Vilaṁbit],[Ajoy Chakrabarty],1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,True
4,51656b20-295c-40f9-8dab-005b9b90fa98,Aahir Bhairon,[Geetinandan : Part-3],"[Bhor Hi Aheerin, Kaise Ke Kara Aaun]",[Ahira bhairav],"[Jhaptāl, Tīntāl]",[Khyāl],[Madhya],[Ajoy Chakrabarty],1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,True


In [13]:
# If you want to dump the filtered dataset in a csv and analyse you can do it as well
file_path = 'filtered_dataset.csv'
filtered_dataset.to_csv(file_path)

In [15]:
# If you want to download selected dataset you can do it
root_dir = '/Users/naad/Work/Personal/saraga/testing/' # directory where files will be downloaded
obj.obj_dataset.download_files(root_dir, mbids=filtered_dataset.mbid.values)

In [16]:
# In case you want to filter specific files for the selected dataset,
# for example, downloading only mp3 and not the other files, you can do it as well.

# If you want to download selected dataset you can do it
root_dir = '/Users/naad/Work/Personal/saraga/testing/' # directory where files will be downloaded
file_slug = ['mp3']
obj.obj_dataset.download_files(root_dir, mbids=filtered_dataset.mbid.values, slug=file_slug)

# Tip:

If you want to perform further analysis on the filtered dataset (counting concepts or files etc) you can follow the same procedure (few lines of code) that is shown in ```concept_statistics.ipynb``` notebook. 

The pandas dataframe that holds the filtered dataset has the same format in all scripts.

