# You can use this script for:

* If you want to filter the dataset by specific metadata (certain ragas, talas etc) 
* Download the filtered dataset
* Or do some analysis on the filtered data (counts of other related information for instance)

In [None]:
import os
import sys
import pdb
import pandas as pd
import numpy as np
sys.path.append(os.path.join(os.path.dirname('__file__'), '../'))
from utilities.dataset_filtering_util import DatasetFilteringUtil

# To enable widgets etc needed for this script
!jupyter nbextension enable --py widgetsnbextension

# Setting API token to access the data

You can get an API token by registering yourself in : https://dunya.compmusic.upf.edu/

NOTE: Tradition slug (i.e. a machine readable unique identifier for the tradition) for hindustani is ```dunya-hindustani-cc```

In [None]:
api_token = "" # set your token here
tradition_slug = 'dunya-hindustani-cc'

# Fetching information and setting up filters

### Note: 
Fetching information every time from the servers is a time taking process (~5 min). To make quicker, we store the information we fetched from the server on the local machine. 

```use_cached = True``` : uses the information saved on the local machine

```use_cached = False``` : pulls the information from the server

If you are using this notebook after a long time you might want to use ```use_cached = False``` to make sure the information stored locally is synced with that on the server.

In [None]:
# Lets create an object which will perform filtering for us
obj = DatasetFilteringUtil(api_token, tradition_slug, use_cached=True)

In [None]:
# Lets fetch all the information and set the filters
obj.prepare_filters()

In [None]:
# After you select items, run this to filter the dataset
filtered_dataset = obj.perform_filtering()
print("There are %d number of mbids (recordings) selected after filtering"%filtered_dataset.index.size)

In [None]:
# This is a "preview" of filtered dataset (top 5 rows)
filtered_dataset.head()

In [None]:
# If you want to dump the filtered dataset in a csv and analyse you can do it as well
file_path = 'filtered_dataset.csv'
filtered_dataset.to_csv(file_path)

In [None]:
# If you want to download selected dataset you can do it
root_dir = '/Users/naad/Work/Personal/saraga/testing/' # directory where files will be downloaded
obj.obj_dataset.download_files(root_dir, mbids=filtered_dataset.mbid.values)

In [None]:
# In case you want to filter specific files for the selected dataset,
# for example, downloading only mp3 and not the other files, you can do it as well.

# If you want to download selected dataset you can do it
root_dir = '/Users/naad/Work/Personal/saraga/testing/' # directory where files will be downloaded
file_slug = ['mp3']
obj.obj_dataset.download_files(root_dir, mbids=filtered_dataset.mbid.values, slug=file_slug)

# Tip:

If you want to perform further analysis on the filtered dataset (counting concepts or files etc) you can follow the same procedure (few lines of code) that is shown in ```concept_statistics.ipynb``` notebook. 

The pandas dataframe that holds the filtered dataset has the same format in all scripts.

