# You can use this script for

* Counting number of recording (i.e. mbids) for each concept in the dataset (such as raga, tala, artist etc)

In [3]:
import os
import sys
import pdb
import pandas as pd
import numpy as np
sys.path.append(os.path.join(os.path.dirname('__file__'), '../'))
from utilities.dataset import Dataset

# Lets start by setting our api_token and tradition slug

In [None]:
api_token = "" # set your token here
tradition_slug = 'dunya-hindustani-cc' # slug for the tradition for which you want to do the analysis

In [None]:
# Create a dataset object
obj = Dataset(tradition_slug, api_token)

In [None]:
# Lets get all the information associated with a dataset (all the metadata and related annotation files)
# In case of Carnatic music this also fetches linked multi-track recordings
dataset_info = obj.consolidate_dataset_info()

# Lets get count of mbids (recordings) for each type of metadata

### Note
The metadata slug (machine readable identifiers of the of the concepts like Raga and Tala) are different in both Hindustani and Carnatic tradition. Please use the appropriate one depending on the tradition you are analyzing.

We here list the slugs for different metadata types in Hindustani and Carnatic tradition: 

| Metadata | Hindustani | Carnatic |
| --- | --- | --- |
| Raga | raags | raaga |
| Tala | taals | taala |
| Form | forms | form |
| Laya | layas | laya |
| Work | works | work |
| Release | release | concert |
| Album artist | album_artists | album_artists |

In [None]:
# For the tradition you are analysing select a metadata slug that you want to analyse
meta_slug = 'taals'

In [None]:
# Lets count the number of mbids (recordings) which are linked with this concept
output = []
for index, row in obj.dataset_info.iterrows():
    if row[meta_slug] is None or len(row[meta_slug])==0:
        continue
    if isinstance(row[meta_slug], np.ndarray) or isinstance(row[meta_slug], list):
        for val in row[meta_slug]:
            output.append(dict(mbid=row.mbid, metadata=val))
    else:
        output.append(dict(mbid=row.mbid, concept=row[meta_slug]))
output_df = pd.DataFrame(output)

In [None]:
# Lets see the counts
metadata_counts = output_df.metadata.value_counts()
# This command below makes sure pandas print all the lines in the dataframe, otherwise the printed output will be truncated
pd.set_option('display.max_rows', metadata_counts.shape[0]+1) 
print(metadata_counts)

In [None]:
# You can dump the entire list in a csv file
file_path = 'metadata_counts.csv' # choose an appropriate file path
metadata_counts.to_csv(file_path)