# Saraga annotations samples

This notebook aims at demonstrating some of the contents of Saraga database annotations: sections, typical phrases, sama and tempo annotations. It is assumed that the notebook downloadAllSARAGAContent.ipynb has been run and data is downloaded in two folders: 'hindustani' and 'carnatic'.

The particular file for which we demonstrate the annotations is set in cell 2. To pick another file and simply change the wave file name.

In [None]:
import collections
import json
import os

import matplotlib.pyplot as plt
import numpy as np
import IPython

from essentia.standard import *
from essentia import array

fs = 44100

Execute **one** of the follwing cells to configure the notebook

In [None]:
# Carnatic samples
music_tradition = 'carnatic'
file_base = 'Cherthala Ranganatha Sharma - Bhuvini Dasudane'
#file_base = 'Akkarai Sisters - Koti Janmani'

In [None]:
# Hindustani samples
music_tradition = 'hindustani'
file_base = 'Omkar Dadarkar - Bhairavi Dadra'
#file_base = 'Ajoy Chakrabarty - Bilaskhani Todi'

In [None]:
base_name = os.path.join(music_tradition, file_base)

#Annotation files
audio_file = '{}.mp3'.format(base_name)
metadata_file = '{}.json'.format(base_name)
phrase_annot_file = '{}.mphrases-manual.txt'.format(base_name)
sama_annot_file = '{}.sama-manual.txt'.format(base_name)
bpm_annot_file = '{}.bpm-manual.txt'.format(base_name)
section_annot_file = '{}.sections-manual.txt'.format(base_name)
tonic_file = '{}.ctonic.txt'.format(base_name)

#Reading files(audio and annotations)
sound_sig = MonoLoader(filename=audio_file, sampleRate=fs)()
with open(metadata_file) as json_data:
    metadata = json.load(json_data)
phrase_annotations = np.loadtxt(phrase_annot_file, dtype={'names': ('start', 'dummy', 'duration', 'phrase'),'formats': ('f4', 'i4', 'f4', 'S32')})
sama_annotations = np.loadtxt(sama_annot_file)
tonic = np.loadtxt(tonic_file)
bpm_annotations = []
with open(bpm_annot_file) as fin:
    for line in fin:
        # tempo-free sections have '-' label, better to set it to -1 for simplicity of conversion 
        line = line.replace('-,', '-1,')
        bpm_annotations.append(tuple([float(i) for i in line.split(',')]))
section_annotations = []
with open(section_annot_file) as fin:
    for line in fin:
        # hindustani uses comma seperator, carnatic tab seperator, this line unifies
        line = line.strip().replace('\t', ',')
        pList = line.split(',')
        section_annotations.append(tuple([float(pList[0]), float(pList[1]), float(pList[2]), pList[3]]))

In [None]:
# Print short info on the recording
#print('Raaga: ',metadata['raaga'][0]['name'],',\tTaala: ',metadata['taala'][0]['name'])
print('MusicBrainz link: https://musicbrainz.org/recording/{}'.format(metadata['mbid']))
print('Tonic: {}Hz'.format(tonic))
print()

print('\033[1m{:<15}{:<10}{:<10}\033[0m'.format('Section', 'Start', 'Stop'))
for start, _, duration, section in section_annotations:
    print('{section:<15}{start:<10}{stop:<10}'.format(section=section, start=round(start, 2), stop=round(start+duration, 2)))

print()
print('\033[1m{:<7}{:<7}{:<7}\033[0m'.format('BPM', 'Start', 'Stop'))
for bpm, start, stop in bpm_annotations:
    print('{bpm:<7}{start:<7}{stop:<7}'.format(bpm=bpm, start=round(start, 2), stop=round(start+duration, 2)))

### Typical phrases
Let's extract some phrases available in the phrase level annotations 

In [None]:
phrase_dict = collections.defaultdict(list)
for start_sec, _, duration_sec, phrase in phrase_annotations:
    start_ind = int(fs*start_sec)
    stop_ind = start_ind + int(fs*duration_sec)
    phrase_dict[phrase].append((start_ind, stop_ind))

for phrase, segments in phrase_dict.items():
    print(phrase.decode())
    for start_ind, stop_ind in segments:
        phrase_sig = sound_sig[start_ind:stop_ind]
        IPython.display.display(IPython.display.Audio(phrase_sig, rate=fs))

## Rhythmic cycles
Sama annotations include begining of cycles. Let's view some cycles, also extract automatically beats and visualize all together.
For a simple introduction to tala with examples, refer to: http://compmusic.upf.edu/examples-taala-carnatic

To also demonstrate accessing section information, we will be using the cycles in the second section, also comparing annotated tempo and tempo estimated suing the Rhythm Extractor implementation in Essentia

In [None]:
#Plotting theoretical descriptions of the taala
imageUrl = 'http://compmusic.upf.edu/sites/all/themes/litejazz/images/logotoweb.png'#Default image
folder_name = 'http://compmusic.upf.edu/system/files/static_files/'
if music_tradition == 'carnatic':
    print('Taala: %s' % metadata['taala'][0]['name'])
    taala_name = metadata['taala'][0]['name']
    if taala_name == 'Miśra chāpu':
        imageUrl = folder_name + 'mChapu.png'
    elif taala_name == 'Ādi':
        imageUrl = folder_name + 'Aditaala_illustrated.png'
    elif taala_name == 'Rūpaka':
        imageUrl = folder_name + 'rupakam_annotations.png'
    elif taala_name == 'Khaṇḍa chāpu':
        imageUrl = folder_name + 'kChapu_annotations.png'
elif music_tradition == 'hindustani':#http://compmusic.upf.edu/examples-taal-hindustani
    print('Taal: %s' % metadata['taals'][0]['name'])
    taals_name = metadata['taals'][0]['name']
    if taals_name == 'ēktāl':
        imageUrl = folder_name + 'ektaal_vilambit_maatras.png'
    elif taals_name == 'Tīntāl':
        imageUrl = folder_name + 'teentaal_matras.png'
    elif taals_name == 'Jhaptāl':
        imageUrl = folder_name + 'Jhaptaal_matras.png'
    elif taals_name == 'Rūpak':
        imageUrl = folder_name + 'rupak_maatras.png'
    elif taals_name == 'Dādrā':
        imageUrl = folder_name + 'dadra.png'
    
IPython.display.Image(imageUrl, width = 300, height = 100)

In [None]:
#Section selection is performed in this cell
#We will display second section content, id=1
selected_section_ind = 1
(section_start, dummy, section_duration, section_tag) = section_annotations[selected_section_ind]
section_stop=section_start+section_duration
(section_bpm, bpm_start, bpm_stop) = bpm_annotations[selected_section_ind]
print('Section: {}, annotated bpm: {}'.format(section_tag, section_bpm))
print('Location in audio: (start,stop in secs): {}, {}'.format(round(section_start, 2), round(section_stop, 2)))

#Extracting corresponding sama annnotations in the section
sama_annotations_seg = sama_annotations[np.logical_and(sama_annotations >= section_start, sama_annotations < section_stop)]
start_seg = sama_annotations_seg[0]
stop_seg = sama_annotations_seg[-1]
sama_annotations_seg = sama_annotations_seg - start_seg

### Comparing annotations and estimation
Below, we use rhythm extractor in Essentia to estimate beats for the selected section. 
Sama annotations and beats are sonified together with the original audio.
We also plot annotated tempo versus estimated tempo

In [None]:
#Beat detection using Essentia

#Plotting
sound_sig_seg = sound_sig[int(start_seg * fs):int(stop_seg * fs)]
t = np.arange(sound_sig_seg.size)/float(fs)
zero_array = t * 0 #used only for plotting purposes
f, axarr = plt.subplots(3,1,figsize=(13, 6))
axarr[0].plot(t, sound_sig_seg);
axarr[0].set_title(audio_file);axarr[0].axis('off')
axarr[0].vlines(sama_annotations_seg, -1, 1.2, color = 'b')

rhythm_extractor = RhythmExtractor2013(method = "multifeature")
bpm, beats, beats_confidence, _, beats_intervals = rhythm_extractor(sound_sig_seg)
onsetMarker = AudioOnsetsMarker(onsets = beats, type = 'noise')
samaMarker = AudioOnsetsMarker(onsets = array(sama_annotations_seg), type = 'beep')
marked_sound_sig = samaMarker(onsetMarker(sound_sig_seg))

axarr[1].plot(t, zero_array);
axarr[1].set_title('Beats estimated');
axarr[1].axis('off')
axarr[1].vlines(beats, -1, 1, color='r')
axarr[1].vlines(sama_annotations_seg, -1, 1, color='b')


axarr[2].plot(60/beats_intervals);
axarr[2].set_title('Estimated BPM');
axarr[2].hlines([section_bpm/2, section_bpm, section_bpm * 2], 0, len(beats_intervals), color = 'k', label = 'Annotated BPM * (0.5, 1, 2)')
axarr[2].legend()
axarr[2].set_ylim(0, section_bpm*2.5)

print('Audio with sonified sama annotations(beeps) and estimated beats(clicks)')
IPython.display.Audio(marked_sound_sig, rate=fs)

### Annotations of the IEMP data

In this part, we present a sample from the Interpersonal Entrainment in Music Performance data by Clayton, Eerola, Jakubowski, Tarsitani and Leante available on https://osf.io/ks325/

In [None]:
instrument = 'TABLA'
#instrument = 'SAROD'

if instrument == 'TABLA':
    wav_file = '../data/IEMP_north_indian_raga/Sample/Media/NIR_PrB_Jhinjhoti_2Gats_Tabla_sample.mp3'
    section_annot_file = '../data/IEMP_north_indian_raga/Sample/Annotations/NIR_PrB_Jhinjhoti_2Gats_Annotation_Sample.csv'
    onset_annot_file = '../data/IEMP_north_indian_raga/Sample/Annotations/NIR_PrB_Jhinjhoti_2Gats_Onsets_Raw_Tabla_Sample.csv'
elif instrument == 'SAROD':
    wav_file = '../data/IEMP_north_indian_raga/Sample/Media/NIR_PrB_Jhinjhoti_2Gats_Sarod_sample.mp3'
    section_annot_file = '../data/IEMP_north_indian_raga/Sample/Annotations/NIR_PrB_Jhinjhoti_2Gats_Annotation_Sample.csv'
    onset_annot_file = '../data/IEMP_north_indian_raga/Sample/Annotations/NIR_PrB_Jhinjhoti_2Gats_Onsets_Raw_Sarod_Sample.csv'


#Reading files(audio and annotations)
sound_sig = MonoLoader(filename=wav_file, sampleRate=fs)()

In [None]:
#Reading section annotations
with open(section_annot_file) as fin:
    for line in fin:
        if instrument in line:
            pList = line.split(',')
            start_seg = float(pList[2])
            stop_seg = float(pList[3])

#for zooming purposes, let's set analysis range to the 40:50 sec within the specific range
start_seg = start_seg + 40
stop_seg = start_seg + 10

#Reading onsets within the tabla section
onset_annots = []
with open(onset_annot_file) as fin:
    for line in fin:
        if 'Time' not in line: #skip row containing column labels
            onset = float(line.split(',')[0])
            #keeping onsets within tabla section 
            if onset > start_seg and onset < stop_seg:
                onset_annots.append(onset-start_seg)


In [None]:
#In this cell, we demonstrate computation of an onset strength function
# Essentia includes many onset strength functions and onset detection methods
# Please refer to the documentation and try out on your own different options for analysis

sound_sig_seg = sound_sig[int(start_seg * fs):int(stop_seg * fs)]
t = np.arange(sound_sig_seg.size)/float(fs)

#sonifying onsets
onsetMarker = AudioOnsetsMarker(onsets=array(onset_annots), type='noise')
marked_sound_sig = onsetMarker(sound_sig_seg)

# Computing the onset strength function: high-frequency content
w = Windowing(type='hann')
fft = FFT() # this gives us a complex FFT
c2p = CartesianToPolar() # and this transforms it into a pair (magnitude, phase)

hfc = []
for frame in FrameGenerator(sound_sig_seg, frameSize = 1024, hopSize = 512):
    mag, phase, = c2p(fft(w(frame)))
    hfc.append(np.dot(np.power(mag, 2), np.arange(mag.size)))
hfc=np.array(hfc)

#Plots
f, axarr = plt.subplots(2,1,figsize=(13, 6))
axarr[0].plot(t, sound_sig_seg);
axarr[0].set_title(wav_file);axarr[0].axis('off')
axarr[0].vlines(onset_annots, -1, 1.2, color = 'b')

#Plotting the high-frequency content function
axarr[1].plot(hfc,label='High-freq.content')
axarr[1].axis('off')
axarr[1].legend(loc=1)
axarr[1].axis('off')

#Little exercise for you: implement a method to detect the onset locations from the high-frequency content function

IPython.display.Audio(marked_sound_sig, rate=fs)