## Labeling notes in the Scherbaum-Mshavanadze Dataset

### Description of the Data

The dataset consists of five songs, each with three voices. There are 
- two mics recording the whole song
- a headset mic for each singer (which picks up the other singers a little)
- a larynx mic for each singer, which picks up only the fundamental frequency of the singer (through the vibrations of the vocal cords).
- video at this [Web site](https://www.audiolabs-erlangen.de/resources/MIR/2018-ISMIR-LBD-ThroatMics)

The songs are in `/Akamai/voice/data/Scherbaum Mshavanadze/Song_name/`.
In that folder is an html file page.txt that describes the data:

In [7]:
from html.parser import HTMLParser
import os
import re
import scipy.signal
import scipy.fft
import librosa
import numpy as np
import matplotlib.pyplot as plt

new_track = False
class MyHTMLParser(HTMLParser):
    def __init__ (self):
        HTMLParser.__init__(self)
        self.data = []

    def handle_starttag(self, tag, attrs):
        new_track = True
        if tag == "track-wrap":
            for key, val in attrs:
                if key == "title":
                    self.data.append(val)
        if tag == "video":
            self.data.append("video")
        if tag == "source":
            for key, val in attrs:
                if key == "src":
                    self.data.append(os.path.basename(val))

    def handle_endtag(self, tag):
        if tag == "track-wrap" or tag == "video":
            new_track = False

    def handle_data(self, data):
        pass
    
    def get_data (self):
        return zip(self.data, self.data[1:] + self.data[:1])

html = open('/Akamai/voice/data/Scherbaum Mshavanadze/GVM017_ChvenMshvidobaTake2_Ozurgeti_ShalvaChemo2016_20160713/page.txt').read()
parser = MyHTMLParser()
parser.feed(html)


### The Labeling Task

Use the short-time Fourier Transform on the throat microphone to record the fundamental frequency at each time window.

In [8]:
data_dir = '/Akamai/voice/data/Scherbaum Mshavanadze/GVM009_BatonebisNanina_Tbilisi_Mzetamze_20160919/'
all_files = list(filter(lambda x: re.search("Throat microphone [0-9]+", x[0]), parser.get_data()))


In [9]:
def generate_stft (cy, csr, title):
    f, t, Zxx = scipy.signal.stft(cy, csr)
    f = f[:15]
    Zxx = Zxx[:15, :]
    plt.figure(figsize=(10, 5))
    plt.pcolormesh(t, f, np.abs(Zxx), shading='gouraud')
    plt.title(title)
    plt.ylabel('Frequency [Hz]')
    plt.xlabel('Time [sec]')
    plt.show()
    
for i, file in enumerate(all_files):
#     y, sr = librosa.load(data_dir + file[1])
    print(file[1])
#     generate_stft(y, sr, "Throat " + str(i + 1))

GVM017_ChvenMshvidobaTake2_Ozurgeti_ShalvaChemo2016_20160713_ALRX1M.mp3
GVM017_ChvenMshvidobaTake2_Ozurgeti_ShalvaChemo2016_20160713_ALRX2M.mp3
GVM017_ChvenMshvidobaTake2_Ozurgeti_ShalvaChemo2016_20160713_ALRX3M.mp3
