# Notebook content 
* Loading GMD dataset
* Playing example audio
* Dataset statistics before and after discarding samples
* Extracting MIDI onset times (observations)
* Tuning Kalman Filter parameters on training dataset

### Import packages

In [1]:
import IPython.display
import os
import sys

import matplotlib.pyplot as plt
import numpy as np

sys.path.append(os.path.join("..", "src"))

from data_loader import GmdDataLoader
from switching_kalman_filter_tracker import SwitchingKalmanFilterTracker

### Define paths

In [2]:
data_root_path = os.path.join("..", "data")
dataset_root_path = os.path.join(data_root_path, "groove")

### Define constants 

In [3]:
min_duration = 30.0

### Create generator for audio files

In [4]:
groove_data_loader = GmdDataLoader(dataset_root_path)

In [5]:
train_data_generator = groove_data_loader.get_data(split=GmdDataLoader.TRAIN_SPLIT, min_duration=min_duration, 
                                                  get_midi_onsets=True)
val_data_generator = groove_data_loader.get_data(split=GmdDataLoader.VALIDATION_SPLIT, min_duration=min_duration, 
                                                  get_midi_onsets=True)
test_data_generator = groove_data_loader.get_data(split=GmdDataLoader.TEST_SPLIT, min_duration=min_duration, 
                                                  get_midi_onsets=True)

### Get dataset statistics

In [6]:
original_n = groove_data_loader.get_dataset_size()
original_train_n = groove_data_loader.get_dataset_size(split="train")
original_validation_n = groove_data_loader.get_dataset_size(split="validation")
original_test_n = groove_data_loader.get_dataset_size(split="test")
print(("Number of samples before discarding stage - Total samples: {} - Training samples: {} - Validation samples: {} - " + 
      "Test samples: {}").format(original_n, original_train_n, original_validation_n, original_test_n))
n = groove_data_loader.get_dataset_size(min_duration = min_duration)
train_n = groove_data_loader.get_dataset_size(split="train", min_duration = min_duration)
validation_n = groove_data_loader.get_dataset_size(split="validation", min_duration = min_duration)
test_n = groove_data_loader.get_dataset_size(split="test", min_duration = min_duration)
print(("Number of samples after discarding stage - Total samples: {} - Training samples: {} - Validation samples: {} - " + 
      "Test samples: {}").format(n, train_n, validation_n, test_n))

Number of samples before discarding stage - Total samples: 1150 - Training samples: 897 - Validation samples: 124 - Test samples: 129
Number of samples after discarding stage - Total samples: 346 - Training samples: 250 - Validation samples: 41 - Test samples: 55


### Extract all training onset times from MIDI file

In [7]:
X_train = []
meta_data_rows = []
start_times = []
for x, meta_data_row, start_time in train_data_generator:
    X_train.append(x)
    meta_data_rows.append(meta_data_row)
    start_times.append(start_time)
for x, meta_data_row, start_time in val_data_generator:
    X_train.append(x)
    meta_data_rows.append(meta_data_row)
    start_times.append(start_time)    
X_train = np.array(X_train)

  X_train = np.array(X_train)


### Extract all test onset times from MIDI file

In [8]:
X_test = []
meta_data_rows_test = []
start_times_test = []
for x, meta_data_row, start_time in train_data_generator:
    X_test.append(x)
    meta_data_rows_test.append(meta_data_row)
    start_times_test.append(start_time)
X_test = np.array(X_train)    

### For each training file, run particle filter and store average of tracked tempos 

In [13]:
for i, x in enumerate(X_train):
    tempo_tracker = SwitchingKalmanFilterTracker()
    print("Track BPM: {}".format(meta_data_rows[i]["bpm"]))
    for onset in x:
        tempo_tracker.run(onset)
    tempo_estimates = tempo_tracker.get_tempo_estimates()
    print(tempo_tracker.tempo_period_to_bpm(tempo_estimates.mean()))
    

Track BPM: 80
79.80459751131639
Track BPM: 94
47.528508835986145
Track BPM: 120
60.354724414436426
Track BPM: 95
95.13716927534841
Track BPM: 95
48.02733780097722
Track BPM: 105
53.04655418378334
Track BPM: 105
104.7882772485624
Track BPM: 78
116.71682417513495
Track BPM: 64
64.05327540977477
Track BPM: 140
70.09724388101591
Track BPM: 80
80.19363131203795
Track BPM: 102
76.51947934085936
Track BPM: 93
69.82447093894041
Track BPM: 125
93.41440514093941
Track BPM: 116
58.708693544996386
Track BPM: 125
62.67040302645446
Track BPM: 125
93.57707457989287
Track BPM: 125
62.830421573575975
Track BPM: 125
95.9741368070507
Track BPM: 180
67.89477860587034
Track BPM: 290
73.2120782944107
Track BPM: 90
89.91467695604038
Track BPM: 170
85.16896340233434
Track BPM: 170
168.69030734845168
Track BPM: 84
42.36328170051841
Track BPM: 114
113.9935435503384
Track BPM: 110
76.61754873879575
Track BPM: 122
91.58314344558693
Track BPM: 110
110.1894239405459
Track BPM: 50
74.99496074615335
Track BPM: 115
86

52.807996970714875
Track BPM: 92
46.461695455480104
Track BPM: 100
99.73187851963013
Track BPM: 158
77.29147864351152
Track BPM: 184
91.9947790593188
Track BPM: 96
96.1190771943958
Track BPM: 89
91.5788141438785
Track BPM: 80
119.8496416260563
Track BPM: 110
56.514628320524295
Track BPM: 133
133.21959685998937
Track BPM: 80
79.91325055102166
Track BPM: 90
32.230973181485666
Track BPM: 127
63.53933534079425
Track BPM: 180
46.14189994331459
Track BPM: 102
102.21268079625169
Track BPM: 100
48.26225479297731
Track BPM: 96
95.11074677414472
Track BPM: 95
47.932334317884305
Track BPM: 118
59.35053246681677
Track BPM: 75
75.14631565227117
Track BPM: 135
67.78590291189742
Track BPM: 95
95.30743755466972
Track BPM: 142
71.11205045712944
Track BPM: 83
41.58942881861365
Track BPM: 83
62.48084258228814
Track BPM: 90
48.72536450472433
Track BPM: 108
54.43017730156449
Track BPM: 81
81.06974056332794
Track BPM: 85
42.762002223272425
Track BPM: 112
84.2034085834992


### For each training file results, compare tracked tempos to global tempo of each file

### Report accuracy metric