In [53]:
import numpy as np
from scipy.ndimage import gaussian_filter1d
from scipy.signal import medfilt

from numpy.fft import fft, fftshift, fftfreq
from matplotlib import pyplot as plt
from pathlib import Path
import importlib

In [54]:
import helpers
importlib.reload(helpers)

# Driver
class Ex:
    def __init__(self, name: str, num_notes: int, spec_thresh: float, bpm: int, max_num_notes_per_beat: int):
        self.name = name
        self.num_notes = num_notes
        self.spec_thresh = spec_thresh
        self.bpm = bpm
        self.max_num_notes_per_beat = max_num_notes_per_beat
        self.min_time_between = 60 / (self.bpm * max_num_notes_per_beat)
        self.sustain_thresh_coeff = 0

def getExercises():
    return [
        Ex("ex1WholeMod.mp4", num_notes=90, spec_thresh=.15, bpm=80, max_num_notes_per_beat=2),
        Ex("ex1WholeModF.mp4", num_notes=90, spec_thresh=.15, bpm=120, max_num_notes_per_beat=2),
        Ex("ex2WholeMod.mp4", num_notes=49, spec_thresh=.15, bpm=80, max_num_notes_per_beat=1),
        Ex("ex3WholeMod.mp4", num_notes=145, spec_thresh=.15, bpm=100, max_num_notes_per_beat=2),
        Ex("ex3WholeModF.mp4", num_notes=145, spec_thresh=.15, bpm=130, max_num_notes_per_beat=2),
        Ex("ex4WholeMod.mp4", num_notes=102, spec_thresh=.15, bpm=90, max_num_notes_per_beat=4),
        Ex("ex4WholeModF.mp4", num_notes=102, spec_thresh=.05, bpm=120, max_num_notes_per_beat=2),
        Ex("ex5WholeMod.mp4", num_notes=133, spec_thresh=.15, bpm=64, max_num_notes_per_beat=2),
        Ex("ex5WholeModF.mp4", num_notes=133, spec_thresh=.15, bpm=86, max_num_notes_per_beat=2),
        Ex("ex6WholeMod.mp4", num_notes=118, spec_thresh=.15, bpm=70, max_num_notes_per_beat=4),
        Ex("ex6WholeModF.mp4", num_notes=118, spec_thresh=.15, bpm=90, max_num_notes_per_beat=4),
        Ex("ex7WholeMod.mp4", num_notes=86, spec_thresh=.15, bpm=70, max_num_notes_per_beat=2),
        Ex("ex8WholeMod.mp4", num_notes=112, spec_thresh=.15, bpm=55, max_num_notes_per_beat=4),
        Ex("ex8WholeModF.mp4", num_notes=112, spec_thresh=.15, bpm=80, max_num_notes_per_beat=4),
        Ex("ex9WholeMod.mp4", num_notes=121, spec_thresh=.15, bpm=100, max_num_notes_per_beat=4),
        Ex("ex10WholeMod.mp4", num_notes=102, spec_thresh=.15, bpm=120, max_num_notes_per_beat=3)
    ]

def observe_accuracy():
    avg_artic_leng = 0
    exerciseNoteCounts = getExercises()

    for ex in exerciseNoteCounts:
        exercise = ex.name
        note_count = ex.num_notes
        spec_thresh = ex.spec_thresh
        min_time_between = ex.min_time_between * .8
        sustain_thresh = ex.sustain_thresh_coeff * spec_thresh

        ys, ts, sr = helpers.get_audio_data(f"exercises/{exercise}")
        ts_fr, fr_freq_amps, freq_bins = helpers.magnitude_spectrogram(ys, ts, sr)
        ts_fr, spec_flux = helpers.compute_spectral_flux(ts_fr, fr_freq_amps, sr)

        centroids = helpers.compute_spectral_centroid(time_frames=ts_fr, freq_bins=freq_bins, frame_freq_amps=fr_freq_amps)
        
        plt.title(exercise)
        plt.xlabel("Time (s)"); plt.ylabel("Spectral Flux")
        plt.plot(ts_fr, spec_flux, c="orange")
        img_path = Path.cwd() / "spec_flux_graphs" / f"{exercise} Spectral Flux.png"
        plt.savefig(img_path)
        plt.close()
        
        # onsets = helpers.detect_onsets(spec_flux, ts_fr, threshold=spec_thresh, min_time_between=min_time_between)
        onsets, sustains = helpers.detect_onsets_and_release(spec_flux=spec_flux, times=ts_fr, sr=sr, sustain_thresh=sustain_thresh, onset_thresh=spec_thresh, min_time_between=min_time_between)

        print_num_onset_detection_accuracy = True
        graph_onsets_sustains = True
        
        if print_num_onset_detection_accuracy:
            print(f"For exercise {exercise}, Note count Actual:\t{note_count}, Note count Detected: \t{len(onsets)} | Accuracy: \t{min(note_count / len(onsets), len(onsets) / note_count)}")

        if graph_onsets_sustains:
            artic_lens = [sustains[i] - onsets[i] for i in range(min(len(onsets), len(sustains)))]
            # if np.max(artic_lens) > 1:
            # print(f"For {exercise}, min articulation length: {np.min(artic_lens)}")
            # if np.max(artic_lens) > .1:
            #     print("==============================================================")
            #     print(f"For {exercise}, max articulation length: {np.max(artic_lens)}")
            print(f"For {exercise}, mean articulation length: {np.average(artic_lens)}")
            avg_artic_leng += np.average(artic_lens)

            onset_indic = np.zeros_like(ts_fr)
            sustain_indic = np.zeros_like(ts_fr)

            stem_top = np.max(centroids)

            # Set 1.0 at the closest time points where onsets/sustains occur
            for onset_time in onsets:
                idx = np.argmin(np.abs(ts_fr - onset_time))
                onset_indic[idx] = stem_top
                
            for sustain_time in sustains:
                idx = np.argmin(np.abs(ts_fr - sustain_time))
                sustain_indic[idx] = stem_top

            plt.title(f"{exercise} Onset + Sustain + Spectral Centroid")
            plt.xlim(10, 14)
            plt.xlabel("Times (s)")
            plt.stem(ts_fr, onset_indic, linefmt='--', markerfmt='pink', label='Onsets')
            plt.stem(ts_fr, sustain_indic, linefmt='--', markerfmt='red', label='Sustains')
            plt.legend()
            img_path = Path.cwd() / "centroids" / f"{exercise} centroid.png"
            plt.plot(ts_fr, centroids)
            plt.savefig(img_path)
            plt.close()

    print(f"Average articulation window size is {avg_artic_leng / len(exerciseNoteCounts)}")

# observe_accuracy()

In [55]:
def optimize_spec_thresh():
    lowest, highest, num_tests = .01, .25, 100
    spec_thres_vals = np.linspace(lowest, highest, num_tests)
    opt_spec_thresh = [] # the optimized thresholds for each exercise

    exerciseNoteCounts = getExercises()

    for ex in exerciseNoteCounts:
        exercise = ex.name
        note_count = ex.num_notes
        min_time_between = ex.min_time_between * .8

        num_onsets_detected = []
        accuracies = []

        ys, ts, sr = helpers.get_audio_data(f"exercises/{exercise}")
        ts_fr, fr_freq_amps, _ = helpers.magnitude_spectrogram(ys, ts, sr)
        ts_fr, spec_flux = helpers.compute_spectral_flux(ts_fr, fr_freq_amps, sr)

        for spec_thresh in spec_thres_vals:
            onsets = helpers.detect_onsets_only(spec_flux=spec_flux, times=ts_fr, sr=sr, onset_thresh=spec_thresh, min_time_between=min_time_between)
            accuracy = min(note_count / len(onsets), len(onsets) / note_count)

            num_onsets_detected.append(len(onsets))
            accuracies.append(accuracy)
        
        plt.title(f"{exercise} Spectral Flux Threshold vs Number of Onsets Detected")
        plt.xlabel("Spectral Flux Threshold"); plt.ylabel("Number of Onsets Detected")
        plt.plot(spec_thres_vals, num_onsets_detected, label="Threshold vs Onsets")
        plt.axhline(y=note_count, color='limegreen', linestyle='--', linewidth=2, label="Correct # Onsets")
        plt.legend()
        img_path = Path.cwd() / "spec_thresh_opt" / "graphs" / f"{exercise}.png"
        plt.savefig(img_path)
        plt.close()

        opt_perf = max(accuracies)
        opt_perf_spec_thresh = [spec_thres_vals[i] for i, val in enumerate(accuracies) if val == opt_perf]
        with open("spec_thresh_opt/optimized_values.txt", "a") as f:
            f.write(f"{'=' * 30}\n")
            f.write(f"Best performing spectral thresholds at {opt_perf * 100}% accuracy for {exercise}:\n")
            f.write(f"{opt_perf_spec_thresh}\n\n")

        opt_spec_thresh.append(opt_perf_spec_thresh[len(opt_perf_spec_thresh) // 2]) # choose the middle most successful output to maximize applicability
        
    minimum_thresh = .08 # optimizing for the best threshold for this case hurts sustain onset detection, so this ensures a decent middle ground.
    for i, ex in enumerate(exerciseNoteCounts):
        ex.spec_thresh = max(opt_spec_thresh[i], minimum_thresh)

    return exerciseNoteCounts

opt_exercises = optimize_spec_thresh()
for ex in opt_exercises:
    print(f"for exercise {ex.name}, note onset spectral threshold is {ex.spec_thresh}")
print(opt_exercises)

for exercise ex1WholeMod.mp4, note onset spectral threshold is 0.08030303030303029
for exercise ex1WholeModF.mp4, note onset spectral threshold is 0.15545454545454546
for exercise ex2WholeMod.mp4, note onset spectral threshold is 0.11424242424242424
for exercise ex3WholeMod.mp4, note onset spectral threshold is 0.08
for exercise ex3WholeModF.mp4, note onset spectral threshold is 0.08
for exercise ex4WholeMod.mp4, note onset spectral threshold is 0.18696969696969698
for exercise ex4WholeModF.mp4, note onset spectral threshold is 0.08
for exercise ex5WholeMod.mp4, note onset spectral threshold is 0.10212121212121211
for exercise ex5WholeModF.mp4, note onset spectral threshold is 0.10454545454545454
for exercise ex6WholeMod.mp4, note onset spectral threshold is 0.10212121212121211
for exercise ex6WholeModF.mp4, note onset spectral threshold is 0.11424242424242424
for exercise ex7WholeMod.mp4, note onset spectral threshold is 0.1215151515151515
for exercise ex8WholeMod.mp4, note onset spec

In [56]:
def optimize_sustain_thresh_coeff(exercises):
    # lowest, highest, num_tests = -.01, .85, 100
    lowest, highest, num_tests = 0, 1, 100
    sustain_thresh_coeffs = np.linspace(lowest, highest, num_tests)
    opt_coeffs = [] # the optimized coefficients for each exercise

    for ex in exercises:
        exercise = ex.name
        note_count = ex.num_notes
        spec_thresh = ex.spec_thresh
        min_time_between = ex.min_time_between * .8

        sustain_saturations = []

        ys, ts, sr = helpers.get_audio_data(f"exercises/{exercise}")
        ts_fr, fr_freq_amps, _ = helpers.magnitude_spectrogram(ys, ts, sr)
        ts_fr, spec_flux = helpers.compute_spectral_flux(ts_fr, fr_freq_amps, sr)

        for coeff in sustain_thresh_coeffs:
            sustain_thresh = spec_thresh * coeff
            _, sustains = helpers.detect_onsets_and_release(spec_flux=spec_flux, times=ts_fr, sr=sr, onset_thresh=spec_thresh, sustain_thresh=sustain_thresh, min_time_between=min_time_between)

            sustain_saturations.append(len(sustains) / note_count)

        plt.title(f"{exercise} Coefficient for Sustain vs Onset Pairing Completeness")
        plt.xlabel("Coefficient for Sustain"); plt.ylabel("Onset Pairing Completeness")
        plt.plot(sustain_thresh_coeffs, sustain_saturations)
        img_path = Path.cwd() / "sustain_thresh_opt" / "graphs" / f"{exercise}.png"
        plt.savefig(img_path)
        plt.close()
    
        opt_perf_sustain_coeff = [sustain_thresh_coeffs[i] for i, val in enumerate(sustain_saturations) if val == max(sustain_saturations)]
        
        with open("sustain_thresh_opt/optimized_values.txt", "a") as f:
            f.write(f"{'=' * 30}\n")
            f.write(f"Minimum coefficient for complete onset + sustain pairing for {exercise}:\n")
            f.write(f"{np.min(opt_perf_sustain_coeff)}\n\n")

        opt_coeffs.append(np.min(opt_perf_sustain_coeff) * 1.1) # choose the minimum most successful output which is closest to ideal best

    for i, ex in enumerate(exercises):
        ex.sustain_thresh_coeff = opt_coeffs[i]
        
    return exercises

opt_exercises = optimize_sustain_thresh_coeff(opt_exercises)

In [None]:
with open("opt_results.txt", "w") as f: 
    f.write("FINAL OPTIMIZED PARAMETERS FOR EACH EXERCISE\n\n")
    for ex in opt_exercises:
        f.write(f"{'=' * 50}\n")
        f.write(f"Name: \t {ex.name}\n")
        f.write(f"Note Count: \t {ex.num_notes}\n")
        f.write(f"Spectral Threshold: \t {ex.spec_thresh:.4f}\n")
        f.write(f"BPM: \t {ex.bpm}\n")
        f.write(f"Max Notes Per Beat: \t {ex.max_num_notes_per_beat}\n")
        f.write(f"Min Time Between Notes: \t {ex.min_time_between:.4f} seconds\n")
        f.write(f"Sustain Threshold Coefficient: \t {ex.sustain_thresh_coeff:.4f}\n")
        f.write(f"Sustain Threshold Value: \t {ex.spec_thresh * ex.sustain_thresh_coeff:.4f}\n\n")

[0.         0.01010101 0.02020202 0.03030303 0.04040404 0.05050505
 0.06060606 0.07070707 0.08080808 0.09090909 0.1010101  0.11111111
 0.12121212 0.13131313 0.14141414 0.15151515 0.16161616 0.17171717
 0.18181818 0.19191919 0.2020202  0.21212121 0.22222222 0.23232323
 0.24242424 0.25252525 0.26262626 0.27272727 0.28282828 0.29292929
 0.3030303  0.31313131 0.32323232 0.33333333 0.34343434 0.35353535
 0.36363636 0.37373737 0.38383838 0.39393939 0.4040404  0.41414141
 0.42424242 0.43434343 0.44444444 0.45454545 0.46464646 0.47474747
 0.48484848 0.49494949 0.50505051 0.51515152 0.52525253 0.53535354
 0.54545455 0.55555556 0.56565657 0.57575758 0.58585859 0.5959596
 0.60606061 0.61616162 0.62626263 0.63636364 0.64646465 0.65656566
 0.66666667 0.67676768 0.68686869 0.6969697  0.70707071 0.71717172
 0.72727273 0.73737374 0.74747475 0.75757576 0.76767677 0.77777778
 0.78787879 0.7979798  0.80808081 0.81818182 0.82828283 0.83838384
 0.84848485 0.85858586 0.86868687 0.87878788 0.88888889 0.89898