In [None]:
%load_ext autoreload
%autoreload 2

import numpy as np
import pandas as pd
import os
import glob
import seaborn as sns
import pickle
import music21
import matplotlib.pyplot as plt

from fractions import Fraction
from collections import defaultdict, Counter
from iteration_utilities import deepflatten #flatten nested lists

from music21 import midi, note, stream, instrument, meter, key
import itertools
import random

import string
import pretty_midi

import phrase_segmentation
from phrase_segmentation import *

from pathlib import Path

# improve quality of plots
%matplotlib inline
%config InlineBackend.figure_format='retina'

# add grid
import seaborn as sns
sns.set_style("whitegrid")

from phrase_tikz import ROOT_PATH

In [None]:
DATA_PATH = ROOT_PATH / "asap-dataset"
if not DATA_PATH.exists():
    DATA_PATH = ROOT_PATH / "data" / "asap-dataset"

In [None]:
piece_path = DATA_PATH / "Schubert" / "Impromptu_op.90_D.899" / "3"
sig = 4

annotations_files = glob.glob(os.path.join(piece_path, "*annotations.txt"))
midi_files = glob.glob(os.path.join(piece_path, "*.mid"))
annotations_files = [f for f in annotations_files if not f.endswith("midi_score_annotations.txt")]
midi_files = [f for f in midi_files if not f.endswith("midi_score.mid")]
annotations_files.sort()
midi_files.sort()

times_list, diff_list = read_diff_timings(annotations_files)
times = np.array(times_list)
diff = np.array(diff_list)

midis = [pretty_midi.PrettyMIDI(midi_path) for midi_path in midi_files]

velocity_curve = compute_all_smooth_velocity_curves(midis, times, 2)

In [None]:
avg_diff = moving_average(diff, n=1)

plot_avg_diff(avg_diff, sig)

In [None]:
break_idx, breaks = performance_segmentation(avg_diff, times, velocity_curve, sig=sig, plot=True)

In [None]:
beat_indices = np.arange(break_idx.shape[0])[break_idx]
measure_counts = (beat_indices) / sig + 1
print(f"Found {measure_counts.shape} phrase boundaries!")
print(measure_counts)

zero_starting = measure_counts - 1
print(f"Zero starting for Tikz:")
print(zero_starting)

# Corpus Version


In [None]:
def run(piece_path, sig):
    annotations_files = glob.glob(os.path.join(piece_path, "*annotations.txt"))
    midi_files = glob.glob(os.path.join(piece_path, "*.mid"))
    annotations_files = [f for f in annotations_files if not f.endswith("midi_score_annotations.txt")]
    midi_files = [f for f in midi_files if not f.endswith("midi_score.mid")]
    annotations_files.sort()
    midi_files.sort()
    
    times_list, diff_list = read_diff_timings(annotations_files)
    times = np.array(times_list)
    diff = np.array(diff_list)
    
    midis = [pretty_midi.PrettyMIDI(midi_path) for midi_path in midi_files]
    
    velocity_curve = compute_all_smooth_velocity_curves(midis, times, 2)

    avg_diff = moving_average(diff, n=1)
    break_idx, breaks = performance_segmentation(avg_diff, times, velocity_curve, sig=sig)

    beat_indices = np.arange(break_idx.shape[0])[break_idx]
    measure_counts = (beat_indices) / sig + 1
    print(f"Found {measure_counts.shape} phrase boundaries!")
    print(measure_counts)
    
    zero_starting = measure_counts - 1

    return zero_starting

In [None]:
run(piece_path.parent / "3", sig=4)

In [None]:
r1 = run(piece_path.parent / "1", sig=4)
r2 = run(piece_path.parent / "2", sig=3)
r4 = run(piece_path.parent / "4", sig=3)

In [None]:
len(r1), len(r2), len(r4)