# Markov Chains for Rhythm Generation

Here we generate a few mock licks from the BUR data with simple Markov chains

## Load imports, set constants

In [22]:
import random
import numpy as np
from joblib import Parallel, delayed
from collections import defaultdict
from pretty_midi import note_name_to_number, Note, Instrument, PrettyMIDI
import soundfile as sf

from src import utils
from src.detect.onset_utils import OnsetMaker
from src.features.rhythm_features import BeatUpbeatRatio

In [2]:
# Upper and lower bounds to use when thresholding BURs
BUR_UPPER = 4.0
BUR_LOWER = 0.25

In [3]:
# N in our Ngram
NGRAM_SIZE = 3

In [4]:
BEAT_DUR = 0.3

In [5]:
DESIRED_PIANISTS = ['Bill Evans', 'Oscar Peterson']
DESIRED_DRUMMERS = ['Jack DeJohnette', 'Ed Thigpen']

## Load in data

In [6]:
allonsets: list[OnsetMaker] = utils.load_corpus_from_files(f'{utils.get_project_root()}/data/cambridge-jazz-trio-database-v02')

## Extract BURs

In [7]:
def bur_extract(track, instr: str = 'piano', precision: int = 1):
    track_res = {}
    # Subset to get my onsets and partner onsets as separate dataframes
    my_onsets = track.ons[instr]
    my_beats = track.summary_dict[instr]
    # Extract BURs using our feature class
    bm = BeatUpbeatRatio(my_onsets=my_onsets, my_beats=my_beats, clean_outliers=False)
    # Iterate through every log BUR by that musician
    burs = [round(i, precision) if not np.isnan(i) else i for i in bm.bur['burs'].dropna().values]
    # Chunk the BURs according to the N-gram size
    burs_chunked = [burs[i: i + NGRAM_SIZE] for i in range(0, len(burs), NGRAM_SIZE)]
    # Iterate through each chunk
    for chunk in burs_chunked:
        # If we don't have enough BURs
        if len(chunk) < NGRAM_SIZE:
            continue
        # If any of our BURs are outside the threshold, then drop them
        if any(not BUR_LOWER <= x <= BUR_UPPER for x in chunk):
            continue
        # Split into previous and next BURs
        prev_burs = tuple(chunk[:-1])
        next_bur = chunk[-1]
        # Append the next BUR
        if prev_burs not in track_res.keys():
            track_res[prev_burs] = []
        track_res[prev_burs].append(next_bur)
    # Return the results for this track
    return track_res

In [8]:
def combine_dicts(lod: list[dict]) -> defaultdict:
    dd = defaultdict(list)
    for d in lod:
        for key, value in d.items():
            dd[key].extend(value)
    return dict(dd)

In [9]:
allres = {}
for pianist in DESIRED_PIANISTS:
    proc = [i for i in allonsets if i.item['pianist'] == pianist]
    with Parallel(n_jobs=1, verbose=10) as par:
        temp = par(delayed(bur_extract)(t) for t in proc)
    allres[pianist] = combine_dicts(temp)

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   3 out of   3 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   4 out of   4 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   5 out of   5 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   6 out of   6 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   7 out of   7 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   8 out of   8 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   9 out of   9 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done 254 out of 254 | elapsed:    2.5s finished
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elap

## Create the Markov Chain

In [31]:
dl_notes = ['C5', 'B4', 'C5', 'C#5', 'D5', 'C#5', 'D5', 'Eb5', 'Fb5', 'Eb5', 'Fb5', 'Eb5', 'D5', 'Db5', 'C5', 'Bb4', 'Ab4', 'Bb4', 'G4', 'Ab4', 'Bb4', 'Ab4', 'G4', 'F4', 'B3', 'D4', 'F4', 'Ab4', 'B4', 'G5', 'F5', 'E5', 'Eb5', 'Db5', 'C5', 'Bb4', 'A4', 'Gb4', 'F4', 'Eb4', 'Db4', 'F4', 'Ab4', 'C5', 'Bb4', 'Ab4', 'G4', 'F4', 'Ab4']
dl_midi = [note_name_to_number(n) for n in dl_notes]

In [12]:
def get_random_bur(bl, chain_val):
    random_bur = random.sample(bl[chain_val], 1)[0]
    newkey = tuple((*chain_val[1:], random_bur))
    return newkey


def create_chain(bur_list, chain_len: int):
    # Our first BUR is set to the most common BUR for this pianist
    starting_bur = [i for i in bur_list.keys() if len(bur_list[i]) == max(len(v) for v in bur_list.values())][0]
    chai = [starting_bur]
    # Iterate through the number of values we want in our chain
    for i in range(chain_len):
        # Get a random BUR
        random_bur = get_random_bur(bur_list, chai[i])
        # Use our modal BUR if the random BUR isn't valid
        if random_bur not in bur_list.keys():
            random_bur = starting_bur
        chai.append(random_bur)
    # Return the full chain
    return [float(i[-1]) for i in chai]

In [None]:
chains = {pia: create_chain(bur, chain_len=int(len(dl_midi) / 2)) for pia, bur in allres.items()}

## Render the chains to audio

In [55]:
def convert_to_midi(chain, pitches, beat_dur: float):
    notes = []
    for i, ((note1, note2), bur) in enumerate(zip(zip(pitches[::2], pitches[1::2]), chain)):
        lb = (beat_dur / (bur + 1))
        b1 = bur * lb
        b2 = 1 * lb
        n1 = Note(velocity=50, pitch=note1, start=i * beat_dur, end=(i * beat_dur) + b1)
        n2 = Note(velocity=50, pitch=note2, start=(i * beat_dur) + b1, end=(i * beat_dur) + b1 + b2)
        notes.append(n1)
        notes.append(n2)
    return notes

In [15]:
note = {pia: convert_to_midi(ch, dl_midi, BEAT_DUR) for pia, ch in chains.items()}

In [81]:
def synthesize(nts):
    ins = Instrument(program=0)
    ins.notes = nts
    return ins.synthesize(utils.SAMPLE_RATE, wave=np.sin)

In [42]:
for pia, no in note.items():
    audio = synthesize(no)
    f = f'{utils.get_project_root()}/reports/generation_examples/{pia.split(" ")[1].lower()}_scale_{NGRAM_SIZE}gram.wav'
    with open(f, 'wb') as fp:
        sf.write(fp, audio, utils.SAMPLE_RATE)

## Drummers

In [58]:
drumres = {}
for drummer in DESIRED_DRUMMERS:
    proc = [i for i in allonsets if i.item['musicians']['drummer'] == drummer]
    with Parallel(n_jobs=1, verbose=10) as par:
        temp = par(delayed(bur_extract)(t, 'drums', 2) for t in proc)
    drumres[drummer] = combine_dicts(temp)

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   3 out of   3 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   4 out of   4 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   5 out of   5 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   6 out of   6 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   7 out of   7 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   8 out of   8 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   9 out of   9 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  84 out of  84 | elapsed:    1.8s finished
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elap

In [59]:
drumchain = {dru: create_chain(bur, chain_len=32 * 4) for dru, bur in drumres.items()}

In [60]:
tempo = [0.24, 0.3, 0.4]
for temp in tempo:
    for dru, ch in drumchain.items():
        midi = convert_to_midi(ch, [51 for _ in range(len(ch))], temp)
        count = 1
        newvals = []
        for i in midi:
            if count != 2:
                newvals.append(i)
            count = count + 1 if count < 4 else 1

        f = f'{utils.get_project_root()}/reports/generation_examples/{dru.split(" ")[1].lower()}_ride_{round(60 / temp)}bpm.mid'
        # with open(f, 'wb') as fp:
        ins = Instrument(program=0)
        ins.notes = newvals
        pm = PrettyMIDI()
        pm.instruments = [ins]
        pm.write(f)

## Experimenting with Pitch

In [21]:
from src.detect.midi_utils import MelodyMaker

In [96]:
NGRAM_SIZE = 6

def extract_midi_ngs(track):
    track_res = {}
    mm = MelodyMaker(f'{utils.get_project_root()}/data/cambridge-jazz-trio-database-v02/{track.item["fname"]}/piano_midi.mid', track)
    mel = mm.extract_melody()
    ints = mm.extract_intervals(list(mel))
    accepts = [i.interval for i in ints if abs(i.interval) < 12]
    chunks = [accepts[i: i + NGRAM_SIZE] for i in range(0, len(accepts), NGRAM_SIZE)]
    # Iterate through each chunk
    for chunk in chunks:
        # If we don't have enough BURs
        if len(chunk) < NGRAM_SIZE:
            continue
        # Split into previous and next BURs
        prev = tuple(chunk[:-1])
        nex = chunk[-1]
        # Append the next BUR
        if prev not in track_res.keys():
            track_res[prev] = []
        track_res[prev].append(nex)
    return track_res

In [97]:
be = [t for t in allonsets if t.item['bandleader'] == 'Bill Evans']
with Parallel(n_jobs=-1, verbose=10) as par:
    allmidi = par(delayed(extract_midi_ngs)(t) for t in be)
    allburs = par(delayed(bur_extract)(t) for t in be)

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done   1 tasks      | elapsed:   34.9s
[Parallel(n_jobs=-1)]: Done   8 tasks      | elapsed:   36.0s
[Parallel(n_jobs=-1)]: Done  17 tasks      | elapsed:   36.6s
[Parallel(n_jobs=-1)]: Done  26 tasks      | elapsed:   37.3s
[Parallel(n_jobs=-1)]: Done  37 tasks      | elapsed:   38.1s
[Parallel(n_jobs=-1)]: Done  48 tasks      | elapsed:   38.6s
[Parallel(n_jobs=-1)]: Done  61 tasks      | elapsed:   39.0s
[Parallel(n_jobs=-1)]: Done  74 tasks      | elapsed:   39.8s
[Parallel(n_jobs=-1)]: Done  89 tasks      | elapsed:   40.7s
[Parallel(n_jobs=-1)]: Done 104 tasks      | elapsed:   41.4s
[Parallel(n_jobs=-1)]: Done 121 tasks      | elapsed:   42.0s
[Parallel(n_jobs=-1)]: Done 138 tasks      | elapsed:   42.9s
[Parallel(n_jobs=-1)]: Done 157 tasks      | elapsed:   44.1s
[Parallel(n_jobs=-1)]: Done 176 tasks      | elapsed:   45.2s
[Parallel(n_jobs=-1)]: Done 197 tasks      | elapsed:  

In [124]:
cds_mid = combine_dicts(allmidi)
ch_mid = create_chain(cds_mid, chain_len=12)

In [125]:
possible = [
    *[str(i) + '3' for i in utils.ALL_PITCHES],
    *[str(i) + '4' for i in utils.ALL_PITCHES],
    *[str(i) + '5' for i in utils.ALL_PITCHES],
    *[str(i) + '6' for i in utils.ALL_PITCHES],
    *[str(i) + '7' for i in utils.ALL_PITCHES],
]

allpitches = [24]
for i, val in enumerate(ch_mid):
    allpitches.append(int(allpitches[i] + val))
pitch_chain = [possible[i] for i in allpitches]

In [126]:
cds_bur = combine_dicts(allburs)
ch_bur = create_chain(cds_bur, chain_len=12)

In [127]:
audio = synthesize(convert_to_midi(ch_bur, [note_name_to_number(n) for n in pitch_chain], BEAT_DUR))
f = f'{utils.get_project_root()}/reports/generation_examples/generate_3.wav'
with open(f, 'wb') as fp:
    sf.write(fp, audio, utils.SAMPLE_RATE)