**significant code credit to https://github.com/subpath/Markov_chain_for_music_generation/blob/master/Markov_chain_for_chords_generation.ipynb

In [2]:
import pandas as pd
import numpy as np
from collections import Counter

np.random.seed(42)
data = pd.read_csv('bach_choral_set_dataset.csv')

In [5]:
data

Unnamed: 0,choral_ID,event_number,pitch_1,pitch_2,pitch_3,pitch_4,pitch_5,pitch_6,pitch_7,pitch_8,pitch_9,pitch_10,pitch_11,pitch_12,bass,meter,chord_label
0,000106b_,1,YES,NO,NO,NO,NO,YES,NO,NO,NO,YES,NO,NO,F,3,F_M
1,000106b_,2,YES,NO,NO,NO,YES,NO,NO,YES,NO,NO,NO,NO,E,5,C_M
2,000106b_,3,YES,NO,NO,NO,YES,NO,NO,YES,NO,NO,NO,NO,E,2,C_M
3,000106b_,4,YES,NO,NO,NO,NO,YES,NO,NO,NO,YES,NO,NO,F,3,F_M
4,000106b_,5,YES,NO,NO,NO,NO,YES,NO,NO,NO,YES,NO,NO,F,2,F_M
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5660,015505b_,105,NO,NO,YES,NO,NO,NO,NO,YES,NO,NO,YES,NO,G,4,G_m
5661,015505b_,106,NO,NO,YES,NO,NO,NO,NO,YES,NO,YES,NO,NO,G,3,G_m
5662,015505b_,107,YES,NO,NO,NO,YES,NO,NO,YES,NO,NO,NO,NO,C,5,C_M
5663,015505b_,108,YES,NO,NO,NO,YES,NO,NO,YES,NO,NO,YES,NO,C,3,C_M


In [42]:
choral_sets=data.choral_ID.unique()
grouped = data.groupby(data.choral_ID)

#create bigrams of chords in bach
bigrams=[]
for chorals in choral_sets:
    chords = grouped.get_group(chorals)['chord_label'].values
    ngrams = (zip(*[chords[i:] for i in range(2)]))
    bigrams.extend([" ".join(ngram) for ngram in ngrams])
    
bigrams[3:]

['F_M F_M',
 'F_M D_m',
 'D_m D_m',
 'D_m F_M',
 'F_M F_M',
 'F_M BbM',
 'BbM BbM',
 'BbM BbM',
 'BbM BbM',
 'BbM BbM',
 'BbM BbM',
 'BbM F_M',
 'F_M F_M',
 'F_M BbM',
 'BbM BbM',
 'BbM BbM',
 'BbM C_M7',
 'C_M7 C_M7',
 'C_M7 C_M7',
 'C_M7 F_M',
 'F_M F_M',
 'F_M C_M',
 'C_M C_M',
 'C_M D_m7',
 'D_m7 D_m7',
 'D_m7 D_m7',
 'D_m7 G_M',
 'G_M C_M',
 'C_M F_M',
 'F_M F_M',
 'F_M BbM',
 'BbM BbM',
 'BbM A_m',
 'A_m A_m',
 'A_m C_M7',
 'C_M7 C_M7',
 'C_M7 F_M',
 'F_M F_M',
 'F_M C_M4',
 'C_M4 C_M4',
 'C_M4 C_M4',
 'C_M4 C_M7',
 'C_M7 C_M7',
 'C_M7 C_M7',
 'C_M7 F_M',
 'F_M F_M',
 'F_M C_M',
 'C_M C_M',
 'C_M F_M',
 'F_M F_M',
 'F_M D_m',
 'D_m D_m',
 'D_m F_M',
 'F_M F_M',
 'F_M BbM',
 'BbM BbM',
 'BbM BbM',
 'BbM BbM',
 'BbM BbM',
 'BbM BbM',
 'BbM F_M',
 'F_M F_M',
 'F_M BbM',
 'BbM BbM',
 'BbM BbM',
 'BbM C_M7',
 'C_M7 C_M7',
 'C_M7 C_M7',
 'C_M7 F_M',
 'F_M F_M',
 'F_M C_M',
 'C_M C_M',
 'C_M D_m7',
 'D_m7 D_m7',
 'D_m7 D_m7',
 'D_m7 G_M',
 'G_M C_M',
 'C_M F_M',
 'F_M F_M',
 'F_M BbM',


In [38]:
def predict_next_state(chord:str, data:list=bigrams):
    """Predict next chord based on current state."""
    # create list of bigrams starting with current chord
    bigrams_with_current_chord = [bigram for bigram in bigrams if bigram.split(' ')[0]==chord]
    # count appearance of each bigram
    count_appearance = dict(Counter(bigrams_with_current_chord))
    # convert apperance into probabilities
    for ngram in count_appearance.keys():
        count_appearance[ngram] = count_appearance[ngram]/len(bigrams_with_current_chord)
    # create list of possible options for the next chord
    options = [key.split(' ')[1] for key in count_appearance.keys()]
    # create  list of probability distribution
    probabilities = list(count_appearance.values())
    # return random prediction
    return np.random.choice(options, p=probabilities)

In [39]:
def generate_sequence(chord:str=None, data:list=bigrams, length:int=30):
    """Generate sequence of defined length."""
    # create list to store future chords
    chords = []
    for n in range(length):
        # append next chord for the list
        chords.append(predict_next_state(chord, bigrams))
        # use last chord in sequence to predict next chord
        chord = chords[-1]
    return chords

In [40]:
generate_sequence('C_M')

['C_M',
 'F_m',
 'F_m7',
 'Bbm6',
 'Bbm6',
 'Bbm6',
 'Bbm6',
 'AbM',
 'DbM',
 'Bbm6',
 'Bbm6',
 'AbM',
 'C#m',
 'C#m',
 'C#m',
 'C#m',
 'F#M7',
 'F#M7',
 'F#M7',
 'B_m',
 'F#M',
 'F#M',
 'F#M',
 'F#M',
 'B_m',
 'E_M7',
 'A_M',
 'D_M',
 'G_M',
 'C_M']