### Import all required libraries

In [None]:
import itertools
import os
import time
from collections import Counter
from itertools import tee

import numpy as np
import pandas as pd
import seaborn as sns
from matplotlib import pyplot as plt
from music21 import converter
from scipy.ndimage import gaussian_filter

from mpl_toolkits.mplot3d import Axes3D


%matplotlib widget
plt.style.use('default')

In [None]:
a = ['C', 'C#', 'D', 'D#', 'E', 'F', 'F#', 'G', 'G#', 'A', 'A#', 'B']

c = map(' '.join, itertools.chain(
    itertools.product(a, a), itertools.product(a, a)))

c = list(c)

c = list(dict.fromkeys(c))


c_dt = {i: 0 for i in c}


df = pd.DataFrame(columns=['Bigram', 'Frequency', 'Count'])

### correctNote()
Function to correct sharps and flats to the same note

In [None]:
def correctNote(note):
    note_letter = note[:1]

    note_step = list(map(lambda i: i, note[1:]))

    note_position = a.index(note_letter)

    for x in note_step:
        if x == '-':
            if note_position == 0:
                note_position = 11
            else:
                note_position = note_position - 1
        if x == '#':
            if note_position == 11:
                note_position = 0
            else:
                note_position = note_position + 1
    return a[note_position]

In [None]:
def pairwise(iterable):
    a, b = tee(iterable)
    next(b, None)
    return zip(a, b)

### plotMatrix()
Plot a given matrix

In [None]:
def plotMatrix(data, annotate=False, smooth=0, color='viridis', save=False, index='0', show=True):
    yticks = data.index
    keptticks = yticks[::int(len(yticks)/10)]
    yticks = ['' for y in yticks]
    yticks[::int(len(yticks)/10)] = keptticks

    xticks = data.columns
    keptticks = xticks[::int(len(xticks)/10)]
    xticks = ['' for y in xticks]
    xticks[::int(len(xticks)/10)] = keptticks

    fig, ax1,  = plt.subplots(figsize=(8, 6), dpi=100)

    data = gaussian_filter(data, sigma=smooth)
    sns.heatmap(data, ax=ax1, linewidth=0, yticklabels=yticks,
                xticklabels=xticks, cmap=color, annot=annotate)

    # This sets the yticks "upright" with 0, as opposed to sideways with 90.
    plt.yticks(rotation=0)
    if save == True:
        plt.savefig(f'matrix{index}.png', dpi=300)
    if show != False:
        plt.show()

### getSong()
Get bigram count for a song

In [None]:
def getSong(filepath, index):
    start_time = time.time()

    global c_dt

    song_dict = {i: 0 for i in c}
    filepath_extended = 'data/midi/' + filepath
    s = converter.parse(filepath_extended)

    pair_count = 0

    recurseIter = s.recurse()

    chopin_notes = []
    
    for i in recurseIter.notes:
        if 'Chord' in str(i):
            chord_pitches = i.pitches
            for i in chord_pitches:
                chopin_notes.append(correctNote(i.name))
        else:
            chopin_notes.append(correctNote(i.name))

    for v, w in pairwise(chopin_notes):
        noteBigram = str(v) + ' ' + str(w)
        song_dict[noteBigram] = song_dict[noteBigram] + 1
        pair_count = pair_count + 1

    c_dt = dict(Counter(c_dt)+Counter(song_dict))

    repetoire_df.loc[repetoire_df.index[index],
                     'Piece Pair Count'] = pair_count

    return c_dt, song_dict, pair_count, start_time

### Function to save a matrix

In [None]:
def saveMatrix(piece_dict, name):
    x = 0
    for key, value in piece_dict.items():
        if value > 0:
            pair_count = sum(piece_dict.values())
            frequency = value/pair_count
            df.loc[x] = [key] + [frequency, value]
            x = x + 1
            notes = key.split(' ')

            note_1 = notes[0]
            note_2 = notes[1]
            df2.loc[note_1, note_2] = value
            file_name = name.split('.'[0])[0]
    dataframe_1 = df2.fillna(0)
    dataframe_1.to_csv(f'data/pieces/{file_name}.csv')

In [None]:
repetoire_df = pd.read_csv('data/midi.csv')
pd.set_option('display.max_rows', 50)

df2 = pd.DataFrame(columns=a, index=a)

In [None]:
def runAnalysis(data, file=None):
    p = 0 
    if data == 'new':
        pair_count_total = 0
        c_dt = {i: 0 for i in c}
        x = 0
        start_time2 = time.time()
        if file == None:
            for filepath in os.listdir('data/midi'):
                x = x + 1
                print('Begun ' + filepath)
                c_dt, song_dict, pair_count, start_time = getSong(filepath, 1)
                time_took = time.time() - start_time
                pair_count_total = pair_count_total + pair_count
                saveMatrix(song_dict, filepath)
                print(f'[{x}] Finished proccessing \"{filepath}\" in {round(time_took, 3)} seconds')
        else:
            filepath = file
            print('Begun ' + filepath)
            c_dt, song_dict, pair_count, start_time = getSong(filepath, 1)
            time_took = time.time() - start_time
            pair_count_total = pair_count_total + pair_count
            saveMatrix(song_dict, filepath)
            print(f'[{x}] Finished proccessing \"{filepath}\" in {round(time_took, 3)} seconds')
            time_took2 = time.time() - start_time2

        print("Total Bigram Count: {:,.2f}".format(pair_count_total))
        print("Total Note Count: {:,.2f}".format(pair_count_total*2))

        print(f'Finished in {round(time_took2, 3)} seconds')

        x = 0
        for key, value in c_dt.items():
            if value > 0:
                frequency = value/pair_count
                df.loc[x] = [key] + [frequency, value]
                x = x + 1
                notes = key.split(' ')

                note_1 = notes[0]
                note_2 = notes[1]
                df2.loc[note_1, note_2] = value

        df3 = df2.fillna(0)
        df4 = df3.div(pair_count_total).round(8)

        # Convert to percentages.
        df5 = df4.multiply(100)

        df4.to_csv('Data/frequency.csv')
    else:
        df4 = pd.read_csv("Data/frequency.csv", index_col=0)
        df5 = df4.multiply(100)
    return df5


## Running the analysis

In [None]:
runAnalysis(data='new')