# 0. Humdrum to wav converter

### libraries and constants

In [41]:
from pathlib import Path
import re
import numpy as np
from IPython.display import Audio
import matplotlib.pyplot as plt
import soundfile as sf

is_print = False
is_sonify = False

bpm = 80
fs = 22050

save_dir = "data/raw_phrases/"

%matplotlib inline

In [43]:
# load txt file
txt_dir = 'Essen_txt_files/'
country = 'deutschl'
txt = Path(txt_dir + country + '_all' + '.txt').read_text(encoding= 'unicode_escape')

### functions

In [42]:
# function for removing multiple items from list based on indices
def delete_multiple_element(list_object, indices):
    indices = sorted(indices, reverse=True)
    for idx in indices:
        if idx < len(list_object):
            list_object.pop(idx)
            
            
# function to create frequency dictionary from note letter and octave
def get_piano_notes_sharp():
    octave = ['c', 'c#', 'd', 'd#', 'e', 'f', 'f#', 'g', 'g#', 'a', 'a#', 'b']
    base_freq = 440 #Frequency of Note A4
    keys = np.array([x+str(y) for y in range(0,9) for x in octave])
    # Trim to standard 88 keys
    start = np.where(keys == 'a0')[0][0]
    end = np.where(keys == 'c8')[0][0]
    keys = keys[start:end+1]
    note_freqs = dict(zip(keys, [2**((n+1-49)/12)*base_freq for n in range(len(keys))]))
    note_freqs['r'] = 0.0 # stop
    return note_freqs

def get_piano_notes_flat():
    octave = ['c', 'd-', 'd', 'e-', 'e', 'f', 'g-', 'g', 'a-', 'a', 'b-', 'b']
    base_freq = 440 #Frequency of Note A4
    keys = np.array([x+str(y) for y in range(0,9) for x in octave])
    # Trim to standard 88 keys
    start = np.where(keys == 'a0')[0][0]
    end = np.where(keys == 'c8')[0][0]
    keys = keys[start:end+1]
    note_freqs = dict(zip(keys, [2**((n+1-49)/12)*base_freq for n in range(len(keys))]))
    note_freqs['r'] = 0.0 # stop
    return note_freqs

# function to generate sinewaves from frequency
def get_sine_wave(frequency, duration, sample_rate=44100, amplitude=4096):
    t = np.linspace(0, duration, int(sample_rate*duration)) # Time axis
    wave = amplitude*np.sin(2*np.pi*frequency*t)
    return wave

# function to split notes pitch and durations
def split_note_duration(str):
    note = ""
    duration = ""
    for i in range(len(str)):
        if (str[i].isdigit()):
            duration = duration + str[i]
        if str[i] == '.':
            duration = duration + str[i]
        if((str[i] >= 'A' and str[i] <= 'Z') or
             (str[i] >= 'a' and str[i] <= 'z')):
            note += str[i]
        if str[i] == '#':
            note += str[i]
        if str[i] == '-':
            note += str[i]
 
    return note, duration

# function to re-write each note as letter+octave
def get_letter_octave(note_original):

    # rests
    if note_original == 'r':
        note = note_original
    
    # middle and higher octaves
    elif note_original.islower():
        if note_original[0] == note_original[-1]:
            octave = len(note_original) + 3
            note = note_original[-1] + str(octave)
        elif note_original[0] != note_original[-1]: # check for accidentals
            if note_original[-1] == "n": # remove naturals
                note_original = note_original[:-1]
                octave = len(note_original) + 3 
                note = note_original + str(octave)
            else:
                octave = len(note_original) + 3 - 1
                note = note_original[-2:] + str(octave)
                
    # lower octaves
    elif note_original.isupper():
        if note_original[0] == note_original[-1]:
            octave = 4 - len(note_original) 
            note = note_original[-1].lower() + str(octave)
        elif note_original[0] != note_original[-1]: # check for accidentals
            note = note_original
            if note_original[-1] == "n": # remove naturals
                note_original = note_original[:-1]
                note = note_original 
                octave = len(note_original) + 3 
                note = note_original + str(octave)
            else:
                octave = 5 - len(note_original) 
                note = note_original[-2:].lower() + str(octave)
                
    if len(note) > 2 and note[0] == note[-2]:
        note = note[0] + note[-1]
        
    return note


# function to calculate each note's length
def note_length(duration, bpm):
    if duration[-1] != '.':
        duration = 1/int(duration)*4
    # dotted notes
    else:
        duration = 1/int(duration[:-1])*4
        duration += duration/2
    note_length = 60 / bpm * duration
    return(note_length)

# function to create sinewaves
def sinewave(fs,duration,f0, phase=0):
    
    # Initialize time vector
    time_vector = np.arange(0,duration,1/fs)

    # Synthesis loop
    signal = np.sin(2 * np.pi * f0 * time_vector + phase)
    
    return time_vector, signal

In [44]:
# define indices
phrase_start = '{'
phrase_ends = '}'

start_indices = [i.start() for i in re.finditer(phrase_start, txt)]
end_indices = [i.start() for i in re.finditer(phrase_ends, txt)]
start_indices = [i+1 for i in start_indices]

start_indices = start_indices[:-2]
end_indices = end_indices[:-2]

In [45]:
# save each phrase and print phrases
num_phrases = min(len(start_indices),len(end_indices))
phrases = {} # create phrases dictionary 
seperator = '\n' # separation between notes
for i in range(num_phrases):
    phrases["phrase {0}".format(i+1)] = txt[start_indices[i]:end_indices[i]]
if is_print:
    print("original phrase:\n", phrases)
    print('')

# get rid of fake phrases ('OTL, SCT')
for key in list(phrases.keys()):  ## creates a list of all keys
    if phrases[key] == 'OTL' or phrases[key] == 'SCT':
        del phrases[key]

# get rid of measure numbers
for k, v in phrases.items():
    measure_indices = [i.start() for i in re.finditer('=', v)]
    measure_indices_temp = [i+1 for i in measure_indices]
    measure_indices = measure_indices + measure_indices_temp
    v_list = list(v)
    delete_multiple_element(v_list, measure_indices)
    phrases[k] = ''.join(v_list)
if is_print:
    print("phrases without measure numbers:\n", phrases)
    
print("Number of phrases:",len(phrases))

Number of phrases: 31756


### define constants for note generation

In [46]:
# pitch values
note_freqs_sharp = get_piano_notes_sharp()
note_freqs_flat = get_piano_notes_flat()

### store time and note values separately

In [None]:
counter = 1
for phrase in phrases.values():
    try:

        phrase_dict = {}
        # separate notes into individual items
        phrase_split = phrase.split("\n")
        phrase_split = list(filter(None, phrase_split))

        if is_print:
            print(phrase_split)

        notes = []
        durations = []
        frequencies = []
        times = []

        for i in phrase_split:
            # get rid of weird symbols

            if len(i) == 1 or i[0] == '*': 
                del(i)

            else:
                a,b = split_note_duration(i)
                notes.append(a)
                durations.append(b)

        if is_print:
            print("notes:", notes)
            print("durations:", durations)

        # re-write each note in letter+octave format
        for i in range(len(notes)):
            notes[i] = get_letter_octave(notes[i])
        if is_print:
            print("notes:", notes)

        # re-write each note's duration in ms
        for i in range(len(durations)):
            durations[i] = note_length(durations[i], bpm)
        durations.append(0)
        if is_print:
            print("durations:", durations)

        # re-write notes in frequencies
        for i in range(len(notes)):
            if "-" in notes[i]:
                frequencies.append(note_freqs_flat[notes[i]])
            elif "#" in notes[i]:
                frequencies.append(note_freqs_sharp[notes[i]])
            else:
                frequencies.append(note_freqs_sharp[notes[i]])
        if is_print:
            print("frequencies: ", ["%.2f" % i for i in frequencies])

        # re-write durations as times
        times.append(0)
        for i in range(len(durations)):
            times.append(times[i]+durations[i])
        times = times[:-1]
        if is_print:
            print("times: ", ["%.2f" % i for i in times])

        # sonify and save as wav file
        phrase_sonify = np.array([])
        for i in range(len(frequencies)):
            [_, n] = sinewave(fs,durations[i],frequencies[i])
            phrase_sonify = np.append(phrase_sonify,n)
        if is_sonify:
            display(Audio(phrase_sonify, rate = fs))
        sf.write(save_dir + country + "_{0}".format(counter)+".wav", phrase_sonify, fs)
        counter+=1
        
        print(counter, "out of", len(phrases), "phrases,", "{:.2f}".format(counter/len(phrases)*100), "%", end='\r')
    
    except:
        pass

6366 out of 31756 phrases, 20.05 %phrases, 15.15 %

# 