In [7]:
from google.colab import drive
drive.mount('/content/drive')

# Base folder path
data_folder = '/content/drive/My Drive/Colab Notebooks/MARG/Deep Realbook/data/'
models_folder = '/content/drive/My Drive/Colab Notebooks/MARG/Deep Realbook/models/'
utils_folder = '/content/drive/My Drive/Colab Notebooks/MARG/Deep Realbook/utils/'

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [8]:
%%writefile '/content/drive/My Drive/Colab Notebooks/MARG/Deep Realbook/utils/config.py'
note_to_index = {
    'C': 0, 'B#': 0, 'C#': 1, 'Db': 1, 'D': 2, 'D#': 3, 'Eb': 3,
    'E': 4, 'Fb': 4, 'E#': 5, 'F': 5, 'F#': 6, 'Gb': 6, 'G': 7,
    'G#': 8, 'Ab': 8, 'A': 9, 'A#': 10, 'Bb': 10, 'B': 11, 'Cb': 11
}

index_to_note = {0: 'C', 1: 'Db', 2: 'D', 3: 'Eb', 4: 'E', 5: 'F', 6: 'Gb', 7: 'G', 8: 'Ab', 9: 'A', 10: 'Bb', 11: 'B'}

"""Adds an interval to the vector based on the root note."""
interval_semitones = {
    'b2': 1, '2': 2, '#2': 3, 'b3': 3, '3': 4,
    '4': 5, '#4': 6, 'b5': 6, '5': 7, '#5': 8, 'b6': 8,
    '6': 9, 'bb7': 9, 'b7': 10, '7': 11,
    'b9': 13 - 12, '9': 14 - 12, '#9': 15 - 12, '11': 17 - 12, '#11': 18 - 12, 'b13': 20 - 12, '13': 21 - 12,
    'b5/#5': 8, 'b9/#9': 1
}


# Add intervals based on the tension
tension_intervals = {
    '' : ['3', '5'],
    '9b5': ['3', 'b5', 'b7', '9'],
    '7#11': ['3', '5', 'b7', '#11'],
    '-b6': ['b3', '5', 'b6'],
    '-7': ['b3', '5', 'b7'],
    '7b9#9': ['3', '5', 'b7', 'b9', '#9'],
    '13sus': ['4', '5', 'b7', '9', '11', '13'],
    '7b9b5': ['3', 'b5', 'b7', 'b9'],
    '13#9': ['3', '5', 'b7', '#9', '13'],
    '69': ['3', '5', '6', '9'],
    '^9#11': ['3', '5', '7', '9', '#11'],
    '7b9sus': ['4', '5', 'b7', 'b9'],
    '7b9': ['3', '5', 'b7', 'b9'],
    '7#9b5': ['3', 'b5', 'b7', '#9'],
    '13': ['3', '5', 'b7', '9', '11', '13'],
    '7#9#5': ['3', '#5', 'b7', '#9'],
    '-7b5': ['b3', 'b5', 'b7'],
    '^7#5': ['3', '#5', '7'],
    'h': ['b3', 'b5', 'b7'],
    '7#9#11': ['3', '5', 'b7', '#9', '#11'],
    '7b9#11': ['3', '5', 'b7', 'b9', '#11'],
    '^7': ['3', '5', '7'],
    '7sus': ['4', '5', 'b7'],
    'o7': ['b3', 'b5', 'bb7'],
    '-9': ['b3', '5', 'b7', '9'],
    '7b5': ['3', 'b5', 'b7'],
    '13b9': ['3', '5', 'b7', 'b9', '13'],
    'add9': ['3', '5', '9'],
    '13#11': ['3', '5', 'b7', '9', '#11', '13'],
    '7#9': ['3', '5', 'b7', '#9'],
    '^9': ['3', '5', '7', '9'],
    'sus': ['4', '5'],
    '-6': ['b3', '5', '6'],
    '7b13sus': ['4', '5', 'b7', 'b13'],
    '-': ['b3', '5'],
    '7b9#5': ['3', '#5', 'b7', 'b9'],
    '7b9b13': ['3', '5', 'b7', 'b9', 'b13'],
    'h7': ['b3', 'b5', 'b7'],
    'o^7': ['b3', 'b5', '7'],
    '7#5': ['3', '#5', 'b7'],
    '7alt': ['3', 'b5/#5', 'b7', 'b9/#9'],
    '+': ['3', '#5'],
    '9#11': ['3', '5', 'b7', '9', '#11'],
    '^13': ['3', '5', '7', '9', '11', '13'],
    '9sus': ['4', '5', 'b7', '9'],
    '9': ['3', '5', 'b7', '9'],
    '-11': ['b3', '5', 'b7', '9', '11'],
    '7susadd3': ['3', '4', '5', 'b7'],
    '^': ['3', '5', '7'],
    '2': ['2', '5'],
    '9#5': ['3', '#5', 'b7', '9'],
    '-#5': ['b3', '#5'],
    '6': ['3', '5', '6'],
    '-69': ['b3', '5', '6', '9'],
    '-^7': ['b3', '5', '7'],
    '7': ['3', '5', 'b7'],
    '^7#11': ['3', '5', '7', '#11'],
    '7b13': ['3', '5', 'b7', 'b13'],
    'o': ['b3', 'b5'],
    'h9': ['b3', 'b5', 'b7', '9'],
}

tension_intervals_reduced = {
    '' : ['3', '5'],
    #'9b5': ['3', 'b5', 'b7', '9'],
    '7#11': ['3', '5', 'b7', '#11'],
    #'-b6': ['b3', '5', 'b6'],
    '-7': ['b3', '5', 'b7'],
    #'7b9#9': ['3', '5', 'b7', 'b9', '#9'],
    #'13sus': ['4', '5', 'b7', '9', '11', '13'],
    #'7b9b5': ['3', 'b5', 'b7', 'b9'],
    '13#9': ['3', '5', 'b7', '#9', '13'],
    '69': ['3', '5', '6', '9'],
    '^9#11': ['3', '5', '7', '9', '#11'],
    '7b9sus': ['4', '5', 'b7', 'b9'],
    '7b9': ['3', '5', 'b7', 'b9'],
    '7#9b5': ['3', 'b5', 'b7', '#9'],
    '13': ['3', '5', 'b7', '9', '11', '13'],
    '7#9#5': ['3', '#5', 'b7', '#9'],
    #'-7b5': ['b3', 'b5', 'b7'],
    '^7#5': ['3', '#5', '7'],
    #'h': ['b3', 'b5', 'b7'],
    '7#9#11': ['3', '5', 'b7', '#9', '#11'],
    '7b9#11': ['3', '5', 'b7', 'b9', '#11'],
    '^7': ['3', '5', '7'],
    '7sus': ['4', '5', 'b7'],
    'o7': ['b3', 'b5', 'bb7'],
    '-9': ['b3', '5', 'b7', '9'],
    '7b5': ['3', 'b5', 'b7'],
    '13b9': ['3', '5', 'b7', 'b9', '13'],
    'add9': ['3', '5', '9'],
    '13#11': ['3', '5', 'b7', '9', '#11', '13'],
    '7#9': ['3', '5', 'b7', '#9'],
    '^9': ['3', '5', '7', '9'],
    'sus': ['4', '5'],
    #'-6': ['b3', '5', '6'],
    #'7b13sus': ['4', '5', 'b7', 'b13'],
    '-': ['b3', '5'],
    '7b9#5': ['3', '#5', 'b7', 'b9'],
    '7b9b13': ['3', '5', 'b7', 'b9', 'b13'],
    'h7': ['b3', 'b5', 'b7'],
    'o^7': ['b3', 'b5', '7'],
    '7#5': ['3', '#5', 'b7'],
    #'7alt': ['3', 'b5/#5', 'b7', 'b9/#9'],
    '+': ['3', '#5'],
    '9#11': ['3', '5', 'b7', '9', '#11'],
    #'^13': ['3', '5', '7', '9', '11', '13'],
    #'9sus': ['4', '5', 'b7', '9'],
    '9': ['3', '5', 'b7', '9'],
    '-11': ['b3', '5', 'b7', '9', '11'],
    '7susadd3': ['3', '4', '5', 'b7'],
    #'^': ['3', '5', '7'],
    #'2': ['2', '5'],
    '9#5': ['3', '#5', 'b7', '9'],
    #'-#5': ['b3', '#5'],
    #'6': ['3', '5', '6'],
    '-69': ['b3', '5', '6', '9'],
    '-^7': ['b3', '5', '7'],
    '7': ['3', '5', 'b7'],
    '^7#11': ['3', '5', '7', '#11'],
    '7b13': ['3', '5', 'b7', 'b13'],
    'o': ['b3', 'b5'],
    #'h9': ['b3', 'b5', 'b7', '9'],

    '-/3': ['b3', '5', '3'],       # '-/' chord with bass note a major 3rd above the root
    '/4': ['3', '5', '4'],         # '/' chord with bass note a perfect 4th above the root
    '7b9/4': ['3', '5', 'b7', 'b9', '4'],  # '7b9' with bass note a perfect 4th above the root
    'o7/b7': ['b3', 'b5', 'bb7', 'b7'],    # 'o7' with bass note a minor 7th above the root
    'o7/b2': ['b3', 'b5', 'bb7', 'b2'],    # 'o7' with bass note a minor 2nd above the root
    'h7/b2': ['b3', 'b5', 'b7', 'b2'],     # 'h7' with bass note a minor 2nd above the root
    '^7/b6': ['3', '5', '7', 'b6'],        # '^7' with bass note a minor 6th above the root
    'o/3': ['b3', 'b5', '3'],              # 'o' with bass note a major 3rd above the root
    '^7#5/5': ['3', '#5', '7', '5'],       # '^7#5' with bass note a perfect 5th above the root
    '-7/b2': ['b3', '5', 'b7', 'b2'],      # '-7' with bass note a minor 2nd above the root
    'o7/5': ['b3', 'b5', 'bb7', '5'],      # 'o7' with bass note a perfect 5th above the root
    '-6/b7': ['b3', '5', '6', 'b7'],       # '-6' with bass note a minor 7th above the root
    '/b3': ['3', '5', 'b3'],               # '/' with bass note a minor 3rd above the root
    'o7/3': ['b3', 'b5', 'bb7', '3'],      # 'o7' with bass note a major 3rd above the root
    '13b9/4': ['3', '5', 'b7', 'b9', '13', '4'],  # '13b9' with bass note a perfect 4th above the root
    '-/2': ['b3', '5', '2'],               # '-/' with bass note a major 2nd above the root
    '/b5': ['3', '5', 'b5'],               # '/' with bass note a diminished 5th above the root
    '-/4': ['b3', '5', '4'],               # '-/' with bass note a perfect 4th above the root
    '-^7/2': ['b3', '5', '7', '2'],        # '-^7' with bass note a major 2nd above the root
    '7#9/2': ['3', '5', 'b7', '#9', '2'],  # '7#9' with bass note a major 2nd above the root
}

list_fr = [
    "Blues For Alice", "Stella By Starlight", "Satin Doll", "Daahoud",
    "There Will Never Be Another You", "Don't Get Around Much Anymore",
    "On Green Dolphin Street", "Indiana (Back Home Again In)", "Donna Lee",
    "Honeysuckle Rose", "Scrapple From The Apple", "Autumn Leaves",
    "Girl From Ipanema, The", "Wave", "Misty", "My Funny Valentine",
    "Someday My Prince Will Come", "I Got Rhythm", "Anthropology", "All Of Me",
    "Bye Bye Blackbird", "Epistrophy", "Impressions", "So What", "Nardis",
    "My Romance", "Sweet Georgia Brown", "Dig",
    "What Is This Thing Called Love", "Hot House", "Night And Day",
    "Maiden Voyage", "A Night In Tunisia", "All Blues",
    "Have You Met Miss Jones?", "Woody'n You"
]

list_so = [
    "Secret Love", "Confirmation", "Like Someone In Love", "I Hear A Rhapsody",
    "I Love You", "Our Love is Here to Stay", "Prelude To A Kiss", "Star Dust",
    "Here's That Rainy Day", "Days Of Wine And Roses",
    "Embraceable You", "Body And Soul", "Cherokee",
    "What's New", "Afternoon In Paris", "Alone Together", "Yesterdays",
    "Fee-Fi-Fo-Fum", "All The Things You Are", "Milestones (Old)", "Bluesette",
    "Corcovado", "Don't Blame Me", "In Your Own Sweet Way", "Four",
    "Lady Bird", "Joy Spring", "Minority"
]
# Freedom Jazz Dance, A Day In The Life Of A Fool

list_jr = [
    "All Of Me", "Beautiful Love", "Everything Happens To Me", "Song Is You, The",
    "Dearly Beloved", "How High The Moon", "Ornithology", "Meditation",
    "I Can't Get Started", "I Got It Bad",
    "End Of A Love Affair, The", "I Remember You", "One Finger Snap",
    "In a Sentimental Mood", "Invitation", "Seven Steps To Heaven",
    "Dolphin Dance", "My One And Only Love", "I'll Take Romance",
    "Up Jumped Spring", "Out Of Nowhere", "Round Midnight",
    "Way You Look Tonight, The", "Sophisticated Lady", "Giant Steps",
    "You Stepped Out Of A Dream", "Tenderly", "When I Fall In Love",
    "Just One Of Those Things", "Over The Rainbow (Somewhere)", "Speak No Evil"
]

list_mm1 = [
    "It Could Happen To You", "Con Alma", "Nica's Dream", "Spring Is Here",
    "It Might As Well Be Spring", "Spring Can Really Hang You Up The Most",
    "Pensativa", "Upper Manhattan Medical Group", "You Don't Know What Love Is",
    "Round Midnight", "Falling Grace", "These Foolish Things", "Once I Loved",
    "Speak Low", "Pent Up House", "Polkadots And Moonbeams", "My Shining Hour",
    "I'm Old Fashioned", "Soul Eyes", "I Thought About You",
    "Everything Happens To Me", "Moment's Notice", "El Gaucho", "Airegin",
    "All God's Chillun Got Rhythm", "Little Willie Leaps", "Angel Eyes",
    "Lament", "But Not For Me", "But Beautiful", "Caravan"
]

# Consolidate all songs into a validation set
validation_set = set(list_fr + list_so + list_jr + list_mm1)

Overwriting /content/drive/My Drive/Colab Notebooks/MARG/Deep Realbook/utils/config.py


In [12]:
%%writefile '/content/drive/My Drive/Colab Notebooks/MARG/Deep Realbook/utils/utils.py'

import sys
sys.path.append('/content/drive/My Drive/Colab Notebooks/MARG/Deep Realbook/utils/')

import importlib
import config

importlib.reload(config)  # Reload the module after making changes

from config import note_to_index, index_to_note, interval_semitones, tension_intervals, tension_intervals_reduced

import json

def extract_chords_from_json(json_file):
    with open(json_file, 'r', encoding='utf-8') as file:  # Add 'encoding=utf-8'
        data = json.load(file)
        songs_data = data.get('songs', [])
        chord_sequences = {}
        for song in songs_data:
            title = song.get('title', 'Unknown')
            measures = song.get('music', {}).get('measures', [])
            chords = []
            for measure in measures:
                for chord in measure:
                    # Replace None with "NC" (No Chord)
                    chord = chord if chord is not None else "NC"
                    chords.append(chord)
                chords.append('|')  # Add bar separator after each measure
            # Remove the last bar separator
            chord_sequence = chords[:-1]
            chord_sequences[title] = chord_sequence
        return chord_sequences


def note_to_vector(note):
    """Converts a note to its 12-dimensional vector representation."""
    vector = [0] * 12
    vector[note_to_index[note]] = 1
    return vector

def add_interval_to_vector(vector, root, interval):
    if interval in interval_semitones:
        note_index = (note_to_index[root] + interval_semitones[interval]) % 12
        vector[note_index] = 1
    return vector

def get_interval_between_notes(root_note, bass_note):
    """Calculate the interval between root note and bass note."""
    root_index = note_to_index[root_note]
    bass_index = note_to_index[bass_note]
    interval = (bass_index - root_index) % 12
    # Find the interval name
    for name, semitone in interval_semitones.items():
        if semitone == interval:
            return name
    return None

def chord_to_vector(chord):
    """Converts a chord string to its 12-dimensional vector representation."""

    # Handle special cases
    if chord == '|':
        return ['|']  # Unique representation for bar token
    elif chord == 'NC':
        return [0] * 12  # Return a vector of zeros for no chord

    parts = chord.split('/')
    main_part = parts[0]

    # Extract root and tension
    root = main_part[0]
    if len(main_part) > 1 and main_part[1] in ['#', 'b']:
        root += main_part[1]
        tension = main_part[2:]
    else:
        tension = main_part[1:]

    # Start with the root note
    vector = note_to_vector(root)

    intervals = tension_intervals.get(tension, [])

    for interval in intervals:
        vector = add_interval_to_vector(vector, root, interval)

    # Add bass note if it's a fraction chord
    if len(parts) > 1:
        bass = parts[1]
        if bass in note_to_index:  # If bass is a note
            interval = get_interval_between_notes(root, bass)
            if interval:
                vector = add_interval_to_vector(vector, root, interval)

    return vector

def create_vector_representation(chord_sequences):
    vector_dict = {}
    for key, chords in chord_sequences.items():
        vector_dict[key] = [chord_to_vector(chord) for chord in chords if chord is not None]
    return vector_dict


def get_lexicographically_smallest_rotation(queue):
    """Get the lexicographically smallest rotation of the queue."""
    n = len(queue)
    rotations = [queue[i:] + queue[:i] for i in range(n)]
    return min(rotations)

# Function to map vector to categories 'A', 'B', 'C', 'D'
def map_vectors_to_categories(vector1, vector2):
    return ['A' if v1 == 0 and v2 == 0 else
            'B' if v1 == 1 and v2 == 0 else
            'C' if v1 == 0 and v2 == 1 else
            'D' for v1, v2 in zip(vector1, vector2)]

def replace_NC_tokens(chords):
    for i in range(len(chords)):
        if chords[i] == [0,0,0,0,0,0,0,0,0,0,0,0]:  # NC token found
            if i > 0:
                # Replace with previous chord
                chords[i] = chords[i-1]
            else:
                # Look for the next non-NC chord and replace with it
                j = i + 1
                while j < len(chords) and chords[j] == [0,0,0,0,0,0,0,0,0,0,0,0]:
                    j += 1
                if j < len(chords):
                    chords[i] = chords[j]

    return chords

def create_circular_representations(songs):
    new_sequences = {}

    for song_name, chords in songs.items():
        chords = replace_NC_tokens(chords)  # Replace NC tokens
        token_sequence = []
        i = 0
        while i < len(chords) - 1:
            # Skip bar tokens
            if chords[i] == ['|']:
                i += 1
                continue

            # Find the next chord index that is not a bar token
            j = i + 1
            while j < len(chords) and chords[j] == ['|']:
                j += 1

            # Skip if no valid next chord is found
            if j >= len(chords):
                break

            # Map vectors to categories and find smallest rotation
            categories = map_vectors_to_categories(chords[i], chords[j])
            smallest_rotation = get_lexicographically_smallest_rotation(categories)
            rotation_str = ''.join(smallest_rotation)

            # Check for bar token between chords
            if j - i == 2:
                rotation_str += 'F'  # Bar token present
            else:
                rotation_str += 'E'  # No bar token

            token_sequence.append(rotation_str)
            i = j  # Move to the next chord

        new_sequences[song_name] = token_sequence

    return new_sequences


def decode_chord_representation(input_str, mapped_results):
    # Convert string back to list of categories
    categories = list(input_str)

    # Find chord pairs that match the categories in mapped_results
    for first_chord, next_chord, cat in mapped_results:
        if cat == categories:
            return first_chord, next_chord

    return None, None

def calculate_interval(note1, note2):
    index1 = note_to_index[note1]
    index2 = note_to_index[note2]
    return (index2 - index1) % 12

def transpose_chord(chord, interval):
    root = get_root(chord)
    if root is None:
        return None
    transposed_root_index = (note_to_index[root] + interval) % 12
    transposed_root = index_to_note[transposed_root_index]
    return chord.replace(root, transposed_root, 1)

def get_root(chord):
    for note in note_to_index.keys():
        if chord.startswith(note):
            return note
    return None

def process_and_transpose_sequences(generated_sequences, mapped_results):
    all_sequences_transposed_chords = []
    all_final_sequences = []

    for generated_text in generated_sequences:
        print(f"Generated Sequence: {generated_text}")

        decoded_chords = []
        bar_tokens = []
        for category_str in generated_text.split():
            # Detach the last character and decode the chord pair
            token, bar_token_indicator = category_str[:-1], category_str[-1]
            first_chord, next_chord = decode_chord_representation(token, mapped_results)

            # Add bar token information
            bar_tokens.append(bar_token_indicator)

            if first_chord and next_chord:
                decoded_chords.append((first_chord, next_chord))

        if not decoded_chords:
            print("No valid chords found in this sequence")
            continue

        transposed_chords = []
        final_sequence = []
        prev_second_chord_root = None
        for i, (first_chord, second_chord) in enumerate(decoded_chords):
            if i != 0:
                interval = calculate_interval(get_root(first_chord), prev_second_chord_root)
                first_chord = transpose_chord(first_chord, interval)
                second_chord = transpose_chord(second_chord, interval)
            transposed_chords.append((first_chord, second_chord))

            # Append to final sequence with bar tokens
            final_sequence.append(first_chord)
            if bar_tokens[i] == 'F':
                final_sequence.append('|')  # Add bar token

            prev_second_chord_root = get_root(second_chord)

        all_sequences_transposed_chords.append(transposed_chords)
        all_final_sequences.append(final_sequence)

    return all_sequences_transposed_chords, all_final_sequences

Overwriting /content/drive/My Drive/Colab Notebooks/MARG/Deep Realbook/utils/utils.py
