In [72]:
import os
import sys
from glob import glob

import pandas as pd

sys.path.append(os.path.join('..', '..'))


from src.melody import Melody
from src.utils import get_chord_progressions, replace_enharmonic
from src.utils.constants import PITCH_CLS

In [73]:
metadata = []

chord_progs = get_chord_progressions('../..')

folder_o = '../../data/Complete Examples/v1.2'
files_o = [y for x in os.walk(folder_o) for y in glob(os.path.join(x[0], '*.mid'))]

for file in files_o:
    m = Melody(file)
    m.setup()
    if m.song_name in chord_progs:
        m.set_song_structure(chord_progs[m.song_name])
        
    artist = "".join(m.filename.split(' - ')[0]) if len(m.filename.split(' - ')) > 1 else ""
    print(artist)
    
    
    meta = {
        'filename': m.filename.replace('.mid', '.csv'),
        'source': m.source,
        'song_name': m.song_name,
        'key_tonic': m.chord_progression_key,
        'key_mode': 'minor' if m.chord_progression_minor else 'major',
        'tempo': m.tempo
    }
    
    metadata.append(meta)




















































































































































































































































































































Tenderly ['no tempo detected']


























































































Here's That Rainy Day ['no tempo detected']











































































































































































































































































Art Pepper
Art Pepper
Ben Webster
Ben Webster
Benny Carter
Benny Carter
Benny Carter
Bix Beiderbecke
Bob Berg
Branford Marsalis
Cannonball Adderley
Cannonball Adderley
John Coltrane
John Coltrane
John Coltrane
John Coltrane
John Coltrane
John Coltrane
John Coltrane
John Coltrane
Johnny Dodds
Lee Konitz
Lee Konitz
Lee Konitz
Lee Morgan
Lionel Hampton
Louis Armstrong
Miles Davis
Miles Davis
Miles Davis
Miles Davis
Miles Davis
Miles Davis
Miles Davis
Milt Jackson
Clifford Brown
Clifford Brown
Coleman Hawkins
Curtis Fuller
David Liebman
David Liebman
David Liebman
David Murray
Dickie Wells
Dizzy Gillespie
Dizzy Gillespie
Don Ellis
Fats Navarro
Fats Navarro
Freddie Hubbard
Freddie Hubbard
George Coleman
Herbie Hancock
J J Johnson
J J Johnson
John Coltrane
Milt J

In [100]:
df = pd.DataFrame.from_dict(metadata)
df = df.dropna()
df['tempo'] = (df['tempo'] / 5000).apply(round).astype(int)

In [101]:
filename_mapping = {}
for k,v in enumerate(set(df['filename'].unique())):
    filename_mapping[v] = k
filename_mapping

{'You Are Too Beautiful.csv': 0,
 "I Didn't Know What Time It Was.csv": 1,
 "We'll Be Together Again.csv": 2,
 'Nardis (2).csv': 3,
 'Zoot Sims - Night And Day (1).csv': 4,
 'You Took Advantage Of Me.csv': 5,
 'Donna Lee.csv': 6,
 'A Night In Tunisia.csv': 7,
 'Herbie Hancock - Dolores.csv': 8,
 'Ruby My Dear.csv': 9,
 'Old Devil Moon.csv': 10,
 'Beauty And The Beast.csv': 11,
 'John Coltrane - Impressions (2).csv': 12,
 'Wynton Marsalis - April In Paris.csv': 13,
 'Make Someone Happy.csv': 14,
 'Charlie Parker - Blues For Alice.csv': 15,
 'Zoot Sims - All The Things You Are.csv': 16,
 'Straight, No Chaser (2).csv': 17,
 'In A Mellow Tone.csv': 18,
 'Margie.csv': 19,
 'David Liebman - There Will Never Be Another You.csv': 20,
 'In Walked Bud.csv': 21,
 'New York, New York (2).csv': 22,
 'Dickie Wells - I Got Rhythm.csv': 23,
 'My One And Only Love (2).csv': 24,
 'Pepper Adams - A Night In Tunisia.csv': 25,
 'Boplicity.csv': 26,
 'Come Rain Or Come Shine (1).csv': 27,
 'My Ship.csv': 28

In [102]:
source_mapping = {}
for k,v in enumerate(set(df['source'].unique())):
    source_mapping[v] = k
source_mapping

{'Jazz-Midi': 0,
 'MidKar': 1,
 'Real Book': 2,
 'Doug McKenzie': 3,
 'Oocities': 4,
 'JazzPage': 5,
 'Jazz Standards': 6,
 'Weimar DB': 7}

In [103]:
song_name_mapping = {}
for k,v in enumerate(set(df['song_name'].unique())):
    song_name_mapping[v] = k
song_name_mapping

{'Bye Bye Blackbird': 0,
 'Once In A While': 1,
 'Ruby My Dear': 2,
 "It Don't Mean A Thing": 3,
 'My One And Only Love': 4,
 'A Night In Tunisia': 5,
 'Good Bait': 6,
 'Lady Is A Tramp': 7,
 "They Can't Take That Away From Me": 8,
 "'Round Midnight": 9,
 'Since I Fell For You': 10,
 'Yesterdays': 11,
 "You Don't Know What Love Is": 12,
 'Some Other Time': 13,
 'Margie': 14,
 'Girl Talk': 15,
 'Gentle Rain': 16,
 'Bewitched': 17,
 'I Cover The Waterfront': 18,
 'Equinox': 19,
 'Beautiful Love': 20,
 "I'm Beginning To See The Light": 21,
 'The More I See You': 22,
 'Laura': 23,
 'Epistrophy': 24,
 'But Not For Me': 25,
 'Eighty-One': 26,
 'Mean To Me': 27,
 'Accentuate The Positive': 28,
 'The Way You Look Tonight': 29,
 'Hackensack': 30,
 'Polka Dots And Moonbeams': 31,
 'Pick Yourself Up': 32,
 'Reflections': 33,
 'Wave': 34,
 'Stella By Starlight': 35,
 'Peace': 36,
 'Beautiful Friendship': 37,
 'Ju-Ju': 38,
 'Bemsha Swing': 39,
 'All Of Me': 40,
 'Mood Indigo': 41,
 'Sweet Georgia B

In [104]:
tempo_mapping = {}
for k,v in enumerate(set(df['tempo'].sort_values().unique())):
    tempo_mapping[v] = k
tempo_mapping

{545: 0,
 39: 1,
 40: 2,
 41: 3,
 42: 4,
 43: 5,
 44: 6,
 45: 7,
 46: 8,
 47: 9,
 48: 10,
 49: 11,
 50: 12,
 51: 13,
 52: 14,
 53: 15,
 54: 16,
 55: 17,
 56: 18,
 57: 19,
 58: 20,
 59: 21,
 60: 22,
 61: 23,
 62: 24,
 63: 25,
 64: 26,
 65: 27,
 66: 28,
 67: 29,
 68: 30,
 69: 31,
 70: 32,
 71: 33,
 72: 34,
 73: 35,
 74: 36,
 75: 37,
 76: 38,
 77: 39,
 78: 40,
 79: 41,
 80: 42,
 81: 43,
 82: 44,
 83: 45,
 84: 46,
 85: 47,
 86: 48,
 87: 49,
 88: 50,
 89: 51,
 90: 52,
 91: 53,
 92: 54,
 94: 55,
 95: 56,
 96: 57,
 97: 58,
 98: 59,
 99: 60,
 100: 61,
 102: 62,
 103: 63,
 104: 64,
 105: 65,
 106: 66,
 107: 67,
 108: 68,
 109: 69,
 110: 70,
 111: 71,
 112: 72,
 113: 73,
 114: 74,
 115: 75,
 117: 76,
 118: 77,
 120: 78,
 121: 79,
 122: 80,
 125: 81,
 126: 82,
 128: 83,
 129: 84,
 130: 85,
 132: 86,
 133: 87,
 135: 88,
 136: 89,
 138: 90,
 140: 91,
 141: 92,
 143: 93,
 146: 94,
 150: 95,
 154: 96,
 158: 97,
 160: 98,
 162: 99,
 164: 100,
 167: 101,
 171: 102,
 176: 103,
 179: 104,
 182: 105,
 185

In [105]:
def get_key(row):
    tonic = row['key_tonic']
    mode = row['key_mode']
    
    idx = PITCH_CLS.index(replace_enharmonic(tonic))
    
    if mode == 'minor':
        idx = (idx + 3) % 12
        
    return idx

In [106]:
df['key'] = df.apply(get_key, axis=1)

In [107]:
df['filename_idx'] = df['filename'].apply(lambda x: filename_mapping[x])
df['source_idx'] = df['source'].apply(lambda x: source_mapping[x])
df['song_name_idx'] = df['song_name'].apply(lambda x: song_name_mapping[x])
df['key_idx'] = df['key']
df['tempo_idx'] = df['tempo'].apply(lambda x: tempo_mapping[x])

In [108]:
df[['filename_idx', 'source_idx', 'song_name_idx', 'tempo_idx', 'key_idx']]

Unnamed: 0,filename_idx,source_idx,song_name_idx,tempo_idx,key_idx
0,310,3,187,48,10
1,53,3,78,45,0
2,39,3,123,45,8
3,195,3,123,25,8
4,312,3,123,25,8
...,...,...,...,...,...
759,255,7,63,111,3
760,13,7,129,40,0
761,16,7,123,24,8
762,4,7,69,18,0


In [109]:
df

Unnamed: 0,filename,source,song_name,key_tonic,key_mode,tempo,key,filename_idx,source_idx,song_name_idx,key_idx,tempo_idx
0,After You've Gone.csv,Doug McKenzie,After You've Gone,Bb,major,86,10,310,3,187,10,48
1,Alice In Wonderland.csv,Doug McKenzie,Alice In Wonderland,C,major,83,0,53,3,78,0,45
2,All The Things You Are (1).csv,Doug McKenzie,All The Things You Are,Ab,major,83,8,39,3,123,8,45
3,All The Things You Are (2).csv,Doug McKenzie,All The Things You Are,Ab,major,63,8,195,3,123,8,25
4,All The Things You Are (3).csv,Doug McKenzie,All The Things You Are,Ab,major,63,8,312,3,123,8,25
...,...,...,...,...,...,...,...,...,...,...,...,...
759,Woody Shaw - Imagination.csv,Weimar DB,Imagination,Eb,major,200,3,255,7,63,3,111
760,Wynton Marsalis - April In Paris.csv,Weimar DB,April In Paris,C,major,78,0,13,7,129,0,40
761,Zoot Sims - All The Things You Are.csv,Weimar DB,All The Things You Are,Ab,major,62,8,16,7,123,8,24
762,Zoot Sims - Night And Day (1).csv,Weimar DB,Night And Day,C,major,56,0,4,7,69,0,18


In [110]:
df.to_csv('../../data/finalised/metadata.csv')

In [112]:
df['tempo_idx'].max()

119