In [1]:
import pretty_midi as pm
from mido import MidiFile
import music21
import os
from glob import glob
import pandas as pd
import re
import copy

In [2]:
folder = "../data/Complete Examples/Oocities"

files = [y for x in os.walk(folder) for y in glob(os.path.join(x[0], '*.mid'))]

In [3]:
files[9]

'../data/Complete Examples/Oocities/Come Rain Or Come Shine.mid'

In [4]:
melody_names = ['melody', 'melodia', 'melodía', 'lead']
non_melody_names = ['bass', 'bajo', 'basso', 'baixo', 'drum', 'percussion', 'batería', 'bateria', 'chord', 'rhythm',
                     'cymbal', 'clap', 'kick', 'snare', 'hh ', 'hats', 'ride', 'kit']

In [5]:
def get_channels(m):
    channels = set({})
    for t in m.tracks:
        for m in t:
            if m.type == 'note_on':
                channels.add(m.channel)
                    
    return channels

In [6]:
def get_programs_mido(m, channels):
    programs = []
    
    for c in channels:
        programs.append(tuple([0, c]))
    
    for t in m.tracks:
        for m in t:
            if m.type == 'program_change':
                programs.append(tuple([m.program, m.channel]))
                
    return programs

In [7]:
def get_programs_pm(m):
    programs = []
    
    for i in m.instruments:
        programs.append(tuple([i.program, i.is_drum]))
        
    return programs

In [8]:
def filter_instruments(pm_p):
    filtered_instr = [
        i for i in pm_p.instruments 
        if i.is_drum == False
        and all(
            sub not in i.name.lower() for sub in non_melody_names
        )]
    
    return filtered_instr

In [9]:
def get_melody_tracks(pm_p):
    melody_tracks = [i for i in pm_p.instruments if 'solo' in i.name.lower()]
    
    if len(melody_tracks) == 0:
        melody_tracks = [
            i for i in pm_p.instruments
            if any(
                sub in i.name.lower() for sub in melody_names
            )]
    
    return melody_tracks

In [10]:
def extract(file):
    status = 'unprocessed'
    error = None
    
    source = os.path.basename(os.path.dirname(file))
    
    single_melody = False
    multiple_melody = False
    solo_channel = False
    multi_channel = False
    disagreement = False
    single_candidate = False
    
    melody_tracks = []
    filtered_instr = []
    melody_track = None
    
    song_name = os.path.basename(file).replace('.* - ', '').replace('.mid', '')
    song_name = re.sub(r'\([0-9]*\)', '', song_name)

    melody_track_name = None
    
    channels = None
    candidate_names = []
    n_channels = None
    n_instr = None
    mido_p = None
    pm_m = None
    pm_p = None
    midi_type = None
    
    instruments = []
    
    try:
        mido_m = MidiFile(file, clip=True)
        midi_type = mido_m.type

        pm_m = pm.PrettyMIDI(file)

        channels = get_channels(mido_m)
        mido_p = get_programs_mido(mido_m, channels)
        pm_p = get_programs_pm(pm_m)
        
        instruments = pm_m.instruments

        for p, _ in pm_p:
            if p not in [m[0] for m in mido_p]:
                error = 'program mismatch'

        melody_tracks = get_melody_tracks(pm_m)

        n_tracks = len(mido_m.tracks)
        n_channels = len(channels)
        n_instr = len(pm_m.instruments)

        filtered_instr = filter_instruments(pm_m)
        candidate_names = [i.name for i in filtered_instr]

        if n_channels == 1 & n_tracks == 1:
            solo_channel = True
            melody_track = pm_m.instruments[0]    

        if len(melody_tracks) == 1:
            melody_track = melody_tracks[0]
            single_melody = True
            single_candidate = True

        if len(melody_tracks) > 1:
            multiple_melody = True

        if len(filtered_instr) == 1:
            melody_track = filtered_instr[0]
            single_candidate = True

        if len(filtered_instr) > 1:
            multi_channel = True

        if n_channels != n_instr:
            disagreement = True


        if single_melody or solo_channel or single_candidate:
            status = 'processed'
            
            out_path = os.path.join('..', 'data', 'Complete Examples Melodies')
            
           
        if status == 'unprocessed':
            
            out_path = os.path.join('..', 'data', 'Complete Examples Melodies Random')
            
            melody_track  = filtered_instr[0]
           
        
        if melody_track:
            melody_track_name = melody_track.name
            
        if not os.path.exists(os.path.join(out_path, source)):
            os.mkdir(os.path.join(out_path, source))
                
        out_filename = os.path.join(out_path, source, os.path.basename(file))

        pm_melody = copy.deepcopy(pm_m)
        pm_melody.instruments = [melody_track]
        
        print(pm_melody.time_signature_changes)    
        
        pm_melody.write(out_filename) 
            
        
    except Exception as e:
        print(f, e)
        error = e
    
    
    return {
       'filename': file,
       'source': source,
       'song_name': song_name,
       'midi_type' : midi_type,
       'channels': channels,
       'n_channels': n_channels,
       'instruments': instruments,
       'n_instruments': n_instr,
       'programs_mido': mido_p,
       'programs_pm': pm_p,
       'melody_tracks': melody_tracks,
       'melody_track': melody_track,
       'melody_track_name': melody_track_name,
       'candidates': filtered_instr,
       'candidate_names': candidate_names,
       'single_melody': single_melody,
       'multiple_melody': multiple_melody,
       'solo_channel': solo_channel, 
       'single_candidate': single_candidate,
       'multi_channel': multi_channel,
       'disagreement': disagreement,
       'status': status,
       'error': error
   }

In [11]:
len(files)

42

In [12]:
extract(files[9])

[TimeSignature(numerator=4, denominator=4, time=0.0)]


{'filename': '../data/Complete Examples/Oocities/Come Rain Or Come Shine.mid',
 'source': 'Oocities',
 'song_name': 'Come Rain Or Come Shine',
 'midi_type': 1,
 'channels': {1, 2, 9},
 'n_channels': 3,
 'instruments': [Instrument(program=32, is_drum=False, name="Bass"),
  Instrument(program=0, is_drum=True, name="Drums"),
  Instrument(program=0, is_drum=False, name="Piano")],
 'n_instruments': 3,
 'programs_mido': [(0, 1), (0, 2), (0, 9), (32, 1), (0, 2)],
 'programs_pm': [(32, False), (0, True), (0, False)],
 'melody_tracks': [],
 'melody_track': Instrument(program=0, is_drum=False, name="Piano"),
 'melody_track_name': 'Piano',
 'candidates': [Instrument(program=0, is_drum=False, name="Piano")],
 'candidate_names': ['Piano'],
 'single_melody': False,
 'multiple_melody': False,
 'solo_channel': False,
 'single_candidate': True,
 'multi_channel': False,
 'disagreement': False,
 'status': 'processed',
 'error': None}

In [13]:
f='/media/manu/Data/PycharmProjects/thesis/data/Complete Examples Melodies/Oocities/Come Rain Or Come Shine.mid'
m = MidiFile(f, clip=True)

print(m)

for i, track in enumerate(m.tracks):
        for msg in track:
            if msg.type == 'time_signature':
                print(msg)

MidiFile(type=1, ticks_per_beat=480, tracks=[
  MidiTrack([
    MetaMessage('set_tempo', tempo=375000, time=0),
    MetaMessage('time_signature', numerator=4, denominator=4, clocks_per_click=24, notated_32nd_notes_per_beat=8, time=0),
    MetaMessage('key_signature', key='F', time=0),
    MetaMessage('end_of_track', time=1)]),
  MidiTrack([
    MetaMessage('track_name', name='Piano', time=0),
    Message('program_change', channel=0, program=0, time=0),
    Message('note_on', channel=0, note=60, velocity=83, time=56),
    Message('note_on', channel=0, note=64, velocity=77, time=2),
    Message('note_on', channel=0, note=62, velocity=73, time=1),
    Message('note_on', channel=0, note=67, velocity=77, time=3),
    Message('note_on', channel=0, note=57, velocity=56, time=4),
    Message('note_on', channel=0, note=53, velocity=59, time=6),
    Message('note_on', channel=0, note=64, velocity=0, time=78),
    Message('note_on', channel=0, note=57, velocity=0, time=8),
    Message('note_on', 

In [14]:
songs_list = []
for f in files:
    song = extract(f)
    songs_list.append(song)

../data/Complete Examples/Oocities/Invitation.mid [Errno 2] No such file or directory: '../data/Complete Examples Melodies Random/Oocities'
../data/Complete Examples/Oocities/A Night In Tunisia.mid [Errno 2] No such file or directory: '../data/Complete Examples Melodies Random/Oocities'
../data/Complete Examples/Oocities/All Blues.mid [Errno 2] No such file or directory: '../data/Complete Examples Melodies Random/Oocities'
../data/Complete Examples/Oocities/All Of Me.mid [Errno 2] No such file or directory: '../data/Complete Examples Melodies Random/Oocities'
../data/Complete Examples/Oocities/Alone Together (1).mid [Errno 2] No such file or directory: '../data/Complete Examples Melodies Random/Oocities'
[TimeSignature(numerator=4, denominator=4, time=0.0)]
../data/Complete Examples/Oocities/Another You.mid 
../data/Complete Examples/Oocities/April In Paris.mid [Errno 2] No such file or directory: '../data/Complete Examples Melodies Random/Oocities'
../data/Complete Examples/Oocities/B



../data/Complete Examples/Oocities/Israel.mid [Errno 2] No such file or directory: '../data/Complete Examples Melodies Random/Oocities'
../data/Complete Examples/Oocities/Lazybird.mid [Errno 2] No such file or directory: '../data/Complete Examples Melodies Random/Oocities'
../data/Complete Examples/Oocities/Maiden Voyage.mid [Errno 2] No such file or directory: '../data/Complete Examples Melodies Random/Oocities'
../data/Complete Examples/Oocities/Mean To Me.mid [Errno 2] No such file or directory: '../data/Complete Examples Melodies Random/Oocities'
../data/Complete Examples/Oocities/Meditation.mid [Errno 2] No such file or directory: '../data/Complete Examples Melodies Random/Oocities'
../data/Complete Examples/Oocities/Misty.mid [Errno 2] No such file or directory: '../data/Complete Examples Melodies Random/Oocities'
[TimeSignature(numerator=3, denominator=4, time=0.0)]
[TimeSignature(numerator=4, denominator=4, time=0.0)]
[TimeSignature(numerator=4, denominator=4, time=0.0)]
../dat



[]
[TimeSignature(numerator=4, denominator=4, time=0.0)]
[TimeSignature(numerator=4, denominator=4, time=0.0)]


In [15]:
columns = ['filename', 'source', 'song_name', 'midi_type', 'channels', 
           'n_channels', 'instruments', 'n_instruments', 
           'programs_mido', 'programs_pm', 'melody_tracks', 'melody_track', 
           'melody_track_name', 'candidates', 'candidate_names', 'single_melody', 'multiple_melody', 
           'solo_channel', 'single_candidate', 'multi_channel', 'disagreement', 'status', 'error']
songs = pd.DataFrame(
   songs_list, 
   columns=columns
)

In [16]:
output_csv = './data/songs/v1.csv'

if not os.path.exists(output_csv):
    songs.to_csv(output_csv)
else:
    print('File already exists!')

FileNotFoundError: [Errno 2] No such file or directory: './data/songs/v1.csv'

In [None]:
songs

In [None]:
songs[songs['error'] == 'program mismatch'][['programs_mido', 'programs_pm']]

In [None]:
songs[~songs['error'].isnull()]['error'].value_counts()

In [None]:
songs[~songs.melody_track.isnull()]

In [None]:
songs[songs.melody_track.isnull()]

In [None]:
songs[songs.multiple_melody].shape

In [None]:
songs[(songs.single_melody | songs.solo_channel)]

In [None]:
songs.shape

In [None]:
songs.single_melody.value_counts()

In [None]:
songs.multiple_melody.value_counts()

In [None]:
songs.solo_channel.value_counts()

In [None]:
songs.multi_channel.value_counts()

In [None]:
songs.single_candidate.value_counts()

In [None]:
songs.disagreement.value_counts()

In [None]:
songs.error.value_counts()

In [None]:
songs.status.value_counts()

In [None]:
songs[songs.status == 'processed']

In [None]:
unpro = songs[songs.status == 'unprocessed'].head(10)

In [None]:
songs.melody_track.isnull().sum()