In [1]:
# Music processing
from pretty_midi import *
# Algorithms
from itertools import takewhile
# File management
import os
from glob import glob
from pathlib import Path
import csv

## Settings

In [2]:
# Jazz music target and destination
corpus_path = r'C:\Users\alext\Desktop\School\2021 Spring\CS 271\Final Project\Feature Extraction\Classical\Classical Corpus (Solo Piano Only)'
export_path = r'C:\Users\alext\Desktop\School\2021 Spring\CS 271\Final Project\Feature Extraction\Classical\Classical Extracted Notes (Solo Piano Only)'

# Minimum number of notes per part
min_notes = 25
# Minimum number of chord phrases per part
min_chords = 1

# 0 = Treble, 1 = Bass
clef = 1

## Export a Music File to a CSV File

In [3]:
def export_file(corpus_path, export_path, file_name):
    # Construct the full path of the music file
    file_path = os.path.join(corpus_path, file_name);
    # Construct the full path of the target csv file
    target_path = os.path.join(export_path, file_name[:-4] + '.csv');
    
    # If the csv already exists, skip this music file
    if os.path.exists(target_path):
        return
    
    # Parse the file and convert it into a stream object
    midi_data = pretty_midi.PrettyMIDI(file_path)
    
    # Get the desired clef
    inst = midi_data.instruments[clef]
    
    # Get the class of instrument
    inst_class = program_to_instrument_class(inst.program)

    # Check for keyboard instruments
    if inst_class == 'Piano' or inst_class == 'Organ':
        # Check if the part meets the minimum note requirement
        if len(inst.notes) >= min_notes:
            # Remove the extension of the music file name
            part_name = file_name[:-4]

            # Export the notes in the part to a csv
            export_part(corpus_path, export_path, inst.notes, part_name)

## Export a Part to a CSV File

In [4]:
def export_part(corpus_path, export_path, part, part_name):
    # Extract the notes of the part
    notes = extract_notes(part)
    
    # If the part doesn't contain enough chords, don't export it
    if notes == None:
        return
    
    # Construct the full path of the target csv file
    csv_path = os.path.join(export_path, part_name + '.csv');
    
    # Write the features list to a csv file
    with open(csv_path, 'w', newline='') as csv_file:
        write = csv.writer(csv_file)
        write.writerows(notes)

## Extract the Notes from a Part

In [5]:
def extract_notes(part):
    # An orderd list of phrases (notes and chords) in the song
    phrase_list = []
    # The phrase currently being processed
    phrase = []
    # The number of chords in the part
    num_chords = 0
    # The start time of the note currently being processed
    curr_time = part[0].start
    
    # Organize all notes into parts and add them to the part list
    for note in part:
        # If this note starts a the same time as the previous,
        # append it to the chord
        if note.start == curr_time:
            phrase.append(str(note.pitch))
        # If this note starts later than the previous one,
        # save the last phrase and construct a new one
        else:
            phrase_list.append(phrase)
            
            if len(phrase) > 1:
                num_chords += 1
            
            phrase = []
            phrase.append(str(note.pitch))
            curr_time = note.start
    
    # Add the last phrase
    phrase_list.append(phrase)
    
    if len(phrase) > 1:
        num_chords += 1
    
    # If the part doesn't contain enough chords, return nothing
    if num_chords < min_chords:
        return None
    
    return phrase_list

## Preprocess the Music Corpus

In [6]:
print('Extracting...\n')

# Go to the corpus directory
os.chdir(corpus_path)

# Get a list of all music files in the corpus
music_files = glob('*.mid')

num_errors = 0

# Export the notes of each score as a csv file
for file_name in music_files:
    try:
        export_file(corpus_path, export_path, file_name)
    except Exception as err:
        print("Skipping File {0}. Error: {1}".format(file_name, err))
        num_errors += 1

print('\nFinished')
print('Total number of errors: ' + str(num_errors))

Extracting...





Skipping File 2mvgmYeZUHTpPtHucAF5ov.mid. Error: list index out of range
Skipping File 2n2nTLX3pqdQopUgXoDif3.mid. Error: list index out of range
Skipping File 2TRSYfaH5ZyxadorDdRMz9.mid. Error: list index out of range
Skipping File 2wYGnCjbfdLLeBV9P3PyuL.mid. Error: list index out of range
Skipping File 2YdxRbpcpmht3fKbBsnwKY.mid. Error: list index out of range
Skipping File 3Jc2jW4gBtpHoWPpwJjLJ3.mid. Error: list index out of range
Skipping File 42waeofCadEwSvTGQo7VqP.mid. Error: list index out of range
Skipping File 45wssfpzEkcb4s42qBLACv.mid. Error: list index out of range
Skipping File 4atYcxiMCB9xXb4CnpGRMt.mid. Error: list index out of range
Skipping File 4bJUa7Nd5um6yBawkN9bCo.mid. Error: list index out of range
Skipping File 4CaYHmGvAz9C3v8kfZjfpq.mid. Error: list index out of range
Skipping File 4KsAPdSCRWJntAzqqRMQWP.mid. Error: list index out of range
Skipping File 4tR8e7CgDr2VATPd5vkcZG.mid. Error: list index out of range
Skipping File 54HwWFozfhVXYc8BMhxDi7.mid. Error: li

Skipping File jfkBDTsp8NRDQUqPZ2F35P.mid. Error: list index out of range
Skipping File jgf4QzTESdwPeZhQKhDC7e.mid. Error: list index out of range
Skipping File JHvEZEeCGQHwP3RAzyTUr8.mid. Error: list index out of range
Skipping File JKMri9rAD97Ncj8yussoso.mid. Error: list index out of range
Skipping File JmhqaLZxcFrcQYnzefGbpG.mid. Error: list index out of range
Skipping File jMtU3RFQ6J5u4QLvMTLjB8.mid. Error: list index out of range
Skipping File jP3izsRtQyGrhe96zKTsHL.mid. Error: list index out of range
Skipping File jQkR8bL4MjUpe2ofzbZAtE.mid. Error: list index out of range
Skipping File JspzSk8chnrUHV8id28AbP.mid. Error: list index out of range
Skipping File jZTQ992CQTgLQtvC8dUUUL.mid. Error: list index out of range
Skipping File k3B8gMJpVmLShkxUvsLCkq.mid. Error: list index out of range
Skipping File K7muSQFBf9XAF4yjS6A2fE.mid. Error: MIDI file has a largest tick of 4295154736, it is likely corrupt
Skipping File kBrvfrxQRmmzituH4trwDR.mid. Error: list index out of range
Skipping F