In [1]:
# Music processing
from pretty_midi import *
# Algorithms
from itertools import takewhile
# File management
import os
from glob import glob
from pathlib import Path
import csv

## Settings

In [2]:
# Classical music target and destination
corpus_path = 'C:\\Users\\alext\\Desktop\\School\\2021 Spring\\CS 271\\Final Project\\Feature Extraction\\Classical\\Classical Corpus'
export_path = 'C:\\Users\\alext\\Desktop\\School\\2021 Spring\\CS 271\\Final Project\\Feature Extraction\\Classical\\Classical Extracted Notes'

# Jazz music target and destination
#corpus_path = 'C:\\Users\\alext\\Desktop\\School\\2021 Spring\\CS 271\\Final Project\\Feature Extraction\\Jazz\\Jazz Corpus'
#export_path = 'C:\\Users\\alext\\Desktop\\School\\2021 Spring\\CS 271\\Final Project\\Feature Extraction\\Jazz\\Jazz Extracted Notes'

# Minimum number of notes per part
min_notes = 25
# Minimum number of chord phrases per part
min_chords = 1

## Export a Music File to a CSV File

In [3]:
def export_file(corpus_path, export_path, file_name):
    # Construct the full path of the music file
    file_path = os.path.join(corpus_path, file_name);
    # Construct the full path of the target csv file
    target_path = os.path.join(export_path, file_name[:-4] + '.csv');
    
    # If the csv already exists, skip this music file
    if os.path.exists(target_path):
        return
    
    # Parse the file and convert it into a stream object
    midi_data = pretty_midi.PrettyMIDI(file_path)
    
    # The number of keyboard parts in the song
    keyboard_parts = 0
    
    # Iterate over all instrument parts in the score
    for inst in midi_data.instruments:
        # Get the class of instrument
        inst_class = program_to_instrument_class(inst.program)
        
        # Check for keyboard instruments
        if inst_class == 'Piano' or inst_class == 'Organ':
            # Check if the part meets the minimum note requirement
            if len(inst.notes) >= min_notes:
                keyboard_parts += 1
                
                # Remove the extension of the music file name
                part_name = file_name[:-4]

                # If this isn't the first keybaord part, append the part number to the part name
                if keyboard_parts > 1:
                    part_name += '-' + str(keyboard_parts)
                
                # Export the notes in the part to a csv
                export_part(corpus_path, export_path, inst.notes, part_name)

## Export a Part to a CSV File

In [4]:
def export_part(corpus_path, export_path, part, part_name):
    # Extract the notes of the part
    notes = extract_notes(part)
    
    # If the part doesn't contain enough chords, don't export it
    if notes == None:
        return
    
    # Construct the full path of the target csv file
    csv_path = os.path.join(export_path, part_name + '.csv');
    
    # Write the features list to a csv file
    with open(csv_path, 'w', newline='') as csv_file:
        write = csv.writer(csv_file)
        write.writerows(notes)

## Extract the Notes from a Part

In [5]:
def extract_notes(part):
    # An orderd list of phrases (notes and chords) in the song
    phrase_list = []
    # The phrase currently being processed
    phrase = []
    # The number of chords in the part
    num_chords = 0
    # The start time of the note currently being processed
    curr_time = part[0].start
    
    # Organize all notes into parts and add them to the part list
    for note in part:
        # If this note starts a the same time as the previous,
        # append it to the chord
        if note.start == curr_time:
            phrase.append(str(note.pitch))
        # If this note starts later than the previous one,
        # save the last phrase and construct a new one
        else:
            phrase_list.append(phrase)
            
            if len(phrase) > 1:
                num_chords += 1
            
            phrase = []
            phrase.append(str(note.pitch))
            curr_time = note.start
    
    # Add the last phrase
    phrase_list.append(phrase)
    
    if len(phrase) > 1:
        num_chords += 1
    
    # If the part doesn't contain enough chords, return nothing
    if num_chords < min_chords:
        return None
    
    return phrase_list

## Preprocess the Music Corpus

In [6]:
print('Extracting...\n')

# Go to the corpus directory
os.chdir(corpus_path)

# Get a list of all music files in the corpus
music_files = glob('*.mid')

num_errors = 0

# Export the notes of each score as a csv file
for file_name in music_files:
    try:
        export_file(corpus_path, export_path, file_name)
    except Exception as err:
        print("Skipping File {0}. Error: {1}".format(file_name, err))
        num_errors += 1

print('\nFinished')
print('Total number of errors: ' + str(num_errors))

Extracting...





Skipping File 2iH24VcccnNwNGDxaxE5Lu.mid. Error: running status without last_status
Skipping File 2uEVCciTszWz9oBCZEkHo7.mid. Error: running status without last_status
Skipping File 2vFiGbBPxaMw4YKiahe6Sp.mid. Error: data byte must be in range 0..127
Skipping File 3FbGdmJA9pmnPFz6wbSAvX.mid. Error: running status without last_status
Skipping File 3SzeJqBupEcw3RWqrXnqXH.mid. Error: running status without last_status
Skipping File 3Yvn47uez2jRGYzYTo9Pp7.mid. Error: running status without last_status
Skipping File 42FzccnPgQpu784FNh2bZj.mid. Error: data byte must be in range 0..127
Skipping File 43KktYzTPQZR4ZMrkRRtF8.mid. Error: running status without last_status
Skipping File 4fA6vXybyrcxLB7P7WpEbn.mid. Error: running status without last_status
Skipping File 58zz5WUjxtedjRTNZddkpc.mid. Error: running status without last_status
Skipping File 5LoRn3ewHA96JhLRkHXKvn.mid. Error: data byte must be in range 0..127
Skipping File 5PuxGev8NZeyscYJXaf8fn.mid. Error: running status without last_st

Skipping File QdP3pixmQNXHfLNXnaxVJq.mid. Error: running status without last_status
Skipping File QggJaJiTeJwrUMHyZZgKsK.mid. Error: data byte must be in range 0..127
Skipping File QVoyzAo3d6LDem3RhQNouy.mid. Error: running status without last_status
Skipping File R5zGn2uJ64h6Np3JqEewMS.mid. Error: running status without last_status
Skipping File RqtYEUJHkaxARoZtGPNBXL.mid. Error: running status without last_status
Skipping File St8LkNZBp6zbWFqdn2mCT9.mid. Error: MIDI file has a largest tick of 4295466857, it is likely corrupt
Skipping File Uwy9DqKaYko2mRJEDk89jR.mid. Error: running status without last_status
Skipping File UyGgMQHBRwyf2aHwcAaDun.mid. Error: running status without last_status
Skipping File VDwRpoVafzkYwCRyV4UfGz.mid. Error: running status without last_status
Skipping File VEwN33u9VYhtAxBTyeoiXu.mid. Error: running status without last_status
Skipping File VtQ8uJDKjZagcobttRPNDe.mid. Error: running status without last_status
Skipping File VWMFujaXg4ZheyuasgFWme.mid. Error