In [6]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import os


In [7]:
#!pip install pretty_midi

In [18]:
import pretty_midi
import csv

def copy_instrument(original_instrument):
    new_instrument = pretty_midi.Instrument(program=original_instrument.program)

    for note in original_instrument.notes:
        new_instrument.notes.append(pretty_midi.Note(
            start=note.start,
            end=note.end,
            pitch=note.pitch,
            velocity=note.velocity
        ))

    return new_instrument

def add_imperfection(midi_data, timing_factor=0.98, pitch_factor=0.98, velocity_factor=0.98):
    # Create a copy of the MIDI data
    modified_midi_data = pretty_midi.PrettyMIDI()

    # Manipulate the timing, pitch, and velocity of notes
    for instrument in midi_data.instruments:
        new_instrument = copy_instrument(instrument)

        for note in new_instrument.notes:
            if int(note.velocity * velocity_factor) < 0 or int(note.velocity * velocity_factor) > 127:
                return None
            # Apply timing factor
            note.start *= timing_factor
            note.end *= timing_factor

            # Apply pitch factor
            note.pitch = int(note.pitch * pitch_factor)

            # Apply velocity factor and keep it within the valid MIDI range
            note.velocity = max(0, min(int(note.velocity * velocity_factor), 127))

        modified_midi_data.instruments.append(new_instrument)

    return modified_midi_data

def process_dataset(dataset_path):
    print("Number of files before augmentation: ", len(os.listdir(dataset_path)))
    for midi_file in os.listdir(dataset_path):
        if midi_file.endswith(".mid"):
            midi_file_path = os.path.join(dataset_path, midi_file)

            # Load the original MIDI file
            original_midi_data = pretty_midi.PrettyMIDI(midi_file_path)

            # Add imperfection to the MIDI data - First augmentation
            modified_midi_data_1 = add_imperfection(original_midi_data, timing_factor=0.98, pitch_factor=0.98, velocity_factor=0.98)

            # if result is none, skip writing the file
            if modified_midi_data_1 is None:
                print(f"Skipping {midi_file} modification_1 due to invalid velocity after modification.")
            else:
                # Save the first modified MIDI file
                modified_output_path_1 = os.path.join(dataset_path, f"{midi_file.replace('.mid', '_modified1.mid')}")
                
                try:
                    modified_midi_data_1.write(modified_output_path_1)
                except ValueError as e:
                    print(f"Error writing modified file 1 for {midi_file}: {e}")

            # Add imperfection to the MIDI data - Second augmentation
            modified_midi_data_2 = add_imperfection(original_midi_data, timing_factor=1.02, pitch_factor=1.02, velocity_factor=1.02)
            
            # if result is none, skip writing the file
            if modified_midi_data_2 is None:
                print(f"Skipping {midi_file} modification_2 due to invalid velocity after modification.")
            else:
                # Save the second modified MIDI file
                modified_output_path_2 = os.path.join(dataset_path, f"{midi_file.replace('.mid', '_modified2.mid')}")
                
                # if result is none, skip writing the file
                try:
                    modified_midi_data_2.write(modified_output_path_2)
                except ValueError as e:
                    print(f"Error writing modified file 2 for {midi_file}: {e}")
                
            # Add imperfection to the MIDI data - Third augmentation
            modified_midi_data_3 = add_imperfection(original_midi_data, timing_factor=1.05, pitch_factor=1.05, velocity_factor=1.05)
            
            # if result is none, skip writing the file
            if modified_midi_data_3 is None:
                print(f"Skipping {midi_file} modification_3 due to invalid velocity after modification.")
            else:
                # Save the third modified MIDI file
                modified_output_path_3 = os.path.join(dataset_path, f"{midi_file.replace('.mid', '_modified3.mid')}")
                
                # if result is none, skip writing the file
                try:
                    modified_midi_data_3.write(modified_output_path_3)
                except ValueError as e:
                    print(f"Error writing modified file 3 for {midi_file}: {e}")
                
            # Add imperfection to the MIDI data - Fourth augmentation
            
            modified_midi_data_4 = add_imperfection(original_midi_data, timing_factor=0.95, pitch_factor=0.95, velocity_factor=0.95)
            
            # if result is none, skip writing the file
            if modified_midi_data_4 is None:
                print(f"Skipping {midi_file} modification_4 due to invalid velocity after modification.")
                continue
            else:
                # Save the fourth modified MIDI file
                modified_output_path_4 = os.path.join(dataset_path, f"{midi_file.replace('.mid', '_modified4.mid')}")
                try:
                    modified_midi_data_4.write(modified_output_path_4)
                except ValueError as e:
                    print(f"Error writing modified file 4 for {midi_file}: {e}")
                    
            # Add imperfection to the MIDI data - Fifth augmentation
            modified_midi_data_5 = add_imperfection(original_midi_data, timing_factor=0.92, pitch_factor=0.92, velocity_factor=0.92)
            
            # if result is none, skip writing the file
            if modified_midi_data_5 is None:
                print(f"Skipping {midi_file} modification_5 due to invalid velocity after modification.")
                continue
            
            # Save the fifth modified MIDI file
            modified_output_path_5 = os.path.join(dataset_path, f"{midi_file.replace('.mid', '_modified5.mid')}")
            try:
                modified_midi_data_5.write(modified_output_path_5)
            except ValueError as e:
                print(f"Error writing modified file 5 for {midi_file}: {e}")
            
    print("Number of files after augmentation: ", len(os.listdir(dataset_path)))
        
def main():
    # Specify the dataset directory
    dataset_path = "MIDI_Files/"

    # Process the dataset
    process_dataset(dataset_path)

if __name__ == "__main__":
    main()


Number of files before augmentation:  61
Skipping ALBW Lorule Castle.mid modification_2 due to invalid velocity after modification.
Skipping ALBW Lorule Castle.mid modification_3 due to invalid velocity after modification.
Skipping FS Staff Roll.mid modification_2 due to invalid velocity after modification.
Skipping FS Staff Roll.mid modification_3 due to invalid velocity after modification.
Skipping FS The Four Links.mid modification_2 due to invalid velocity after modification.
Skipping FS The Four Links.mid modification_3 due to invalid velocity after modification.
Skipping FS Title Theme.mid modification_2 due to invalid velocity after modification.
Skipping FS Title Theme.mid modification_3 due to invalid velocity after modification.
Skipping FSA Hyrule Field.mid modification_2 due to invalid velocity after modification.
Skipping FSA Hyrule Field.mid modification_3 due to invalid velocity after modification.
Skipping FSA Realm.mid modification_2 due to invalid velocity after modif

In [9]:
#import pandas as pd
#df=pd.read_csv('MIDI_Files_Aug/augmented_dataset_records.csv')
#df.head()

In [10]:
# remove MIDI_Files and MIDI_FILES_Aug dirs
#!rm -rf MIDI_Files
#!rm -rf MIDI_Files_Aug

In [17]:
# remove augmented files from MIDI_Files
import os
import glob

files = glob.glob('MIDI_Files/*_modified*.mid')

for file in files:
    try:
        os.remove(file)
    except Exception as e:
        print(f"Error removing file {file}: {e}")