# Chuyển file MIDI thành cấu trúc dữ liệu của Python

Environment: Python 3.10.10

In [14]:
# Mido requires Python version 3.7 or higher.
# !pip install mido

!pip install pretty_midi



## Import thư viện

In [15]:
# import mido

import os
import sys

# from matplotlib import pyplot as plt
# from matplotlib import patches
# from matplotlib import colors
import pretty_midi
import pandas as pd

## Đường dẫn đến file MIDI

In [16]:
midi_file_paths = [
    './data/TimeTesting.mid',
    './data/Stickz - Sentio Synth Loop 001 - 90BPM Amin - MIDI.mid',
]

## Chuyển đổi note number sang note name (có thông tin quãng tám)

In [17]:
def note_number_to_name_with_octave(note_number):
    # Define lists for note names and accidentals
    note_names = ['C', 'C#', 'D', 'D#', 'E', 'F', 'F#', 'G', 'G#', 'A', 'A#', 'B']
    accidentals = ['b', '#']
    
    n_note_per_octave = 12

    # Calculate the octave and note name
    octave = note_number // n_note_per_octave - 1  # MIDI note numbers start at 21, so subtract 1
    note_index = note_number % 12

    note_name = note_names[note_index]

    # Check for enharmonic equivalents (e.g., C# or Db)
    if len(note_name) == 2:
        equiv_note_name, equiv_accidental = note_names[note_names.index(note_name) + 1], accidentals[0]
        return {
            # TODO
            'note_name': f"{note_name}", # /{equiv_note_name}{equiv_accidental}", 
            'octave': octave
        }
    else:
        return {
            'note_name': note_name,
            'octave': octave
        }

# Example usage:
note_number = 60  # MIDI note number for middle C
note_info = note_number_to_name_with_octave(note_number)
print(f"Note Name: {note_info['note_name']}")
print(f"Octave: {note_info['octave']}")  # Output: "Note Name: C", "Octave: 4"


Note Name: C
Octave: 4


## MIDI sang Data Structure in Python với thư viện mido 

- Note: k sử dụng nữa do đọc bị sai với nốt quá ngắn, sẽ check lại sau

In [18]:
def midi_to_data_structure(midi_file_path):
    data_structure = []
    
    try:
        midi_file = mido.MidiFile(midi_file_path)
        
        for track in midi_file.tracks:
            track_data = []
            for message in track:
                if message.type in ['note_on', 'note_off']:
                    note_name = note_number_to_name_with_octave(message.note)
                    track_data.append({
                        'type': message.type,
                        'note_number': message.note,
                        'note': note_name["note_name"],
                        'octave': note_name["octave"],
                        'velocity': message.velocity,
                        'time': message.time
                    })
            if track_data:
                data_structure.append(track_data)
    
    except Exception as e:
        print(f"Error: {e}")
    
    return data_structure

## Sử dụng hàm

In [19]:
# data_structure = midi_to_data_structure(midi_file_paths[0])

# for track in data_structure:
#     for message in track:
#         print(message)

In [20]:
# data_structure = midi_to_data_structure(midi_file_paths[1])

# for track in data_structure:
#     for message in track:
#         print(message)

## MIDI sang Data Structure in Python với thư viện pretty_midi

In [21]:
def read_midi_file(midi_file_path):
    midi_data = pretty_midi.PrettyMIDI(midi_file_path)

    time_signature = midi_data.time_signature_changes[0]
    print(f"Time Signature: {time_signature}")

    # tempo: BPM (beat per minute)
    tempo = float(midi_data.get_tempo_changes()[1])
    print(f"Tempo: {tempo}")

    midi_list = []
    for instrument in midi_data.instruments:
        for note in instrument.notes:
            start = note.start
            end = note.end

            duration = (tempo / 60) * (end - start)
            
            pitch = note.pitch
            note_name_with_octave = note_number_to_name_with_octave(pitch)
            note_name = note_name_with_octave["note_name"]
            octave = note_name_with_octave["octave"]

            velocity = note.velocity
            
            midi_list.append([
                start, 
                end, 
                duration,
                note_name,
                octave,
                velocity, 
                instrument.name
            ])
            
    midi_list = sorted(midi_list, key=lambda x: x[0])

    df = pd.DataFrame(
        midi_list, 
        columns=['Start', 'End', 'Duration', 'Note Name', 'Octave', 'Velocity', 'Instrument']
    )

    return df

## Sử dụng hàm

In [22]:
df = read_midi_file("./data/TimeTesting.mid")
print("Start, End đơn vị là giây, Duration đơn vị là beat")
df

Time Signature: 4/4 at 0.00 seconds
Tempo: 128.0
Start, End đơn vị là giây, Duration đơn vị là beat


Unnamed: 0,Start,End,Duration,Note Name,Octave,Velocity,Instrument
0,0.0,0.039062,0.083333,F,4,100,Steinway D #2 1
1,0.0,0.058594,0.125,F#,4,100,Steinway D #2 1
2,0.0,0.117188,0.25,G,4,100,Steinway D #2 1
3,0.0,0.117188,0.25,C,4,100,Steinway D #2 1
4,0.0,7.5,16.0,A,5,100,Steinway D #2 1
5,0.46875,0.703125,0.5,C#,4,100,Steinway D #2 1
6,0.9375,1.289062,0.75,D,4,100,Steinway D #2 1
7,1.40625,1.875,1.0,D#,4,100,Steinway D #2 1
8,1.875,2.8125,2.0,E,4,100,Steinway D #2 1
9,3.75,5.625,4.0,F,4,100,Steinway D #2 1


In [23]:
df = read_midi_file("./data/Stickz - Sentio Synth Loop 001 - 90BPM Amin - MIDI.mid")
print("Start, End đơn vị là giây, Duration đơn vị là beat")
df

Time Signature: 4/4 at 0.00 seconds
Tempo: 89.9999550000225
Start, End đơn vị là giây, Duration đơn vị là beat


Unnamed: 0,Start,End,Duration,Note Name,Octave,Velocity,Instrument
0,0.000000,0.562500,0.843750,C,5,100,1
1,0.000000,1.000001,1.500000,A,1,100,1
2,1.000001,1.333334,0.500000,A,1,100,1
3,1.166667,1.634029,0.701042,C,5,100,1
4,1.333334,2.000001,1.000000,A,1,100,1
...,...,...,...,...,...,...,...
56,18.500009,21.333344,4.250000,F,3,127,1
57,18.500009,21.333344,4.250000,E,4,127,1
58,18.500009,21.333344,4.250000,C,4,127,1
59,18.500009,21.333344,4.250000,F,2,127,1


In [24]:
# Hàm chuyển file midi sang dữ liệu trên dataframe với duration tính theo hình nốt (nốt trắng, nốt đen, nốt 1/4, nốt 1/8, nốt 1/16, nốt 1/32, nốt 1/64, v.v.)
def read_midi_file_with_note_length(file_path):
    midi_data = pretty_midi.PrettyMIDI(file_path)
    tempo = float(midi_data.get_tempo_changes()[1])

    midi_list = []
    for instrument in midi_data.instruments:
        for note in instrument.notes:
            start = note.start
            end = note.end

            duration = (tempo / 60) * (end - start)
            
            pitch = note.pitch
            note_name_with_octave = note_number_to_name_with_octave(pitch)
            note_name = note_name_with_octave["note_name"]
            octave = note_name_with_octave["octave"]

            velocity = note.velocity
            
            midi_list.append([
                start, 
                end, 
                duration,
                note_name,
                octave,
                velocity, 
                instrument.name
            ])
            
    midi_list = sorted(midi_list, key=lambda x: x[0])

    df = pd.DataFrame(
        midi_list, 
        columns=['Start', 'End', 'Duration', 'Note Name', 'Octave', 'Velocity', 'Instrument']
    )

    return df

df = read_midi_file_with_note_length("./data/TimeTesting.mid")

print("Start, End đơn vị là giây, Duration đơn vị là beat")

df


Start, End đơn vị là giây, Duration đơn vị là beat


Unnamed: 0,Start,End,Duration,Note Name,Octave,Velocity,Instrument
0,0.0,0.039062,0.083333,F,4,100,Steinway D #2 1
1,0.0,0.058594,0.125,F#,4,100,Steinway D #2 1
2,0.0,0.117188,0.25,G,4,100,Steinway D #2 1
3,0.0,0.117188,0.25,C,4,100,Steinway D #2 1
4,0.0,7.5,16.0,A,5,100,Steinway D #2 1
5,0.46875,0.703125,0.5,C#,4,100,Steinway D #2 1
6,0.9375,1.289062,0.75,D,4,100,Steinway D #2 1
7,1.40625,1.875,1.0,D#,4,100,Steinway D #2 1
8,1.875,2.8125,2.0,E,4,100,Steinway D #2 1
9,3.75,5.625,4.0,F,4,100,Steinway D #2 1


In [25]:
# Chuyển dataframe thành file midi
def midi_dataframe_to_midi(df, output_file_path):
    midi = pretty_midi.PrettyMIDI()
    instrument = pretty_midi.Instrument(0)

    for index, row in df.iterrows():
        start = row["Start"]
        end = row["End"]
        duration = row["Duration"]
        note_name = row["Note Name"]
        octave = row["Octave"]
        velocity = row["Velocity"]

        note_number = pretty_midi.note_name_to_number(f"{note_name}{octave}")
        note = pretty_midi.Note(
            velocity=velocity,
            pitch=note_number,
            start=start,
            end=end
        )
        instrument.notes.append(note)

    midi.instruments.append(instrument)
    midi.write(output_file_path)

    return 

df = read_midi_file_with_note_length("./data/TimeTesting.mid")
midi_dataframe_to_midi(df, "./data/TimeTesting_1.mid")

