# Test how to induce phone effect

In [7]:
import IPython
import torchaudio
import torchaudio.functional as F

In [8]:
fpath = 'test-transcription/hft-transcribed__02_R1_2004_05_Track05.wav'
waveform, sample_rate = torchaudio.load(fpath)

In [9]:
sample_rate

44100

In [65]:
# IPython.display.Audio(data=waveform, rate=sample_rate)

In [56]:
phone_wav = F.highpass_biquad(waveform, sample_rate, cutoff_freq=1200)
phone_wav = F.lowpass_biquad(phone_wav, sample_rate, cutoff_freq=1400)
resample_rate = 6000
phone_wav = F.resample(phone_wav, orig_freq=sample_rate, new_freq=resample_rate, lowpass_filter_width=3)

# MIR_EVAL

In [55]:
import pretty_midi
import numpy as np 
import mir_eval

def midi_to_intervals_and_pitches(midi_file_path):
    """
    This function reads a MIDI file and extracts note intervals and pitches
    suitable for use with mir_eval's transcription evaluation functions.
    """
    # Load the MIDI file
    midi_data = pretty_midi.PrettyMIDI(midi_file_path)
    
    # Prepare lists to collect note intervals and pitches
    notes = []
    for instrument in midi_data.instruments:
        # Skip drum instruments
        if not instrument.is_drum:
            for note in instrument.notes:
                notes.append([note.start, note.end, note.pitch])
    notes = sorted(notes, key=lambda x: x[0])
    notes = np.array(notes)
    intervals, pitches = notes[:, :2], notes[:, 2]
    intervals -= intervals[0][0]
    return intervals, pitches

def midi_to_hz(note, shift=0):
    """
    Convert MIDI to HZ.

    Shift, if != 0, is subtracted from the MIDI note. Use "2" for the hFT augmented model transcriptions, else pitches won't match.
    """
    # the one used in hFT transformer
    return 440.0 * (2.0 ** (note.astype(int) - shift - 69) / 12)
    a = 440 # frequency of A (common value is 440Hz)
    # return (a / 32) * (2 ** ((note - 9) / 12))

# Kong's Alignment Method

In [17]:
import csv
def get_stats(csv_path):
    """Parse aligned results csv file to get results.

    Args:
      csv_path: str, aligned result path, e.g., xx_corresp.txt

    Returns:
      stat_dict, dict, keys: 
          true positive (TP), 
          deletion (D), 
          insertion (I), 
          substitution (S), 
          error rate (ER), 
          ground truth number (N)
    """
    with open(csv_path, 'r') as fr:
        reader = csv.reader(fr, delimiter='\t')
        lines = list(reader)

    lines = lines[1 :]

    TP, D, I, S = 0, 0, 0, 0
    align_counter = []
    ref_counter = []

    for line in lines:
        line = line[0 : -1]
        [alignID, _, _, alignPitch, _, refID, _, _, refPitch, _] = line

        if alignID != '*' and refID != '*':
            if alignPitch == refPitch:
                TP += 1
            else:
                S += 1

        if alignID == '*':
            D += 1

        if refID == '*':
            I += 1

    N = TP + D + S
    ER = (D + I + S) / N
    stat_dict = {'TP': TP, 'D': D, 'I': I, 'S': S, 'ER': ER, 'N': N}
    return stat_dict

In [18]:
import os 
def align_files(ref_fp, est_fp):
    align_tools_dir = '../../2017_midi_alignment'
    ref_fn = os.path.basename(ref_fp)
    est_fn = os.path.basename(est_fp)
    ref_fn_name, ext = os.path.splitext(ref_fn)
    est_fn_name, ext = os.path.splitext(est_fn)
    
    # Copy MIDI files
    cmd = f'cp "{ref_fp}" "{align_tools_dir}/{ref_fn}"; '
    cmd += f'cp "{est_fp}" "{align_tools_dir}/{est_fn}"; '
    print(cmd)
    os.system(cmd)
    
    # Align
    cmd = f'cd {align_tools_dir}; '
    # cmd += f'./MIDIToMIDIAlign.sh {ref_fn_name} {est_fn_name}; '
    cmd += f'./MIDIToMIDIAlign.sh {ref_fn} {est_fn}; '
    print(cmd)
    os.system(cmd)

# Test Sample MIDI

In [None]:
from importlib import reload
import sys
sys.path.insert(0, '../../aria-dl/hFT-Transformer/evaluation/')
import transcribe_new_files as t
import glob
import aria.utils
from importlib import reload 
reload(aria)
import IPython

all_maestro_files = sorted(glob.glob('../../corpus/maestro-v3.0.0/2004/*'))

In [3]:
input_wav_file = 'test-transcription/MIDI-Unprocessed_SMF_02_R1_2004_01-05_ORIG_MID--AUDIO_02_R1_2004_05_Track05_wav.wav'
output_midi_file = 'test-transcription/hft-transcribed__02_R1_2004_05_Track05.midi'
# t.transcribe_file(input_wav_file, output_midi_file)
gold_truth_midi_file = 'test-transcription/MIDI-Unprocessed_SMF_02_R1_2004_01-05_ORIG_MID--AUDIO_02_R1_2004_05_Track05_wav.midi'

In [None]:
aria.utils.midi_to_audio("test-transcription/hft-transcribed__02_R1_2004_05_Track05.midi")

In [67]:
import IPython
# IPython.display.Audio(data='test-transcription/hft-transcribed__02_R1_2004_05_Track05.wav', rate=44100)

In [68]:
import IPython
# IPython.display.Audio(data=input_wav_file, rate=44100)

#### evaluate using mir_eval

In [56]:
ref_intervals, ref_pitches = midi_to_intervals_and_pitches(gold_truth_midi_file)
est_intervals, est_pitches = midi_to_intervals_and_pitches(output_midi_file)

ref_pitches_hz = midi_to_hz(ref_pitches)
est_pitches_hz = midi_to_hz(est_pitches, shift=2) ## shift=2 because hFT transcribes 2 notes above, for some reason

In [57]:
scores = mir_eval.transcription.evaluate(ref_intervals, ref_pitches_hz, est_intervals, est_pitches_hz)

In [47]:
matched_onsets = mir_eval.transcription.match_note_onsets(ref_intervals, est_intervals)

In [50]:
ref_pitches[[0, 1, 2, 3, 4, 5]]

array([71., 55., 71., 59., 62., 72.])

In [51]:
est_pitches[[0, 1, 2, 3, 4, 5]]

array([73., 57., 73., 61., 64., 74.])

In [64]:
import json
json.dumps(scores, indent=4)

'{\n    "Precision": 0.7708092856226754,\n    "Recall": 0.7613377248543197,\n    "F-measure": 0.7660442291759608,\n    "Average_Overlap_Ratio": 0.8455788515638166,\n    "Precision_no_offset": 0.9976914197768373,\n    "Recall_no_offset": 0.9854319736508741,\n    "F-measure_no_offset": 0.9915238034542095,\n    "Average_Overlap_Ratio_no_offset": 0.7557460905388416,\n    "Onset_Precision": 0.9980761831473643,\n    "Onset_Recall": 0.9858120091208513,\n    "Onset_F-measure": 0.9919061882607865,\n    "Offset_Precision": 0.8117224573553931,\n    "Offset_Recall": 0.8017481631618951,\n    "Offset_F-measure": 0.806704480275317\n}'

### evaluate using kong's method

In [18]:
output_midi_file

'test-transcription/hft-transcribed__02_R1_2004_05_Track05.midi'

In [19]:
align_files(gold_truth_midi_file, output_midi_file)

cp "test-transcription/MIDI-Unprocessed_SMF_02_R1_2004_01-05_ORIG_MID--AUDIO_02_R1_2004_05_Track05_wav.midi" "../../2017_midi_alignment/MIDI-Unprocessed_SMF_02_R1_2004_01-05_ORIG_MID--AUDIO_02_R1_2004_05_Track05_wav.midi"; cp "test-transcription/hft-transcribed__02_R1_2004_05_Track05.midi" "../../2017_midi_alignment/hft-transcribed__02_R1_2004_05_Track05.midi"; 
cd ../../2017_midi_alignment; ./MIDIToMIDIAlign.sh MIDI-Unprocessed_SMF_02_R1_2004_01-05_ORIG_MID--AUDIO_02_R1_2004_05_Track05_wav.midi hft-transcribed__02_R1_2004_05_Track05.midi; 
File not found: ./MIDI-Unprocessed_SMF_02_R1_2004_01-05_ORIG_MID--AUDIO_02_R1_2004_05_Track05_wav.midi_fmt3x.txt
File not found: ./MIDI-Unprocessed_SMF_02_R1_2004_01-05_ORIG_MID--AUDIO_02_R1_2004_05_Track05_wav.midi_hmm.txt
File not found: ./MIDI-Unprocessed_SMF_02_R1_2004_01-05_ORIG_MID--AUDIO_02_R1_2004_05_Track05_wav.midi_fmt3x.txt
File not found: ./MIDI-Unprocessed_SMF_02_R1_2004_01-05_ORIG_MID--AUDIO_02_R1_2004_05_Track05_wav.midi_fmt3x.txt
Fil

Assertion failed: (ifs.is_open()), function ReadFile, file Midi_v170101.hpp, line 177.
./MIDIToMIDIAlign.sh: line 14: 68574 Abort trap: 6           $ProgramFolder/midi2pianoroll 0 $RelCurrentFolder/${I1}
Assertion failed: (ifs.is_open()), function ReadFile, file Midi_v170101.hpp, line 177.
./MIDIToMIDIAlign.sh: line 15: 68575 Abort trap: 6           $ProgramFolder/midi2pianoroll 0 $RelCurrentFolder/${I2}
./MIDIToMIDIAlign.sh: line 17: 68576 Segmentation fault: 11  $ProgramFolder/SprToFmt3x $RelCurrentFolder/${I1}_spr.txt $RelCurrentFolder/${I1}_fmt3x.txt
Assertion failed: (false), function ReadFile, file Fmt3x_v170225.hpp, line 252.
./MIDIToMIDIAlign.sh: line 18: 68577 Abort trap: 6           $ProgramFolder/Fmt3xToHmm $RelCurrentFolder/${I1}_fmt3x.txt $RelCurrentFolder/${I1}_hmm.txt
Assertion failed: (false), function ReadFile, file Hmm_v170225.hpp, line 69.
./MIDIToMIDIAlign.sh: line 20: 68578 Abort trap: 6           $ProgramFolder/ScorePerfmMatcher $RelCurrentFolder/${I1}_hmm.txt $Re

In [17]:
ls ../../2017_midi_alignment/

[34mCode[m[m/
LICENCE.txt
MANUAL.pdf
MIDI-Unprocessed_02_R1_2009_03-06_ORIG_MID--AUDIO_02_R1_2009_02_R1_2009_04_WAV.mid
MIDI-Unprocessed_02_R1_2009_03-06_ORIG_MID--AUDIO_02_R1_2009_02_R1_2009_04_WAV_corresp.txt
MIDI-Unprocessed_02_R1_2009_03-06_ORIG_MID--AUDIO_02_R1_2009_02_R1_2009_04_WAV_match.txt
MIDI-Unprocessed_02_R1_2009_03-06_ORIG_MID--AUDIO_02_R1_2009_02_R1_2009_04_WAV_spr.txt
MIDI-Unprocessed_SMF_02_R1_2004_01-05_ORIG_MID--AUDIO_02_R1_2004_05_Track05_wav.midi
[31mMIDIToMIDIAlign.sh[m[m*
[31mMusicXMLToMIDIAlign.sh[m[m*
[34mPrograms[m[m/
Scriabin_op_42_Hf4EIJB4DGc_cut_no_4.mid
Scriabin_op_42_Hf4EIJB4DGc_cut_no_4_corresp.txt
Scriabin_op_42_Hf4EIJB4DGc_cut_no_4_match.txt
Scriabin_op_42_Hf4EIJB4DGc_cut_no_4_spr.txt
[31mcompile.sh[m[m*
ex_align1.mid
[31mex_align2.mid[m[m*
ex_ref.musx
ex_ref.pdf
ex_ref.xml
ex_ref_fmt3x.txt
ex_ref_hmm.txt
hft-transcribed__02_R1_2004_05_Track05.midi
scriabin_etude_op_42_no_4_dery.mid
scriabin_etude_op_42_no_4_dery_fmt3x.txt
scriabin_et

# Test Kong's Samples

In [2]:
import sys
sys.path.insert(0, '..')
import amt.audio

In [None]:
audio_transform = amt.audio.AudioTransform()

In [10]:
ls ../../GiantMIDI-Piano/midis_preview/

Chopin, Frédéric, Études, Op.10, g0hoN6_HDVU.mid
Handel, George Frideric, Air in E major, HWV 425, bNzVz5byPqk.mid
Liszt, Franz, Hungarian Rhapsody No.2, S.244_2, LdH1hSWGFGU.mid
Ravel, Maurice, Jeux d'eau, v-QmwrhO3ec.mid


In [28]:
import pandas as pd
import os 
df = pd.read_csv('../../GiantMIDI-Piano/midis_for_evaluation/groundtruth_maestro_giantmidi-piano.csv', sep='\t')

In [62]:
gt_folder = '../../GiantMIDI-Piano/midis_for_evaluation/ground_truth/'
giant_midi_folder = '../../GiantMIDI-Piano/midis_for_evaluation/giantmidi-piano/'
maestro_midi_folder = '../../GiantMIDI-Piano/midis_for_evaluation/maestro/'
gt_fn, giant_midi_fn, maestro_fn = df[['GroundTruth', 'GiantMIDI-Piano', 'Maestro']].iloc[0]

In [65]:
gt_fp = os.path.join(gt_folder, gt_fn)
giant_midi_fp = os.path.join(giant_midi_folder, giant_midi_fn)
maestro_midi_fp = os.path.join(maestro_midi_folder, maestro_fn)

In [None]:
import mirdata
import mido 
mido.MidiFile(filename=gt_fp)

In [160]:
ref_intervals, ref_pitches = midi_to_intervals_and_pitches(gt_fp)
est_intervals, est_pitches = midi_to_intervals_and_pitches(maestro_midi_fp)

ref_pitches_hz = midi_to_hz(ref_pitches)
est_pitches_hz = midi_to_hz(est_pitches)

In [161]:
mir_eval.transcription.match_notes(
    ref_intervals, ref_pitches_hz, est_intervals, est_pitches_hz
)

[(1, 1), (2, 2), (23, 23), (67, 66), (71, 70), (336, 385), (677, 709)]

In [164]:
# mir_eval.transcription.precision_recall_f1_overlap()
# mir_eval.transcription.evaluate()

In [165]:
scores = mir_eval.transcription.evaluate(ref_intervals, ref_pitches_hz, est_intervals, est_pitches_hz)

In [166]:
scores

OrderedDict([('Precision', 0.008939974457215836),
             ('Recall', 0.008816120906801008),
             ('F-measure', 0.008877615726062145),
             ('Average_Overlap_Ratio', 0.8771661491453748),
             ('Precision_no_offset', 0.04469987228607918),
             ('Recall_no_offset', 0.04408060453400504),
             ('F-measure_no_offset', 0.04438807863031072),
             ('Average_Overlap_Ratio_no_offset', 0.5049151558206666),
             ('Onset_Precision', 0.2771392081736909),
             ('Onset_Recall', 0.27329974811083124),
             ('Onset_F-measure', 0.2752060875079264),
             ('Offset_Precision', 0.4623243933588761),
             ('Offset_Recall', 0.45591939546599497),
             ('Offset_F-measure', 0.45909955611921366)])

In [146]:
import pretty_midi
import numpy as np
import mir_eval

def midi_to_intervals_and_pitches(midi_file_path):
    # Load the MIDI file
    midi_data = pretty_midi.PrettyMIDI(midi_file_path)
    
    intervals = []
    pitches = []
    
    for instrument in midi_data.instruments:
        if not instrument.is_drum:
            for note in instrument.notes:
                start_time = note.start
                end_time = note.end
                intervals.append([start_time, end_time])
                pitches.append(note.pitch)
    
    intervals = np.array(intervals)
    pitches = np.array(pitches)
    
    return intervals, pitches

# Load your reference and estimated MIDI files
ref_intervals, ref_pitches = midi_to_intervals_and_pitches(gt_fp)
est_intervals, est_pitches = midi_to_intervals_and_pitches(giant_midi_fp)
ref_pitches_hz = midi_to_hz(ref_pitches)
est_pitches_hz = midi_to_hz(est_pitches)

# Evaluate using mir_eval
precision, recall, f_measure, _ = mir_eval.transcription.precision_recall_f1_overlap(
    ref_intervals, ref_pitches_hz, est_intervals, est_pitches_hz
)

print(f"Precision: {precision}, Recall: {recall}, F-measure: {f_measure}")

Precision: 0.0012091898428053204, Recall: 0.0012594458438287153, F-measure: 0.0012338062924120913


# Try using the GiantMIDI Method

In [None]:
csv_path = f'{align_tools_dir}/{maestro_fn[: -4]}_corresp.txt'
maestro_stats = get_stats(csv_path)

csv_path = f'{align_tools_dir}/{giant_midi_fn[: -4]}_corresp.txt'
giantmidi_stats = get_stats(csv_path)

In [155]:
giantmidi_stats

{'TP': 780, 'D': 8, 'I': 41, 'S': 6, 'ER': 0.06926952141057935, 'N': 794}

In [66]:
# IPython.display.Audio(data=phone_wav, rate=resample_rate)