In [8]:
import pandas as pd
df = pd.read_csv("predicted_res/daphne/annotated_music_df.csv")
df.head(50)

Unnamed: 0,original_index,filename,type,track,channel,pitch,onset,release,velocity,other,slice_id,distinct_slice_id,duplicated_indices,harmonic_analysis
0,0,daphne_am_bach.mid,time_signature,0,,,0.0,,,"{'numerator': 1, 'denominator': 8, 'clocks_per...",-1,-1,,
1,1,daphne_am_bach.mid,control_change,0,0.0,,0.0,,,"{'control': 121, 'value': 0}",-1,-1,,
2,2,daphne_am_bach.mid,control_change,0,0.0,,0.0,,,"{'control': 7, 'value': 100}",-1,-1,,
3,3,daphne_am_bach.mid,control_change,0,0.0,,0.0,,,"{'control': 10, 'value': 41}",-1,-1,,
4,4,daphne_am_bach.mid,control_change,0,0.0,,0.0,,,"{'control': 91, 'value': 0}",-1,-1,,
5,5,daphne_am_bach.mid,control_change,0,0.0,,0.0,,,"{'control': 93, 'value': 0}",-1,-1,,
6,6,daphne_am_bach.mid,control_change,0,0.0,,0.0,,,"{'control': 2, 'value': 80}",-1,-1,,
7,7,daphne_am_bach.mid,control_change,1,1.0,,0.0,,,"{'control': 121, 'value': 0}",-1,-1,,
8,8,daphne_am_bach.mid,control_change,1,1.0,,0.0,,,"{'control': 7, 'value': 100}",-1,-1,,
9,9,daphne_am_bach.mid,control_change,1,1.0,,0.0,,,"{'control': 10, 'value': 85}",-1,-1,,


In [2]:
df["harmonic_analysis"].unique()

array([nan, 'Ab.IM', 'VMm7', 'IM', 'VMm65', 'VM6', 'VM'], dtype=object)

In [4]:
def parse_harmonic_analysis(rn):
    """
    Parse harmonic analysis notation like 'G.IM', 'g.im/VI', 'IM'
    Removes key prefix and quality indicators (M/m)
    
    Args:
        rn: Roman numeral string
        
    Returns:
        str: cleaned Roman numeral (e.g., 'I', 'V/VI', 'iv')
    """
    if not rn or pd.isna(rn):
        return None
    
    rn = str(rn).strip()
    
    # Remove key prefix (everything before the dot)
    if "." in rn:
        rn = rn.split(".", 1)[1]
    
    # Remove quality indicators M and m
    rn = rn.replace("M", "").replace("m", "")
    
    return rn if rn else None

In [5]:
df_harmonic_cleaned = df.copy()
df_harmonic_cleaned["harmonic_analysis"] = df_harmonic_cleaned["harmonic_analysis"].apply(lambda x: parse_harmonic_analysis(x))
df_harmonic_cleaned[ df_harmonic_cleaned["harmonic_analysis"].notna()]


Unnamed: 0,original_index,filename,type,track,channel,pitch,onset,release,velocity,other,slice_id,distinct_slice_id,duplicated_indices,harmonic_analysis
32,32,daphne_am_bach.mid,note,1,1.0,63.0,0.0,0.5,80.0,,0,0,3233.0,I
42,45,daphne_am_bach.mid,note,2,1.0,51.0,1.5,2.0,80.0,,3,2,,V7
51,57,daphne_am_bach.mid,note,2,1.0,44.0,2.5,3.0,80.0,,6,5,,I
66,77,daphne_am_bach.mid,note,2,1.0,55.0,4.5,5.0,80.0,,11,10,,V65
74,87,daphne_am_bach.mid,note,2,1.0,56.0,5.5,6.0,80.0,,13,11,,I
96,115,daphne_am_bach.mid,note,2,1.0,51.0,9.5,10.1875,80.0,,20,16,,V7
107,129,daphne_am_bach.mid,note,2,1.0,44.0,10.5,11.4375,80.0,,24,20,,I
122,148,daphne_am_bach.mid,note,2,1.0,55.0,12.5,13.0,80.0,,29,25,,V6
128,156,daphne_am_bach.mid,note,2,1.0,55.0,13.5,14.0,80.0,,31,26,,V65
132,161,daphne_am_bach.mid,note,2,1.0,56.0,14.0,14.5,80.0,,32,27,,I


In [5]:
not_narn= df_harmonic_cleaned [ df_harmonic_cleaned ["harmonic_analysis"].notna()]
rn_times = not_narn["onset"].values
rn_times /2
rn_values = not_narn["harmonic_analysis"].values

In [54]:
rn_values

array(['I', 'V7', 'I', 'V65', 'I', 'V7', 'I', 'V6', 'V65', 'I', 'V'],
      dtype=object)

In [6]:
from music21 import converter, note, chord, tempo
def get_score(score, bpm = 120):
        
    score = converter.parse(score)
    for el in score.recurse():
        if isinstance(el, tempo.MetronomeMark):
            bpm = el.number
            break
    sec_per_beat = 60 / bpm

    onset_data = []
    onsets = []

    for n in score.recurse().notes:
        onset_beats = float(n.getOffsetInHierarchy(score))
        onset_seconds = onset_beats * sec_per_beat

        if isinstance(n, note.Note):
            pitch = n.pitch.nameWithOctave
        else:  # chord
            pitch = ".".join(p.nameWithOctave for p in n.pitches)

        onset_data.append({
            "pitch": pitch,
            "onset_beats": float(onset_beats),
            "onset_seconds": float(onset_seconds),
            "duration_beats": float(n.quarterLength),
        })
        onsets.append(onset_seconds)       
    return onsets, score

In [7]:
from music21 import converter, note, chord, tempo
score = converter.parse("test_audio/roman-numeral-daphne.musicxml")
bpm = 120
for el in score.recurse():
    if isinstance(el, tempo.MetronomeMark):
        bpm = el.number
        break
sec_per_beat = 60 / bpm

onset_data = []
onsets = []

for n in score.recurse().notes:
    onset_beats = float(n.getOffsetInHierarchy(score))
    onset_seconds = onset_beats * sec_per_beat

    if isinstance(n, note.Note):
        pitch = n.pitch.nameWithOctave
    else:  # chord
        pitch = ".".join(p.nameWithOctave for p in n.pitches)

    onset_data.append({
        "pitch": pitch,
        "onset_beats": float(onset_beats),
        "onset_seconds": float(onset_seconds),
        "duration_beats": float(n.quarterLength),
    })
    onsets.append(onset_seconds)

In [23]:
from music21 import roman, key
from music21 import harmony
from music21 import expressions
def add_rn (score, df_harmonic_cleaned):
    part = score.parts[0]
    part_flat = part.flatten()
    notes = [n for n in part_flat.notes]  # includes Chord objects
    note_onsets = [float(n.offset) for n in notes]
    onset_col = "onset"
    rn_col = "harmonic_analysis"
    tolerance = 0.05
# 4) For each lyric onset, find closest note and attach text
    for _, row in df_harmonic_cleaned.iterrows():
        target_onset = float(row[onset_col])
        text = str(row[rn_col])
        

        # find index of closest note onset
        if not note_onsets:
            continue
        best_idx = min(
            range(len(note_onsets)),
            key=lambda i: abs(note_onsets[i] - target_onset)
        )
        diff = abs(note_onsets[best_idx] - target_onset)

        # only attach if close enough
        if diff <= tolerance and text != "None":
            n = notes[best_idx]
            n.lyric= text
            # onset = note_onsets[best_idx]
            # k = key.Key("C")
            # rn = roman.RomanNumeral(text, k)
            # rn.writeAsChord = False
            # part.insert(onset, rn)   # if you want, or just skip this line

            # # 2) visible analysis in the exported score: staff text
            # expr = expressions.TextExpression(text)
            # part.insert(onset, expr)

    return score
    #score.write("musicxml", fp="predicted_res/daphne/output_score_as_l.musicxml")


In [24]:
import pandas as pd
from music21 import converter, note, chord, tempo
def update_musicxml (df_path, orig_xml_path, output_xml_path):
    df = pd.read_csv(df_path)
    df_harmonic_cleaned = df.copy()
    # convert harmonic predictions to rn
    df_harmonic_cleaned["harmonic_analysis"] = df_harmonic_cleaned["harmonic_analysis"].apply(lambda x: parse_harmonic_analysis(x))
    onsets, score = get_score(orig_xml_path)
    score = add_rn (score, df_harmonic_cleaned)
    score.write("musicxml", fp=output_xml_path)

In [None]:
update_musicxml (df_path = "predicted_res/wiese/annotated_music_df.csv", orig_xml_path ="test_audio/roman-numeral-daphne.musicxml" ,
                  output_xml_path= "predicted_res/daphne/output_score_as_l.musicxml")

In [None]:
import xml.etree.ElementTree as ET

def strip_ns(tag):
    # turns "{ns}harmony" â†’ "harmony"
    if "}" in tag:
        return tag.split("}", 1)[1]
    return tag

def remove_all_harmony(in_path, out_path):
    tree = ET.parse(in_path)
    root = tree.getroot()

    # iterate over all elements and remove children named "harmony"
    for parent in root.iter():
        # make a copy of children list so we can modify while iterating
        for child in list(parent):
            if strip_ns(child.tag) == "harmony":
                parent.remove(child)

    tree.write(out_path, encoding="utf-8", xml_declaration=True)
#tree.write("predicted_res/daphne/cleaned.musicxml", encoding="utf-8", xml_declaration=True)
remove_all_harmony(in_path="predicted_res/daphne/scores.musicxml", out_path="predicted_res/daphne/cleaned.musicxml")

In [20]:
root

<Element 'score-partwise' at 0x1546868af970>

In [64]:
def remove_all_lyrics(mxl_in, mxl_out):
    score = converter.parse(mxl_in)

    # Go through all elements that can have lyrics (notes, chords)
    for el in score.recurse():
        if hasattr(el, "lyrics"):
            el.lyrics = []   # clear all attached lyrics

    score.write("musicxml", fp=mxl_out)
    print(f"Saved score without lyrics to {mxl_out}")

In [None]:
#remove_all_lyrics("test_audio/daphne.musicxml", "test_audio/daphne_wo.musicxml")

Saved score without lyrics to test_audio/daphne.musicxml


In [83]:
# for el in score.recurse():
#     if hasattr(el, "lyrics"):
#         el.lyrics = []
# part = score.parts[1]
for i, p in enumerate(score.parts):
    count = sum(1 for n in p.recurse().notes if n.lyrics)
    print(i, count)

0 26
1 0
2 0


In [88]:
for i, part in enumerate(score.parts):
    print(f"Part {i}: {part.partName}")

Part 0: Voice
Part 1: Piano
Part 2: Piano


In [None]:

from music21 import roman, key
from music21 import expressions
# for el in score.recurse():
#     if hasattr(el, "lyrics"):
#         el.lyrics = []
part = score.parts[0]
part_flat = part.flatten()
notes = [n for n in part_flat.notes]  # includes Chord objects
note_onsets = [float(n.offset) for n in notes]
onset_col = "onset"
rn_col = "harmonic_analysis"
tolerance = 0.05
# 4) For each lyric onset, find closest note and attach text
for _, row in df_harmonic_cleaned .iterrows():
    target_onset = float(row[onset_col])
    text = str(row[rn_col])
    

    # find index of closest note onset
    if not note_onsets:
        continue
    best_idx = min(
        range(len(note_onsets)),
        key=lambda i: abs(note_onsets[i] - target_onset)
    )
    diff = abs(note_onsets[best_idx] - target_onset)

    # only attach if close enough
    if diff <= tolerance and text != "None":
        n = notes[best_idx]
        #n.addLyric(text)
        n.lyric= text
        onset = note_onsets[best_idx]

    # 1) (optional) keep RN object for *Python-side* analysis only
        k = key.Key("C")
        rn = roman.RomanNumeral(text, k)
        rn.writeAsChord = False
        #part.insert(onset, rn)   # if you want, or just skip this line

        # 2) visible analysis in the exported score: staff text
        expr = expressions.TextExpression(text)
        part.insert(onset, expr)

        print(f"Attached RN {rn} and text '{text}' at onset {onset}")
            
        print(f"Attached {text}")
score.write("musicxml", fp="predicted_res/daphne/output_score_as_l.musicxml")


Attached RN <music21.roman.RomanNumeral I in C major> and text 'I' at onset 0.0
Attached I
Attached RN <music21.roman.RomanNumeral V7 in C major> and text 'V7' at onset 1.5
Attached V7
Attached RN <music21.roman.RomanNumeral I in C major> and text 'I' at onset 2.5
Attached I
Attached RN <music21.roman.RomanNumeral V65 in C major> and text 'V65' at onset 4.5
Attached V65
Attached RN <music21.roman.RomanNumeral I in C major> and text 'I' at onset 5.5
Attached I
Attached RN <music21.roman.RomanNumeral V7 in C major> and text 'V7' at onset 9.5
Attached V7
Attached RN <music21.roman.RomanNumeral I in C major> and text 'I' at onset 10.5
Attached I
Attached RN <music21.roman.RomanNumeral V6 in C major> and text 'V6' at onset 12.5
Attached V6
Attached RN <music21.roman.RomanNumeral V65 in C major> and text 'V65' at onset 13.5
Attached V65
Attached RN <music21.roman.RomanNumeral I in C major> and text 'I' at onset 14.0
Attached I
Attached RN <music21.roman.RomanNumeral V in C major> and text 'V

PosixPath('/rwthfs/rz/cluster/home/ui556004/projects/musicbert_hf/predicted_res/daphne/output_score_as_l.musicxml')

In [25]:
import music21
print(music21.__version__)

9.3.0


In [16]:
import xml.etree.ElementTree as ET

def add_functions_from_lyrics(in_path, out_path, remove_lyrics=False):
    tree = ET.parse(in_path)
    root = tree.getroot()

    # Handle namespace if present
    if root.tag.startswith("{"):
        ns_uri = root.tag.split("}")[0].strip("{")
        def q(tag):  # qualify tag with namespace
            return f"{{{ns_uri}}}{tag}"
    else:
        ns_uri = None
        def q(tag):
            return tag

    # Find all measures
    measure_path = ".//" + (q("measure") if ns_uri else "measure")

    for measure in root.findall(measure_path):
        # We need a list() copy because we'll insert while iterating
        children = list(measure)
        for i, elem in enumerate(children):
            if elem.tag != q("note"):
                continue

            # Find lyrics on this note
            lyrics = elem.findall(q("lyric"))
            if not lyrics:
                continue

            # Take the first lyric's text as the RN
            text_el = lyrics[0].find(q("text"))
            if text_el is None:
                continue

            rn = (text_el.text or "").strip()
            if not rn or rn in ("None",):
                continue

            # Create <harmony><function>RN</function></harmony>
            harmony_el = ET.Element(q("harmony"))
            harmony_el.set("placement", "below")  # optional
            func_el = ET.SubElement(harmony_el, q("function"))
            func_el.text = rn

            # Insert harmony just before this <note>
            insert_index = measure.getchildren().index(elem)
            measure.insert(insert_index, harmony_el)

            # Optionally remove lyrics so only function remains
            if remove_lyrics:
                for l in lyrics:
                    elem.remove(l)

    tree.write(out_path, encoding="utf-8", xml_declaration=True)


In [17]:
add_functions_from_lyrics("predicted_res/daphne/output_score_as_l.musicxml",
                              "predicted_res/daphne/output_score_as_l.musicxml",
                              remove_lyrics=False)

AttributeError: 'xml.etree.ElementTree.Element' object has no attribute 'getchildren'

In [None]:
onset_data

[{'pitch': 'E-4',
  'onset_beats': 0.0,
  'onset_seconds': 0.0,
  'duration_beats': 0.5},
 {'pitch': 'C5',
  'onset_beats': 0.5,
  'onset_seconds': 0.25,
  'duration_beats': 0.5},
 {'pitch': 'C5',
  'onset_beats': 1.0,
  'onset_seconds': 0.5,
  'duration_beats': 0.5},
 {'pitch': 'C5',
  'onset_beats': 1.5,
  'onset_seconds': 0.75,
  'duration_beats': 0.5},
 {'pitch': 'B-4',
  'onset_beats': 2.0,
  'onset_seconds': 1.0,
  'duration_beats': 0.5},
 {'pitch': 'B-4',
  'onset_beats': 2.5,
  'onset_seconds': 1.25,
  'duration_beats': 0.0},
 {'pitch': 'A-4',
  'onset_beats': 2.5,
  'onset_seconds': 1.25,
  'duration_beats': 1.0},
 {'pitch': 'A-4',
  'onset_beats': 3.5,
  'onset_seconds': 1.75,
  'duration_beats': 0.5},
 {'pitch': 'A-4',
  'onset_beats': 4.0,
  'onset_seconds': 2.0,
  'duration_beats': 0.5},
 {'pitch': 'D-5',
  'onset_beats': 4.5,
  'onset_seconds': 2.25,
  'duration_beats': 0.5},
 {'pitch': 'D-5',
  'onset_beats': 5.0,
  'onset_seconds': 2.5,
  'duration_beats': 0.5},
 {'pitc

In [41]:
m = converter.parse("test_audio/daphne_am_bach.mid")

bpm = None
for el in m.recurse():
    if isinstance(el, tempo.MetronomeMark) and el.number is not None:
        bpm = el.number
        break

print("BPM:", bpm)

BPM: 120


In [3]:
import pandas as pd
from music21 import converter, note, chord

def parse_harmonic_analysis(rn):
    """
    Parse harmonic analysis notation like 'G.IM', 'g.im/VI', 'IM'
    Removes key prefix and quality indicators (M/m)
    
    Args:
        rn: Roman numeral string
        
    Returns:
        str: cleaned Roman numeral (e.g., 'I', 'V/VI', 'iv')
    """
    if not rn or pd.isna(rn):
        return None
    
    rn = str(rn).strip()
    
    # Remove key prefix (everything before the dot)
    if "." in rn:
        rn = rn.split(".", 1)[1]
    
    # Remove quality indicators M and m
    rn = rn.replace("M", "").replace("m", "")
    
    return rn if rn else None


def add_roman_numerals_to_score(score_path, csv_path, output_path, lyric_number=2):
    """
    Add Roman numeral annotations from CSV to a music score as lyrics
    
    Args:
        score_path: Path to input MusicXML/MXL file
        csv_path: Path to CSV file with columns 'onset' and 'harmonic_analysis'
        output_path: Path for output MusicXML file
        lyric_number: Which lyric line to use (1=first line, 2=second line, etc.)
    """
    
    # Load the score
    print(f"Loading score from {score_path}...")
    score = converter.parse(score_path)
    
    # Load the CSV
    print(f"Loading annotations from {csv_path}...")
    df = pd.read_csv(csv_path)
    
    # Validate required columns
    if 'onset' not in df.columns or 'harmonic_analysis' not in df.columns:
        raise ValueError("CSV must contain 'onset' and 'harmonic_analysis' columns")
    
    # Filter and sort annotations
    df_rn = df.dropna(subset=['harmonic_analysis']).sort_values('onset').reset_index(drop=True)
    
    print(f"Found {len(df_rn)} Roman numeral annotations")
    print("\nFirst few annotations:")
    print(df_rn[['onset', 'harmonic_analysis']].head())
    
    # Determine which part to annotate
    if 'track' in df.columns and not df['track'].isna().all():
        # Use track information if available
        part_tracks = df_rn['track'].dropna().unique()
        print(f"\nAnnotating tracks: {sorted(part_tracks)}")
    else:
        # Annotate the first part (usually melody/soprano)
        print("\nNo track information found, annotating first part")
    
    # Process each annotation
    annotations_added = 0
    for idx, row in df_rn.iterrows():
        onset = float(row['onset'])
        rn_text = str(row['harmonic_analysis'])
        
        # Parse the Roman numeral
        cleaned_rn = parse_harmonic_analysis(rn_text)
        if not cleaned_rn:
            continue
        
        # Determine which part to use
        if 'track' in df.columns and not pd.isna(row['track']):
            part_idx = int(row['track']) - 1  # Assuming 1-based track numbers
            part_idx = max(0, min(part_idx, len(score.parts) - 1))
        else:
            part_idx = 0
        
        part = score.parts[part_idx]
        
        # Find the note/chord at this onset
        flat_part = part.flatten()
        elements_at_onset = flat_part.getElementsByOffset(
            onset,
            mustBeginInSpan=False,
            mustFinishInSpan=False,
            classList=[note.Note, chord.Chord]
        )
        
        # Add lyric to the first note/chord found at this onset
        if elements_at_onset:
            target = elements_at_onset[0]
            target.lyric = cleaned_rn
            # Or use specific lyric number: target.addLyric(cleaned_rn, lyric_number)
            annotations_added += 1
            
            if annotations_added <= 5:  # Show first few for debugging
                print(f"  Added '{cleaned_rn}' at onset {onset} to {type(target).__name__}")
        else:
            print(f"  Warning: No note found at onset {onset} for '{cleaned_rn}'")
    
    print(f"\nSuccessfully added {annotations_added} annotations")
    
    # Write the annotated score
    print(f"Writing annotated score to {output_path}...")
    score.write('musicxml', fp=output_path)
    print("Done!")


# if __name__ == "__main__":
#     # Example usage
#     score_file = "input_score.mxl"
#     csv_file = "roman_numerals.csv"
#     output_file = "annotated_score.musicxml"
    
#     add_roman_numerals_to_score(score_file, csv_file, output_file)
    
#     # If you want to use a specific lyric line (e.g., line 2 for harmony below text):
#     # add_roman_numerals_to_score(score_file, csv_file, output_file, lyric_number=2)

In [6]:
add_roman_numerals_to_score(score_path= "test_audio/daphne.musicxml", csv_path= "predicted_res/daphne/annotated_music_df.csv", output_path= "predicted_res/daphne/output_score1.musicxml")


Loading score from test_audio/daphne.musicxml...
Loading annotations from predicted_res/daphne/annotated_music_df.csv...
Found 11 Roman numeral annotations

First few annotations:
   onset harmonic_analysis
0    0.0             Ab.IM
1    1.5              VMm7
2    2.5                IM
3    4.5             VMm65
4    5.5                IM

Annotating tracks: [np.int64(1), np.int64(2)]
  Added 'I' at onset 0.0 to Note
  Added 'V7' at onset 1.5 to NoChord
  Added 'I' at onset 2.5 to NoChord
  Added 'V65' at onset 4.5 to NoChord
  Added 'I' at onset 5.5 to NoChord

Successfully added 11 annotations
Writing annotated score to predicted_res/daphne/output_score1.musicxml...
Done!


In [None]:
from music21 import converter, harmony

# Load your MusicXML
def add_rn (score_path, output_path, df)
score = converter.parse('your_score.musicxml')

# Add chord symbols at specific measures/beats
for chord_data in your_harmony_list:  # Your extracted data
    m = score.measure(chord_data['measure'])
    beat_offset = chord_data['beat']
    
    # Create chord symbol
    cs = harmony.ChordSymbol(chord_data['harmony'])
    
    # Insert at the right position
    m.insert(beat_offset, cs)

# Save
score.write('musicxml', 'annotated_score.musicxml')

In [None]:
score_path= "test_audio/daphne.musicxml", csv_path= "predicted_res/daphne/annotated_music_df.csv", output_path= "predicted_res/daphne/output_score1.musicxml"
add_harmony_to_midi('sonata.mid', 'sonata.csv', 'sonata_annotated.musicxml')

In [1]:
import pandas as pd
from music21 import converter, note, chord, harmony, meter, tempo, key
from music21.stream import Score, Part, Measure
import numpy as np
import warnings
warnings.filterwarnings('ignore')
def parse_harmonic_analysis(rn):
    """
    Parse harmonic analysis notation like 'G.IM', 'g.im/VI', 'IM'
    Removes key prefix and quality indicators (M/m)
    
    Args:
        rn: Roman numeral string
        
    Returns:
        str: cleaned Roman numeral (e.g., 'I', 'V/VI', 'iv')
    """
    if not rn or pd.isna(rn):
        return None
    
    rn = str(rn).strip()
    
    # Remove key prefix (everything before the dot)
    if "." in rn:
        rn = rn.split(".", 1)[1]
    
    # Remove quality indicators M and m
    rn = rn.replace("M", "").replace("m", "")
    
    return rn if rn else None
def add_roman_numerals_to_score(score_path="test_audio/daphne.musicxml", 
                                csv_path="predicted_res/daphne/annotated_music_df.csv", 
                                output_path="predicted_res/daphne/output_score1.musicxml"):
    """
    Add Roman numeral harmonic analysis from CSV to MusicXML score.
    
    Args:
        score_path (str): Path to input MusicXML file
        csv_path (str): Path to CSV with harmonic analysis
        output_path (str): Path to output MusicXML file with annotations
    """
    
    # Load the score
    print(f"Loading score from {score_path}...")
    score = converter.parse(score_path)
    
    # Load the CSV data
    print(f"Loading harmonic analysis from {csv_path}...")
    df = pd.read_csv(csv_path)
    
    # Check if required columns exist
    required_columns = ['onset', 'harmonic_analysis']
    for col in required_columns:
        if col not in df.columns:
            raise ValueError(f"CSV must contain '{col}' column")
    
    # Clean data: remove rows without harmonic analysis
    df_clean = df[df['harmonic_analysis'].notna()].copy()
    df_clean = df_clean[df_clean['harmonic_analysis'] != '']
    df_clean = df_clean.reset_index(drop=True)
    
    print(f"Found {len(df_clean)} harmonic annotations")
    
    # Get the main part (usually the first part with notes)
    main_part = score.parts[0] if len(score.parts) > 0 else score
    
    # Convert score to flat representation for easier timing calculations
    score_flat = main_part.flat
    
    # Get tempo and meter info
    # Find the first tempo marking
    tempo_marking = score_flat.getElementsByClass(tempo.MetronomeMark)
    bpm = 120  # default
    if tempo_marking:
        bpm = tempo_marking[0].number
    
    # Find time signatures
    time_sigs = score_flat.getElementsByClass(meter.TimeSignature)
    
    # Create a mapping from onset time (in seconds) to measure and beat
    print("Creating timing map...")
    
    # Method 1: Use music21's offset system
    # We'll find the closest note to each onset time
    
    # Get all notes and their offsets
    all_notes = list(score_flat.notesAndRests)
    note_offsets = []
    note_measures = []
    note_beats = []
    
    for n in all_notes:
        # Get the offset from the beginning of the score
        offset = n.offset
        note_offsets.append(offset)
        
        # Get measure number and beat
        measure_num = n.measureNumber
        note_measures.append(measure_num)
        
        # Calculate beat within measure
        beat = n.beat if hasattr(n, 'beat') else 1
        note_beats.append(beat)
    
    # Convert note offsets to seconds based on tempo
    # In music21, offset is in quarters, so we need to convert to seconds
    # quarters per minute = bpm, so seconds per quarter = 60 / bpm
    seconds_per_quarter = 60.0 / bpm
    note_times = [offset * seconds_per_quarter for offset in note_offsets]
    
    # For each harmonic analysis entry, find the closest note
    print("Aligning harmonic analysis with score...")
    
    # We'll store annotations to add them later
    annotations_to_add = []
    
    for idx, row in df_clean.iterrows():
        onset_time = float(row['onset'])
        roman_numeral = str(row['harmonic_analysis']).strip()
        
        # Skip empty or invalid Roman numerals
        if not roman_numeral or roman_numeral.lower() == 'nan':
            continue
        roman_numeral = parse_harmonic_analysis(roman_numeral)
        # Find the note closest in time to this onset
        if note_times:
            # Find index of closest note time
            closest_idx = np.argmin([abs(t - onset_time) for t in note_times])
            closest_time_diff = abs(note_times[closest_idx] - onset_time)
            
            # Only accept if reasonably close (within 0.5 seconds)
            if closest_time_diff < 0.5:
                measure_num = note_measures[closest_idx]
                beat_num = note_beats[closest_idx]
                note_offset = note_offsets[closest_idx]
                
                annotations_to_add.append({
                    'measure': measure_num,
                    'beat': beat_num,
                    'offset': note_offset,
                    'roman_numeral': roman_numeral,
                    'original_onset': onset_time
                })
            else:
                print(f"Warning: Could not find note close to onset {onset_time}s (closest: {closest_time_diff:.2f}s away)")
        else:
            print(f"Warning: No notes found in score")
    
    # Sort annotations by measure and beat
    annotations_to_add.sort(key=lambda x: (x['measure'], x['beat']))
    
    # Add chord symbols to the score
    print(f"Adding {len(annotations_to_add)} chord symbols to score...")
    
    # We need to work with the hierarchical structure
    # Create a dictionary to organize annotations by measure
    annotations_by_measure = {}
    for ann in annotations_to_add:
        measure_num = ann['measure']
        if measure_num not in annotations_by_measure:
            annotations_by_measure[measure_num] = []
        annotations_by_measure[measure_num].append(ann)
    
    # Add chord symbols to each measure
    for part in score.parts:
        for measure in part.getElementsByClass('Measure'):
            measure_num = measure.number
            if measure_num in annotations_by_measure:
                for ann in annotations_by_measure[measure_num]:
                    # Create a chord symbol
                    # Format Roman numeral for music21
                    roman_text = ann['roman_numeral']
                    
                    # Try to parse as Roman numeral chord symbol
                    # try:
                    #     chord_symbol = harmony.ChordSymbol(roman_text)
                        
                    #     # Set the offset within the measure
                    #     # Convert beat to offset (assuming quarter note beat)
                    #     beat_offset = ann['beat'] - 1  # music21 uses 0-based offsets
                        
                    #     # Insert at the calculated offset
                    #     measure.insert(beat_offset, chord_symbol)
                        
                    #     # Alternatively, add as text annotation
                    #     # from music21 import expressions
                    #     # text_exp = expressions.TextExpression(roman_text)
                    #     # measure.insert(beat_offset, text_exp)
                        
                    # except Exception as e:
                    #     print(f"Warning: Could not create chord symbol for '{roman_text}': {e}")
                    #     # Add as plain text expression instead
                    from music21 import expressions
                    text_exp = expressions.TextExpression(roman_text)
                    beat_offset = ann['beat'] - 1
                    measure.insert(beat_offset, text_exp)

    # Save the annotated score
    print(f"Saving annotated score to {output_path}...")
    score.write('musicxml', fp=output_path)
    
    # Also create a simpler version with just the Roman numerals as lyrics
    # This might be more reliable for some notation software
    print("Creating alternative version with lyrics...")
    score_lyrics = converter.parse(score_path)
    
    # Add Roman numerals as lyrics to the first part
    main_part_lyrics = score_lyrics.parts[0] if len(score_lyrics.parts) > 0 else score_lyrics
    
    # Group annotations by measure and beat to avoid duplicates
    added_positions = set()
    
    for ann in annotations_to_add:
        measure_num = ann['measure']
        beat_num = ann['beat']
        roman_text = ann['roman_numeral']
        
        position_key = (measure_num, beat_num)
        if position_key in added_positions:
            continue  # Skip duplicates at same position
        
        # Find notes at this measure and beat
        for note_obj in main_part_lyrics.recurse().notes:
            if (hasattr(note_obj, 'measureNumber') and 
                note_obj.measureNumber == measure_num and
                hasattr(note_obj, 'beat') and
                abs(note_obj.beat - beat_num) < 0.5):
                
                # Add lyric
                note_obj.addLyric(roman_text, lyricNumber=3)  # Use lyric number 3 for harmony
                added_positions.add(position_key)
                break
    
    # Save lyrics version
    lyrics_output_path = output_path.replace('.musicxml', '_with_lyrics.musicxml')
    score_lyrics.write('musicxml', fp=lyrics_output_path)
    
    print(f"Saved main version to: {output_path}")
    print(f"Saved lyrics version to: {lyrics_output_path}")
    
    return score, annotations_to_add


# Example usage
# if __name__ == "__main__":
#     # Test with sample files
#     try:
#         score, annotations = add_roman_numerals_to_score(
#             score_path="test_audio/daphne.musicxml",
#             csv_path="predicted_res/daphne/annotated_music_df.csv",
#             output_path="predicted_res/daphne/output_score.musicxml"
#         )
        
#         print(f"\nAdded {len(annotations)} annotations:")
#         for i, ann in enumerate(annotations[:10]):  # Show first 10
#             print(f"  {i+1}. M{ann['measure']} B{ann['beat']:.1f}: {ann['roman_numeral']}")
        
#         if len(annotations) > 10:
#             print(f"  ... and {len(annotations) - 10} more")
            
#     except Exception as e:
#         print(f"Error: {e}")
#         print("\nTroubleshooting tips:")
#         print("1. Make sure music21 is installed: pip install music21")
#         print("2. Check that your CSV has 'onset' and 'harmonic_analysis' columns")
#         print("3. Ensure the MusicXML file is valid")

In [3]:
add_roman_numerals_to_score(score_path= "test_audio/daphne.musicxml", csv_path= "predicted_res/daphne/annotated_music_df.csv", output_path= "predicted_res/daphne/output_score1.musicxml")

Loading score from test_audio/daphne.musicxml...
Loading harmonic analysis from predicted_res/daphne/annotated_music_df.csv...
Found 11 harmonic annotations
Creating timing map...
Aligning harmonic analysis with score...
Adding 5 chord symbols to score...
Saving annotated score to predicted_res/daphne/output_score1.musicxml...
Creating alternative version with lyrics...
Saved main version to: predicted_res/daphne/output_score1.musicxml
Saved lyrics version to: predicted_res/daphne/output_score1_with_lyrics.musicxml


(<music21.stream.Score 0x14f70378bf90>,
 [{'measure': 0,
   'beat': 2.5,
   'offset': 0.0,
   'roman_numeral': 'I',
   'original_onset': 0.0},
  {'measure': 2,
   'beat': 1.0,
   'offset': 2.5,
   'roman_numeral': 'V7',
   'original_onset': 1.5},
  {'measure': 3,
   'beat': 1.5,
   'offset': 5.0,
   'roman_numeral': 'I',
   'original_onset': 2.5},
  {'measure': 5,
   'beat': 1.5,
   'offset': 9.0,
   'roman_numeral': 'V65',
   'original_onset': 4.5},
  {'measure': 6,
   'beat': 1.0,
   'offset': 10.5,
   'roman_numeral': 'I',
   'original_onset': 5.5}])

In [10]:
import pandas as pd
from music21 import converter, stream, note, chord, tempo, midi
import numpy as np
import warnings
warnings.filterwarnings('ignore')

def add_roman_numerals_with_midi(score_path="test_audio/daphne.musicxml",
                                 midi_path="test_audio/daphne.mid",  # New: MIDI file
                                 csv_path="predicted_res/daphne/annotated_music_df.csv", 
                                 output_path="predicted_res/daphne/output_score.musicxml"):
    """
    Add Roman numeral harmonic analysis from CSV to MusicXML score using MIDI for timing.
    
    Args:
        score_path (str): Path to input MusicXML file
        midi_path (str): Path to MIDI file (same performance as MusicXML)
        csv_path (str): Path to CSV with harmonic analysis
        output_path (str): Path to output MusicXML file with annotations
    """
    
    print(f"Loading score from {score_path}...")
    score = converter.parse(score_path)
    
    print(f"Loading MIDI from {midi_path}...")
    midi_stream = converter.parse(midi_path)
    
    print(f"Loading harmonic analysis from {csv_path}...")
    df = pd.read_csv(csv_path)
    
    # Check required columns
    required_columns = ['onset', 'harmonic_analysis']
    for col in required_columns:
        if col not in df.columns:
            raise ValueError(f"CSV must contain '{col}' column")
    
    # Clean data
    df_clean = df[df['harmonic_analysis'].notna()].copy()
    df_clean = df_clean[df_clean['harmonic_analysis'] != '']
    df_clean = df_clean.reset_index(drop=True)
    
    print(f"Found {len(df_clean)} harmonic annotations")
    
    # Method 1: Use MIDI to get exact timing mapping
    # -------------------------------------------------
    
    # Get all notes from MIDI with their onset times
    midi_notes = []
    for element in midi_stream.flat.notesAndRests:
        if isinstance(element, note.Note):
            midi_notes.append({
                'onset': element.offset,  # MIDI offset in quarters
                'pitch': element.pitch.midi,
                'duration': element.duration.quarterLength
            })
        elif isinstance(element, chord.Chord):
            for n in element.notes:
                midi_notes.append({
                    'onset': element.offset,
                    'pitch': n.pitch.midi,
                    'duration': element.duration.quarterLength
                })
    
    # Get tempo from MIDI
    tempo_markings = midi_stream.flat.getElementsByClass(tempo.MetronomeMark)
    if tempo_markings:
        bpm = tempo_markings[0].number
    else:
        # Try to get from score instead
        score_tempo = score.flat.getElementsByClass(tempo.MetronomeMark)
        bpm = score_tempo[0].number if score_tempo else 120
    
    print(f"Tempo: {bpm} BPM")
    seconds_per_quarter = 60.0 / bpm
    
    # Convert MIDI onset times to seconds
    for n in midi_notes:
        n['onset_seconds'] = n['onset'] * seconds_per_quarter
    
    # Get MusicXML notes with their positions
    xml_notes = []
    for part in score.parts:
        for n in part.recurse().notes:
            if isinstance(n, note.Note) or isinstance(n, chord.Chord):
                xml_notes.append({
                    'element': n,
                    'measure': n.measureNumber if hasattr(n, 'measureNumber') else 0,
                    'beat': n.beat if hasattr(n, 'beat') else 1,
                    'offset': n.offset
                })
    
    print(f"Found {len(midi_notes)} MIDI notes and {len(xml_notes)} MusicXML notes")
    
    # Method 2: Direct mapping using common onset times
    # -------------------------------------------------
    print("Mapping annotations to score...")
    
    # For each CSV annotation, find closest MIDI note
    annotations_to_add = []
    
    for idx, row in df_clean.iterrows():
        csv_onset = float(row['onset'])
        roman_num = str(row['harmonic_analysis']).strip()
        
        if not roman_num or roman_num.lower() == 'nan':
            continue
        
        # Find closest MIDI note by time
        if midi_notes:
            # Calculate time differences
            time_diffs = [abs(n['onset_seconds'] - csv_onset) for n in midi_notes]
            closest_midi_idx = np.argmin(time_diffs)
            closest_diff = time_diffs[closest_midi_idx]
            closest_midi_note = midi_notes[closest_midi_idx]
            
            if closest_diff < 0.2:  # Tight tolerance for good matches
                # Now find corresponding MusicXML note
                # We can match by:
                # 1. Similar timing (offset in quarters)
                # 2. Or find notes in same relative position
                
                midi_offset_quarters = closest_midi_note['onset']
                
                # Find MusicXML note with similar offset
                xml_matches = []
                for xml_note in xml_notes:
                    # Compare offsets (allow some tolerance)
                    offset_diff = abs(xml_note['offset'] - midi_offset_quarters)
                    if offset_diff < 0.5:  # Half a quarter note tolerance
                        xml_matches.append((offset_diff, xml_note))
                
                if xml_matches:
                    # Take the closest match
                    xml_matches.sort(key=lambda x: x[0])
                    best_match = xml_matches[0][1]
                    
                    annotations_to_add.append({
                        'measure': best_match['measure'],
                        'beat': best_match['beat'],
                        'offset': best_match['offset'],
                        'roman_numeral': roman_num,
                        'xml_note': best_match['element'],
                        'midi_onset': closest_midi_note['onset_seconds'],
                        'csv_onset': csv_onset,
                        'time_diff': closest_diff
                    })
                else:
                    print(f"Note: Could not find MusicXML note for MIDI onset {midi_offset_quarters}")
            else:
                print(f"Warning: No close MIDI note for CSV onset {csv_onset}s (closest: {closest_diff:.3f}s)")
        else:
            print("Warning: No MIDI notes found")
    
    print(f"Mapped {len(annotations_to_add)} annotations to score notes")
    
    # Method 3: If direct mapping fails, use proportional positioning
    # ----------------------------------------------------------------
    if len(annotations_to_add) < len(df_clean) * 0.5:  # Less than 50% success
        print("Using proportional positioning method...")
        
        # Get total duration from MIDI
        if midi_notes:
            max_midi_time = max([n['onset_seconds'] for n in midi_notes])
        else:
            # Estimate from CSV
            max_midi_time = df_clean['onset'].max() if len(df_clean) > 0 else 60
        
        # Get total duration from score (in quarters)
        score_duration = 0
        if xml_notes:
            score_duration = max([n['offset'] for n in xml_notes])
        
        # Map each CSV annotation proportionally
        for idx, row in df_clean.iterrows():
            csv_onset = float(row['onset'])
            roman_num = str(row['harmonic_analysis']).strip()
            
            if not roman_num or roman_num.lower() == 'nan':
                continue
            
            # Skip if already mapped
            already_mapped = any(a['csv_onset'] == csv_onset for a in annotations_to_add)
            if already_mapped:
                continue
            
            # Calculate proportional position in score
            proportion = csv_onset / max_midi_time if max_midi_time > 0 else 0
            target_offset = proportion * score_duration
            
            # Find closest MusicXML note
            if xml_notes:
                offset_diffs = [abs(n['offset'] - target_offset) for n in xml_notes]
                closest_idx = np.argmin(offset_diffs)
                closest_note = xml_notes[closest_idx]
                
                annotations_to_add.append({
                    'measure': closest_note['measure'],
                    'beat': closest_note['beat'],
                    'offset': closest_note['offset'],
                    'roman_numeral': roman_num,
                    'xml_note': closest_note['element'],
                    'midi_onset': csv_onset,
                    'csv_onset': csv_onset,
                    'time_diff': offset_diffs[closest_idx],
                    'method': 'proportional'
                })
    
    # Add as lyrics
    print(f"Adding {len(annotations_to_add)} annotations as lyrics...")
    
    # Group by note to avoid duplicate lyrics on same note
    notes_with_lyrics = {}
    
    for ann in annotations_to_add:
        note_key = id(ann['xml_note'])
        if note_key not in notes_with_lyrics:
            notes_with_lyrics[note_key] = {
                'note': ann['xml_note'],
                'numerals': []
            }
        
        if ann['roman_numeral'] not in notes_with_lyrics[note_key]['numerals']:
            notes_with_lyrics[note_key]['numerals'].append(ann['roman_numeral'])
    
    # Add lyrics to notes
    lyric_number = 3  # Use verse 3 for harmonic analysis
    
    for note_data in notes_with_lyrics.values():
        lyric_text = "/".join(note_data['numerals'])
        note_data['note'].addLyric(lyric_text, lyricNumber=lyric_number)
    
    # Save the score
    print(f"Saving to {output_path}...")
    score.write('musicxml', fp=output_path)
    
    # Print summary
    print(f"\n=== SUMMARY ===")
    print(f"Total CSV annotations: {len(df_clean)}")
    print(f"Successfully mapped: {len(annotations_to_add)}")
    print(f"Added to {len(notes_with_lyrics)} unique notes")
    
    if annotations_to_add:
        print("\nFirst 10 annotations:")
        for i, ann in enumerate(annotations_to_add[:10]):
            method = ann.get('method', 'direct')
            print(f"  {i+1}. M{ann['measure']} B{ann['beat']:.1f}: {ann['roman_numeral']} "
                  f"(CSV: {ann['csv_onset']:.2f}s, diff: {ann.get('time_diff', 0):.3f}s, method: {method})")
    
    return score, annotations_to_add


# Alternative: Even simpler approach - parse MIDI and XML into comparable structures
def add_roman_numerals_simple_midi(score_path, midi_path, csv_path, output_path):
    """
    Simple method: Parse both files and match by measure/beat structure.
    """
    from music21 import converter, instrument
    
    print("Using simple MIDI alignment method...")
    
    # Parse files
    score = converter.parse(score_path)
    midi_score = converter.parse(midi_path)
    
    # Get CSV data
    df = pd.read_csv(csv_path)
    df_clean = df[df['harmonic_analysis'].notna()].copy()
    df_clean = df_clean[df_clean['harmonic_analysis'] != '']
    
    print(f"Found {len(df_clean)} annotations")
    
    # Create a mapping from measure/beat to notes in the score
    score_note_map = {}
    for part in score.parts:
        for n in part.recurse().notes:
            if hasattr(n, 'measureNumber') and hasattr(n, 'beat'):
                key = (n.measureNumber, round(n.beat * 2) / 2)  # Round to nearest half-beat
                if key not in score_note_map:
                    score_note_map[key] = []
                score_note_map[key].append(n)
    
    # Create similar mapping for MIDI
    midi_note_map = {}
    for n in midi_score.flat.notes:
        # We need to approximate measure/beat in MIDI
        # This assumes MIDI and score have similar structure
        offset = n.offset
        
        # Simple approximation: 4 quarters per measure
        approx_measure = int(offset // 4) + 1
        approx_beat = (offset % 4) + 1
        
        key = (approx_measure, round(approx_beat * 2) / 2)
        if key not in midi_note_map:
            midi_note_map[key] = []
        midi_note_map[key].append(n)
    
    # For each CSV annotation, find matching position
    annotations_added = 0
    
    for idx, row in df_clean.iterrows():
        csv_onset = float(row['onset'])
        roman_num = str(row['harmonic_analysis']).strip()
        
        if not roman_num:
            continue
        
        # Convert CSV onset to approximate measure/beat
        # Assuming 120 BPM if not known
        bpm = 120
        quarters = csv_onset * (bpm / 60.0)
        approx_measure = int(quarters // 4) + 1
        approx_beat = (quarters % 4) + 1
        
        # Round to nearest half-beat
        search_key = (approx_measure, round(approx_beat * 2) / 2)
        
        # Look for notes at this position in score
        if search_key in score_note_map:
            # Add to first note at this position
            note_to_annotate = score_note_map[search_key][0]
            
            # Check if already has this lyric
            existing_lyrics = note_to_annotate.lyrics
            lyric_exists = any(l.text == roman_num for l in existing_lyrics if l.text)
            
            if not lyric_exists:
                note_to_annotate.addLyric(roman_num, lyricNumber=3)
                annotations_added += 1
        else:
            # Try nearby positions
            for offset in [0.5, -0.5, 1.0, -1.0]:
                alt_key = (search_key[0], search_key[1] + offset)
                if alt_key in score_note_map:
                    note_to_annotate = score_note_map[alt_key][0]
                    note_to_annotate.addLyric(roman_num, lyricNumber=3)
                    annotations_added += 1
                    break
    
    # Save
    score.write('musicxml', fp=output_path)
    print(f"Added {annotations_added} annotations as lyrics")
    
    return score, annotations_added


# Main function that tries different methods
def add_roman_numerals_to_score(score_path="test_audio/daphne.musicxml",
                                csv_path="predicted_res/daphne/annotated_music_df.csv",
                                output_path="predicted_res/daphne/output_score.musicxml",
                                midi_path=None):
    """
    Main function with fallback methods.
    """
    
    # Try to find MIDI file automatically if not provided
    if midi_path is None:
        # Check common locations
        import os
        possible_midi_paths = [
            score_path.replace('.musicxml', '.mid'),
            score_path.replace('.xml', '.mid'),
            csv_path.replace('annotated_music_df.csv', 'daphne_am_bach.mid'),
            'test_audio/daphne.mid',
            'daphne.mid'
        ]
        
        for path in possible_midi_paths:
            if os.path.exists(path):
                midi_path = path
                print(f"Found MIDI file: {midi_path}")
                break
    
    if midi_path: #nd os.path.exists(midi_path):
        print("Using MIDI file for accurate timing alignment...")
        try:
            return add_roman_numerals_with_midi(score_path, midi_path, csv_path, output_path)
        except Exception as e:
            print(f"MIDI method failed: {e}")
            print("Falling back to timing method...")
    
    # Fallback to the original timing method
    print("Using timing-based alignment (no MIDI)...")
    
    # (Include the original timing-based function here, or import it)
    # For now, using a simplified version:
    from music21 import converter
    import numpy as np
    
    score = converter.parse(score_path)
    df = pd.read_csv(csv_path)
    df_clean = df[df['harmonic_analysis'].notna()].copy()
    
    # Simple fallback: add to first note of each measure
    measures_with_annotations = {}
    for idx, row in df_clean.iterrows():
        csv_onset = float(row['onset'])
        roman_num = str(row['harmonic_analysis']).strip()
        
        if roman_num and roman_num.lower() != 'nan':
            # Simple mapping: group by measure (assuming 2 seconds per measure at 120 BPM)
            approx_measure = int(csv_onset // 2) + 1
            if approx_measure not in measures_with_annotations:
                measures_with_annotations[approx_measure] = []
            if roman_num not in measures_with_annotations[approx_measure]:
                measures_with_annotations[approx_measure].append(roman_num)
    
    # Add to first note in each measure
    lyric_number = 3
    annotations_added = 0
    
    for measure_num, numerals in measures_with_annotations.items():
        for part in score.parts:
            for n in part.recurse().notes:
                if hasattr(n, 'measureNumber') and n.measureNumber == measure_num:
                    lyric_text = "/".join(numerals)
                    n.addLyric(lyric_text, lyricNumber=lyric_number)
                    annotations_added += 1
                    break  # Only add to first note in measure
            if annotations_added >= len(measures_with_annotations):
                break
    
    score.write('musicxml', fp=output_path)
    print(f"Added {annotations_added} annotations to {len(measures_with_annotations)} measures")
    
    return score, annotations_added


# if __name__ == "__main__":
#     import sys
    
#     # Parse command line arguments
#     score_path = sys.argv[1] if len(sys.argv) > 1 else "test_audio/daphne.musicxml"
#     csv_path = sys.argv[2] if len(sys.argv) > 2 else "predicted_res/daphne/annotated_music_df.csv"
#     output_path = sys.argv[3] if len(sys.argv) > 3 else "predicted_res/daphne/output_score.musicxml"
    
#     # Look for MIDI file
#     midi_path = None
#     if len(sys.argv) > 4:
#         midi_path = sys.argv[4]
    
#     score, result = add_roman_numerals_to_score(
#         score_path=score_path,
#         csv_path=csv_path,
#         output_path=output_path,
#         midi_path=midi_path
#     )
    
#     print(f"\nDone! Check {output_path} for the annotated score.")

In [11]:
import os
score, result = add_roman_numerals_to_score(
    score_path= "test_audio/daphne.musicxml", 
    csv_path= "predicted_res/daphne/annotated_music_df.csv", 
    output_path= "predicted_res/daphne/output_score2.musicxml",
        midi_path="test_audio/daphne_am_bach.mid"
    )

Using MIDI file for accurate timing alignment...
Loading score from test_audio/daphne.musicxml...
Loading MIDI from test_audio/daphne_am_bach.mid...
Loading harmonic analysis from predicted_res/daphne/annotated_music_df.csv...
Found 11 harmonic annotations
Tempo: 120 BPM
Found 113 MIDI notes and 104 MusicXML notes
Mapping annotations to score...
Note: Could not find MusicXML note for MIDI onset 3.0
Note: Could not find MusicXML note for MIDI onset 5.0
Note: Could not find MusicXML note for MIDI onset 9.0
Mapped 1 annotations to score notes
Using proportional positioning method...
Adding 11 annotations as lyrics...
Saving to predicted_res/daphne/output_score2.musicxml...

=== SUMMARY ===
Total CSV annotations: 11
Successfully mapped: 11
Added to 5 unique notes

First 10 annotations:
  1. M0 B2.5: Ab.IM (CSV: 0.00s, diff: 0.000s, method: direct)
  2. M1 B1.5: VMm7 (CSV: 1.50s, diff: 0.138s, method: proportional)
  3. M1 B1.5: IM (CSV: 2.50s, diff: 0.103s, method: proportional)
  4. M1 B2

In [None]:
add_roman_numerals_to_score(score_path= "test_audio/daphne.musicxml", csv_path= "predicted_res/daphne/annotated_music_df.csv", output_path= "predicted_res/daphne/output_score1.musicxml")