In [1]:
import pandas as pd
df = pd.read_csv("predicted_res/daphne/annotated_music_df.csv")
df.head()

Unnamed: 0,original_index,filename,type,track,channel,pitch,onset,release,velocity,other,slice_id,distinct_slice_id,duplicated_indices,harmonic_analysis
0,0,daphne_am_bach.mid,time_signature,0,,,0.0,,,"{'numerator': 1, 'denominator': 8, 'clocks_per...",-1,-1,,
1,1,daphne_am_bach.mid,control_change,0,0.0,,0.0,,,"{'control': 121, 'value': 0}",-1,-1,,
2,2,daphne_am_bach.mid,control_change,0,0.0,,0.0,,,"{'control': 7, 'value': 100}",-1,-1,,
3,3,daphne_am_bach.mid,control_change,0,0.0,,0.0,,,"{'control': 10, 'value': 41}",-1,-1,,
4,4,daphne_am_bach.mid,control_change,0,0.0,,0.0,,,"{'control': 91, 'value': 0}",-1,-1,,


In [3]:
import pandas as pd
from music21 import converter, note, chord

def parse_harmonic_analysis(rn):
    """
    Parse harmonic analysis notation like 'G.IM', 'g.im/VI', 'IM'
    Removes key prefix and quality indicators (M/m)
    
    Args:
        rn: Roman numeral string
        
    Returns:
        str: cleaned Roman numeral (e.g., 'I', 'V/VI', 'iv')
    """
    if not rn or pd.isna(rn):
        return None
    
    rn = str(rn).strip()
    
    # Remove key prefix (everything before the dot)
    if "." in rn:
        rn = rn.split(".", 1)[1]
    
    # Remove quality indicators M and m
    rn = rn.replace("M", "").replace("m", "")
    
    return rn if rn else None


def add_roman_numerals_to_score(score_path, csv_path, output_path, lyric_number=2):
    """
    Add Roman numeral annotations from CSV to a music score as lyrics
    
    Args:
        score_path: Path to input MusicXML/MXL file
        csv_path: Path to CSV file with columns 'onset' and 'harmonic_analysis'
        output_path: Path for output MusicXML file
        lyric_number: Which lyric line to use (1=first line, 2=second line, etc.)
    """
    
    # Load the score
    print(f"Loading score from {score_path}...")
    score = converter.parse(score_path)
    
    # Load the CSV
    print(f"Loading annotations from {csv_path}...")
    df = pd.read_csv(csv_path)
    
    # Validate required columns
    if 'onset' not in df.columns or 'harmonic_analysis' not in df.columns:
        raise ValueError("CSV must contain 'onset' and 'harmonic_analysis' columns")
    
    # Filter and sort annotations
    df_rn = df.dropna(subset=['harmonic_analysis']).sort_values('onset').reset_index(drop=True)
    
    print(f"Found {len(df_rn)} Roman numeral annotations")
    print("\nFirst few annotations:")
    print(df_rn[['onset', 'harmonic_analysis']].head())
    
    # Determine which part to annotate
    if 'track' in df.columns and not df['track'].isna().all():
        # Use track information if available
        part_tracks = df_rn['track'].dropna().unique()
        print(f"\nAnnotating tracks: {sorted(part_tracks)}")
    else:
        # Annotate the first part (usually melody/soprano)
        print("\nNo track information found, annotating first part")
    
    # Process each annotation
    annotations_added = 0
    for idx, row in df_rn.iterrows():
        onset = float(row['onset'])
        rn_text = str(row['harmonic_analysis'])
        
        # Parse the Roman numeral
        cleaned_rn = parse_harmonic_analysis(rn_text)
        if not cleaned_rn:
            continue
        
        # Determine which part to use
        if 'track' in df.columns and not pd.isna(row['track']):
            part_idx = int(row['track']) - 1  # Assuming 1-based track numbers
            part_idx = max(0, min(part_idx, len(score.parts) - 1))
        else:
            part_idx = 0
        
        part = score.parts[part_idx]
        
        # Find the note/chord at this onset
        flat_part = part.flatten()
        elements_at_onset = flat_part.getElementsByOffset(
            onset,
            mustBeginInSpan=False,
            mustFinishInSpan=False,
            classList=[note.Note, chord.Chord]
        )
        
        # Add lyric to the first note/chord found at this onset
        if elements_at_onset:
            target = elements_at_onset[0]
            target.lyric = cleaned_rn
            # Or use specific lyric number: target.addLyric(cleaned_rn, lyric_number)
            annotations_added += 1
            
            if annotations_added <= 5:  # Show first few for debugging
                print(f"  Added '{cleaned_rn}' at onset {onset} to {type(target).__name__}")
        else:
            print(f"  Warning: No note found at onset {onset} for '{cleaned_rn}'")
    
    print(f"\nSuccessfully added {annotations_added} annotations")
    
    # Write the annotated score
    print(f"Writing annotated score to {output_path}...")
    score.write('musicxml', fp=output_path)
    print("Done!")


# if __name__ == "__main__":
#     # Example usage
#     score_file = "input_score.mxl"
#     csv_file = "roman_numerals.csv"
#     output_file = "annotated_score.musicxml"
    
#     add_roman_numerals_to_score(score_file, csv_file, output_file)
    
#     # If you want to use a specific lyric line (e.g., line 2 for harmony below text):
#     # add_roman_numerals_to_score(score_file, csv_file, output_file, lyric_number=2)

In [6]:
add_roman_numerals_to_score(score_path= "test_audio/daphne.musicxml", csv_path= "predicted_res/daphne/annotated_music_df.csv", output_path= "predicted_res/daphne/output_score1.musicxml")


Loading score from test_audio/daphne.musicxml...
Loading annotations from predicted_res/daphne/annotated_music_df.csv...
Found 11 Roman numeral annotations

First few annotations:
   onset harmonic_analysis
0    0.0             Ab.IM
1    1.5              VMm7
2    2.5                IM
3    4.5             VMm65
4    5.5                IM

Annotating tracks: [np.int64(1), np.int64(2)]
  Added 'I' at onset 0.0 to Note
  Added 'V7' at onset 1.5 to NoChord
  Added 'I' at onset 2.5 to NoChord
  Added 'V65' at onset 4.5 to NoChord
  Added 'I' at onset 5.5 to NoChord

Successfully added 11 annotations
Writing annotated score to predicted_res/daphne/output_score1.musicxml...
Done!


In [None]:
from music21 import converter, harmony

# Load your MusicXML
def add_rn (score_path, output_path, df)
score = converter.parse('your_score.musicxml')

# Add chord symbols at specific measures/beats
for chord_data in your_harmony_list:  # Your extracted data
    m = score.measure(chord_data['measure'])
    beat_offset = chord_data['beat']
    
    # Create chord symbol
    cs = harmony.ChordSymbol(chord_data['harmony'])
    
    # Insert at the right position
    m.insert(beat_offset, cs)

# Save
score.write('musicxml', 'annotated_score.musicxml')

In [None]:
score_path= "test_audio/daphne.musicxml", csv_path= "predicted_res/daphne/annotated_music_df.csv", output_path= "predicted_res/daphne/output_score1.musicxml"
add_harmony_to_midi('sonata.mid', 'sonata.csv', 'sonata_annotated.musicxml')

In [10]:
import pandas as pd
from music21 import converter, note, chord, harmony, meter, tempo, key
from music21.stream import Score, Part, Measure
import numpy as np
import warnings
warnings.filterwarnings('ignore')

def add_roman_numerals_to_score(score_path="test_audio/daphne.musicxml", 
                                csv_path="predicted_res/daphne/annotated_music_df.csv", 
                                output_path="predicted_res/daphne/output_score1.musicxml"):
    """
    Add Roman numeral harmonic analysis from CSV to MusicXML score.
    
    Args:
        score_path (str): Path to input MusicXML file
        csv_path (str): Path to CSV with harmonic analysis
        output_path (str): Path to output MusicXML file with annotations
    """
    
    # Load the score
    print(f"Loading score from {score_path}...")
    score = converter.parse(score_path)
    
    # Load the CSV data
    print(f"Loading harmonic analysis from {csv_path}...")
    df = pd.read_csv(csv_path)
    
    # Check if required columns exist
    required_columns = ['onset', 'harmonic_analysis']
    for col in required_columns:
        if col not in df.columns:
            raise ValueError(f"CSV must contain '{col}' column")
    
    # Clean data: remove rows without harmonic analysis
    df_clean = df[df['harmonic_analysis'].notna()].copy()
    df_clean = df_clean[df_clean['harmonic_analysis'] != '']
    df_clean = df_clean.reset_index(drop=True)
    
    print(f"Found {len(df_clean)} harmonic annotations")
    
    # Get the main part (usually the first part with notes)
    main_part = score.parts[0] if len(score.parts) > 0 else score
    
    # Convert score to flat representation for easier timing calculations
    score_flat = main_part.flat
    
    # Get tempo and meter info
    # Find the first tempo marking
    tempo_marking = score_flat.getElementsByClass(tempo.MetronomeMark)
    bpm = 120  # default
    if tempo_marking:
        bpm = tempo_marking[0].number
    
    # Find time signatures
    time_sigs = score_flat.getElementsByClass(meter.TimeSignature)
    
    # Create a mapping from onset time (in seconds) to measure and beat
    print("Creating timing map...")
    
    # Method 1: Use music21's offset system
    # We'll find the closest note to each onset time
    
    # Get all notes and their offsets
    all_notes = list(score_flat.notesAndRests)
    note_offsets = []
    note_measures = []
    note_beats = []
    
    for n in all_notes:
        # Get the offset from the beginning of the score
        offset = n.offset
        note_offsets.append(offset)
        
        # Get measure number and beat
        measure_num = n.measureNumber
        note_measures.append(measure_num)
        
        # Calculate beat within measure
        beat = n.beat if hasattr(n, 'beat') else 1
        note_beats.append(beat)
    
    # Convert note offsets to seconds based on tempo
    # In music21, offset is in quarters, so we need to convert to seconds
    # quarters per minute = bpm, so seconds per quarter = 60 / bpm
    seconds_per_quarter = 60.0 / bpm
    note_times = [offset * seconds_per_quarter for offset in note_offsets]
    
    # For each harmonic analysis entry, find the closest note
    print("Aligning harmonic analysis with score...")
    
    # We'll store annotations to add them later
    annotations_to_add = []
    
    for idx, row in df_clean.iterrows():
        onset_time = float(row['onset'])
        roman_numeral = str(row['harmonic_analysis']).strip()
        
        # Skip empty or invalid Roman numerals
        if not roman_numeral or roman_numeral.lower() == 'nan':
            continue
        
        # Find the note closest in time to this onset
        if note_times:
            # Find index of closest note time
            closest_idx = np.argmin([abs(t - onset_time) for t in note_times])
            closest_time_diff = abs(note_times[closest_idx] - onset_time)
            
            # Only accept if reasonably close (within 0.5 seconds)
            if closest_time_diff < 0.5:
                measure_num = note_measures[closest_idx]
                beat_num = note_beats[closest_idx]
                note_offset = note_offsets[closest_idx]
                
                annotations_to_add.append({
                    'measure': measure_num,
                    'beat': beat_num,
                    'offset': note_offset,
                    'roman_numeral': roman_numeral,
                    'original_onset': onset_time
                })
            else:
                print(f"Warning: Could not find note close to onset {onset_time}s (closest: {closest_time_diff:.2f}s away)")
        else:
            print(f"Warning: No notes found in score")
    
    # Sort annotations by measure and beat
    annotations_to_add.sort(key=lambda x: (x['measure'], x['beat']))
    
    # Add chord symbols to the score
    print(f"Adding {len(annotations_to_add)} chord symbols to score...")
    
    # We need to work with the hierarchical structure
    # Create a dictionary to organize annotations by measure
    annotations_by_measure = {}
    for ann in annotations_to_add:
        measure_num = ann['measure']
        if measure_num not in annotations_by_measure:
            annotations_by_measure[measure_num] = []
        annotations_by_measure[measure_num].append(ann)
    
    # Add chord symbols to each measure
    for part in score.parts:
        for measure in part.getElementsByClass('Measure'):
            measure_num = measure.number
            if measure_num in annotations_by_measure:
                for ann in annotations_by_measure[measure_num]:
                    # Create a chord symbol
                    # Format Roman numeral for music21
                    roman_text = ann['roman_numeral']
                    
                    # Try to parse as Roman numeral chord symbol
                    try:
                        chord_symbol = harmony.ChordSymbol(roman_text)
                        
                        # Set the offset within the measure
                        # Convert beat to offset (assuming quarter note beat)
                        beat_offset = ann['beat'] - 1  # music21 uses 0-based offsets
                        
                        # Insert at the calculated offset
                        measure.insert(beat_offset, chord_symbol)
                        
                        # Alternatively, add as text annotation
                        # from music21 import expressions
                        # text_exp = expressions.TextExpression(roman_text)
                        # measure.insert(beat_offset, text_exp)
                        
                    except Exception as e:
                        print(f"Warning: Could not create chord symbol for '{roman_text}': {e}")
                        # Add as plain text expression instead
                        from music21 import expressions
                        text_exp = expressions.TextExpression(roman_text)
                        beat_offset = ann['beat'] - 1
                        measure.insert(beat_offset, text_exp)
    
    # Save the annotated score
    print(f"Saving annotated score to {output_path}...")
    score.write('musicxml', fp=output_path)
    
    # Also create a simpler version with just the Roman numerals as lyrics
    # This might be more reliable for some notation software
    print("Creating alternative version with lyrics...")
    score_lyrics = converter.parse(score_path)
    
    # Add Roman numerals as lyrics to the first part
    main_part_lyrics = score_lyrics.parts[0] if len(score_lyrics.parts) > 0 else score_lyrics
    
    # Group annotations by measure and beat to avoid duplicates
    added_positions = set()
    
    for ann in annotations_to_add:
        measure_num = ann['measure']
        beat_num = ann['beat']
        roman_text = ann['roman_numeral']
        
        position_key = (measure_num, beat_num)
        if position_key in added_positions:
            continue  # Skip duplicates at same position
        
        # Find notes at this measure and beat
        for note_obj in main_part_lyrics.recurse().notes:
            if (hasattr(note_obj, 'measureNumber') and 
                note_obj.measureNumber == measure_num and
                hasattr(note_obj, 'beat') and
                abs(note_obj.beat - beat_num) < 0.5):
                
                # Add lyric
                note_obj.addLyric(roman_text, lyricNumber=3)  # Use lyric number 3 for harmony
                added_positions.add(position_key)
                break
    
    # Save lyrics version
    lyrics_output_path = output_path.replace('.musicxml', '_with_lyrics.musicxml')
    score_lyrics.write('musicxml', fp=lyrics_output_path)
    
    print(f"Saved main version to: {output_path}")
    print(f"Saved lyrics version to: {lyrics_output_path}")
    
    return score, annotations_to_add


# Example usage
# if __name__ == "__main__":
#     # Test with sample files
#     try:
#         score, annotations = add_roman_numerals_to_score(
#             score_path="test_audio/daphne.musicxml",
#             csv_path="predicted_res/daphne/annotated_music_df.csv",
#             output_path="predicted_res/daphne/output_score.musicxml"
#         )
        
#         print(f"\nAdded {len(annotations)} annotations:")
#         for i, ann in enumerate(annotations[:10]):  # Show first 10
#             print(f"  {i+1}. M{ann['measure']} B{ann['beat']:.1f}: {ann['roman_numeral']}")
        
#         if len(annotations) > 10:
#             print(f"  ... and {len(annotations) - 10} more")
            
#     except Exception as e:
#         print(f"Error: {e}")
#         print("\nTroubleshooting tips:")
#         print("1. Make sure music21 is installed: pip install music21")
#         print("2. Check that your CSV has 'onset' and 'harmonic_analysis' columns")
#         print("3. Ensure the MusicXML file is valid")

In [11]:
add_roman_numerals_to_score(score_path= "test_audio/daphne.musicxml", csv_path= "predicted_res/daphne/annotated_music_df.csv", output_path= "predicted_res/daphne/output_score1.musicxml")

Loading score from test_audio/daphne.musicxml...
Loading harmonic analysis from predicted_res/daphne/annotated_music_df.csv...
Found 11 harmonic annotations
Creating timing map...
Aligning harmonic analysis with score...
Adding 5 chord symbols to score...
Saving annotated score to predicted_res/daphne/output_score1.musicxml...
Creating alternative version with lyrics...
Saved main version to: predicted_res/daphne/output_score1.musicxml
Saved lyrics version to: predicted_res/daphne/output_score1_with_lyrics.musicxml


(<music21.stream.Score 0x15183738a310>,
 [{'measure': 0,
   'beat': 2.5,
   'offset': 0.0,
   'roman_numeral': 'Ab.IM',
   'original_onset': 0.0},
  {'measure': 2,
   'beat': 1.0,
   'offset': 2.5,
   'roman_numeral': 'VMm7',
   'original_onset': 1.5},
  {'measure': 3,
   'beat': 1.5,
   'offset': 5.0,
   'roman_numeral': 'IM',
   'original_onset': 2.5},
  {'measure': 5,
   'beat': 1.5,
   'offset': 9.0,
   'roman_numeral': 'VMm65',
   'original_onset': 4.5},
  {'measure': 6,
   'beat': 1.0,
   'offset': 10.5,
   'roman_numeral': 'IM',
   'original_onset': 5.5}])