In [67]:
# Import dependencies
import numpy as np
import pretty_midi
import xml.etree.ElementTree as ET
import sys
import importlib

# Import project modules
from voicing import Voicing
from utils import MPE_MIDI_Exporter

In [68]:
# Load only what we need to test "Something"
import xmlTranslator as xmlT
from utils import get_project_root

# Load the dataset (quietly)
directory = '/dataset/iRealXML'
root = get_project_root()
myPath = str(root) + str(directory)

print("Loading dataset for 'Something' test...")
theChordDataset, theDurationsDataset, all_meta = xmlT.parse_info_from_XML(myPath)

# Just get "Something" 
something_id = None
for i, meta in enumerate(all_meta):
    if meta['song_name'] == 'Something':
        something_id = i
        break

if something_id is None:
    raise ValueError("'Something' not found in dataset!")

dataset = theChordDataset
print(f"‚úì Found 'Something' at index {something_id}")
print(f"‚úì Ready to test slash chords")

Loading dataset for 'Something' test...


  0%|          | 0/4005 [00:00<?, ?it/s]

(4005,) (4005,) (4005,)
‚úì Found 'Something' at index 3986
‚úì Ready to test slash chords


## 1. Find "Something" in Dataset

In [69]:
# Find "Something" in the dataset
something_id = None
for i, meta in enumerate(all_meta):
    if 'Something' in meta['song_name']:
        print(f"[{i}] {meta['song_name']} - {meta['tonality']}")
        if meta['song_name'] == 'Something':
            something_id = i

if something_id is not None:
    print(f"\n‚úì Found 'Something' at index {something_id}")
    print(f"Metadata: {all_meta[something_id]}")
else:
    print("‚ö†Ô∏è  'Something' not found in dataset")

[267] Something From Everyone - G major
[766] You Do Something To Me - Eb major
[1263] You Do Something To Me 1 - Eb major
[2063] Tell Me Something Good 1 - Ab major
[2303] Something 1 - C major
[2333] Tell Me Something Good - Ab major
[2404] This Could Be The Start Of Something Big - Eb major
[3986] Something - C major

‚úì Found 'Something' at index 3986
Metadata: {'composer': 'The Beatles (George Harrison)', 'style': 'Rock Pop', 'song_name': 'Something', 'tonality': 'C major', 'midi_key': 12, 'time_signature': '4/4', 'decade': 'Null', 'software': 'iReal Pro 2022.2', 'encoding_date': '2022-10-16'}


## 2. Extract Chord Sequence

In [70]:
def extract_readable_chords(token_sequence):
    """Extract chords in readable format: root + nature + extensions"""
    structural = {'.', '|', ':|', '|:', 'N.C.', '<end>'}
    
    chords = []
    i = 0
    while i < len(token_sequence):
        token = token_sequence[i]
        
        if token == '.':
            # Start of chord - look ahead to collect all parts
            chord_parts = []
            j = i + 1
            
            # Skip duration
            while j < len(token_sequence):
                next_token = token_sequence[j]
                
                # Stop at next structural element
                if next_token in structural or next_token.startswith('Form_'):
                    break
                
                # Skip duration numbers
                try:
                    float(next_token)
                    j += 1
                    continue
                except:
                    pass
                
                chord_parts.append(next_token)
                j += 1
            
            if chord_parts:
                chord_str = ' '.join(chord_parts)
                chords.append((i, chord_str))
            i = j
        else:
            i += 1
    
    return chords

# Extract readable chord sequence
something_id = 3986
something_tokenized = dataset[something_id]
something_chords = extract_readable_chords(something_tokenized)

print("="*80)
print("CHORD SEQUENCE FOR 'SOMETHING'")
print("="*80)
for pos, chord in something_chords:
    # Highlight slash chords
    marker = " ‚Üê SLASH CHORD" if '/' in chord else ""
    print(f"[{pos:3d}] {chord:<40}{marker}")

print(f"\nTotal chords: {len(something_chords)}")
slash_count = sum(1 for _, c in something_chords if '/' in c)
print(f"Slash chords: {slash_count}")

CHORD SEQUENCE FOR 'SOMETHING'
[  4] F major                                 
[  8] Eb major                                
[ 12] G dominant / D                           ‚Üê SLASH CHORD
[ 20] C major                                 
[ 25] C major-seventh                         
[ 30] C dominant                              
[ 35] F major                                 
[ 39] F major / E                              ‚Üê SLASH CHORD
[ 46] D dominant                              
[ 51] G dominant                              
[ 55] G dominant / A                           ‚Üê SLASH CHORD
[ 61] G dominant / B                           ‚Üê SLASH CHORD
[ 69] A minor                                 
[ 73] A minor add #7                          
[ 79] A minor-seventh / G                      ‚Üê SLASH CHORD
[ 85] D dominant-ninth                        
[ 91] F major                                 
[ 95] Eb major                                
[ 99] G dominant / D                       

## 3. Verify XML Extraction

In [71]:
# Check the original XML file for "Something"
xml_path = '../dataset/iRealXML/Something.xml'
tree = ET.parse(xml_path)
root = tree.getroot()

# Extract all chords from XML
xml_chords = []
harmonies = root.findall('.//{*}harmony')

print("="*80)
print("ORIGINAL XML CHORDS (from iReal Pro)")
print("="*80)

for i, harmony in enumerate(harmonies):
    root_step = harmony.find('.//{*}root-step')
    root_alter = harmony.find('.//{*}root-alter')
    kind = harmony.find('.//{*}kind')
    
    # Bass note for slash chords
    bass_step = harmony.find('.//{*}bass-step')
    bass_alter = harmony.find('.//{*}bass-alter')
    
    if root_step is not None:
        root_note = root_step.text
        if root_alter is not None and root_alter.text:
            alter = int(root_alter.text)
            if alter == 1:
                root_note += '#'
            elif alter == -1:
                root_note += 'b'
        
        kind_text = kind.get('text', '') if kind is not None else ''
        kind_name = kind.text if kind is not None else 'major'
        
        # Extract bass
        bass_note = None
        if bass_step is not None:
            bass_note = bass_step.text
            if bass_alter is not None and bass_alter.text:
                alter = int(bass_alter.text)
                if alter == 1:
                    bass_note += '#'
                elif alter == -1:
                    bass_note += 'b'
        
        chord_display = f"{root_note}{kind_text}"
        if bass_note:
            chord_display += f"/{bass_note}"
            xml_chords.append({
                'root': root_note,
                'kind': kind_name,
                'bass': bass_note,
                'display': chord_display
            })
            print(f"[{i:2d}] {chord_display:<30} (bass: {bass_note})  ‚Üê SLASH CHORD")
        else:
            xml_chords.append({
                'root': root_note,
                'kind': kind_name,
                'bass': None,
                'display': chord_display
            })
            print(f"[{i:2d}] {chord_display}")

print(f"\nTotal chords in XML: {len(xml_chords)}")
xml_slash_count = sum(1 for c in xml_chords if c['bass'] is not None)
print(f"Slash chords in XML: {xml_slash_count}")

ORIGINAL XML CHORDS (from iReal Pro)
[ 0] F
[ 1] Eb
[ 2] G7/D                           (bass: D)  ‚Üê SLASH CHORD
[ 3] C
[ 4] Cmaj7
[ 5] C7
[ 6] F
[ 7] F/E                            (bass: E)  ‚Üê SLASH CHORD
[ 8] D7
[ 9] G7
[10] G7/A                           (bass: A)  ‚Üê SLASH CHORD
[11] G7/B                           (bass: B)  ‚Üê SLASH CHORD
[12] Am
[13] Am
[14] Am7/G                          (bass: G)  ‚Üê SLASH CHORD
[15] D9
[16] F
[17] Eb
[18] G7/D                           (bass: D)  ‚Üê SLASH CHORD
[19] A
[20] A
[21] A/G#                           (bass: G#)  ‚Üê SLASH CHORD
[22] A/F#                           (bass: F#)  ‚Üê SLASH CHORD
[23] A/E                            (bass: E)  ‚Üê SLASH CHORD
[24] D
[25] G
[26] A
[27] A
[28] A/G#                           (bass: G#)  ‚Üê SLASH CHORD
[29] A/F#                           (bass: F#)  ‚Üê SLASH CHORD
[30] A/E                            (bass: E)  ‚Üê SLASH CHORD
[31] D
[32] G
[33] C
[34] C
[35] Cmaj7
[36] C7
[37] F
[38]

## 4. Test Corrected Slash Chord Implementation

In [72]:
# CRITICAL FIX: Convert XML chord names to internal tokens FIRST!
print("\n" + "="*80)
print("STEP 1: Converting XML chord names to internal tokens (replaceTheseChords)")
print("="*80)

import xmlTranslator as xmlT
import importlib
importlib.reload(xmlT)

# Convert XML chord names like 'dominant' ‚Üí 'dom7', 'major' ‚Üí 'maj', etc.
corrected_sequence = xmlT.replaceTheseChords([something_tokenized], False)
something_tokenized_corrected = corrected_sequence[0]

print(f"‚úì Converted: {len(something_tokenized)} ‚Üí {len(something_tokenized_corrected)} tokens")
print("="*80 + "\n")

# STEP 2: Add 'maj' tokens to single notes
print("="*80)
print("STEP 2: Adding 'maj' tokens to single notes")
print("="*80)

from voicing import Voicing as TempVoicing
voicing_temp = TempVoicing()

# Process the sequence to add 'maj' after single notes
processed_sequence = []
added_maj_count = 0

for i, token in enumerate(something_tokenized_corrected):
    processed_sequence.append(token)
    
    # Check if this is a single note that needs 'maj'
    if i < len(something_tokenized_corrected) - 1:
        next_token = something_tokenized_corrected[i + 1]
        previous = something_tokenized_corrected[i - 1] if i > 0 else ''
        
        # If current token is a note, and next is structural, and previous wasn't slash
        if token in voicing_temp.all_notes and next_token != 'N.C.' and previous != '/' \
           and (next_token in voicing_temp.structural_elements or next_token.startswith('Form_')):
            processed_sequence.append('maj')
            added_maj_count += 1

print(f"‚úì Added {added_maj_count} 'maj' tokens")
print(f"‚úì Tokens: {len(something_tokenized_corrected)} ‚Üí {len(processed_sequence)}")
print("="*80 + "\n")

# STEP 3: Convert to voicing with FULLY preprocessed sequence
# FORCE RELOAD - delete from sys.modules completely
for mod_name in list(sys.modules.keys()):
    if 'voicing' in mod_name.lower():
        del sys.modules[mod_name]

import voicing as voicing_module

# Create new Voicing instance
voicing_corrected = voicing_module.Voicing()

# Convert "Something" with FULLY PREPROCESSED sequence
something_midi_corrected, status = voicing_corrected.convert_chords_to_voicing(processed_sequence)
print(f"Conversion status: {status}")
print(f"Converted {len(something_midi_corrected)} events")

# Count REAL chords (3+ notes)
real_chords = sum(1 for m,d,l in something_midi_corrected if len([n for n in m if n>0])>=3)
print(f"‚úì REAL CHORDS WITH 3+ NOTES: {real_chords}")

print("\nTesting corrected slash chord implementation:")
print("="*90)

# Find and display first 5 slash chords
slash_indices = [i for i, item in enumerate(something_midi_corrected) if item[2] == '/']
print(f"Found {len(slash_indices)} slash markers\n")

for slash_num, idx in enumerate(slash_indices[:5], 1):
    print(f"--- SLASH CHORD #{slash_num} at index {idx} ---")
    
    # Show chord before slash, slash marker, and bass note after
    for i in range(max(0, idx-2), min(len(something_midi_corrected), idx+3)):
        midi, duration, label = something_midi_corrected[i]
        
        if i == idx-2:
            # Root note
            note_name = pretty_midi.note_number_to_name(midi[0]) if midi[0] > 0 else "silence"
            print(f"[{i:>3}]  {str(label):<20} {str(midi):<50} ‚Üê Root: {note_name}")
        elif i == idx-1:
            # Full chord
            notes = [pretty_midi.note_number_to_name(m) for m in midi if m > 0]
            print(f"[{i:>3}]  {str(label):<20} {str(midi):<50} ‚Üê Chord: {notes}")
        elif i == idx:
            print(f"[{i:>3}]  {str(label):<20} {str(midi):<50} ‚Üê SLASH MARKER")
        elif i == idx+1:
            # Slash bass note (should have full voicing with new bass + moved root)
            notes = [pretty_midi.note_number_to_name(m) for m in midi if m > 0]
            note_count = len(notes)
            print(f"[{i:>3}]  {str(label):<20} {str(midi):<50} ‚Üê BASS+CHORD: {notes}")
            if len(notes) > 1:
                print(f"       ‚Üí {note_count} notes: Bass={notes[0]}, OldRoot+12={notes[1]}")
    print()

print("="*90)
print("‚úì Expected: Each slash chord should have:")
print("  1. New bass note at [0]")
print("  2. Old root moved up octave (+12) at [1]")
print("  3. Rest of chord voicing preserved")



STEP 1: Converting XML chord names to internal tokens (replaceTheseChords)


  0%|          | 0/1 [00:00<?, ?it/s]

‚úì Converted: 300 ‚Üí 300 tokens

STEP 2: Adding 'maj' tokens to single notes
‚úì Added 0 'maj' tokens
‚úì Tokens: 300 ‚Üí 300

Conversion status: True
Converted 135 events
‚úì REAL CHORDS WITH 3+ NOTES: 71

Testing corrected slash chord implementation:
Found 18 slash markers

--- SLASH CHORD #1 at index 7 ---
[  5]  maj                  [51, 55, 58, 0, 0, 0, 0, 0]                        ‚Üê Root: D#3
[  6]  dom7                 [43, 47, 50, 53, 0, 0, 0, 0]                       ‚Üê Chord: ['G2', 'B2', 'D3', 'F3']
[  7]  /                    [0, 0, 0, 0, 0, 0, 0, 0]                           ‚Üê SLASH MARKER
[  8]  D                    [50, 55, 47, 53, 0, 0, 0, 0]                       ‚Üê BASS+CHORD: ['D3', 'G3', 'B2', 'F3']
       ‚Üí 4 notes: Bass=D3, OldRoot+12=G3

--- SLASH CHORD #2 at index 19 ---
[ 17]  maj                  [53, 57, 60, 0, 0, 0, 0, 0]                        ‚Üê Root: F3
[ 18]  maj                  [53, 57, 60, 0, 0, 0, 0, 0]                        ‚Üê Chord: ['

In [73]:
# ACTUALLY LOOK AT THE DATA STRUCTURE - Stop making blind assumptions!
print("="*100)
print("RAW DATA INSPECTION - What's ACTUALLY in something_midi_corrected?")
print("="*100)

print(f"\nTotal elements: {len(something_midi_corrected)}")
print("\nFirst 50 elements with ALL details:\n")

for i in range(min(50, len(something_midi_corrected))):
    midi, duration, label = something_midi_corrected[i]
    note_count = len([n for n in midi if n > 0])
    notes = [pretty_midi.note_number_to_name(n) for n in midi if n > 0]
    
    # Color code by type
    if midi == [0, 0, 0, 0, 0, 0, 0, 0]:
        marker = "‚Üê EMPTY"
    elif note_count == 1:
        marker = "‚Üê SINGLE NOTE (root?)"
    elif note_count >= 3:
        marker = "‚Üê FULL CHORD ‚úì"
    else:
        marker = "‚Üê PARTIAL"
    
    print(f"[{i:3d}] label='{label:<15}' dur={duration:4.1f} notes={note_count} {str(notes):<40} {marker}")

# Count by type
empty = sum(1 for m,d,l in something_midi_corrected if m == [0,0,0,0,0,0,0,0])
single = sum(1 for m,d,l in something_midi_corrected if len([n for n in m if n>0])==1)
full = sum(1 for m,d,l in something_midi_corrected if len([n for n in m if n>0])>=3)
partial = sum(1 for m,d,l in something_midi_corrected if 1<len([n for n in m if n>0])<3)

print(f"\n{'='*100}")
print(f"SUMMARY:")
print(f"  Empty MIDI:     {empty}")
print(f"  Single notes:   {single}")
print(f"  Partial (2):    {partial}")
print(f"  Full chords(3+): {full}")
print(f"{'='*100}")

RAW DATA INSPECTION - What's ACTUALLY in something_midi_corrected?

Total elements: 135

First 50 elements with ALL details:

[  0] label='<style>        ' dur= 0.0 notes=0 []                                       ‚Üê EMPTY
[  1] label='Rock Pop       ' dur= 0.0 notes=0 []                                       ‚Üê EMPTY
[  2] label='Form_intro     ' dur= 0.0 notes=0 []                                       ‚Üê EMPTY
[  3] label='|              ' dur= 0.0 notes=0 []                                       ‚Üê EMPTY
[  4] label='maj            ' dur= 2.0 notes=3 ['F3', 'A3', 'C4']                       ‚Üê FULL CHORD ‚úì
[  5] label='maj            ' dur= 1.0 notes=3 ['D#3', 'G3', 'A#3']                     ‚Üê FULL CHORD ‚úì
[  6] label='dom7           ' dur= 1.0 notes=4 ['G2', 'B2', 'D3', 'F3']                 ‚Üê FULL CHORD ‚úì
[  7] label='/              ' dur= 1.0 notes=0 []                                       ‚Üê EMPTY
[  8] label='D              ' dur= 1.0 notes=4 ['D3', 'G3', 'B2

In [74]:
# DIAGNOSTIC: Check what the conversion did
print("="*80)
print("DIAGNOSTIC: Checking conversion results")
print("="*80)

# Show first 30 tokens before and after conversion
print("\nBEFORE replaceTheseChords (first 30 tokens):")
for i, token in enumerate(something_tokenized[:30]):
    print(f"  [{i:2d}] {token}")

print("\nAFTER replaceTheseChords (first 30 tokens):")
for i, token in enumerate(something_tokenized_corrected[:30]):
    print(f"  [{i:2d}] {token}")

print("\nAFTER adding 'maj' tokens (first 30 tokens):")
for i, token in enumerate(processed_sequence[:30]):
    print(f"  [{i:2d}] {token}")

print("\n" + "="*80)


DIAGNOSTIC: Checking conversion results

BEFORE replaceTheseChords (first 30 tokens):
  [ 0] <style>
  [ 1] Rock Pop
  [ 2] Form_intro
  [ 3] |
  [ 4] .
  [ 5] 2.0
  [ 6] F
  [ 7] major
  [ 8] .
  [ 9] 1.0
  [10] Eb
  [11] major
  [12] .
  [13] 1.0
  [14] G
  [15] dominant
  [16] /
  [17] D
  [18] Form_Segno
  [19] |:
  [20] .
  [21] 4.0
  [22] C
  [23] major
  [24] |
  [25] .
  [26] 4.0
  [27] C
  [28] major-seventh
  [29] |

AFTER replaceTheseChords (first 30 tokens):
  [ 0] <style>
  [ 1] Rock Pop
  [ 2] Form_intro
  [ 3] |
  [ 4] .
  [ 5] 2.0
  [ 6] F
  [ 7] maj
  [ 8] .
  [ 9] 1.0
  [10] Eb
  [11] maj
  [12] .
  [13] 1.0
  [14] G
  [15] dom7
  [16] /
  [17] D
  [18] Form_Segno
  [19] |:
  [20] .
  [21] 4.0
  [22] C
  [23] maj
  [24] |
  [25] .
  [26] 4.0
  [27] C
  [28] maj7
  [29] |

AFTER adding 'maj' tokens (first 30 tokens):
  [ 0] <style>
  [ 1] Rock Pop
  [ 2] Form_intro
  [ 3] |
  [ 4] .
  [ 5] 2.0
  [ 6] F
  [ 7] maj
  [ 8] .
  [ 9] 1.0
  [10] Eb
  [11] maj
  [12] .
  [13]

In [75]:
# ROOT CAUSE ANALYSIS: Why is export_to_midi broken?
print("="*100)
print("EXPORT FUNCTION DEBUG - Understanding the BROKEN logic")
print("="*100)

print("\nThe export function looks for '.' markers and then searches FORWARD for the chord.")
print("Let's see what it's actually capturing:\n")

# Simulate what export_to_midi does
after_chords = {'.', '|', ':|', '|:', 'N.C.', '<end>', '/'}
dot_captures = []

for i, element in enumerate(something_midi_corrected):
    chord = element[2]
    
    if chord == '.' and i < len(something_midi_corrected) - 2:
        ref = i
        counter = 0
        doIt = True
        
        # Look ahead to find next non-structural element
        while doIt and ref < len(something_midi_corrected)-1:       
            counter += 1 
            ref += 1
            next_element = something_midi_corrected[ref]
            next_label = next_element[2]
            
            if next_label in after_chords or str(next_label).startswith('Form_'):
                doIt = False
                counter -= 1
        
        if counter > 0:
            captured = something_midi_corrected[i+counter]
            midi_data = captured[0]
            label = captured[2]
            note_count = len([n for n in midi_data if n > 0])
            
            dot_captures.append({
                'dot_index': i,
                'captured_index': i+counter,
                'label': label,
                'midi': midi_data,
                'note_count': note_count
            })
            
            if len(dot_captures) <= 10:
                print(f"Dot at [{i:3d}] ‚Üí captured [{i+counter:3d}] '{label}': {midi_data[:4]}... ({note_count} notes)")

print(f"\nTotal dots processed: {len(dot_captures)}")
print(f"\nPROBLEM DIAGNOSIS:")

# Check what labels are being captured
captured_labels = {}
for cap in dot_captures:
    label = cap['label']
    captured_labels[label] = captured_labels.get(label, 0) + 1

print(f"\nWhat the export function is capturing:")
for label, count in captured_labels.items():
    print(f"  {label}: {count} times")

# Check for full chord voicings being skipped
print(f"\n\nWhat's in the FULL converted data (showing all chord types):")
full_chords = []
for i, (midi, duration, label) in enumerate(something_midi_corrected):
    if midi != [0, 0, 0, 0, 0, 0, 0, 0]:
        note_count = len([n for n in midi if n > 0])
        if note_count >= 3:  # Real chords
            full_chords.append((i, label, midi, note_count))

print(f"\nTotal REAL chords in converted data: {len(full_chords)}")
print("First 10 real chords:")
for i, label, midi, note_count in full_chords[:10]:
    notes = [pretty_midi.note_number_to_name(n) for n in midi if n > 0]
    print(f"  [{i:3d}] '{label}': {notes} ({note_count} notes)")

print(f"\n{'='*100}")
print("THE BUG: Export function's 'dot logic' is NOT finding the full chord voicings!")
print("It's only capturing root notes or partial data, not the complete chords.")
print("='*100}")

EXPORT FUNCTION DEBUG - Understanding the BROKEN logic

The export function looks for '.' markers and then searches FORWARD for the chord.
Let's see what it's actually capturing:


Total dots processed: 0

PROBLEM DIAGNOSIS:

What the export function is capturing:


What's in the FULL converted data (showing all chord types):

Total REAL chords in converted data: 71
First 10 real chords:
  [  4] 'maj': ['F3', 'A3', 'C4'] (3 notes)
  [  5] 'maj': ['D#3', 'G3', 'A#3'] (3 notes)
  [  6] 'dom7': ['G2', 'B2', 'D3', 'F3'] (4 notes)
  [  8] 'D': ['D3', 'G3', 'B2', 'F3'] (4 notes)
  [ 11] 'maj': ['C3', 'E3', 'G3'] (3 notes)
  [ 13] 'maj7': ['C3', 'E3', 'G3', 'B3'] (4 notes)
  [ 15] 'dom7': ['C3', 'E3', 'G3', 'A#3'] (4 notes)
  [ 17] 'maj': ['F3', 'A3', 'C4'] (3 notes)
  [ 18] 'maj': ['F3', 'A3', 'C4'] (3 notes)
  [ 20] 'E': ['E3', 'F4', 'A3', 'C4'] (4 notes)

THE BUG: Export function's 'dot logic' is NOT finding the full chord voicings!
It's only capturing root notes or partial data, not the c

## 5. Export and Verify MIDI

Export the corrected MIDI and verify the slash chords are correct in the final output.

In [76]:
# Use the WORKING export_to_midi method from voicing.py
output_path = "../dataset/midi_files/mpe/TESTING_Something_FIXED.mid"

print("Exporting 'Something' to MIDI using voicing.export_to_midi()...")
voicing_corrected.export_to_midi(
    something_midi_corrected,
    "TESTING_Something_FIXED",
    "../dataset/midi_files/mpe/"
)

print(f"‚úì Exported to: {output_path}")

# Load and verify
midi_data = pretty_midi.PrettyMIDI(output_path)
print(f"\nMIDI File: TESTING_Something_FIXED.mid")
print(f"Duration: {midi_data.get_end_time():.2f} seconds")
print(f"Total instruments (MPE channels): {len(midi_data.instruments)}")

# Collect all notes
all_notes = []
for inst in midi_data.instruments:
    all_notes.extend(inst.notes)
all_notes = sorted(all_notes, key=lambda n: n.start)

print(f"Total notes: {len(all_notes)}")

# Show chord summary - group by time
if len(all_notes) > 0:
    print("\nChord progression (first 10 chords):")
    from itertools import groupby
    chord_count = 0
    for time, notes_at_time in groupby(all_notes, key=lambda n: round(n.start, 2)):
        notes_list = list(notes_at_time)
        pitches = [pretty_midi.note_number_to_name(n.pitch) for n in notes_list]
        print(f"  Time {time:6.2f}s: {pitches}")
        chord_count += 1
        if chord_count >= 10:
            break
    
    print(f"\n‚úì SUCCESS! Song has {chord_count}+ chords")
else:
    print("\n‚ö†Ô∏è  NO NOTES IN MIDI FILE!")

Exporting 'Something' to MIDI using voicing.export_to_midi()...
‚úì MIDI file created: TESTING_Something_FIXED.mid
‚úì Exported to: ../dataset/midi_files/mpe/TESTING_Something_FIXED.mid

MIDI File: TESTING_Something_FIXED.mid
Duration: 74.00 seconds
Total instruments (MPE channels): 1
Total notes: 253

Chord progression (first 10 chords):
  Time   0.00s: ['F3', 'A3', 'C4']
  Time   1.00s: ['D#3', 'G3', 'A#3']
  Time   1.50s: ['G2', 'B2', 'D3', 'F2']
  Time   2.00s: ['D3', 'G3', 'B2', 'F2']
  Time   2.50s: ['C3', 'E3', 'G3']
  Time   4.50s: ['C3', 'E3', 'G3', 'B2']
  Time   6.50s: ['C3', 'E3', 'G3', 'A#3']
  Time   8.50s: ['F3', 'A3', 'C4']
  Time  10.00s: ['F3', 'A3', 'C4']
  Time  10.50s: ['E3', 'F4', 'A3', 'C3']

‚úì SUCCESS! Song has 10+ chords


In [77]:
# BRUTAL HONESTY DIAGNOSTIC: Compare what we WANTED vs what we GOT
print("="*100)
print("STUPIDITY DIAGNOSTIC REPORT")
print("="*100)

# 1. What we WANTED (from converted data)
print("\n1. EXPECTED CHORDS (from convert_chords_to_voicing):")
print("-"*100)
expected_chords = []
for i, (midi, duration, label) in enumerate(something_midi_corrected):
    # Only count actual chord voicings (skip structural markers)
    if midi != [0, 0, 0, 0, 0, 0, 0, 0] and len([n for n in midi if n > 0]) >= 3:
        notes = [pretty_midi.note_number_to_name(n) for n in midi if n > 0]
        expected_chords.append((i, notes, midi))
        if len(expected_chords) <= 15:
            print(f"  [{i:3d}] {str(notes):<50} {midi}")

print(f"\nTotal expected chords: {len(expected_chords)}")

# 2. What we GOT (from MIDI file)
print("\n2. ACTUAL MIDI FILE OUTPUT:")
print("-"*100)
try:
    midi_data = pretty_midi.PrettyMIDI(output_path)
    
    # Collect all notes
    all_notes = []
    for inst in midi_data.instruments:
        all_notes.extend(inst.notes)
    all_notes = sorted(all_notes, key=lambda n: (n.start, n.pitch))
    
    # Group by start time
    from itertools import groupby
    actual_chords = []
    for time, notes_at_time in groupby(all_notes, key=lambda n: round(n.start, 3)):
        notes_list = list(notes_at_time)
        pitches = [n.pitch for n in notes_list]
        pitch_names = [pretty_midi.note_number_to_name(p) for p in pitches]
        actual_chords.append((time, pitch_names, pitches))
        if len(actual_chords) <= 15:
            print(f"  Time {time:6.2f}s: {str(pitch_names):<50} {pitches}")
    
    print(f"\nTotal actual chords: {len(actual_chords)}")
    
    # 3. BRUTAL COMPARISON
    print("\n3. REALITY CHECK:")
    print("-"*100)
    
    expected_count = len(expected_chords)
    actual_count = len(actual_chords)
    missing_chords = expected_count - actual_count
    
    print(f"  Expected: {expected_count} chords")
    print(f"  Got:      {actual_count} chords")
    print(f"  Missing:  {missing_chords} chords ({100*missing_chords/expected_count:.1f}% loss!)")
    
    if len(actual_chords) == 0:
        print("\n  ‚ùå MIDI FILE IS COMPLETELY EMPTY!")
        print("  ‚ùå EXPORT FUNCTION WROTE NOTHING!")
        print("  ‚ùå TOTAL FAILURE!")
        stupidity_score = 100
        
    elif len(actual_chords) == 1:
        print("\n  ‚ùå MIDI FILE HAS ONLY ONE CHORD!")
        print("  ‚ùå Lost 99.9% of the song!")
        stupidity_score = 100
        
    elif missing_chords > expected_count * 0.9:
        print(f"\n  ‚ùå LOST {100*missing_chords/expected_count:.0f}% OF THE SONG!")
        print("  ‚ùå EXPORT FUNCTION IS COMPLETELY BROKEN!")
        stupidity_score = 100
        
    elif missing_chords > expected_count * 0.5:
        print(f"\n  ‚ùå LOST MORE THAN HALF THE CHORDS!")
        print("  ‚ùå EXPORT LOGIC IS FUNDAMENTALLY BROKEN!")
        stupidity_score = 95
        
    else:
        # Check if chords actually match
        mismatches = 0
        for i in range(min(len(expected_chords), len(actual_chords))):
            exp_midi = expected_chords[i][2]
            act_midi = actual_chords[i][2]
            exp_notes = sorted([n for n in exp_midi if n > 0])
            act_notes = sorted(act_midi)
            if exp_notes != act_notes:
                mismatches += 1
                if mismatches <= 3:
                    print(f"  ‚ùå Chord {i}: Expected {exp_notes} but got {act_notes}")
        
        if mismatches > expected_count * 0.5:
            print(f"\n  ‚ùå {mismatches}/{expected_count} chords are WRONG!")
            print("  ‚ùå VOICING IS COMPLETELY BROKEN!")
            stupidity_score = 90
        elif mismatches > expected_count * 0.2:
            print(f"\n  ‚ö†Ô∏è  {mismatches}/{expected_count} chords don't match")
            stupidity_score = 70
        elif mismatches > 0:
            print(f"\n  ‚ö†Ô∏è  {mismatches} chords have differences")
            stupidity_score = 30
        else:
            print("\n  ‚úì Chords look correct!")
            stupidity_score = 5
    
    print(f"\n{'='*100}")
    print(f"STUPIDITY SCORE: {stupidity_score}/100")
    print(f"{'='*100}")
    
    if stupidity_score >= 90:
        print("\nüí©üí©üí© VERDICT: CATASTROPHICALLY BROKEN")
        print("The export function is writing almost nothing. The 'continue' fix broke everything.")
        print("Need to revert to original export logic and fix the ROOT CAUSE of empty MIDI chords.")
        
    elif stupidity_score >= 70:
        print("\nüí©üí© VERDICT: SERIOUSLY BROKEN")
        print("Export produces wrong chords. Voicing logic is broken.")
        
    elif stupidity_score >= 30:
        print("\nüí© VERDICT: PARTIALLY BROKEN")
        print("Has some issues but mostly works.")
        
    else:
        print("\n‚úì VERDICT: WORKING")
        
except Exception as e:
    print(f"\n‚ùå ERROR LOADING MIDI: {e}")
    import traceback
    traceback.print_exc()
    stupidity_score = 100
    print(f"\nSTUPIDITY SCORE: {stupidity_score}/100 - CAN'T EVEN LOAD THE FILE!")

STUPIDITY DIAGNOSTIC REPORT

1. EXPECTED CHORDS (from convert_chords_to_voicing):
----------------------------------------------------------------------------------------------------
  [  4] ['F3', 'A3', 'C4']                                 [53, 57, 60, 0, 0, 0, 0, 0]
  [  5] ['D#3', 'G3', 'A#3']                               [51, 55, 58, 0, 0, 0, 0, 0]
  [  6] ['G2', 'B2', 'D3', 'F2']                           [43, 47, 50, 41, 0, 0, 0, 0]
  [  8] ['D3', 'G3', 'B2', 'F2']                           [50, 55, 47, 41, 0, 0, 0, 0]
  [ 11] ['C3', 'E3', 'G3']                                 [48, 52, 55, 0, 0, 0, 0, 0]
  [ 13] ['C3', 'E3', 'G3', 'B2']                           [48, 52, 55, 47, 0, 0, 0, 0]
  [ 15] ['C3', 'E3', 'G3', 'A#3']                          [48, 52, 55, 58, 0, 0, 0, 0]
  [ 17] ['F3', 'A3', 'C4']                                 [53, 57, 60, 0, 0, 0, 0, 0]
  [ 18] ['F3', 'A3', 'C4']                                 [53, 57, 60, 0, 0, 0, 0, 0]
  [ 20] ['E3', 'F4', 'A3', 'C3

## Summary

**Slash Chord Fix**:
- Keep full chord voicing
- Move old root UP one octave (+12 semitones)
- Add new bass note at the beginning in bass range

**Example: G7/D**
- G7 chord: `[43, 65, 71]` (G2, F4, B4)
- Old root G2 (43) ‚Üí move up to G3 (55)
- Add D bass (50) at beginning
- **Result**: `[50, 55, 65, 71]` (D3, G3, F4, B4) ‚úì