In [1]:
import pandas as pd
import music21
import re
import ast

## Almacenamos los acordes que reconoce music21 de forma simbolica

In [None]:
# almacenamos en .csv los tipos de acordes que music21 reconoce

chord_types = []
chord_data = []
    
for key in music21.harmony.CHORD_TYPES.keys():
    chord_types.append(key)
    chord_data.append(music21.harmony.CHORD_TYPES[key])

In [None]:
chord_notes, chord_symbols = zip(*chord_data)

In [None]:
chord_symbols

In [None]:
df_chord_types = pd.DataFrame({'Chord Type': chord_types, 'Chord Notes': chord_notes, 'Chord Symbols': chord_symbols})
df_chord_types.to_csv('/mnt/c/Users/nehem/OneDrive - Universidad de Chile/Universidad/6to año/Data/MIDI/preprocced/Chordomicon/chord_types.csv', index=False)

## Clear Mapping Pitch Class

In [None]:
NOTES_AMERICAN = ['C', 'D', 'E', 'F', 'G', 'A', 'B']
NOTES_AMERICAN_SHARP = ['Cs', 'Ds', 'Es', 'Fs', 'Gs', 'As', 'Bs']
NOTES_AMERICAN_FLAT = ['Cb', 'Db', 'Eb', 'Fb', 'Gb', 'Ab', 'Bb']

NOTES_LATIN = ['do', 're', 'mi', 'fa', 'sol', 'la', 'si']
NOTES_LATIN_SHARP= ['dos', 'res', 'mis', 'fas', 'sols', 'las', 'sis']
NOTES_LATIN_FLAT= ['dob', 'reb', 'mib', 'fab', 'solb', 'lab', 'sib']

NOTES = dict(zip(NOTES_LATIN, NOTES_AMERICAN))
NOTES_SHARP = dict(zip(NOTES_LATIN_SHARP, NOTES_AMERICAN_SHARP))
NOTES_FLAT = dict(zip(NOTES_LATIN_FLAT, NOTES_AMERICAN_FLAT))      

In [None]:
def extract_chord_symbol(chord, note):
    for c in range(len(NOTES_AMERICAN)):                        
        if note == NOTES_LATIN[c]:
            tonic = NOTES[note]
            break
        elif note == NOTES_LATIN_SHARP[c]:
            tonic = NOTES_SHARP[note]
            break
        elif note == NOTES_LATIN_FLAT[c]:
            tonic = NOTES_FLAT[note]
            break        
    symbol = chord[len(tonic):]     
    return tonic, symbol, chord

In [None]:
mapping_path = '/mnt/c/Users/nehem/OneDrive - Universidad de Chile/Universidad/6to año/Data/MIDI/preprocced/Chordomicon/chords_mapping.csv' 
progresions_path = '/mnt/c/Users/nehem/OneDrive - Universidad de Chile/Universidad/6to año/Data/MIDI/preprocced/Chordomicon/chordonomicon_v2.csv'

df_mapping = pd.read_csv(mapping_path)
df_progresions_meta = pd.read_csv(progresions_path, low_memory=False)


df_chords = df_mapping['Chords']
df_progresions = df_progresions_meta['chords']

df_notes = df_mapping['Notes']
df_notes = df_notes.apply(ast.literal_eval)

df_degrees = df_mapping['Degrees']
df_degrees = df_degrees.apply(ast.literal_eval)

In [None]:
ALL_NOTES = NOTES_AMERICAN + NOTES_AMERICAN_SHARP + NOTES_AMERICAN_FLAT

notes_in = []
for n in df_notes:
    notes_in.append(n[0])

chords_by_tonic = {}
symbols_by_tonic = {}
degrees_by_tonic = {}
notes_by_tonic = {}

chord_degrees_by_tonic = {}

for NOTE in ALL_NOTES:
    
    if NOTE != 'C':
        break
    
    chords = []
    symbols = []
    chords_degrees = []
    chords_notes = []
    
    for chord, degrees, notes, note_in in zip(df_chords, df_degrees, df_notes, notes_in):
        
        tonic, symbol, _ = extract_chord_symbol(chord, note_in)        
        
        if tonic == NOTE:
            
            symbol_space = symbol + " "
            symbol_inv = symbol + "/"
            
            if any(symbol_space in str(prog) for prog in df_progresions) or any(symbol_inv in str(prog) for prog in df_progresions):         
                chords.append(f'"{chord}"')
                symbols.append(f'"{symbol}"')
                chords_degrees.append(f'"{str(degrees)}"')
                chords_notes.append(f'"{str(notes)}"')

    chords = tuple(chords)
    symbols = tuple(symbols)
    chord_degrees = tuple(chords_degrees)
    chord_notes = tuple(chords_notes)

    chord_degrees_by_tonic[NOTE] = (chords, symbols, chord_degrees, chord_notes)

print(len(chord_degrees_by_tonic['C'][0]))
print(len(chord_degrees_by_tonic['C'][1]))
print(len(chord_degrees_by_tonic['C'][2]))
print(len(chord_degrees_by_tonic['C'][3]))

In [None]:
# save only c chords and degrees
df_chords_and_grades = pd.DataFrame(chord_degrees_by_tonic['C'][0], columns=['Chords'])
df_chords_and_grades['Symbols'] = pd.Series(chord_degrees_by_tonic['C'][1])
df_chords_and_grades['Degrees'] = pd.Series(chord_degrees_by_tonic['C'][2])
df_chords_and_grades['Notes'] = pd.Series(chord_degrees_by_tonic['C'][3])

df_chords_and_grades.to_csv('/mnt/c/Users/nehem/OneDrive - Universidad de Chile/Universidad/6to año/Data/MIDI/preprocced/Chordomicon/Chords_Symbols.csv')

## Validar mapeo por grados

In [None]:
mapping_path = '/mnt/c/Users/nehem/OneDrive - Universidad de Chile/Universidad/6to año/Data/MIDI/preprocced/Chordomicon/ChordSymbol_Mapping_v1_(gpt5).csv'
symbols_path = '/mnt/c/Users/nehem/OneDrive - Universidad de Chile/Universidad/6to año/Data/MIDI/preprocced/Chordomicon/Chords_Symbols.csv'

df_mapping = pd.read_csv(mapping_path)
df_symbols = pd.read_csv(symbols_path)

# Filtrar filas
df_mapping = df_mapping[df_mapping['Original Symbol'].isin(df_symbols['Symbols'])]

In [None]:
df_mapping

In [None]:
df_symbols

In [None]:
notes = df_mapping['Notes']
degrees = df_symbols['Degrees']

In [None]:
# Convertir Notes a pitch class set --> "['C', 'E', 'G', 'B-']" >> "[1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0]"

def convert_to_psc(note):    
    
    pcs = [0]*12    
    pitches = music21.chord.Chord(note).pitchClasses 
    
    for p in pitches:
        pcs[p] = 1

    return f'"{str(pcs)}"' 


In [None]:
notes_degrees = []
for note in notes:
    note = note.strip('"')
    note = ast.literal_eval(note)
    psc = convert_to_psc(note)
    notes_degrees.append(psc)   


In [None]:
type(notes_degrees), type(degrees), type(notes)

In [None]:
# Comparamos grados de las notas
erro_index = []
for i in range(len(notes_degrees)):  
    if notes_degrees[i] != degrees[i]:        
        erro_index.append(i)
        print(f"Error en la fila {i}: {notes_degrees[i]} != {degrees.iloc[i]} --> Acorde {notes.iloc[i]}")

In [None]:
for i in erro_index:
    print(i)
    print(df_mapping['Original Symbol'].iloc[i])
    print(df_symbols['Symbols'].iloc[i])
    print('---------------------')

In [None]:
df_mapping = df_mapping.reset_index(drop=True)

In [None]:
df_mapping.to_csv('/mnt/c/Users/nehem/OneDrive - Universidad de Chile/Universidad/6to año/Data/MIDI/preprocced/Chordomicon/ChordSymbol_Mapping_v2.csv', index=False)

## Validar si music21 reconoce

In [None]:
mapping_chord_symbol_path = "/mnt/c/Users/nehem/OneDrive - Universidad de Chile/Universidad/6to año/Data/MIDI/preprocced/Chordomicon/ChordSymbol_Mapping_v2_(gpt5).csv"
df_chords = pd.read_csv(mapping_chord_symbol_path)

In [None]:
df_chords

In [None]:
# Eliminar corchetes " "
df_chords_symbol = df_chords["ChordSymbol"].str.replace('"','')
# Eliminar el primer caracter de cada elemento de una columna
df_chords_symbol = df_chords_symbol.apply(lambda x: x[1:] if isinstance(x, str) and len(x) > 0 else x)

In [None]:
chords = "C" + df_chords_symbol
check = 0

chords_m21 = []
pitches_m21 = []

for c in chords:
    try:
        chord_m21 = music21.harmony.ChordSymbol(c).figure
        pitch_m21 = ([str(p) for p in music21.harmony.ChordSymbol(c).pitches])  
        check += 1
        # print(f"Chord: {c}, Chord Symbol: {chord_m21}")
    except Exception as e:
        chord_m21 = "Unknown"
        pitch_m21 = "Unknown"
        # print(f"Error processing chord {c}: {e}")
    chords_m21.append(chord_m21)
    pitches_m21.append(pitch_m21)
    
print(f"Se convirtieron {check} acordes correctamente")

In [None]:
df_chords['chords_m21'] = [f'"{c}"' for c in chords_m21] 
df_chords['pitches_m21'] = [f'"{c}"' for c in pitches_m21]

In [None]:
df_chords

In [None]:

df_chords.to_csv('/mnt/c/Users/nehem/OneDrive - Universidad de Chile/Universidad/6to año/Data/MIDI/preprocced/Chordomicon/ChordSymbol_Mapping_v3_(gpt5).csv', index=False)

## Validar reconocimiento por Music21

In [None]:
mapping_path = '/mnt/c/Users/nehem/OneDrive - Universidad de Chile/Universidad/6to año/Data/MIDI/preprocced/Chordomicon/ChordSymbol_Mapping_v3_(gpt5).csv'
df_chords = pd.read_csv(mapping_path)

In [None]:
df_chords

In [None]:
# Convertir a listas
df_notes_val = df_chords['Notes'].apply(ast.literal_eval)
df_notes_test = df_chords['pitches_m21'].apply(ast.literal_eval)

In [None]:
# Validaremos si music21 reconoció correctamente las notas de cada tipo de acorde
erro_count = 0
erro = []
chord_erro = []

for i in range(len(df_notes_val)):
    c_val = df_notes_val[i]
    c_test = df_notes_test[i]
        
    # limpiar
    c_val = c_val.replace("'", "").replace("[", "").replace("]", "").replace(" ", "")
    c_test = c_test.replace("'", "").replace("[", "").replace("]", "").replace(" ", "")
    
    # eliminar numeros    
    c_val = re.sub(r'\d+', '', c_val).split(",")   
    c_test = re.sub(r'\d+', '', c_test).split(",")
    
    # ordenar por alfabeto
    c_val.sort()
    c_test.sort()    
    
    for j in range(len(c_val)):
        check = 'Correct'
        if c_val[j] != c_test[j]:
            check = 'Fail'
            erro_count+=1
            chord_erro.append(df_chords['Original Symbol'][i])
            break                        
            
    erro.append(check)

In [None]:
erro_count

In [None]:
# Hacer un solo mapeo de Original Symbol y ChordSymbol
df_mirex_mapping = pd.DataFrame({
    'Original Symbol': df_chords['Original Symbol'],
    'ChordSymbol': df_chords['ChordSymbol'].str.replace('C', '')
})


In [None]:
df_mirex_mapping.to_csv('/mnt/c/Users/nehem/OneDrive - Universidad de Chile/Universidad/6to año/Data/MIDI/preprocced/Chordomicon/mirex_mapping.csv', index=False)

## Filtrar mapeo por Music21

In [None]:
mirex_mapping = pd.read_csv('/mnt/c/Users/nehem/OneDrive - Universidad de Chile/Universidad/6to año/Data/MIDI/preprocced/Chordomicon/mirex_mapping.csv')
music21_chord_types = pd.read_csv('/mnt/c/Users/nehem/OneDrive - Universidad de Chile/Universidad/6to año/Data/MIDI/preprocced/Chordomicon/chord_types.csv')

In [None]:
chord_symbol = mirex_mapping['ChordSymbol']

chord_types = music21_chord_types['Chord Type']
chord_notes = music21_chord_types['Chord Notes']
chord_symbols_m21 = music21_chord_types['Chord Symbols']


In [None]:
chord_symbol_ = chord_symbol.str.replace('"', '').str.replace('b', '-')
chord_notes_ = chord_notes.astype(str).str.replace(',', '')

In [None]:
chord_symbol_ = '1' + chord_symbol_.astype(str)

In [None]:
chord_couples = []

symbol_chord = []
type_chord = []

for i, chord in enumerate(chord_symbol_):
    
    chord_couple = None
    
    for j, chord_21 in enumerate(chord_notes_):

        if chord == chord_21:
            chord_couple = ([chord,i], [chord_21,j])
            chord_couples.append(chord_couple)

            symbol_chord.append(chord_symbols_m21[j])
            type_chord.append(chord_types[j])
            
            print(f"Match found: {chord} == {chord_21}")

    if not chord_couple:
        symbol_chord.append("Unknown")
        type_chord.append("Unknown")
        print(f"No match for: {chord}")
        
print(f"Total matches found: {len(chord_couples)}")
            

In [None]:
mirex_mapping['ChordSymbol_m21'] = [f'"{s}"' for s in symbol_chord]
mirex_mapping['ChordType_m21'] = [f'"{t}"' for t in type_chord]

In [None]:
mirex_mapping.to_csv('/mnt/c/Users/nehem/OneDrive - Universidad de Chile/Universidad/6to año/Data/MIDI/preprocced/Chordomicon/mirex_mapping_v2.csv', index=False)

## Recoger progresiones que solo contiene m21 symbols

In [2]:
progresions_path = '/mnt/c/Users/nehem/OneDrive - Universidad de Chile/Universidad/6to año/Data/MIDI/preprocced/Chordomicon/chordonomicon_v2.csv'
mirex_mapping_path = '/mnt/c/Users/nehem/OneDrive - Universidad de Chile/Universidad/6to año/Data/MIDI/preprocced/Chordomicon/mirex_mapping_v2.csv'

df_progresions_meta = pd.read_csv(progresions_path, low_memory=False)
df_mirex_mapping = pd.read_csv(mirex_mapping_path)

In [3]:
df_progresions = df_progresions_meta['chords']

Original_Symbol = df_mirex_mapping['Original Symbol']
Chord_Symbol_m21 = df_mirex_mapping['ChordSymbol_m21']

In [4]:
Chord_Symbol_m21 = Chord_Symbol_m21.str.replace('"', '')

In [5]:
indices = Chord_Symbol_m21[Chord_Symbol_m21 == "Unknown"].index

Original_Symbol = Original_Symbol.str.replace('"', '')
Original_Symbol_unknown = Original_Symbol.loc[indices]
Original_Symbol_unknown

4        majs9
5           b9
6         b7b9
7          7b9
10       7sus4
11    maj7sus4
12        add9
13       add11
14       add13
15         11s
16        11b9
17    majs911s
19     maj911s
21       b11b9
22        13b9
23       1311s
24         13b
26    maj1311s
28       1113b
33       minb9
34     minadd9
35    minadd11
36    minadd13
41    min1113b
45    dimadd11
47     dim11b9
49     dim13b9
Name: Original Symbol, dtype: object

In [6]:
acordes_excluir = set([acorde for acorde in Original_Symbol_unknown])
acordes_excluir

{'1113b',
 '11b9',
 '11s',
 '1311s',
 '13b',
 '13b9',
 '7b9',
 '7sus4',
 'add11',
 'add13',
 'add9',
 'b11b9',
 'b7b9',
 'b9',
 'dim11b9',
 'dim13b9',
 'dimadd11',
 'maj1311s',
 'maj7sus4',
 'maj911s',
 'majs9',
 'majs911s',
 'min1113b',
 'minadd11',
 'minadd13',
 'minadd9',
 'minb9'}

In [7]:
def contiene_acorde_excluir(progresion):
    return any(acorde in progresion for acorde in acordes_excluir) # devuelve True si contiene algun acorde a excluir

progresions_filtradas = df_progresions[~df_progresions.apply(contiene_acorde_excluir)]

In [8]:
progresions_filtradas

0         <intro_1> C <verse_1> F C E7 Amin C F C G7 C F...
1         <intro_1> E D A/Cs E D A/Cs <verse_1> E D A/Cs...
2         <intro_1> Csmin <verse_1> A Csmin A Csmin A Cs...
3         <intro_1> D Dmaj7 D Dmaj7 <verse_1> Emin A D G...
4         <intro_1> C <verse_1> G C G C <chorus_1> F Dmi...
                                ...                        
679801    D A D Bmin Amin D G Gmin F Emin A D Bmin Amin ...
679802    D G D A G D A D G D A G D A D G D A G D A D G ...
679804    E Fs E Fs E Fs E Fs E Fs E Fs B Cs Fs B Cs Fs ...
679805    E Csmin Fsmin B E Csmin Fsmin B E Csmin Fsmin ...
679806    A B7 E7 A Fs7 A E7 A D A D B7 A B7 E7 A Fs7 A ...
Name: chords, Length: 616252, dtype: object

In [9]:
indices_progresiones = progresions_filtradas.index

In [10]:
# Filtrar df_progresions_meta usando los índices de progresions_filtradas
df_progresions_meta_filtrado = df_progresions_meta.loc[progresions_filtradas.index]

In [11]:
df_progresions_meta_filtrado.to_csv('/mnt/c/Users/nehem/OneDrive - Universidad de Chile/Universidad/6to año/Data/MIDI/preprocced/Chordomicon/chordonomicon_v2_filtrered.csv', index=False)