<a href="https://colab.research.google.com/github/ShakhovaP/musical-chord-recognition/blob/main/chord_transcript.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [133]:
import csv
import numpy as np
import os
import re



In [134]:
from google.colab import drive
drive.mount('/content/gdrive')

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [135]:
CHORDS_DIR = '/content/gdrive/MyDrive/data/data_transcripts'


In [136]:
def _read_file(song_path):
    """Reads file with chord transcriptions of the song

        :param song_path (string): path to the song transcription file
        :return chord_transcription (list): ChordStartTime, ChordEndTime and ChordLabel for each chord 
    """
    try:
        with open(song_path, "r") as csv_file:
            try:
                data = csv.reader(csv_file, delimiter=" ")
                chord_transcription = [[float(row[0]), float(row[1]), str(row[2])] for row in data]
                return chord_transcription
            except:
                print(f"Problem in processing {os.path.basename(song_path)}.\n"
                        "Data format is incorrect!")
                # [print(row[0]) for row in data]   
    except OSError:
        print(f"Can't open {os.path.basename(song_path)}")

# t = read_song(FILE_NAME)
# t = np.array(t)[:, 2]
# print(t.tolist())


In [137]:
def count_unique_values(data):
    """Counts how many times each value of data was used

        :param data (list):
        :return (dictionary): dictionary where keys are all values used in data and values are the numbers of times they were used
    """
    data = np.array(data).reshape(-1)
    # res = {}
    values, counts = np.unique(data, return_counts=True)  
    return dict(zip(values, counts))

def _get_column(matrix, i):
    """Returns needed column of given matrix

        :param matrix (list): given matrix
        :param i (int): column index

        :return (list): list of values of column with index i in matrix
    """
    return [row[i] for row in matrix]

In [138]:
def get_chord_transcripts(dirname):
    """Reads and saves chord transcriptions of each song from the directory

        :param dirname (string): name of the directory with transcription files
        :return transcriptions (dictionary): dictionary where keys are song transcription file names and 
                                                              values are [ [ChordStartTime, ChordEndTime, Chord], ...] of each song
    """
    transcriptions = {}
    for filename in os.scandir(dirname):
        if filename.is_file():
            t_chords = _read_file(filename)
            transcriptions[os.path.basename(filename)] = t_chords
    return transcriptions

def count_chord_types(song_transcripts_arr):
    """Counts how many times each chord was used in input dataset

        :param song_transcripts_arr (list): list of chord transcriptions
        :return chord_counts (dictionary): dictionary where keys are chord labels and values are the numbers of times they were used
    """
    all_used_chords = []
    for st in song_transcripts_arr:
        all_used_chords += _get_column(st, 2)
    chord_counts = count_unique_values(all_used_chords)
    return chord_counts

In [139]:
def _simplify_chord(chord):
    """Replaces complicated chord with simple analog

        :param chord (string):  input chord
        :return base (string): simplified chord
    """
    base = re.search("^[A-GN][#b]?", chord).group()   # find the base note in chord label)
    n = ["A", "B", "C", "D", "E", "F", "G"]
    if re.search(".*min.*|^[A-G][b#]?m.*", chord):    # if the chord is minor
        base += "m"                                   # add "m" to the base note
    
    if "#" in base:
        n.reverse()
        index = n.index(base[0]) - 1
        base += re.sub("[A-G]#", "|" + n[index] + "b", base)
    elif "b" in base:
        index = n.index(base[0]) - 1
        base = re.sub("[A-G]b", n[index] + "#", base) + "|" + base

    return base

def simplify_song(chord_transcript):
    """Replace all complicated chords with simple analogs

        :param song_transcript (list): list of [ChordStartTime, ChordEndTime, ChordLabel]
        :return 
    """
    for row in chord_transcript:
        row[2] = _simplify_chord(row[2])

In [140]:
# Getting a chord transcriptions for every song in directory
chord_transcriptions = get_chord_transcripts(CHORDS_DIR)


# New Section

In [141]:
# Count and print all chords and how many times they had been used
print('ALL USED CHORDS')
chord_types = count_chord_types(chord_transcriptions.values())
print("\nNumber of chord classes: ", len(list(chord_types.keys())))
print('\n')
[print(f'{key}: {value} ( {round(value*100/sum(chord_types.values()), 2)}% )') 
                                            for key, value in chord_types.items()]


ALL USED CHORDS

Number of chord classes:  219


A: 360 ( 11.79% )
A/3: 12 ( 0.39% )
A/5: 6 ( 0.2% )
A:(1): 4 ( 0.13% )
A:7: 38 ( 1.24% )
A:7(#9): 1 ( 0.03% )
A:7/3: 1 ( 0.03% )
A:9: 2 ( 0.07% )
A:maj: 22 ( 0.72% )
A:maj(9): 1 ( 0.03% )
A:maj/5: 3 ( 0.1% )
A:maj6: 1 ( 0.03% )
A:min: 64 ( 2.1% )
A:min(2): 2 ( 0.07% )
A:min/5: 2 ( 0.07% )
A:min/b3: 2 ( 0.07% )
A:min7: 22 ( 0.72% )
A:min7(*5): 1 ( 0.03% )
A:min7/b7: 1 ( 0.03% )
A:sus4: 4 ( 0.13% )
A:sus4/5: 2 ( 0.07% )
Ab: 7 ( 0.23% )
Ab:maj: 30 ( 0.98% )
Ab:maj(*3): 1 ( 0.03% )
Ab:maj/5: 7 ( 0.23% )
Ab:maj6: 6 ( 0.2% )
Ab:maj7: 4 ( 0.13% )
B: 36 ( 1.18% )
B/3: 6 ( 0.2% )
B/5: 10 ( 0.33% )
B:(1): 1 ( 0.03% )
B:7: 40 ( 1.31% )
B:7(#9): 2 ( 0.07% )
B:9: 8 ( 0.26% )
B:dim: 6 ( 0.2% )
B:maj: 58 ( 1.9% )
B:maj(*3): 4 ( 0.13% )
B:maj/3: 1 ( 0.03% )
B:maj/5: 2 ( 0.07% )
B:maj/9: 1 ( 0.03% )
B:min: 72 ( 2.36% )
B:min/5: 14 ( 0.46% )
B:min/6: 6 ( 0.2% )
B:min/7: 6 ( 0.2% )
B:min/b7: 6 ( 0.2% )
B:sus2: 1 ( 0.03% )
Bb: 26 ( 0.85% )
Bb/5: 3 ( 0.1% )


[None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,

In [142]:
# Simplify complicated chords: replace them with one of the list of 24 base chords
[simplify_song(transcript) for transcript in chord_transcriptions.values()]

print('\nSIMPLIFIED CHORDS:\n')
simplified_chord_types = count_chord_types(chord_transcriptions.values())
[print(f'{key}: {value} ( {round(value*100/sum(simplified_chord_types.values()), 2)}% )') 
                                            for key, value in simplified_chord_types.items()]

chord_classes = list(simplified_chord_types.keys())
print(chord_classes)


SIMPLIFIED CHORDS:

A: 457 ( 14.96% )
A#m|Bbm: 3 ( 0.1% )
A#|Bb: 100 ( 3.27% )
Am: 94 ( 3.08% )
B: 176 ( 5.76% )
Bm: 104 ( 3.41% )
C: 219 ( 7.17% )
C#m|Dbm: 37 ( 1.21% )
C#|Db: 34 ( 1.11% )
Cm: 35 ( 1.15% )
D: 400 ( 13.1% )
D#m|Ebm: 2 ( 0.07% )
D#|Eb: 62 ( 2.03% )
Dm: 72 ( 2.36% )
E: 313 ( 10.25% )
Em: 117 ( 3.83% )
F: 78 ( 2.55% )
F#m|Gbm: 106 ( 3.47% )
F#|Gb: 65 ( 2.13% )
Fm: 34 ( 1.11% )
G: 330 ( 10.81% )
G#m|Abm: 53 ( 1.74% )
G#|Ab: 57 ( 1.87% )
Gm: 19 ( 0.62% )
N: 87 ( 2.85% )
['A', 'A#m|Bbm', 'A#|Bb', 'Am', 'B', 'Bm', 'C', 'C#m|Dbm', 'C#|Db', 'Cm', 'D', 'D#m|Ebm', 'D#|Eb', 'Dm', 'E', 'Em', 'F', 'F#m|Gbm', 'F#|Gb', 'Fm', 'G', 'G#m|Abm', 'G#|Ab', 'Gm', 'N']
