<a href="https://colab.research.google.com/github/ShakhovaP/musical-chord-recognition/blob/main/chord_transcript_stft.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import csv
import numpy as np
import os
import re

In [None]:
from google.colab import drive
drive.mount('/content/gdrive')

In [2]:
CHORDS_DIR = '/content/gdrive/MyDrive/data_cut/tr_7'

In [None]:
def _read_file(song_path):
    """Reads file with chord transcriptions of the song

        :param song_path (string): path to the song transcription file
        :return chord_transcription (list): ChordStartTime, ChordEndTime and ChordLabel for each chord 
    """
    try:
        with open(song_path, "r") as csv_file:
            try:
                data = csv.reader(csv_file, delimiter=" ")
                chord_transcription = [[float(row[0]), float(row[1]), str(row[2])] for row in data]
                return chord_transcription
            except:
                print(f"Problem in processing {os.path.basename(song_path)}.\n"
                        "Data format is incorrect!")
                # [print(row[0]) for row in data]   
    except OSError:
        print(f"Can't open {os.path.basename(song_path)}")


In [None]:
def count_unique_values(data):
    """Counts how many times each value of data was used

        :param data (list):
        :return (dictionary): dictionary where keys are all values used in data and values are the numbers of times they were used
    """
    data = np.array(data).reshape(-1)
    # res = {}
    values, counts = np.unique(data, return_counts=True)  
    return dict(zip(values, counts))

def _get_column(matrix, i):
    """Returns needed column of given matrix

        :param matrix (list): given matrix
        :param i (int): column index

        :return (list): list of values of column with index i in matrix
    """
    return [row[i] for row in matrix]

In [None]:
def get_chord_transcripts(dirname):
    """Reads and saves chord transcriptions of each song from the directory

        :param dirname (string): name of the directory with transcription files
        :return transcriptions (dictionary): dictionary where keys are song transcription file names and 
                                                              values are [ [ChordStartTime, ChordEndTime, Chord], ...] of each song
    """
    transcriptions = {}
    for filename in os.scandir(dirname):
        if filename.is_file():
            t_chords = _read_file(filename)
            transcriptions[os.path.basename(filename)] = t_chords
    return transcriptions

def count_chord_types(song_transcripts_arr):
    """Counts how many times each chord was used in input dataset

        :param song_transcripts_arr (list): list of chord transcriptions
        :return chord_counts (dictionary): dictionary where keys are chord labels and values are the numbers of times they were used
    """
    all_used_chords = []
    for st in song_transcripts_arr:
        all_used_chords += _get_column(st, 2)
    chord_counts = count_unique_values(all_used_chords)
    return chord_counts

In [None]:
def _simplify_chord(chord):
    """Replaces complicated chord with simple analog

        :param chord (string):  input chord
        :return base (string): simplified chord
    """
    base = re.search("^[A-GN][#b]?", chord).group()   # find the base note in chord label)
    n = ["A", "B", "C", "D", "E", "F", "G"]
    if re.search(".*min.*|^[A-G][b#]?m.*", chord):    # if the chord is minor
        base += "m"                                   # add "m" to the base note
    
    if "#" in base:
        n.reverse()
        index = n.index(base[0]) - 1
        base += re.sub("[A-G]#", "|" + n[index] + "b", base)
    elif "b" in base:
        index = n.index(base[0]) - 1
        base = re.sub("[A-G]b", n[index] + "#", base) + "|" + base

    return base

def simplify_song(chord_transcript):
    """Replace all complicated chords with simple analogs

        :param song_transcript (list): list of [ChordStartTime, ChordEndTime, ChordLabel]
        :return 
    """
    for row in chord_transcript:
        row[2] = _simplify_chord(row[2])

In [None]:
# Getting a chord transcriptions for every song in directory
chord_transcriptions = get_chord_transcripts(CHORDS_DIR)

In [None]:
# Count and print all chords and how many times they had been used
print('ALL USED CHORDS')
chord_types = count_chord_types(chord_transcriptions.values())
print("\nNumber of chord classes: ", len(list(chord_types.keys())))
print('\n')
[print(f'{key}: {value} ( {round(value*100/sum(chord_types.values()), 2)}% )') 
                                            for key, value in chord_types.items()]

In [None]:
# Simplify complicated chords: replace them with one of the list of 24 base chords
[simplify_song(transcript) for transcript in chord_transcriptions.values()]

print('\nSIMPLIFIED CHORDS:\n')
simplified_chord_types = count_chord_types(chord_transcriptions.values())
[print(f'{key}: {value} ( {round(value*100/sum(simplified_chord_types.values()), 2)}% )') 
                                            for key, value in simplified_chord_types.items()]

chord_classes = list(simplified_chord_types.keys())
print(chord_classes)