<h1>Template-based chord recognition</h1>
From resources of text book "Fundamentals of Music Processing".
The first two cells (after the imports) are helper functions for normalisation and plotting respectively. They are included in case you don't have FMPLib installed, so that this notebook can run independently.

In [None]:
import numpy as np
from matplotlib import pyplot as plt
import librosa
import IPython.display as ipd
%matplotlib inline

In [None]:
def normalize_feature_sequence(X, norm='2', threshold=0.0001, v=None):
    """Normalizes the columns of a feature sequence

    From notebook: C3/C3S1_FeatureNormalization.ipynb

    Args:
        X: Feature sequence
        norm: The norm to be applied. '1', '2', 'max' or 'z'
        threshold: An threshold below which the vector `v` used instead of normalization
        v: Used instead of normalization below `threshold`. If None, uses unit vector for given norm

    Returns:
        X_norm: Normalized feature sequence
    """
    K, N = X.shape
    X_norm = np.zeros((K, N))
    if norm == '1':
        if v is None:
            v = np.ones(K) / K 
        for n in range(N):
            s = np.sum(np.abs(X[:, n]))
            if s > threshold:
                X_norm[:, n] = X[:, n] / s
            else:
                X_norm[:, n] = v
    if norm == '2':
        if v is None:
            v = np.ones(K) / np.sqrt(K)
        for n in range(N):
            s = np.sqrt(np.sum(X[:, n] ** 2))
            if s > threshold:
                X_norm[:, n] = X[:, n] / s
            else:
                X_norm[:, n] = v
    if norm == 'max':
        if v is None:
            v = np.ones(K)
        for n in range(N):
            s = np.max(np.abs(X[:, n]))
            if s > threshold:
                X_norm[:, n] = X[:, n] / s
            else:
                X_norm[:, n] = v
    if norm == 'z':
        if v is None:
            v = np.zeros(K)
        for n in range(N):
            mu = np.sum(X[:, n]) / K
            sigma = np.sqrt(np.sum((X[:, n] - mu) ** 2) / (K - 1))
            if sigma > threshold:
                X_norm[:, n] = (X[:, n] - mu) / sigma
            else:
                X_norm[:, n] = v
    return X_norm

In [None]:
def plot_matrix(X, Fs=1, Fs_F=1, T_coef=None, F_coef=None, xlabel='Time (seconds)', ylabel='Frequency (Hz)', title='',
                dpi=72, colorbar=True, colorbar_aspect=20.0, ax=None, figsize=(6, 3), **kwargs):
    """Plot a matrix, e.g. a spectrogram or a tempogram

    From notebook: B/B_PythonVisualization.ipynb

    Args:
        X: The matrix
        Fs: Sample rate for axis 1
        Fs_F: Sample rate for axis 0
        T_coef: Time coeffients. If None, will be computed, based on Fs.
        F_coef: Frequency coeffients. If None, will be computed, based on Fs_F.
        xlabel: Label for x axis
        ylabel: Label for y axis
        title: Title for plot
        dpi: Dots per inch
        colorbar: Create a colorbar.
        colorbar_aspect: Aspect used for colorbar, in case only a single axes is used.
        ax: Either (1.) a list of two axes (first used for matrix, second for colorbar), or (2.) a list with a single
            axes (used for matrix), or (3.) None (an axes will be created).
        figsize: Width, height in inches
        **kwargs: Keyword arguments for matplotlib.pyplot.imshow

    Returns:
        fig: The created matplotlib figure or None if ax was given.
        ax: The used axes.
        im: The image plot
    """
    fig = None
    if ax is None:
        fig, ax = plt.subplots(1, 1, figsize=figsize, dpi=dpi)
        ax = [ax]
    if T_coef is None:
        T_coef = np.arange(X.shape[1]) / Fs
    if F_coef is None:
        F_coef = np.arange(X.shape[0]) / Fs_F

    if 'extent' not in kwargs:
        x_ext1 = (T_coef[1] - T_coef[0]) / 2
        x_ext2 = (T_coef[-1] - T_coef[-2]) / 2
        y_ext1 = (F_coef[1] - F_coef[0]) / 2
        y_ext2 = (F_coef[-1] - F_coef[-2]) / 2
        kwargs['extent'] = [T_coef[0] - x_ext1, T_coef[-1] + x_ext2, F_coef[0] - y_ext1, F_coef[-1] + y_ext2]
    if 'cmap' not in kwargs:
        kwargs['cmap'] = 'gray_r'
    if 'aspect' not in kwargs:
        kwargs['aspect'] = 'auto'
    if 'origin' not in kwargs:
        kwargs['origin'] = 'lower'

    im = ax[0].imshow(X, **kwargs)

    if len(ax) == 2 and colorbar:
        plt.colorbar(im, cax=ax[1])
    elif len(ax) == 2 and not colorbar:
        ax[1].set_axis_off()
    elif len(ax) == 1 and colorbar:
        plt.sca(ax[0])
        plt.colorbar(im, aspect=colorbar_aspect)

    ax[0].set_xlabel(xlabel)
    ax[0].set_ylabel(ylabel)
    ax[0].set_title(title)

    if fig is not None:
        plt.tight_layout()

    return fig, ax, im


def plot_chromagram(*args, chroma_yticks=np.arange(12), **kwargs):
    """Calls LibFMP.B.plot_matrix and sets chroma labels

    From Notebook: B/B_PythonVisualization.ipynb

    Args:
        see plot_matrix() above

    Returns:
        see plot_matrix() above
    """
    if 'ylabel' not in kwargs:
        kwargs['ylabel'] = 'Chroma'
    fig, ax, im = plot_matrix(*args, **kwargs)

    chroma_names = 'C C# D D# E F F# G G# A A# B'.split()
    ax[0].set_yticks(np.array(chroma_yticks))
    ax[0].set_yticklabels([chroma_names[i] for i in chroma_yticks])

    return fig, ax, im

<h2>Initialise templates for all major and minor triads</h2>

In [None]:
# From FMP_0.1.1/C5/C5S2_ChordRec_Templates.html by Christof Weiß and Meinard Müller

def generate_template_matrix(templates):
    """For each input template, create 12 templates,
    one for each transposition of the input template."""
    
    assert templates.shape[0] == 12, 'input "templatesC" has wrong size!'
    
    template_matrix = np.zeros((12, 12 * templates.shape[1]))

    for shift in range(12):
        template_matrix[:, shift::12] = np.roll(templates, shift, axis=0)

    return template_matrix


template_cmaj = np.array([[ *** Major triad template here *** ]]).T
template_cmin = np.array([[ *** Minor triad template here *** ]]).T
templates = generate_template_matrix(np.concatenate((template_cmaj, template_cmin), axis=1))

# Display templates
fig = plt.figure(figsize=(10, 4))
chroma_label = ['C', 'C#', 'D', 'D#', 'E', 'F', 'F#', 'G', 'G#', 'A', 'A#', 'B']
chord_label_maj = chroma_label
chord_label_min = [s + 'm' for s in chroma_label]
chord_labels = chord_label_maj + chord_label_min
plt.imshow(templates, origin='lower', aspect='auto', cmap='bwr', extent=[0, 24, 0, 12])
plt.clim([-1, 1])
plt.xlabel('Chord')
plt.xticks(np.arange(24) + 0.5, chord_labels)
plt.ylabel('Pitch class')
plt.yticks(np.arange(12) + 0.5, chroma_label)
plt.axvline(x=12, ymin=0, ymax = 12, linewidth=1, color='k')
plt.tight_layout()

<h2>Chord recognition example</h2>

In [None]:
import matplotlib.gridspec as gridspec

def analysis_template_match(f_chroma, templates, apply_normalization=True, norm_output='2'):
    
    assert f_chroma.shape[0] == 12, 'input "f_chroma" has wrong size!'
    assert templates.shape[0] == 12, 'input "templates" has wrong size!'
    
    chroma_normalized = normalize_feature_sequence(f_chroma, norm='2')
    templates_normalized = normalize_feature_sequence(templates, norm='2')
    
    f_analysis =  #  *** match chromagram to chord templates here *** 
    if apply_normalization:
         f_analysis = normalize_feature_sequence(f_analysis, norm=norm_output)

    return f_analysis


# load wav file
Fs = 22050
fn_wav = 'FMP_C5_F01_Beatles_LetItBe-mm1-4_Original.wav'
x, Fs = librosa.load(fn_wav, sr=Fs)
#fn_wav = '../lectures/audio/felicidad30.wav'
#x, Fs = librosa.load(fn_wav, sr=Fs, duration=10)
ipd.display(ipd.Audio(x, rate=Fs))

# define parameters
N = 4096
H = Fs // 10     # 0.1s hop size
gamma = 100
norm_p = '2'

# Compute chroma features with elliptic filter bank
P = librosa.iirt(y=x, sr=Fs, win_length=N, hop_length=H, center=True, tuning=0.0)
P_compressed = np.log(1.0 + gamma * P)
C_nonorm = librosa.feature.chroma_cqt(C=P_compressed, bins_per_octave=12, n_octaves=7, fmin=librosa.midi_to_hz(24), norm=None)
C = normalize_feature_sequence(C_nonorm, norm=norm_p)

# Template-based chord recognition (for each time point and each template, the output matrix
# contains the dot product of the template and the chroma vector)
f_analysis = analysis_template_match(C, templates, apply_normalization=False, norm_output='1')

# Compute binary matrix indicating maximal entries only
f_analysis_max = (f_analysis == f_analysis.max(axis=0)).astype(int)

# Compute normalized binary templates of analysis
templates_normalized = normalize_feature_sequence(templates, norm=norm_p)
template_sequence = np.matmul(templates_normalized, f_analysis_max)


plt.figure(figsize=(10, 12))
gs = gridspec.GridSpec(4, 2, width_ratios=[1, 0.02], height_ratios=[1, 1.8, 1.8, 1])

ax0 = plt.subplot(gs[0, 0])
ax1 = plt.subplot(gs[0, 1])
ax2 = plt.subplot(gs[1, 0])
ax3 = plt.subplot(gs[1, 1])
ax4 = plt.subplot(gs[2, 0])
ax5 = plt.subplot(gs[2, 1])
ax6 = plt.subplot(gs[3, 0])
ax7 = plt.subplot(gs[3, 1])

plot_chromagram(C, ax=[ax0, ax1], Fs=Fs/H, clim=[0, 1], xlabel='')
ax0.set_title('Chromagram (input feature to match against templates)')

plot_matrix(f_analysis, ax=[ax2, ax3], Fs=Fs/H, clim=[0, 1], ylabel='Chord', xlabel='')
ax2.set_yticks(np.arange(24))
ax2.set_yticklabels(chord_labels)
ax2.set_title('Strength of match to each chord template (dot products of templates and chroma vectors)')

plot_matrix(f_analysis_max, ax=[ax4, ax5], Fs=Fs/H, clim=[0, 1], ylabel='Chord', xlabel='')
ax4.set_yticks(np.arange(24))
ax4.set_yticklabels(chord_labels)
ax4.set_title('Best matching chord at each point in time')

plot_chromagram(template_sequence, ax=[ax6, ax7], Fs=Fs/H, clim=[0, 1])
ax6.set_title('Pitch classes of best matching chords')
plt.tight_layout()
#plt.savefig('LetItBe-intro-chordDetection.pdf')

In [None]:
from scipy.signal import medfilt as medfilt
width = 5
smoothed_f = medfilt(f_analysis, (1,width))
max_smoothed = (smoothed_f == smoothed_f.max(axis=0)).astype(int)

plt.figure(figsize=(10, 4))
gs = gridspec.GridSpec(1, 2, width_ratios=[1, 0.02], height_ratios=[1])
ax0 = plt.subplot(gs[0, 0])
ax1 = plt.subplot(gs[0, 1])

plot_matrix(max_smoothed, ax=[ax0, ax1], Fs=Fs/H, clim=[0, 1], ylabel='Chord', xlabel='')
ax0.set_yticks(np.arange(24))
ax0.set_yticklabels(chord_labels)
ax0.set_title('Best matching chords after smoothing over time')