In [3]:
import librosa
import os
import numpy as np
import scipy.signal as sp

In [6]:
# just get the bpm (tempo) first which we will need
def get_bpm(path):
    y, sr = librosa.load(path)
    
    tempo, beat_frames = librosa.beat.beat_track(y=y, sr=sr)
    
    return tempo, beat_frames, sr

In [28]:
KS_MAJOR = np.array([6.35, 2.23, 3.48, 2.33, 4.38, 4.09, 2.52, 5.19, 2.39, 3.66, 2.29, 2.88])
KS_MINOR = np.array([6.33, 2.68, 3.52, 5.38, 2.60, 3.53, 2.54, 4.75, 3.98, 2.69, 3.34, 3.17])

KS_MAJOR = KS_MAJOR / KS_MAJOR.sum()
KS_MINOR = KS_MINOR / KS_MINOR.sum()

NOTE_NAMES = ['C', 'C#', 'D', 'D#', 'E', 'F', 'F#', 'G', 'G#', 'A', 'A#', 'B']

def detect_key_librosa(path, use_cqt=True):
    y, sr = librosa.load(path, sr=None, mono=True)
    # Compute chroma (CQT chroma is often robust for music)
    if use_cqt:
        chroma = librosa.feature.chroma_cqt(y=y, sr=sr)
    else:
        chroma = librosa.feature.chroma_stft(y=y, sr=sr)
    chroma_mean = chroma.mean(axis=1)  # 12-d vector

    # normalize
    if chroma_mean.sum() == 0:
        return None
    chroma_norm = chroma_mean / chroma_mean.sum()

    best = {'score': -1, 'tonic': None, 'mode': None}
    # rotate templates for 12 possible tonics
    for tonic in range(12):
        # roll KS to align tonic
        major_template = np.roll(KS_MAJOR, tonic)
        minor_template = np.roll(KS_MINOR, tonic)

        major_score = np.dot(chroma_norm, major_template)
        minor_score = np.dot(chroma_norm, minor_template)

        if major_score > best['score']:
            best = {'score': major_score, 'tonic': tonic, 'mode': 'major'}
        if minor_score > best['score']:
            best = {'score': minor_score, 'tonic': tonic, 'mode': 'minor'}

    tonic_name = NOTE_NAMES[best['tonic']]
    return f"{tonic_name} {best['mode']}", best['score']

In [7]:
base_dir = 'Music/wav_files'
song = 'wakemeup-avicii.wav'
path = os.path.join(base_dir, song)

tempo, beat_frames, sr = get_bpm(path)

print("Tempo:", int(tempo[0])) # this matches what I saw online for this song so thats good

Tempo: 123


In [8]:
beat_times = librosa.frames_to_time(beat_frames, sr=sr)

In [14]:
beat_times.shape
# this makes sense since the song is 4 mins long @ 123 BPM

(493,)

In [None]:
ex = {1:"A", 1.0:"B"}

In [None]:
key, score = detect_key_librosa(path)
print(key, score)