## Librosa


In [4]:
import librosa
import numpy as np

def estimate_key(chroma):
    # Major and minor key profiles (Fifths circle order)
    major = np.array([1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0])
    minor = np.array([1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0])

    # Rotate the profile for all 12 possible tonic notes
    major_profiles = np.array([np.roll(major, i) for i in range(12)])
    minor_profiles = np.array([np.roll(minor, i) for i in range(12)])

    # Compute the correlation with the chroma vector
    major_correlation = np.dot(major_profiles, chroma.mean(axis=1))
    minor_correlation = np.dot(minor_profiles, chroma.mean(axis=1))

    # Find the key with the highest correlation
    key_idx = np.argmax(major_correlation + minor_correlation)
    mode = 'minor' if key_idx >= 12 else 'major'
    key_idx = key_idx % 12

    # Mapping index to key
    keys = ['C', 'C#', 'D', 'D#', 'E', 'F', 'F#', 'G', 'G#', 'A', 'A#', 'B']
    return keys[key_idx], mode

def detect_tempo_and_key(audio_files):
    for audio_file in audio_files:
        print(f"Analyzing {audio_file}")
        y, sr = librosa.load(audio_file)
        
        # Detect tempo
        onset_env = librosa.onset.onset_strength(y=y, sr=sr)
        tempo, _ = librosa.beat.beat_track(onset_envelope=onset_env, sr=sr)
        
        # Extract chroma features
        chroma = librosa.feature.chroma_cqt(y=y, sr=sr)
        
        # Estimate key
        key, mode = estimate_key(chroma)

        # Output results
        print(f"Detected Tempo: {tempo} BPM")
        print(f"Estimated Key: {key} {mode}")
        print("----------")

# List of your audio files
audio_files = [
    '/workspaces/Playful-Project/A Place For My Head.wav',
    '/workspaces/Playful-Project/Lovestory.wav',
    '/workspaces/Playful-Project/my_immortal.wav'
]

# Perform detection
detect_tempo_and_key(audio_files)


Analyzing /workspaces/Playful-Project/A Place For My Head.wav
Detected Tempo: [135.99917763] BPM
Estimated Key: D# major
----------
Analyzing /workspaces/Playful-Project/Lovestory.wav
Detected Tempo: [117.45383523] BPM
Estimated Key: D major
----------
Analyzing /workspaces/Playful-Project/my_immortal.wav
Detected Tempo: [143.5546875] BPM
Estimated Key: A major
----------


## Aubio

In [5]:
import aubio
import numpy as np

def detect_tempo_and_key(audio_files):
    for audio_file in audio_files:
        print(f"Analyzing {audio_file}")
        # Open the audio file
        samplerate = 0  # Use the native samplerate of the file
        hop_size = 512  # Number of frames to read at once
        win_size = hop_size * 4  # Window size for analysis
        
        # Create aubio tempo detection object
        tempo = aubio.tempo("default", win_size, hop_size, samplerate)
        
        # Create pitch detection object
        pitch_o = aubio.pitch("yin", win_size, hop_size, samplerate)
        pitch_o.set_unit("Hz")
        
        # Prepare to read the audio
        source = aubio.source(audio_file, samplerate, hop_size)
        samplerate = source.samplerate
        
        beats = []
        pitches = []
        
        while True:
            samples, read = source()
            beat = tempo(samples)
            if beat:
                beats.append(tempo.get_last_s())
            pitch = pitch_o(samples)[0]
            if pitch:
                pitches.append(pitch)
            if read < hop_size:
                break
        
        # Estimate BPM
        bpm = tempo.get_bpm()
        
        # Estimate the key based on the most frequent pitch
        if pitches:
            histogram, bin_edges = np.histogram(pitches, bins="auto")
            peak = bin_edges[np.argmax(histogram)]
            key = frequency_to_note(peak)
        else:
            key = "Unknown"

        # Output results
        print(f"Detected Tempo: {bpm} BPM")
        print(f"Most Frequent Pitch (Est. Key): {key}")
        print("----------")

def frequency_to_note(frequency):
    A4 = 440
    C0 = A4 * pow(2, -4.75)
    h = round(12 * np.log2(frequency / C0))
    octave = h // 12
    n = h % 12
    notes = ["C", "C#", "D", "D#", "E", "F", "F#", "G", "G#", "A", "A#", "B"]
    return notes[n] + str(octave)

# List of your audio files
audio_files = [
    '/workspaces/Playful-Project/A Place For My Head.wav',
    '/workspaces/Playful-Project/Lovestory.wav',
    '/workspaces/Playful-Project/my_immortal.wav'
]

# Perform detection
detect_tempo_and_key(audio_files)


Analyzing /workspaces/Playful-Project/A Place For My Head.wav
Detected Tempo: 136.94447326660156 BPM
Most Frequent Pitch (Est. Key): F1
----------
Analyzing /workspaces/Playful-Project/Lovestory.wav
Detected Tempo: 121.2387466430664 BPM
Most Frequent Pitch (Est. Key): G#1
----------
Analyzing /workspaces/Playful-Project/my_immortal.wav
Detected Tempo: 137.12925720214844 BPM
Most Frequent Pitch (Est. Key): C#2
----------


## Essentia

In [9]:
import essentia.standard as es

def analyze_audio(audio_files):
    for audio_file in audio_files:
        print(f"Analyzing {audio_file}")

        # Load audio file
        loader = es.MonoLoader(filename=audio_file)
        audio = loader()

        # Perform tempo detection
        rhythm_extractor = es.RhythmExtractor2013(method="multifeature")
        bpm, beats, beats_confidence, _, beats_intervals = rhythm_extractor(audio)
        
        # Perform key detection
        key_extractor = es.KeyExtractor()
        key, scale, strength = key_extractor(audio)

        # Print results
        print(f"Detected BPM: {bpm}")
        print(f"Key: {key} {scale}, Strength: {strength}")
        print("Beat positions (sec):", beats)
        print("Beat interval (sec):", beats_intervals)
        print("Confidence in beat detection:", beats_confidence)
        print("----------")

# List of audio files to analyze
audio_files = [
    '/workspaces/Playful-Project/A Place For My Head.wav',
    '/workspaces/Playful-Project/Lovestory.wav',
    '/workspaces/Playful-Project/my_immortal.wav'
]

# Analyze the audio
analyze_audio(audio_files)


[   INFO   ] MusicExtractorSVM: no classifier models were configured by default


Analyzing /workspaces/Playful-Project/A Place For My Head.wav
Detected BPM: 134.62326049804688
Key: Eb minor, Strength: 0.8908191919326782
Beat positions (sec): [  0.3947392   0.8126984   1.2538775   1.6834466   2.1362357   2.577415
   3.053424    3.5062132   3.9473922   4.4117913   4.87619     5.3173695
   5.7585487   6.2229476   6.664127    7.1285257   7.569705    8.010884
   8.475284    8.916462    9.357641    9.822041   10.26322    10.727619
  11.168798   11.621587   12.074376   12.527165   12.979955   13.444353
  13.885532   14.326712   14.791111   15.25551    15.696689   16.161089
  16.602268   17.043446   17.484625   17.949024   18.390203   18.854603
  19.295782   19.760181   20.20136    20.66576    21.09533    21.548117
  22.000906   22.465305   22.918095   23.359274   23.800453   24.253242
  24.706032   25.147211   25.6        26.064398   26.517187   26.958366
  27.399546   27.852335   28.305124   28.757914   29.210703   29.651882
  30.116282   30.557459   31.010248   31.46303

## Librosa and music21

In [3]:
import librosa
import numpy as np
import music21

def get_chromatic_profile(audio_file):
    # Load audio data with pre-emphasis filtering
    y, sr = librosa.load(audio_file, sr=None)
    y = librosa.effects.preemphasis(y)

    # Harmonic-percussive source separation
    y_harmonic, y_percussive = librosa.effects.hpss(y)

    # High-resolution Constant-Q chromagram
    chroma = librosa.feature.chroma_cqt(y=y_harmonic, sr=sr, bins_per_octave=36, n_octaves=7)

    # Aggregate chroma features across time to form a pitch profile
    chroma_mean = np.mean(chroma, axis=1)
    pitch_classes = ['C', 'C#', 'D', 'D#', 'E', 'F', 'F#', 'G', 'G#', 'A', 'A#', 'B']
    
    # Build a normalized chromatic profile
    profile = dict(zip(pitch_classes, chroma_mean))
    total = sum(profile.values())
    profile = {k: v / total for k, v in profile.items()}
    return profile

def estimate_key_from_profile(profile):
    # Create a music21 stream with the profile weights as note durations
    s = music21.stream.Stream()
    for note, weight in profile.items():
        if weight > 0:
            s.append(music21.note.Note(note, quarterLength=weight * 4))

    # Analyze the key
    key = s.analyze('key')
    return key

def process_files(audio_files):
    for audio_file in audio_files:
        print(f"Analyzing {audio_file}")
        profile = get_chromatic_profile(audio_file)
        key = estimate_key_from_profile(profile)
        print(f"File: {audio_file}")
        print(f"Estimated Key: {key.tonic.name} {key.mode}")
        print("----------")

# List of your audio files
audio_files = [
    '/workspaces/Playful-Project/A Place For My Head.wav',
    '/workspaces/Playful-Project/Lovestory.wav',

]

# Process each file
process_files(audio_files)


Analyzing /workspaces/Playful-Project/A Place For My Head.wav
File: /workspaces/Playful-Project/A Place For My Head.wav
Estimated Key: G# minor
----------
Analyzing /workspaces/Playful-Project/Lovestory.wav
File: /workspaces/Playful-Project/Lovestory.wav
Estimated Key: D major
----------


## Aubio and music21

In [4]:
import aubio
import numpy as np
import music21

def process_audio_with_aubio(audio_file):
    # Open audio file
    samplerate = 0  # use original samplerate
    hop_size = 512  # number of frames to read at once
    win_size = 2048  # fft size
    source = aubio.source(audio_file, samplerate, hop_size)
    samplerate = source.samplerate

    # Setup pitch detection
    tolerance = 0.8
    pitch_o = aubio.pitch("default", win_size, hop_size, samplerate)
    pitch_o.set_unit("Hz")
    pitch_o.set_tolerance(tolerance)

    # Prepare to store pitches
    pitches = []

    # Processing blocks
    while True:
        samples, read = source()
        pitch = pitch_o(samples)[0]
        pitches.append(pitch)
        if read < hop_size:
            break

    # Convert to music21 notes
    stream = music21.stream.Stream()
    for pitch in pitches:
        if pitch > 0:  # filter out zero pitches
            note = music21.note.Note()
            note.pitch.frequency = pitch
            note.quarterLength = 0.25  # Assuming each pitch is a quarter note
            stream.append(note)

    # Analyze the key
    key = stream.analyze('key')
    return key

# List of your audio files
audio_files = [
    '/workspaces/Playful-Project/A Place For My Head.wav',
    '/workspaces/Playful-Project/Lovestory.wav',
    '/workspaces/Playful-Project/my_immortal.wav'
]

# Process and print key for each file
for file in audio_files:
    key = process_audio_with_aubio(file)
    print(f"File: {file}, Detected Key: {key.tonic.name} {key.mode}")


File: /workspaces/Playful-Project/A Place For My Head.wav, Detected Key: G# minor
File: /workspaces/Playful-Project/Lovestory.wav, Detected Key: D major
File: /workspaces/Playful-Project/my_immortal.wav, Detected Key: F# minor


## Crepe and music21

In [5]:
import crepe
import scipy.io.wavfile
import numpy as np
import music21

def process_audio_with_crepe(audio_file):
    # Load audio file
    sr, audio = scipy.io.wavfile.read(audio_file)
    if audio.ndim > 1:
        audio = np.mean(audio, axis=1)  # Convert to mono if stereo

    # Run CREPE to extract pitches and confidence
    time, frequency, confidence, activation = crepe.predict(audio, sr, viterbi=True)

    # Filter pitches with high confidence
    pitches = frequency[confidence > 0.5]
    pitches = pitches[pitches > 0]  # filter out zero frequencies

    # Convert to music21 notes
    stream = music21.stream.Stream()
    for pitch in pitches:
        note = music21.note.Note()
        note.pitch.frequency = pitch
        note.quarterLength = 0.25  # Assuming each pitch is a quarter note
        stream.append(note)

    # Analyze the key
    key = stream.analyze('key')
    return key

# List of your audio files
audio_files = [
    '/workspaces/Playful-Project/A Place For My Head.wav',
    '/workspaces/Playful-Project/Lovestory.wav',
    '/workspaces/Playful-Project/my_immortal.wav'
]

# Process and print key for each file
for file in audio_files:
    key = process_audio_with_crepe(file)
    print(f"File: {file}, Detected Key: {key.tonic.name} {key.mode}")


2024-05-11 21:52:25.065357: I external/local_tsl/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2024-05-11 21:52:26.149265: I external/local_tsl/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2024-05-11 21:52:28.446631: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
2024-05-11 21:52:35.182763: W external/local_tsl/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 33554432 exceeds 10% of free system memory.
2024-05-11 21:52:35.207218: W external/local_tsl/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 33554432 exceeds 10% of free system memory.
2024-05-11 21:52:35.213638: W external/local_tsl/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 335

[1m578/578[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m308s[0m 532ms/step
File: /workspaces/Playful-Project/A Place For My Head.wav, Detected Key: B major


  sr, audio = scipy.io.wavfile.read(audio_file)


[1m544/544[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m290s[0m 533ms/step
File: /workspaces/Playful-Project/Lovestory.wav, Detected Key: B minor
[1m829/829[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m440s[0m 531ms/step
File: /workspaces/Playful-Project/my_immortal.wav, Detected Key: F# minor
