In [None]:
import librosa as lb
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import os
import soundfile as sf
from scipy.stats import pearsonr

In [2]:
y, sr = lb.load('./goat.mp3')

In [3]:
def calculate_bpm(audio_path):
    
    onset_env = lb.onset.onset_strength(y=y, sr=sr)
    tempo, _ = lb.beat.beat_track(onset_envelope=onset_env, sr=sr)
    return tempo[0]

bpm = calculate_bpm(y)
bpm

107.666015625

In [4]:
def calc_speed(bpm, changed_bpm):
    return changed_bpm/bpm

In [None]:
def change_speed_and_pitch(audio_path, speed_factor, output_path):

    # Change speed (tempo) but keep pitch
    y_fast = lb.effects.time_stretch(y, rate=speed_factor)

    # Save the processed file
    sf.write(output_path, y_fast, sr)
    print(f"Processed file saved at {output_path}")

change_speed_and_pitch('./goat.mp3', calc_speed(bpm, 26), './moreslo_goat.mp3')

Processed file saved at ./moreslo_goat.mp3


In [None]:
def pitch_shift(audio_path, n_steps, output_path):
    # Load the audio file
    y, sr = lb.load(audio_path)
    
    # Perform pitch shift
    y_shifted = lb.effects.pitch_shift(y, sr=sr, n_steps=n_steps)
    
    # Save the processed file
    sf.write(output_path, y_shifted, sr)

# Example: pitch_shift('input.mp3', 2, 'output.mp3')  # Shifts up by 2 semitones

In [None]:
# Krumhansl-Schmuckler key profiles (empirical data)
# Order: C, C#, D, D#, E, F, F#, G, G#, A, A#, B
# Values represent perceived "fit" or "stability" of each pitch class within the key.

# Profile for major keys (based on C major)
krumhansl_major_profile_base = np.array([
    6.35,  # C
    2.23,  # C#
    3.48,  # D
    2.33,  # D#
    4.38,  # E
    4.09,  # F
    2.52,  # F#
    5.19,  # G
    2.39,  # G#
    3.66,  # A
    2.29,  # A#
    2.88   # B
])

# Profile for minor keys (based on C minor for easier rolling,
# original Krumhansl might present A minor, but conceptually similar when rolled)
krumhansl_minor_profile_base = np.array([
    6.33,  # C (tonic of C minor)
    2.68,  # C#
    3.52,  # D
    5.38,  # Eb (minor third)
    2.60,  # E
    3.53,  # F
    2.54,  # F#
    4.75,  # G (dominant)
    3.98,  # G#
    2.69,  # A
    3.34,  # A#
    3.17   # B
])

# List of pitch classes (keys)
PITCH_CLASSES = ['C', 'C#', 'D', 'D#', 'E', 'F', 'F#', 'G', 'G#', 'A', 'A#', 'B']

def generate_key_profiles():
    """
    Generates the 24 Krumhansl-Schmuckler key profiles (12 major, 12 minor).
    """
    key_profiles = {}
    for i in range(12):
        # Major keys
        major_key_name = f"{PITCH_CLASSES[i]} major"
        major_profile = np.roll(krumhansl_major_profile_base, i)
        key_profiles[major_key_name] = major_profile / np.sum(major_profile) # Normalize

        # Minor keys
        minor_key_name = f"{PITCH_CLASSES[i]} minor"
        minor_profile = np.roll(krumhansl_minor_profile_base, i)
        key_profiles[minor_key_name] = minor_profile / np.sum(minor_profile) # Normalize
    return key_profiles

# Pre-generate the profiles when the script loads
ALL_KEY_PROFILES = generate_key_profiles()

def detect_key_krumhansl(audio_path):
    try:
        # 1. Load the audio file
        y, sr = librosa.load(audio_path)
        if len(y) == 0:
            print("Warning: Audio file is empty or could not be loaded properly.")
            return "Unknown", 0.0

        # 2. Extract chroma features
        # We can use various chroma types. chroma_stft is common.
        # hop_length can be tuned. Longer hop_length might smooth too much.
        chromagram = librosa.feature.chroma_stft(y=y, sr=sr)

        # 3. Aggregate chroma features over time
        # Summing gives a sense of the total energy per pitch class.
        # Mean can also be used.
        # Normalizing the resulting vector is crucial for correlation.
        song_chroma_profile = np.sum(chromagram, axis=1)
        
        # Handle cases where the song is silent or has no chromatic content
        if np.sum(song_chroma_profile) == 0:
            print("Warning: No chromatic content found in the audio.")
            return "Unknown (No chromatic content)", 0.0
            
        song_chroma_profile_normalized = song_chroma_profile / np.sum(song_chroma_profile)


        # 4. Compare with Krumhansl-Schmuckler profiles
        best_key = None
        max_correlation = -np.inf  # Initialize with a very small number

        for key_name, key_profile in ALL_KEY_PROFILES.items():
            # Calculate Pearson correlation coefficient
            # The key_profiles are already normalized
            correlation, _ = pearsonr(song_chroma_profile_normalized, key_profile)

            if correlation > max_correlation:
                max_correlation = correlation
                best_key = key_name
        
        if best_key is None:
             return "Unknown (Correlation failed)", 0.0

        return best_key, float(max_correlation)

    except Exception as e:
        print(f"Error during key detection: {e}")
        return "Unknown (Error)", 0.0

# --- Example Usage ---
if __name__ == "__main__":
    # Replace with the path to your actual song
    audio_file_path = "path/to/your/actual/song.mp3" 
    # If you don't have a file, comment out or skip this part
    try:
        print(f"Testing with user-provided file: {audio_file_path}")
        detected_key, confidence = detect_key_krumhansl(audio_file_path)
        print(f"Detected key: {detected_key}, Confidence (Correlation): {confidence:.4f}")
    except FileNotFoundError:
        print(f"Audio file not found: {audio_file_path}. Skipping this test.")
    except Exception as e:
        print(f"Could not process {audio_file_path} due to: {e}")

In [None]:
# def change_speed_and_pitch(audio_data, sr, speed_factor):
#     """
#     Changes the speed of audio data without saving to file
#     Parameters:
#         audio_data: numpy array of audio samples
#         sr: sample rate
#         speed_factor: factor to change speed (e.g., 1.2 for 20% faster)
#     Returns:
#         processed audio data as numpy array
#     """
#     # Change speed (tempo) but keep pitch
#     y_processed = lb.effects.time_stretch(audio_data, rate=speed_factor)
    
#     return y_processed, sr

In [None]:
# # Load audio once
# y, sr = lb.load("input.mp3")

# # Process audio when speed change is requested (e.g., from a slider in UI)
# def on_speed_change(speed_factor):
#     processed_audio, sr = change_speed_and_pitch(y, sr, speed_factor)
#     # Here you would send processed_audio to your audio playback system
#     return processed_audio