In [None]:
import librosa as lb
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import soundfile as sf
import noisereduce as nr
from scipy.stats import pearsonr
from IPython.display import Audio
import pyloudnorm as pyln
from pedalboard import Pedalboard, Compressor, Limiter, Gain

# Load Song

In [None]:
song, sample_rate = lb.load("/kaggle/input/untangle-songs/fake friend.mp3", mono=False)

# Calculate BPM

In [None]:
def calculate_bpm(audio_path):
    
    onset_env = lb.onset.onset_strength(y=y, sr=sr)
    tempo, _ = lb.beat.beat_track(onset_envelope=onset_env, sr=sr)
    return tempo[0]

bpm = calculate_bpm(y)
bpm

# Modify tempo of the song

In [None]:
def calc_speed(bpm, changed_bpm):
    return changed_bpm/bpm

In [None]:
def change_speed_and_pitch(audio_path, speed_factor, output_path):

    # Change speed (tempo) but keep pitch
    y_fast = lb.effects.time_stretch(y, rate=speed_factor)

    # Save the processed file
    sf.write(output_path, y_fast, sr)
    print(f"Processed file saved at {output_path}")

change_speed_and_pitch('./goat.mp3', calc_speed(bpm, 26), './moreslo_goat.mp3')

# Detect Key 

In [None]:
krumhansl_major_profile_base = np.array([
    6.35,  # C
    2.23,  # C#
    3.48,  # D
    2.33,  # D#
    4.38,  # E
    4.09,  # F
    2.52,  # F#
    5.19,  # G
    2.39,  # G#
    3.66,  # A
    2.29,  # A#
    2.88   # B
])

# Profile for minor keys (based on C minor for easier rolling)
krumhansl_minor_profile_base = np.array([
    6.33,  # C (tonic of C minor)
    2.68,  # C#
    3.52,  # D
    5.38,  # Eb (minor third)
    2.60,  # E
    3.53,  # F
    2.54,  # F#
    4.75,  # G (dominant)
    3.98,  # G#
    2.69,  # A
    3.34,  # A#
    3.17   # B
])

# List of pitch classes (keys)
PITCH_CLASSES = ['C', 'C#', 'D', 'D#', 'E', 'F', 'F#', 'G', 'G#', 'A', 'A#', 'B']

def generate_key_profiles():
    """
    Generates the 24 Krumhansl-Schmuckler key profiles (12 major, 12 minor).
    """
    key_profiles = {}
    for i in range(12):
        # Major keys
        major_key_name = f"{PITCH_CLASSES[i]} major"
        major_profile = np.roll(krumhansl_major_profile_base, i)
        key_profiles[major_key_name] = major_profile / np.sum(major_profile) # Normalize

        # Minor keys
        minor_key_name = f"{PITCH_CLASSES[i]} minor"
        minor_profile = np.roll(krumhansl_minor_profile_base, i)
        key_profiles[minor_key_name] = minor_profile / np.sum(minor_profile) # Normalize
    return key_profiles

# Pre-generate the profiles when the script loads
ALL_KEY_PROFILES = generate_key_profiles()

def detect_key_krumhansl(audio_path):
    try:
        # 1. Load the audio file
        y, sr = lb.load(audio_path)
        if len(y) == 0:
            print("Warning: Audio file is empty or could not be loaded properly.")
            return "Unknown", 0.0

        # 2. Extract chroma features
        chromagram = lb.feature.chroma_stft(y=y, sr=sr)

        # 3. Aggregate chroma features over time
        song_chroma_profile = np.sum(chromagram, axis=1)
        
        # Handle cases where the song is silent or has no chromatic content
        if np.sum(song_chroma_profile) == 0:
            print("Warning: No chromatic content found in the audio.")
            return "Unknown (No chromatic content)", 0.0
            
        song_chroma_profile_normalized = song_chroma_profile / np.sum(song_chroma_profile)


        # 4. Compare with Krumhansl-Schmuckler profiles
        best_key = None
        max_correlation = -np.inf  # Initialize with a very small number

        for key_name, key_profile in ALL_KEY_PROFILES.items():
            correlation, _ = pearsonr(song_chroma_profile_normalized, key_profile)

            if correlation > max_correlation:
                max_correlation = correlation
                best_key = key_name
        
        if best_key is None:
             return "Unknown (Correlation failed)", 0.0

        return best_key, float(max_correlation)

    except Exception as e:
        print(f"Error during key detection: {e}")
        return "Unknown (Error)", 0.0

if __name__ == "__main__":

    audio_file_path = song

    try:
        print(f"Testing with user-provided file: {audio_file_path}")
        detected_key, confidence = detect_key_krumhansl(audio_file_path)
        print(f"Detected key: {detected_key}, Confidence (Correlation): {confidence:.4f}")
    except FileNotFoundError:
        print(f"Audio file not found: {audio_file_path}. Skipping this test.")
    except Exception as e:
        print(f"Could not process {audio_file_path} due to: {e}")

# Shift Key
Each step is eqv to half a note

In [None]:
def pitch_shift(audio_path, n_steps, output_path):
    # Load the audio file
    y, sr = lb.load(audio_path)
    
    # Perform pitch shift
    y_shifted = lb.effects.pitch_shift(y, sr=sr, n_steps=n_steps)
    
    # Save the processed file
    sf.write(output_path, y_shifted, sr)

# Example: pitch_shift('input.mp3', 2, 'output.mp3')  # Shifts up by 2 semitones

# Noise Reduction

In [None]:
noise_subtracted = nr.reduce_noise(y = song, sr = sample_rate)
display(Audio(data=noise_subtracted, rate=sample_rate))

# Loudness Determination & Normalisation.

In [None]:
TARGET_LUFS = -14.0
TARGET_PEAK_DBFS = -1.0 # Target peak after any processing to avoid clipping

# --- 1. Initial Loudness Analysis ---
meter = pyln.Meter(sample_rate)
og_lufs = meter.integrated_loudness(song)
og_lra = meter.loudness_range(song)
og_dbfs = 20 * np.log10(np.max(np.abs(song)))

print(f"\n--- Initial Audio Stats ---")
print(f"Integrated Loudness: {og_lufs:.2f} LUFS")
print(f"Loudness Range (LRA): {og_lra:.2f} LU")
print(f"Peak: {og_dbfs:.2f} dBFS")

processed_song_arr = song.copy()

# --- 2. Conditional Processing based on LRA ---
if og_lra < 7.0 or og_lra > 16.0:
    print(f"\n--- LRA ({og_lra:.2f} LU) is outside 7-16 LU range. Applying LUFS Normalization. ---")
    gain_db = TARGET_LUFS - og_lufs
    gain_linear = 10 ** (gain_db / 20.0)
    processed_song_arr = processed_song_arr * gain_linear
    print(f"Applied {gain_db:.2f} dB gain for LUFS normalization.")

else:
    print(f"\n--- LRA ({og_lra:.2f} LU) is within 7-16 LU range. Applying Dynamic Range Compression. ---")

    # Define compressor and limiter settings. Future-Scope: Modifiable via UI. Unlikely to be implemented.
    board = Pedalboard([
        Compressor(threshold_db=-20, ratio=4, attack_ms=5.0, release_ms=150.0),
        Limiter(threshold_db=TARGET_PEAK_DBFS, release_ms=50.0) # Limiter to prevent peaks
    ])

    # Apply effects
    # Pedalboard expects float32, librosa might return float64
    processed_song_arr = board(processed_song_arr.astype(np.float32), sample_rate=sample_rate)
    print("Applied Compressor and Limiter.")

    # After compression, the LUFS will have changed. Re-normalize to target LUFS.
    compressed_lufs = meter.integrated_loudness(processed_song_arr)
    gain_db_compressed = TARGET_LUFS - compressed_lufs
    
    # Apply gain using Pedalboard.Gain for consistency and to handle float32
    gain_board = Pedalboard([Gain(gain_db=gain_db_compressed)])
    processed_song_arr = gain_board(processed_song_arr.astype(np.float32), sample_rate = sample_rate)
    print(f"Applied {gain_db_compressed:.2f} dB gain to reach target LUFS post-compression.")


# --- 3. Final Peak Normalization/Limiting (Safety Net) ---
# Ensure the final audio does not exceed TARGET_PEAK_DBFS
current_peak_linear = np.max(np.abs(processed_song_arr))
target_peak_linear = 10 ** (TARGET_PEAK_DBFS / 20.0)

if current_peak_linear > target_peak_linear and current_peak_linear > 0: # also check current_peak_linear > 0 to avoid div by zero if silent
    print(f"Final peak ({20*np.log10(current_peak_linear):.2f} dBFS) exceeds target. Applying peak normalization.")
    peak_norm_gain = target_peak_linear / current_peak_linear
    processed_song_arr = processed_song_arr * peak_norm_gain
else:
    print("Final peak is within target limits.")

# --- 4. Final Loudness Check & Output ---
final_lufs = meter.integrated_loudness(processed_song_arr)
final_lra = meter.loudness_range(processed_song_arr)
final_peak_dbfs = 20 * np.log10(np.max(np.abs(processed_song_arr)))

print(f"\n--- Processed Audio Stats ---")
print(f"Final Integrated Loudness: {final_lufs:.2f} LUFS (Target: {TARGET_LUFS:.2f} LUFS)")
print(f"Final Loudness Range (LRA): {final_lra:.2f} LU")
print(f"Final Peak: {final_peak_dbfs:.2f} dBFS (Target: <= {TARGET_PEAK_DBFS:.2f} dBFS)")


print("\nOriginal Audio:")
display(Audio(data=data, rate = sample_rate))

print("\nProcessed Audio:")
display(Audio(data=processed_song_arr, rate = sample_rate))

In [None]:
# def change_speed_and_pitch(audio_data, sr, speed_factor):
#     """
#     Changes the speed of audio data without saving to file
#     Parameters:
#         audio_data: numpy array of audio samples
#         sr: sample rate
#         speed_factor: factor to change speed (e.g., 1.2 for 20% faster)
#     Returns:
#         processed audio data as numpy array
#     """
#     # Change speed (tempo) but keep pitch
#     y_processed = lb.effects.time_stretch(audio_data, rate=speed_factor)
    
#     return y_processed, sr

In [None]:
# # Load audio once
# y, sr = lb.load("input.mp3")

# # Process audio when speed change is requested (e.g., from a slider in UI)
# def on_speed_change(speed_factor):
#     processed_audio, sr = change_speed_and_pitch(y, sr, speed_factor)
#     # Here you would send processed_audio to your audio playback system
#     return processed_audio