In [3]:
# Working Step 1
import numpy as np
import librosa
import soundfile as sf

def load_audio(file_path):
    audio, sr = librosa.load(file_path, sr=None)
    return audio, sr

def reduce_silences(audio, sr, top_db=22, max_reduction_factor=0.011):
    # Detect non-silent intervals
    non_silent_intervals = librosa.effects.split(audio, top_db=top_db)
    processed_audio = []

    for i, (start, end) in enumerate(non_silent_intervals):
        processed_audio.append(audio[start:end])
        if i < len(non_silent_intervals) - 1:
            next_start = non_silent_intervals[i + 1][0]
            silence_duration = next_start - end
            reduction_factor = max(0.01, max_reduction_factor * (silence_duration / sr))
            reduced_silence_duration = int(silence_duration * reduction_factor)
            silence_segment = np.zeros(reduced_silence_duration)
            processed_audio.append(silence_segment)

    processed_audio = np.concatenate(processed_audio)
    return processed_audio

def speed_up_audio(audio, sr, speed_factor=0.6):
    # Speed up the audio without changing the pitch
    audio_fast = librosa.effects.time_stretch(audio, rate=1/speed_factor)
    return audio_fast

def process_audio(vocals_path, output_path, max_reduction_factor=0.011, speed_factor=0.6):
    vocals, sr = load_audio(vocals_path)
    vocals_reduced_silence = reduce_silences(vocals, sr, max_reduction_factor=max_reduction_factor)
    vocals_speed_up = speed_up_audio(vocals_reduced_silence, sr, speed_factor=speed_factor)
    sf.write(output_path, vocals_speed_up, sr)

def main():
    vocals_file = "Nav-On-His-First-Beat.wav"
    output_file = "NavBeatSpeedDelay.wav"
    process_audio(vocals_file, output_file, max_reduction_factor=0.011, speed_factor=0.6)

if __name__ == "__main__":
    main()

In [4]:
# Working Step 2
import numpy as np
import librosa
import soundfile as sf
import audioread

def simple_delay_reverb(audio, sr, delay_ms=300, decay=0, repeats=2):
    delay_samples = int(sr * delay_ms / 1000)
    output = np.copy(audio)
    for _ in range(repeats):
        delayed = np.zeros_like(output)
        delayed[delay_samples:] = output[:-delay_samples]
        output += delayed * decay
    output = output / np.max(np.abs(output))
    return output

def load_audio_with_fallback(file_path, sr=None):
    try:
        return librosa.load(file_path, sr=sr)
    except Exception as e:
        print(f"Failed to load with librosa due to: {e}. Falling back to audioread.")
        with audioread.audio_open(file_path) as f:
            data = np.hstack([np.frombuffer(chunk, np.int16) for chunk in f])
            if sr is not None:
                data = librosa.resample(data.astype(float), f.samplerate, sr)
            return data, sr

def modify_vocals(vocals_filename, beat_filename, output_filename, volume_gain=2.0, beat_start_time=0.0, pitch_shift_semitones=0):
    vocals, sr_vocals = load_audio_with_fallback(vocals_filename, sr=None)
    beat, sr_beat = load_audio_with_fallback(beat_filename, sr=sr_vocals)

    if pitch_shift_semitones != 0:
        vocals = librosa.effects.pitch_shift(vocals, sr_vocals, pitch_shift_semitones)

    vocals = simple_delay_reverb(vocals, sr_vocals)

    start_sample = int(beat_start_time * sr_beat)
    if start_sample < len(beat):
        beat = beat[start_sample:]
    else:
        raise ValueError("Beat start time exceeds the length of the beat track.")

    vocals = vocals * volume_gain

    min_length = min(len(vocals), len(beat))
    vocals = vocals[:min_length]
    beat = beat[:min_length]

    mixed = vocals + beat
    mixed = mixed / np.max(np.abs(mixed))

    sf.write(output_filename, mixed, sr_vocals)

def main():
    vocals_file = r"NavBeatSpeedDelay.wav"
    beat_file = r"beat_client.wav"
    output_file = r"Final_Pr_O_1.wav"

    modify_vocals(vocals_file, beat_file, output_file, volume_gain=1.0, beat_start_time=1.955555, pitch_shift_semitones=0)

if __name__ == "__main__":
    main()