### Final version of the Phonk Beat (Maybe)

In [4]:
import numpy as np
import librosa
import soundfile as sf

def load_audio_with_fallback(file_path, sr=None, start_offset=0):
    try:
        audio, sr = librosa.load(file_path, sr=sr, offset=start_offset)
        return audio, sr
    except Exception as e:
        print(f"Failed to load with librosa due to: {e}")
        # Implement fallback to audioread if necessary
        # For simplicity, this part is omitted
        raise e

def time_stretch_audio_to_match(audio, sr, target_length):
    """Time-stretch the audio to match the target length."""
    current_length = librosa.get_duration(y=audio, sr=sr)
    stretch_factor = target_length / current_length
    return librosa.effects.time_stretch(audio, rate=stretch_factor), sr

def mix_and_sync_vocals_with_beat(vocals_filename, beat_filename, output_filename, beat_delay_seconds=0):
    vocals, sr_vocals = load_audio_with_fallback(vocals_filename, sr=None)
    beat, sr_beat = load_audio_with_fallback(beat_filename, sr=sr_vocals, start_offset=beat_delay_seconds)

    # Time-stretch vocals to match the beat's length
    beat_length_seconds = librosa.get_duration(y=beat, sr=sr_beat)
    vocals_stretched, _ = time_stretch_audio_to_match(vocals, sr_vocals, beat_length_seconds)

    # Ensure both tracks are of the same length after stretching
    max_length = max(len(vocals_stretched), len(beat))
    if len(vocals_stretched) < max_length:
        vocals_stretched = np.concatenate((vocals_stretched, np.zeros(max_length - len(vocals_stretched))))
    if len(beat) < max_length:
        beat = np.concatenate((beat, np.zeros(max_length - len(beat))))

    # Mix the stretched vocals and the beat
    mixed = vocals_stretched + beat
    mixed = mixed / np.max(np.abs(mixed))

    sf.write(output_filename, mixed, sr_vocals)

def main():
    vocals_file = r"latest_input.wav"
    beat_file = r"beat_client.wav"
    output_file = r"Phonk_Final_Synced2.wav"

    mix_and_sync_vocals_with_beat(vocals_file, beat_file, output_file, beat_delay_seconds=3.0)

if __name__ == "__main__":
    main()

In [14]:
import numpy as np
import librosa
import soundfile as sf
from scipy.spatial.distance import euclidean
from fastdtw import fastdtw


def load_audio_with_fallback(file_path, sr=None, start_offset=0):
    try:
        # Ensure librosa is correctly installed and updated.
        audio, sr = librosa.load(path=file_path, sr=sr, offset=start_offset)
        return audio, sr
    except Exception as e:
        print(f"Failed to load with librosa due to: {e}")
        raise e


def time_stretch_audio_to_match(audio, sr, target_length):
    current_length = librosa.get_duration(y=audio, sr=sr)
    stretch_factor = target_length / current_length
    return librosa.effects.time_stretch(audio, rate=stretch_factor), sr

def align_tracks_dtw(track1, track2, sr):
    distance, path = fastdtw(track1, track2, dist=euclidean)
    # This example does not directly use the path for stretching but uses DTW distance as a guide.
    # Implementing precise segment-wise stretching based on DTW path would require a more complex approach.
    return path

def mix_and_sync_vocals_with_beat(vocals_filename, beat_filename, output_filename, beat_delay_seconds=0):
    vocals, sr_vocals = load_audio_with_fallback(vocals_filename, sr=None)
    beat, sr_beat = load_audio_with_fallback(beat_filename, sr=sr_vocals, start_offset=beat_delay_seconds)

    # Calculate the number of samples to delay the beat by
    beat_delay_samples = int(sr_vocals * beat_delay_seconds)

    # Time-stretch vocals to match the beat's length, if necessary
    # This step is based on your previous requirement; adjust as needed

    # Prepare the beat: pad the beginning with zeros based on the delay, then trim or extend to match vocals length
    beat_padded = np.zeros_like(vocals)
    beat_length = min(len(beat), len(beat_padded) - beat_delay_samples)  # Ensure beat does not exceed vocal length after delay
    beat_padded[beat_delay_samples:beat_delay_samples+beat_length] = beat[:beat_length]

    # Mix the vocals and the adjusted beat
    mixed = vocals + beat_padded
    mixed = mixed / np.max(np.abs(mixed))  # Normalize to prevent clipping

    sf.write(output_filename, mixed, sr_vocals)

def main():
    vocals_file = r"latest_input.wav"
    beat_file = r"beat_client.wav"
    output_file = r"Phonk_Final_Synced2.wav"

    mix_and_sync_vocals_with_beat(vocals_file, beat_file, output_file, beat_delay_seconds=0)

if __name__ == "__main__":
    main()

In [18]:
import numpy as np
import librosa
import soundfile as sf
from scipy.spatial.distance import euclidean
from fastdtw import fastdtw


def load_audio_with_fallback(file_path, sr=None, start_offset=0):
    try:
        # Ensure librosa is correctly installed and updated.
        audio, sr = librosa.load(path=file_path, sr=sr, offset=start_offset)
        return audio, sr
    except Exception as e:
        print(f"Failed to load with librosa due to: {e}")
        raise e


def time_stretch_audio_to_match(audio, sr, target_length):
    current_length = librosa.get_duration(y=audio, sr=sr)
    stretch_factor = target_length / current_length
    return librosa.effects.time_stretch(audio, rate=stretch_factor), sr

def align_tracks_dtw(track1, track2, sr):
    distance, path = fastdtw(track1, track2, dist=euclidean)
    # This example does not directly use the path for stretching but uses DTW distance as a guide.
    # Implementing precise segment-wise stretching based on DTW path would require a more complex approach.
    return path

def mix_and_sync_vocals_with_beat(vocals_filename, beat_filename, output_filename, beat_delay_seconds=0, vocal_stretch_factor=0.83):
    vocals, sr_vocals = load_audio_with_fallback(vocals_filename, sr=None)
    beat, sr_beat = load_audio_with_fallback(beat_filename, sr=sr_vocals, start_offset=beat_delay_seconds)

    # Stretch the vocals by the specified factor
    if vocal_stretch_factor != 1.0:
        vocals = librosa.effects.time_stretch(vocals, rate=vocal_stretch_factor)

    # Calculate the number of samples to delay the beat by
    beat_delay_samples = int(sr_vocals * beat_delay_seconds)

    # Prepare the beat: pad the beginning with zeros based on the delay, then trim or extend to match vocals length
    beat_padded = np.zeros_like(vocals)
    beat_length = min(len(beat), len(beat_padded) - beat_delay_samples)  # Ensure beat does not exceed vocal length after delay
    beat_padded[beat_delay_samples:beat_delay_samples+beat_length] = beat[:beat_length]

    # Mix the vocals and the adjusted beat
    mixed = vocals + beat_padded
    mixed = mixed / np.max(np.abs(mixed))  # Normalize to prevent clipping

    sf.write(output_filename, mixed, sr_vocals)

def main():
    vocals_file = r"latest_input.wav"
    beat_file = r"beat_client.wav"
    output_file = r"Phonk_Final_Synced3.wav"

    mix_and_sync_vocals_with_beat(vocals_file, beat_file, output_file, beat_delay_seconds=0)

if __name__ == "__main__":
    main()

In [19]:
import numpy as np
import librosa
import soundfile as sf
from scipy.spatial.distance import euclidean
from fastdtw import fastdtw


def load_audio_with_fallback(file_path, sr=None, start_offset=0):
    try:
        # Ensure librosa is correctly installed and updated.
        audio, sr = librosa.load(path=file_path, sr=sr, offset=start_offset)
        return audio, sr
    except Exception as e:
        print(f"Failed to load with librosa due to: {e}")
        raise e


def time_stretch_audio_to_match(audio, sr, target_length):
    current_length = librosa.get_duration(y=audio, sr=sr)
    stretch_factor = target_length / current_length
    return librosa.effects.time_stretch(audio, rate=stretch_factor), sr

def align_tracks_dtw(track1, track2, sr):
    distance, path = fastdtw(track1, track2, dist=euclidean)
    # This example does not directly use the path for stretching but uses DTW distance as a guide.
    # Implementing precise segment-wise stretching based on DTW path would require a more complex approach.
    return path

def mix_and_sync_vocals_with_beat(vocals_filename, beat_filename, output_filename, beat_delay_seconds=0, vocal_stretch_factor=0.83):
    vocals, sr_vocals = load_audio_with_fallback(vocals_filename, sr=None)
    beat, sr_beat = load_audio_with_fallback(beat_filename, sr=sr_vocals, start_offset=beat_delay_seconds)

    # Stretch the vocals by the specified factor
    if vocal_stretch_factor != 1.0:
        vocals = librosa.effects.time_stretch(vocals, rate=vocal_stretch_factor)

    # Calculate the number of samples to delay the beat by
    beat_delay_samples = int(sr_vocals * beat_delay_seconds)

    # Prepare the beat: pad the beginning with zeros based on the delay, then trim or extend to match vocals length
    beat_padded = np.zeros_like(vocals)
    beat_length = min(len(beat), len(beat_padded) - beat_delay_samples)  # Ensure beat does not exceed vocal length after delay
    beat_padded[beat_delay_samples:beat_delay_samples+beat_length] = beat[:beat_length]

    # Mix the vocals and the adjusted beat
    mixed = vocals + beat_padded
    mixed = mixed / np.max(np.abs(mixed))  # Normalize to prevent clipping

    sf.write(output_filename, mixed, sr_vocals)

def main():
    vocals_file = r"latest_input.wav"
    beat_file = r"Beat.wav"
    output_file = r"Phonk_Final_Synced4.wav"

    mix_and_sync_vocals_with_beat(vocals_file, beat_file, output_file, beat_delay_seconds=0)

if __name__ == "__main__":
    main()

In [8]:
import numpy as np
import librosa
import soundfile as sf
import pyrubberband as pyrb

def load_audio_with_fallback(file_path, sr=None, start_offset=0):
    try:
        audio, sr = librosa.load(file_path, sr=sr, offset=start_offset)
        return audio, sr
    except Exception as e:
        print(f"Failed to load with librosa due to: {e}")
        raise e

def mix_and_sync_vocals_with_beat(vocals_filename, beat_filename, output_filename, beat_delay_seconds=0, vocal_stretch_factor=0.8):
    vocals, sr_vocals = load_audio_with_fallback(vocals_filename, sr=None)
    beat, sr_beat = load_audio_with_fallback(beat_filename, sr=sr_vocals, start_offset=beat_delay_seconds)

    # Stretch the vocals using pyrubberband
    if vocal_stretch_factor != 1.0:
        vocals = pyrb.time_stretch(vocals, sr_vocals, vocal_stretch_factor)

    # Calculate the number of samples to delay the beat by
    beat_delay_samples = int(sr_vocals * beat_delay_seconds)

    # Prepare the beat: pad the beginning with zeros based on the delay, then trim or extend to match vocals length
    beat_padded = np.zeros_like(vocals)
    beat_length = min(len(beat), len(beat_padded) - beat_delay_samples)  # Ensure beat does not exceed vocal length after delay
    beat_padded[beat_delay_samples:beat_delay_samples+beat_length] = beat[:beat_length]

    # Mix the vocals and the adjusted beat
    mixed = vocals + beat_padded
    mixed = mixed / np.max(np.abs(mixed))  # Normalize to prevent clipping

    sf.write(output_filename, mixed, sr_vocals)

def main():
    vocals_file = "latest_input.wav"
    beat_file = "beat_client.wav"
    output_file = "Phonk_Final_Synced2_1.wav"
    mix_and_sync_vocals_with_beat(vocals_file, beat_file, output_file, beat_delay_seconds=0, vocal_stretch_factor=0.8)

if __name__ == "__main__":
    main()

In [9]:
import numpy as np
import librosa
import soundfile as sf
import pyrubberband as pyrb

def load_audio_with_fallback(file_path, sr=None, start_offset=0):
    try:
        audio, sr = librosa.load(file_path, sr=sr, offset=start_offset)
        return audio, sr
    except Exception as e:
        print(f"Failed to load with librosa due to: {e}")
        raise e

def stretch_audio(file_path, output_filename, stretch_factor=0.8):
    audio, sr = load_audio_with_fallback(file_path, sr=None)

    # Stretch the audio using pyrubberband
    if stretch_factor != 1.0:
        audio_stretched = pyrb.time_stretch(audio, sr, stretch_factor)
    else:
        audio_stretched = audio

    sf.write(output_filename, audio_stretched, sr)

def main():
    input_file = "latest_input.wav"
    output_file = "Audio_002_Without_Beat.wav"
    stretch_factor = 0.8  # Adjust the stretch factor as needed
    stretch_audio(input_file, output_file, stretch_factor)

if __name__ == "__main__":
    main()

#### Mr. Krystof Wants to sync the beat without having any delay in the audio that too without slowing down the source audio so we decided to provide that


In [17]:
import numpy as np
import librosa
import soundfile as sf
import pyrubberband as pyrb

def load_audio(file_path, sr=None):
    try:
        audio, sr = librosa.load(file_path, sr=sr)
        return audio, sr
    except Exception as e:
        print(f"Error loading audio: {e}")
        return None, None

def stretch_audio(audio, sr, stretch_factor=1.0):
    if stretch_factor != 1.0:
        return pyrb.time_stretch(audio, sr, stretch_factor)
    return audio

def synchronize_beat(vocals_audio, beat_audio, sr):
    # Assuming no need to stretch the beat, just align it to start with vocals
    vocals_length = len(vocals_audio)
    beat_length = len(beat_audio)
    
    if beat_length < vocals_length:
        # If beat is shorter than vocals, loop the beat
        loop_count = int(np.ceil(vocals_length / beat_length))
        beat_audio = np.tile(beat_audio, loop_count)[:vocals_length]
    else:
        # If beat is longer, trim it to match vocals length
        beat_audio = beat_audio[:vocals_length]

    return beat_audio

def process_audio(vocals_path, beat_path, output_path, stretch_factor=1.0):
    vocals, sr = load_audio(vocals_path)
    if vocals is None:
        return
    vocals = stretch_audio(vocals, sr, stretch_factor)
    
    beat, _ = load_audio(beat_path, sr)
    if beat is None:
        return
    
    beat_synced = synchronize_beat(vocals, beat, sr)
    
    # Mix vocals and beat
    mixed = vocals + beat_synced
    mixed = mixed / np.max(np.abs(mixed))  # Normalize to prevent clipping
    
    try:
        sf.write(output_path, mixed, sr)
    except Exception as e:
        print(f"Failed to write output file: {e}")

def main():
    vocals_file = "latest_input.wav"
    beat_file = "beat_client.wav"
    output_file = "Final_Output_Synced_Beat_S.wav"
    stretch_factor = 1.0  # Change if needed
    process_audio(vocals_file, beat_file, output_file, stretch_factor)

if __name__ == "__main__":
    main()

#### One more change was proposed, it needs to be done in such a way that the time between the words decreases but the speed of the file remains the same

In [None]:
#NEW - NOT WORKING MAYBE

import numpy as np
import librosa
import soundfile as sf

def load_audio(file_path):
    audio, sr = librosa.load(file_path, sr=None)
    return audio, sr

def reduce_silences(audio, sr, top_db=10, silence_reduction_factor=0.1):
    # Detect non-silent intervals
    non_silent_intervals = librosa.effects.split(audio, top_db=top_db)

    processed_audio_segments = []
    last_end = 0

    for start, end in non_silent_intervals:
        # Append silence reduced by the specified factor
        if start > last_end:
            silence_duration = start - last_end
            reduced_silence_duration = int(silence_duration * silence_reduction_factor)
            silence_segment = np.zeros(reduced_silence_duration)
            processed_audio_segments.append(silence_segment)

        # Append the current non-silent audio segment
        processed_audio_segments.append(audio[last_end:end])
        last_end = end

    # Concatenate all segments
    processed_audio = np.concatenate(processed_audio_segments)

    return processed_audio

def process_audio(vocals_path, output_path, silence_reduction_factor=0.1):
    vocals, sr = load_audio(vocals_path)
    vocals_reduced_silence = reduce_silences(vocals, sr, silence_reduction_factor=silence_reduction_factor)
    sf.write(output_path, vocals_reduced_silence, sr)

def main():
    vocals_file = "Final_Output_Synced_Beat.wav"
    output_file = "reduced_silence_output_3.wav"
    process_audio(vocals_file, output_file, silence_reduction_factor=0.1)

if __name__ == "__main__":
    main()

In [3]:
# OLD - SEEMS TO WORK WAITING FOR CLIENT APPROVAL -- Approved but last change needed

import numpy as np
import librosa
import soundfile as sf

def load_audio(file_path):
    audio, sr = librosa.load(file_path, sr=None)
    return audio, sr

def reduce_silences(audio, sr, top_db=25, reduce_by_factor=0.3):
    # Detect non-silent intervals
    non_silent_intervals = librosa.effects.split(audio, top_db=top_db)

    # Initialize an empty list to hold processed (shortened silence) audio
    processed_audio = []

    # Process each non-silent interval
    for i, (start, end) in enumerate(non_silent_intervals):
        # Append the current non-silent audio segment to the processed_audio list
        processed_audio.append(audio[start:end])

        if i < len(non_silent_intervals) - 1:
            # Calculate the duration of the next silence
            next_start = non_silent_intervals[i + 1][0]
            silence_duration = next_start - end

            # Reduce the silence duration by the specified factor
            reduced_silence_duration = int(silence_duration * reduce_by_factor)

            # Create a silence segment of the reduced duration
            silence_segment = np.zeros(reduced_silence_duration)

            # Append the reduced silence segment to the processed_audio list
            processed_audio.append(silence_segment)

    # Concatenate all processed audio segments back together
    processed_audio = np.concatenate(processed_audio)

    return processed_audio

def process_audio(vocals_path, output_path, reduce_by_factor=0.3):
    vocals, sr = load_audio(vocals_path)
    vocals_reduced_silence = reduce_silences(vocals, sr, reduce_by_factor=reduce_by_factor)
    sf.write(output_path, vocals_reduced_silence, sr)

def main():
    vocals_file = "Final_Output_Synced_Beat.wav"
    output_file = "reduced_silence_output_F.wav"
    process_audio(vocals_file, output_file, reduce_by_factor=0.3)

if __name__ == "__main__":
    main()

In [16]:
# 15/03/24 - Last Changes Oncoming

import numpy as np
import librosa
import soundfile as sf

def load_audio(file_path):
    audio, sr = librosa.load(file_path, sr=None)
    return audio, sr

def reduce_silences(audio, sr, top_db=22, reduce_by_factor=0.0011):
    # Detect non-silent intervals
    non_silent_intervals = librosa.effects.split(audio, top_db=top_db)

    # Initialize an empty list to hold processed (shortened silence) audio
    processed_audio = []

    # Process each non-silent interval
    for i, (start, end) in enumerate(non_silent_intervals):
        # Append the current non-silent audio segment to the processed_audio list
        processed_audio.append(audio[start:end])

        if i < len(non_silent_intervals) - 1:
            # Calculate the duration of the next silence
            next_start = non_silent_intervals[i + 1][0]
            silence_duration = next_start - end

            # Reduce the silence duration by the specified factor
            reduced_silence_duration = int(silence_duration * reduce_by_factor)

            # Create a silence segment of the reduced duration
            silence_segment = np.zeros(reduced_silence_duration)

            # Append the reduced silence segment to the processed_audio list
            processed_audio.append(silence_segment)

    # Concatenate all processed audio segments back together
    processed_audio = np.concatenate(processed_audio)

    return processed_audio

def process_audio(vocals_path, output_path, reduce_by_factor=0.0011):
    vocals, sr = load_audio(vocals_path)
    vocals_reduced_silence = reduce_silences(vocals, sr, reduce_by_factor=reduce_by_factor)
    sf.write(output_path, vocals_reduced_silence, sr)

def main():
    vocals_file = "Final_Output_Synced_Beat.wav"
    output_file = "reduced_silence_output_F2.wav"
    process_audio(vocals_file, output_file, reduce_by_factor=0.0011)

if __name__ == "__main__":
    main()

#### While researching a thing came to my mind there are chances that the delay is not reduced because also the beat is there in the audio what if we process the audio standalone first and then insert the beat after the audio quality turns out to be perfect.

##### So below is the trial of the above mentioned experiment


# Expermimental

# Seems to have worked as the client said - "This is Perfect!"

In [52]:
import numpy as np
import librosa
import soundfile as sf

def load_audio(file_path):
    audio, sr = librosa.load(file_path, sr=None)
    return audio, sr

def reduce_silences(audio, sr, top_db=22, max_reduction_factor=0.011):
    # Detect non-silent intervals
    non_silent_intervals = librosa.effects.split(audio, top_db=top_db)
    processed_audio = []

    for i, (start, end) in enumerate(non_silent_intervals):
        processed_audio.append(audio[start:end])
        if i < len(non_silent_intervals) - 1:
            next_start = non_silent_intervals[i + 1][0]
            silence_duration = next_start - end
            reduction_factor = max(0.01, max_reduction_factor * (silence_duration / sr))
            reduced_silence_duration = int(silence_duration * reduction_factor)
            silence_segment = np.zeros(reduced_silence_duration)
            processed_audio.append(silence_segment)

    processed_audio = np.concatenate(processed_audio)
    return processed_audio

def speed_up_audio(audio, sr, speed_factor=0.6):
    # Speed up the audio without changing the pitch
    audio_fast = librosa.effects.time_stretch(audio, rate=1/speed_factor)
    return audio_fast

def process_audio(vocals_path, output_path, max_reduction_factor=0.011, speed_factor=0.6):
    vocals, sr = load_audio(vocals_path)
    vocals_reduced_silence = reduce_silences(vocals, sr, max_reduction_factor=max_reduction_factor)
    vocals_speed_up = speed_up_audio(vocals_reduced_silence, sr, speed_factor=speed_factor)
    sf.write(output_path, vocals_speed_up, sr)

def main():
    vocals_file = "latest_input.wav"
    output_file = "reduced_delay_speed_up_output.wav"
    process_audio(vocals_file, output_file, max_reduction_factor=0.011, speed_factor=0.6)

if __name__ == "__main__":
    main()

##### We will now be adding the beat and merging it with the audio - and all this while syncing it perfectly with the beat

In [60]:
import numpy as np
import librosa
import soundfile as sf
import pyrubberband as pyrb

def load_audio(file_path, sr=None):
    try:
        audio, sr = librosa.load(file_path, sr=sr)
        return audio, sr
    except Exception as e:
        print(f"Error loading audio: {e}")
        return None, None

def stretch_audio(audio, sr, stretch_factor=1.0):
    if stretch_factor != 1.0:
        return pyrb.time_stretch(audio, sr, stretch_factor)
    return audio

def synchronize_beat(vocals_audio, beat_audio, sr):
    # Assuming no need to stretch the beat, just align it to start with vocals
    vocals_length = len(vocals_audio)
    beat_length = len(beat_audio)
    
    if beat_length < vocals_length:
        # If beat is shorter than vocals, loop the beat
        loop_count = int(np.ceil(vocals_length / beat_length))
        beat_audio = np.tile(beat_audio, loop_count)[:vocals_length]
    else:
        # If beat is longer, trim it to match vocals length
        beat_audio = beat_audio[:vocals_length]

    return beat_audio

def process_audio(vocals_path, beat_path, output_path, stretch_factor=1.0):
    vocals, sr = load_audio(vocals_path)
    if vocals is None:
        return
    vocals = stretch_audio(vocals, sr, stretch_factor)
    
    beat, _ = load_audio(beat_path, sr)
    if beat is None:
        return
    
    beat_synced = synchronize_beat(vocals, beat, sr)
    
    # Mix vocals and beat
    mixed = vocals + beat_synced
    mixed = mixed / np.max(np.abs(mixed))  # Normalize to prevent clipping
    
    try:
        sf.write(output_path, mixed, sr)
    except Exception as e:
        print(f"Failed to write output file: {e}")

def main():
    vocals_file = "reduced_delay_speed_up_output.wav"
    beat_file = "beat_client.wav"
    output_file = "Audio_003_1503.wav"
    stretch_factor = 0.95  # Change if needed
    process_audio(vocals_file, beat_file, output_file, stretch_factor)

if __name__ == "__main__":
    main()

In [76]:
#Poojan latest
import numpy as np
import librosa
import soundfile as sf
import audioread

def simple_delay_reverb(audio, sr, delay_ms=300, decay=0, repeats=2):
    delay_samples = int(sr * delay_ms / 1000)
    output = np.copy(audio)
    for _ in range(repeats):
        delayed = np.zeros_like(output)
        delayed[delay_samples:] = output[:-delay_samples]
        output += delayed * decay
    output = output / np.max(np.abs(output))
    return output

def load_audio_with_fallback(file_path, sr=None):
    try:
        return librosa.load(file_path, sr=sr)
    except Exception as e:
        print(f"Failed to load with librosa due to: {e}. Falling back to audioread.")
        with audioread.audio_open(file_path) as f:
            data = np.hstack([np.frombuffer(chunk, np.int16) for chunk in f])
            if sr is not None:
                data = librosa.resample(data.astype(float), f.samplerate, sr)
            return data, sr

def modify_vocals(vocals_filename, beat_filename, output_filename, volume_gain=2.0, beat_start_time=0.0, pitch_shift_semitones=0):
    vocals, sr_vocals = load_audio_with_fallback(vocals_filename, sr=None)
    beat, sr_beat = load_audio_with_fallback(beat_filename, sr=sr_vocals)

    if pitch_shift_semitones != 0:
        vocals = librosa.effects.pitch_shift(vocals, sr_vocals, pitch_shift_semitones)

    vocals = simple_delay_reverb(vocals, sr_vocals)

    start_sample = int(beat_start_time * sr_beat)
    if start_sample < len(beat):
        beat = beat[start_sample:]
    else:
        raise ValueError("Beat start time exceeds the length of the beat track.")

    vocals = vocals * volume_gain

    min_length = min(len(vocals), len(beat))
    vocals = vocals[:min_length]
    beat = beat[:min_length]

    mixed = vocals + beat
    mixed = mixed / np.max(np.abs(mixed))

    sf.write(output_filename, mixed, sr_vocals)

def main():
    vocals_file = r"reduced_delay_speed_up_output.wav"
    beat_file = r"beat_client.wav"
    output_file = r"Trial_P_O_3.wav"

    modify_vocals(vocals_file, beat_file, output_file, volume_gain=1.0, beat_start_time=1.955555, pitch_shift_semitones=0)

if __name__ == "__main__":
    main()