In [63]:
import librosa
import numpy as np
from pydub import AudioSegment
import speech_recognition as sr
from pydub import AudioSegment
from pydub.playback import play
import time
import json
from AHAPpy import generate_ahap as ga
import os.path


def pitch_shift(audio, sample_rate, n_steps):
    return librosa.effects.pitch_shift(audio, sr=sample_rate, n_steps=n_steps)

def time_stretch(audio, rate):
    return librosa.effects.time_stretch(audio, rate=rate)

def change_amplitude(audio_segment, db_change):
    return audio_segment + db_change

def apply_equalizer(audio_segment, low_gain, mid_gain, high_gain):
    low = audio_segment.low_pass_filter(200).apply_gain(low_gain)
    mid = audio_segment.high_pass_filter(200).low_pass_filter(2000).apply_gain(mid_gain)
    high = audio_segment.high_pass_filter(2000).apply_gain(high_gain)
    return low.overlay(mid).overlay(high)

def detect_keywords(audio_file, keywords, retries=3, delay=5):
    recognizer = sr.Recognizer()
    for attempt in range(retries):
        try:
            with sr.AudioFile(audio_file) as source:
                audio = recognizer.record(source)
                transcript = recognizer.recognize_google(audio)
                print(transcript)
                detected_keywords = [kw for kw in keywords if kw in transcript.lower()]
                return detected_keywords
        except sr.RequestError as e:
            print(f"Request error: {e}. Attempt {attempt + 1}/{retries}")
            if attempt < retries - 1:
                time.sleep(delay)
            else:
                return []
        except sr.UnknownValueError:
            return []

def adjust_audio_duration(audio, duration_seconds):
    # Calculate the desired duration in milliseconds
    desired_duration_ms = duration_seconds * 1000
    if len(audio) > desired_duration_ms:
        # Trim the audio if it's longer than the desired duration
        audio = audio[:desired_duration_ms]
    elif len(audio) < desired_duration_ms:
        # Loop the audio if it's shorter than the desired duration
        loop_count = desired_duration_ms // len(audio)
        remaining_duration = desired_duration_ms % len(audio)
        audio = audio * loop_count + audio[:remaining_duration]
    print("Successfully adjusted audio.")
    return audio

def overlay_background_sound(audio_segment, background_file, volume=-10):
    background = AudioSegment.from_file(background_file).apply_gain(volume)
    return audio_segment.overlay(background, loop=True)

def process_audio(speech_file_path, sound_file_paths, duration_seconds, pitch_steps, time_stretch_rate, amp_change, eq_settings, keywords_to_sounds):
    
    #output_path= str(os.path.dirname(speech_file_path)) + "/" + os.path.splitext(os.path.basename(speech_file_path))[0] + "-" + os.path.splitext(os.path.basename(sound_file_path))[0] + "-output.wav"
    output_background_path="/Users/user/Downloads/embrace_audio/Output-background.wav"
    output_path="/Users/user/Downloads/embrace_audio/Output.wav"
    print ("output_path:", output_path)
    
    # Load audio file
    audio, sample_rate = librosa.load(speech_file_path, sr=None)

    # Pitch Shifting
    shifted_audio = pitch_shift(audio, sample_rate, pitch_steps)

    # Time Stretching
    stretched_audio = time_stretch(shifted_audio, time_stretch_rate)

    # Convert to pydub AudioSegment for amplitude and EQ
    audio_segment = AudioSegment(
        (stretched_audio * 32767).astype(np.int16).tobytes(),
        frame_rate=sample_rate,
        sample_width=2,
        channels=1
    )

    # Amplitude Manipulation
    audio_segment = change_amplitude(audio_segment, amp_change)

    # Spectrum Manipulation (Basic EQ)
    eq_audio_segment = apply_equalizer(audio_segment, *eq_settings)
    
    # Detect keywords in the audio
    # detected_keywords = detect_keywords(file_path, keywords_to_sounds.keys())

    # Load the original audio
    audio_segment = eq_audio_segment

    sound_segment = AudioSegment.from_file(sound_file_paths[0])
    sound_segment = adjust_audio_duration(sound_segment, duration_seconds)
    sound_index = 1
    
    while sound_index < len(sound_file_paths):
        background = AudioSegment.from_file(sound_file_paths[sound_index])
        background = adjust_audio_duration(background, duration_seconds)
        sound_segment = sound_segment.overlay(background, loop=True)
        sound_index += 1

    sound_segment.export(output_background_path, format="wav")
    
    # Generate AHAP by background sound
    ga.convert_wav_to_ahap(output_background_path, os.path.dirname(output_background_path), "sfx", "none")
    print("New AHAP file generated successfully.")
    
    audio_segment = adjust_audio_duration(audio_segment, duration_seconds)
    audio_segment = overlay_background_sound(audio_segment, output_background_path)

    # Export the final audio
    audio_segment.export(output_path, format="wav")
    
"""
    # Load the AHAP file and modify with new merged audio
    with open(sound_file_path.split('.', 1)[0] + '_combined.ahap', 'r') as file:
        data = json.load(file)

    # Define the new event to be added
    new_event = {
        "Event": {
            "Time": 0.0,
            "EventType":"AudioCustom",
            "EventWaveformPath":output_path,
              "EventParameters":
              [
                  {"ParameterID":"AudioVolume","ParameterValue":0.75}
              ]
        }
    }

    # Append the new event to the "Pattern" list
    data["Pattern"].append(new_event)

    # Save the modified JSON data back to the file
    with open(sound_file_path.split('.', 1)[0] + '_combined.ahap', 'w') as file:
        json.dump(data, file, indent=4)
    
    print("New event added successfully.")


    # Overlay background sounds based on detected keywords
    for keyword in detected_keywords:
        background_file = keywords_to_sounds[keyword]
        print(background_file)
        audio_segment = overlay_background_sound(audio_segment, background_file)
"""

'\n    # Load the AHAP file and modify with new merged audio\n    with open(sound_file_path.split(\'.\', 1)[0] + \'_combined.ahap\', \'r\') as file:\n        data = json.load(file)\n\n    # Define the new event to be added\n    new_event = {\n        "Event": {\n            "Time": 0.0,\n            "EventType":"AudioCustom",\n            "EventWaveformPath":output_path,\n              "EventParameters":\n              [\n                  {"ParameterID":"AudioVolume","ParameterValue":0.75}\n              ]\n        }\n    }\n\n    # Append the new event to the "Pattern" list\n    data["Pattern"].append(new_event)\n\n    # Save the modified JSON data back to the file\n    with open(sound_file_path.split(\'.\', 1)[0] + \'_combined.ahap\', \'w\') as file:\n        json.dump(data, file, indent=4)\n    \n    print("New event added successfully.")\n\n\n    # Overlay background sounds based on detected keywords\n    for keyword in detected_keywords:\n        background_file = keywords_to_sou

In [65]:
#file_path="/Users/user/Downloads/Mountain.mp3",
#output_path="/Users/user/Downloads/audio_output/output.wav",

keywords_to_sounds = {
    "focus": "/Users/user/Downloads/audio_output/sounds/birds-singing.mp3",
    "sitting": "/Users/user/Downloads/audio_output/sounds/heavy-rain.mp3",
}

# Example usage
process_audio(
    speech_file_path="/Users/user/Downloads/embrace_audio/Ana.wav",
    sound_file_paths=["/Users/user/Downloads/embrace_audio/Waves.wav",
                    "/Users/user/Downloads/embrace_audio/Thunder.wav",
                     "/Users/user/Downloads/embrace_audio/Canon.mp3"],
    duration_seconds=60,
    pitch_steps=0,               # Shift up 2 semitones
    time_stretch_rate=1,      # Slow down by xx%
    amp_change=5,                # Increase volume by 5 dB
    eq_settings=(-3, 0, 3),       # Low gain: -3 dB, Mid gain: 0 dB, High gain: +3 dB
    keywords_to_sounds=keywords_to_sounds
)

output_path: /Users/user/Downloads/embrace_audio/Output.wav
Successfully adjusted audio.
Successfully adjusted audio.
Successfully adjusted audio.


Processing transient events: 100%|████████████████████████████████████| 545/545 [00:08<00:00, 65.93it/s]
Processing continuous events: 100%|██████████████████████████████████| 600/600 [00:02<00:00, 293.33it/s]


AHAP files generated successfully in 40.49 seconds.
Generated files:
 - /Users/user/Downloads/embrace_audio/Output-background_combined.ahap
New AHAP file generated successfully.
Successfully adjusted audio.
