## Audio Script

In [1]:
import argparse
import json
import librosa
import numpy as np
from pydub import AudioSegment
import time
import os
from tqdm import tqdm
import matplotlib.pyplot as plt
import librosa.display

def convert_wav_to_ahap(input_wav, output_dir, mode, split):
    try:
        # Start timing
        start_time = time.time()

        # Load audio file using pydub
        audio = AudioSegment.from_file(input_wav, format=os.path.splitext(input_wav)[-1][1:])

        # Convert to mono and set sample rate to 44.1 kHz
        audio = audio.set_channels(1).set_frame_rate(48000)

        # Convert to numpy array
        audio_data = np.array(audio.get_array_of_samples())

        # Convert to float32 in the range [-1, 1]
        audio_data = audio_data.astype(np.float32) / 32768.0

        sample_rate = audio.frame_rate
        duration = len(audio_data) / sample_rate

        # Perform HPSS once
        harmonic, percussive = librosa.effects.hpss(audio_data)

        # Isolate bass using a low-pass filter
        bass = librosa.effects.hpss(audio_data, margin=(1.0, 20.0))[0]

        # Use the directory of the input WAV file if output_dir is not provided
        if not output_dir:
            output_dir = os.path.dirname(input_wav)

        # Create output directory if it doesn't exist
        os.makedirs(output_dir, exist_ok=True)

        output_files = []

        if split == "none":
            ahap_data = generate_ahap(audio_data, sample_rate, mode, harmonic, percussive, bass, duration, split)
            output_ahap = os.path.join(output_dir, os.path.basename(input_wav).replace(os.path.splitext(input_wav)[-1], '.ahap'))
            write_ahap_file(output_ahap, ahap_data)
            output_files.append(output_ahap)
        else:
            splits = ['bass', 'vocals', 'drums', 'other']
            for split_type in splits:
                if split != "all" and split != split_type:
                    continue
                ahap_data = generate_ahap(audio_data, sample_rate, mode, harmonic, percussive, bass, duration, split_type)
                output_ahap = os.path.join(output_dir, os.path.basename(input_wav).replace(os.path.splitext(input_wav)[-1], f'_{split_type}.ahap'))
                write_ahap_file(output_ahap, ahap_data)
                output_files.append(output_ahap)

        # End timing
        end_time = time.time()
        elapsed_time = end_time - start_time

        print(f"AHAP files generated successfully in {elapsed_time:.2f} seconds.")
        print("Generated files:")
        for file in output_files:
            print(f" - {file}")
    except Exception as e:
        print("Error:", e)

def write_ahap_file(output_ahap, ahap_data):
    # Write AHAP content to file
    with open(output_ahap, 'w') as f:
        json.dump(ahap_data, f, indent=4)

def generate_ahap(audio_data, sample_rate, mode, harmonic, percussive, bass, duration, split):
    """
    Generate AHAP content with both transient and continuous events.
    """
    pattern = []

    # Detect onsets for transients
    onsets = librosa.onset.onset_detect(y=audio_data, sr=sample_rate)

    # Convert onsets to time
    event_times = librosa.frames_to_time(onsets, sr=sample_rate)
    
    # Plot the audio waveform, components, and event markers
    ## Uncomment it only if you want to see the plot of the audio data
    # plot_audio_components(audio_data, harmonic, percussive, bass, event_times, sample_rate)

    # Create progress bar for transient events
    with tqdm(total=len(event_times), desc="Processing transient events") as pbar:
        for time in event_times:
            # Determine event type based on audio features
            haptic_mode = determine_haptic_mode(audio_data, time, sample_rate, mode, harmonic, percussive, bass)
            if haptic_mode in ['transient', 'both']:
                event = create_event("HapticTransient", time, audio_data, sample_rate, split)
                pattern.append(event)
            if haptic_mode in ['continuous', 'both']:
                event = create_event("HapticContinuous", time, audio_data, sample_rate, split)
                pattern.append(event)
            pbar.update(1)

    # Add continuous events for bass and harmonic components
    add_continuous_events(pattern, audio_data, sample_rate, harmonic, bass, duration, split)

    ahap_data = {"Version": 1.0, "Pattern": pattern}
    return ahap_data

def create_event(event_type, time, audio_data, sample_rate, split):
    """
    Create an event with appropriate parameters based on event type and audio features.
    """
    intensity, sharpness = calculate_parameters(audio_data, time, sample_rate, split)
    event = {
        "Event": {
            "Time": float(time),
            "EventType": event_type,
            "EventParameters": [
                {"ParameterID": "HapticIntensity", "ParameterValue": float(intensity)},
                {"ParameterID": "HapticSharpness", "ParameterValue": float(sharpness)}
            ]
        }
    }
    if event_type == "HapticContinuous":
        event["Event"]["EventDuration"] = 0.05  # Adjust duration as needed
    return event

def determine_haptic_mode(audio_data, time, sample_rate, mode, harmonic, percussive, bass):
    """
    Determine whether to use transient, continuous, or both haptic modes based on audio features.
    """
    # Calculate RMS energy in a small window around the specified time
    window_size = int(sample_rate * 0.02)  # 20 ms window
    start_index = max(0, int((time - 0.01) * sample_rate))  # Start 10 ms before the specified time
    end_index = min(len(audio_data), start_index + window_size)
    energy = np.sqrt(np.mean(audio_data[start_index:end_index] ** 2))

    # Calculate sub-band energies using pre-computed harmonic, percussive, and bass components
    bass_energy = np.sqrt(np.mean(bass[start_index:end_index] ** 2))
    percussive_energy = np.sqrt(np.mean(percussive[start_index:end_index] ** 2))
    harmonic_energy = np.sqrt(np.mean(harmonic[start_index:end_index] ** 2))

    # Calculate spectral centroid in a small window around the specified time
    window_size = int(sample_rate * 0.05)  # 50 ms window
    start_index = max(0, int((time - 0.025) * sample_rate))  # Start 25 ms before the specified time
    end_index = min(len(audio_data), start_index + window_size)
    spectral_centroid = librosa.feature.spectral_centroid(
        y=audio_data[start_index:end_index], sr=sample_rate
    )

    # Calculate additional features
    zcr = librosa.feature.zero_crossing_rate(y=audio_data[start_index:end_index])
    spectral_rolloff = librosa.feature.spectral_rolloff(y=audio_data[start_index:end_index], sr=sample_rate)
    mfccs = librosa.feature.mfcc(y=audio_data[start_index:end_index], sr=sample_rate, n_mfcc=13)

    # Get mean value of spectral centroid for comparison
    spectral_centroid_mean = np.mean(spectral_centroid)
    zcr_mean = np.mean(zcr)
    spectral_rolloff_mean = np.mean(spectral_rolloff)
    mfcc_mean = np.mean(mfccs, axis=1)

    # Adjust thresholds based on the mode
    if mode == 'sfx':
        transient_rms_threshold = 0.5
        continuous_rms_threshold = 0.2
        spectral_threshold = np.percentile(spectral_centroid, 90)
    else:  # music
        transient_rms_threshold = 0.2
        continuous_rms_threshold = 0.1
        spectral_threshold = np.percentile(spectral_centroid, 70)

    # Classify based on a combination of features
    if energy > transient_rms_threshold and spectral_centroid_mean > spectral_threshold:
        return 'transient'
    elif energy < continuous_rms_threshold:
        return 'continuous'
    else:
        return 'both'

def calculate_parameters(audio_data, time, sample_rate, split):
    # Calculate RMS energy in a small window around the specified time
    window_size = int(sample_rate * 0.02)  # 20 ms window
    start_index = max(0, int((time - 0.01) * sample_rate))  # Start 10 ms before the specified time
    end_index = min(len(audio_data), start_index + window_size)
    energy = np.sqrt(np.mean(audio_data[start_index:end_index] ** 2))

    # Calculate spectral centroid in a small window around the specified time
    window_size = int(sample_rate * 0.05)  # 50 ms window
    start_index = max(0, int((time - 0.025) * sample_rate))  # Start 25 ms before the specified time
    end_index = min(len(audio_data), start_index + window_size)
    spectral_centroid = librosa.feature.spectral_centroid(
        y=audio_data[start_index:end_index], sr=sample_rate
    )

    # Calculate sharpness based on the spectral centroid
    sharpness = np.mean(spectral_centroid)

    # Scale the energy to the range [0, 1]
    scaled_energy = np.clip(energy / np.max(audio_data), 0, 1)

    # Increase the overall intensity to add more "oomph"
    scaled_energy *= 3 ## By Siming
    scaled_energy = np.clip(scaled_energy, 0, 1)

    # Scale sharpness to a range that fits the haptic feedback parameters
    scaled_sharpness = np.clip(sharpness / np.max(spectral_centroid) , 0, 1) * 0

    # Adjust parameters based on split type
    if split == "vocal":
        scaled_energy *= 1.2
        scaled_sharpness *= 1.1
    elif split == "drums":
        scaled_energy *= 1.5
        scaled_sharpness *= 1.3
    elif split == "bass":
        scaled_energy *= 1.4
        scaled_sharpness *= 0.9
    elif split == "other":
        scaled_energy *= 1.3
        scaled_sharpness *= 1.2

    return scaled_energy, scaled_sharpness

def add_continuous_events(pattern, audio_data, sample_rate, harmonic, bass, duration, split):
    """
    Add continuous haptic events for bass and harmonic components.
    """
    time_step = 0.05  # Adjust time step for continuous events
    num_steps = int(duration / time_step)
    
    # Create progress bar for continuous events
    with tqdm(total=num_steps, desc="Processing continuous events") as pbar:
        for t in np.arange(0, duration, time_step):
            bass_energy = np.sqrt(np.mean(bass[int(t * sample_rate):int((t + time_step) * sample_rate)] ** 2))
            harmonic_energy = np.sqrt(np.mean(harmonic[int(t * sample_rate):int((t + time_step) * sample_rate)] ** 2))
            
            scaled_intensity = 3
            scaled_sharpness = 0

            # Calculate intensity and sharpness
            intensity = np.clip(bass_energy / np.max(bass), 0, 1) * scaled_intensity
            intensity = np.clip(intensity, 0, 1)
            sharpness = np.clip(harmonic_energy / np.max(harmonic), 0, 1) * scaled_sharpness
            
            event = {
                "Event": {
                    "Time": float(t),
                    "EventType": "HapticContinuous",
                    "EventDuration": time_step,
                    "EventParameters": [
                        {"ParameterID": "HapticIntensity", "ParameterValue": float(intensity)},
                        {"ParameterID": "HapticSharpness", "ParameterValue": float(sharpness)}
                    ]
                }
            }
            pattern.append(event)
            pbar.update(1)

def plot_audio_components(audio_data, harmonic, percussive, bass, event_times, sample_rate):
    """
    Plot the original audio waveform, harmonic, percussive, and bass components,
    and mark detected events on the waveform.
    """
    # Create a time axis for the waveform
    time_axis = np.arange(len(audio_data)) / sample_rate

    plt.figure(figsize=(14, 12))

    # Plot the original waveform
    plt.subplot(4, 1, 1)
    librosa.display.waveshow(audio_data, sr=sample_rate)
    plt.title("Original Waveform")
    plt.xlabel("Time (s)")
    plt.ylabel("Amplitude")

    # Plot the harmonic component
    plt.subplot(4, 1, 2)
    librosa.display.waveshow(harmonic, sr=sample_rate)
    plt.title("Harmonic Component")
    plt.xlabel("Time (s)")
    plt.ylabel("Amplitude")

    # Plot the percussive component
    plt.subplot(4, 1, 3)
    librosa.display.waveshow(percussive, sr=sample_rate)
    plt.title("Percussive Component")
    plt.xlabel("Time (s)")
    plt.ylabel("Amplitude")

    # Plot the bass component
    plt.subplot(4, 1, 4)
    librosa.display.waveshow(bass, sr=sample_rate)
    plt.title("Bass Component")
    plt.xlabel("Time (s)")
    plt.ylabel("Amplitude")

    # Mark the event times on the original waveform
    for event_time in event_times:
        plt.subplot(4, 1, 1)
        plt.axvline(x=event_time, color='red', linestyle='--', label='Event Time' if event_time == event_times[0] else "")
    plt.legend()

    plt.tight_layout()
    plt.show()



## Script for Data Processing

In [2]:
import os
import pandas as pd
from openai import OpenAI
import azure.cognitiveservices.speech as speechsdk
from dotenv import load_dotenv
from pydub import AudioSegment
from io import BytesIO
import re
import time

# Load environment variables from .env file
load_dotenv()

def process_meditation_script_from_csv(file_path, output_path, voice_name="en-US-NancyNeural", prosody_rate="-15.00%"):
    """
    Reads a CSV file, processes the `original_content` column to generate SSML content,
    and saves the result to a new CSV file with an additional column `ssml_content`.

    Args:
    - file_path (str): Path to the input CSV file.
    - output_path (str): Path to save the output CSV file.
    - voice_name (str): The name of the Azure Neural voice to use.
    - prosody_rate (str): The prosody rate adjustment for the voice.
    """
    # Read the CSV file into a DataFrame
    df = pd.read_csv(file_path)

    # Function to convert a single chunk into SSML
    def process_chunk_to_ssml(chunk):
        # Replace periods with 2-second breaks
        chunk = re.sub(r"\.", r'.<break time="2s"/>', chunk)
        # Replace commas with 1-second breaks
        chunk = re.sub(r",", r',<break time="1s"/>', chunk)
        # Replace [x's] with x-second breaks
        chunk = re.sub(r"\[(\d+)s\]", r'<break time="\1s"/>', chunk)
        # Wrap with SSML structure
        return f"""
<speak xmlns="http://www.w3.org/2001/10/synthesis" 
       xmlns:mstts="http://www.w3.org/2001/mstts" 
       xmlns:emo="http://www.w3.org/2009/10/emotionml" 
       version="1.0" xml:lang="en-US">
  <voice name="{voice_name}">
    <mstts:express-as style="whispering">
      <prosody rate="{prosody_rate}">
        {chunk}
      </prosody>
    </mstts:express-as>
  </voice>
</speak>
        """.strip()

    # Process the `original_content` column
    df['ssml_content'] = df['original_content'].apply(process_chunk_to_ssml)

    # Save the DataFrame with the new column to a new CSV file
    df.to_csv(output_path, index=False)

    print(f"Processed file saved to: {output_path}")

In [3]:
# Example usage
process_meditation_script_from_csv(
    file_path="audio_content_database.csv",
    output_path="ssml_content_database.csv",
    voice_name="en-US-NancyNeural",
    prosody_rate="-15.00%"
)

Processed file saved to: ssml_content_database.csv


## Genearte AI audio Along with Haptics and Soundscaping

In [4]:
import pandas as pd
from dotenv import load_dotenv
import random
from pydub import AudioSegment
from io import BytesIO
import azure.cognitiveservices.speech as speechsdk
import os

load_dotenv()

seed = 17

def synthesize_text_to_audio(ssml_text, speech_config):
    """
    Synthesizes SSML text to an audio segment using Azure TTS.
    """
    audio_config = speechsdk.audio.PullAudioOutputStream()
    print("Attributes of audio_config: ", vars(audio_config))
    synthesizer = speechsdk.SpeechSynthesizer(speech_config=speech_config, audio_config=audio_config)

    # Synthesize the SSML
    synthesis_result = synthesizer.speak_ssml_async(ssml_text).get()

    if synthesis_result.reason == speechsdk.ResultReason.SynthesizingAudioCompleted:
        print("SynthesizingAudioCompleted")
        audio_data = synthesis_result.audio_data
        return AudioSegment.from_file(BytesIO(audio_data), format="wav")
    elif synthesis_result.reason == speechsdk.ResultReason.Canceled:
        cancellation_details = synthesis_result.cancellation_details
        print(f"Speech synthesis canceled: {cancellation_details.reason}")
        if cancellation_details.reason == speechsdk.CancellationReason.Error:
            print(f"Error details: {cancellation_details.error_details}")
        return None

def overlay_background_sound(speech_audio, sound_library_path, background_tag, delay = 1000, volume=-15):
    """
    Overlays a background sound onto a speech audio segment.
    """
    # Check for both .mp3 and .wav files
    background_mp3 = os.path.join(sound_library_path, f"{background_tag}.mp3")
    background_wav = os.path.join(sound_library_path, f"{background_tag}.wav")
    
    # Determine which file exists
    if os.path.exists(background_mp3):
        background_file = background_mp3
    elif os.path.exists(background_wav):
        background_file = background_wav
    else:
        print(f"Background file {background_tag} not found in {sound_library_path}.")
        return speech_audio, None
    
    # Load the background audio and adjust its volume
    background_audio = AudioSegment.from_file(background_file).apply_gain(volume)
    
    # Add silence to the beginning of the background audio (one-time delay)
    delayed_background = AudioSegment.silent(duration=delay) + background_audio

    # Extend the background audio to match the speech audio's length
    loops_needed = (len(speech_audio) - len(delayed_background)) // len(background_audio) + 1
    extended_background = delayed_background + (background_audio * loops_needed)
    
    # Trim the background to match the length of the speech audio (no looping)
    final_background = extended_background[:len(speech_audio)]

    return speech_audio.overlay(final_background), final_background

def generate_ahap_file(background_file, output_dir="ahap_outputs"):
    """
    Generates an AHAP file from the provided background audio file.
    """
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    ahap_output_file = os.path.join(output_dir, os.path.splitext(os.path.basename(background_file))[0] + ".ahap")
    convert_wav_to_ahap(background_file, output_dir, "sfx", "none")
    print(f"AHAP file saved as '{ahap_output_file}'.")
    return ahap_output_file

def process_csv_and_generate_audio_with_ahap(csv_file_path,
                                             sound_library_path, 
                                             target_function, 
                                             target_scenario, 
                                             background_delay = 5000):
    """
    Processes a CSV file containing SSML text and sound tags, synthesizes audio,
    adds background sounds, generates AHAP files, and combines the results.
    """
    ## Sound library append
    sound_library_path = os.path.join(sound_library_path, target_scenario)

    # Load the CSV
    data = pd.read_csv(csv_file_path, encoding='latin1')
    # Randomly select slices
    # Filter the data for the target function
    data = data[data['function'] == target_function]
    data = data[data['scenario'] == target_scenario]

    # Select the slices for each section (entry, body, end)
    entry_slices = data[data['position'].str.contains('entry', na=False)].sort_values(by='position')
    body_slices =  data[data['position'].str.contains('body', na=False)].sort_values(by='position')
    end_slice = data[data['position'] == 'end']

    # Combine slices into one dataframe
    all_slices = pd.concat([entry_slices, body_slices, end_slice])

    if all_slices.empty:
        print("No slices found for the specified function.")
        return None

    # Create output directories
    output_dir = f"{target_function}/{target_scenario}"
    os.makedirs(output_dir, exist_ok=True)
    
    # Initialize Azure TTS config
    speech_config = speechsdk.SpeechConfig(
        subscription=os.getenv("SPEECH_KEY"),
        region=os.getenv("SPEECH_REGION")
    )
    ## Default is 48Khz with the highest quality
    speech_config.set_speech_synthesis_output_format(speechsdk.SpeechSynthesisOutputFormat.Riff48Khz16BitMonoPcm)

    # Process each slice
    for index, row in all_slices.iterrows():
        slice_name = f"{row['position']}"
        ssml_text = row['ssml_content']
        setting_tag = row['setting_tag']
        event_tag = row['event_tag']

        print(f"Processing {slice_name} with setting tag '{setting_tag}' and event tag {event_tag}")

        # Synthesize SSML text to audio
        speech_audio = synthesize_text_to_audio(ssml_text, speech_config)
        if speech_audio is None:
            continue

        # Adjust background and incorporate event_tag
        if pd.notna(event_tag):
            print(f"Event '{event_tag}' detected. Lowering background volume and adding event.")
            # Overlay background (sound_tag) and event sound
            speech_audio, background_sound = overlay_background_sound(speech_audio, sound_library_path, event_tag, background_delay ,volume=-5)
            speech_audio, _ = overlay_background_sound(speech_audio, sound_library_path, setting_tag, background_delay, volume=-5)
        else:
            print("No event detected. Using default background.")
            # Default background overlay
            speech_audio, background_sound = overlay_background_sound(speech_audio, sound_library_path, setting_tag, background_delay,volume=-5)


        # Export audio file
        audio_output_path = os.path.join(output_dir, f"{slice_name}.wav")
        speech_audio.export(audio_output_path, format="wav")
        print(f"Audio file saved as '{audio_output_path}'.")

        # Export Background file 
        background_name = slice_name + "_background"
        background_output_path = os.path.join(output_dir, f"{background_name}.wav")
        background_sound.export(background_output_path, format = "wav")
        print(f"Background file saved as '{background_output_path}'.")

        # Generate AHAP file
        ahap_output_path = generate_ahap_file(background_output_path, output_dir=output_dir)
        print(f"AHAP file saved as '{ahap_output_path}'.")

    print("All slices processed successfully.")
    return output_dir

In [None]:
process_csv_and_generate_audio_with_ahap('ssml_content_database.csv', 
                                         '../../sound_library',
                                         target_function="meditate", 
                                         target_scenario="zengarden", 
                                         background_delay= 0)

Processing entry1 with setting tag 'leaves' and event tag walk_on_gravel
Attributes of audio_config:  {'_AudioOutputStream__handle': <azure.cognitiveservices.speech.interop._Handle object at 0x34552e390>}
SynthesizingAudioCompleted
Info: on_underlying_io_bytes_received: Close frame received
Info: on_underlying_io_bytes_received: closing underlying io.
Info: on_underlying_io_close_complete: uws_state: 6.
Event 'walk_on_gravel' detected. Lowering background volume and adding event.
Audio file saved as 'meditate/zengarden/entry1.wav'.
Background file saved as 'meditate/zengarden/entry1_background.wav'.


Processing transient events: 100%|██████████| 702/702 [00:03<00:00, 200.02it/s]
Processing continuous events: 1260it [00:00, 1719.51it/s]                          


AHAP files generated successfully in 14.24 seconds.
Generated files:
 - meditate/zengarden/entry1_background.ahap
AHAP file saved as 'meditate/zengarden/entry1_background.ahap'.
AHAP file saved as 'meditate/zengarden/entry1_background.ahap'.
Processing entry2 with setting tag 'zengarden' and event tag nan
Attributes of audio_config:  {'_AudioOutputStream__handle': <azure.cognitiveservices.speech.interop._Handle object at 0x329889210>}
SynthesizingAudioCompleted
Info: on_underlying_io_bytes_received: Close frame received
Info: on_underlying_io_bytes_received: closing underlying io.
Info: on_underlying_io_close_complete: uws_state: 6.
No event detected. Using default background.
Audio file saved as 'meditate/zengarden/entry2.wav'.
Background file saved as 'meditate/zengarden/entry2_background.wav'.


Processing transient events: 100%|██████████| 794/794 [00:03<00:00, 249.23it/s]
Processing continuous events: 3422it [00:04, 716.37it/s]                          


AHAP files generated successfully in 36.70 seconds.
Generated files:
 - meditate/zengarden/entry2_background.ahap
AHAP file saved as 'meditate/zengarden/entry2_background.ahap'.
AHAP file saved as 'meditate/zengarden/entry2_background.ahap'.
Processing body1 with setting tag 'zengarden' and event tag zen_fountain_ambience
Attributes of audio_config:  {'_AudioOutputStream__handle': <azure.cognitiveservices.speech.interop._Handle object at 0x329ac0ed0>}
SynthesizingAudioCompleted
Info: on_underlying_io_bytes_received: Close frame received
Info: on_underlying_io_bytes_received: closing underlying io.
Info: on_underlying_io_close_complete: uws_state: 6.
Event 'zen_fountain_ambience' detected. Lowering background volume and adding event.
Audio file saved as 'meditate/zengarden/body1.wav'.
Background file saved as 'meditate/zengarden/body1_background.wav'.


Processing transient events: 100%|██████████| 251/251 [00:01<00:00, 157.53it/s]
Processing continuous events: 7284it [00:19, 381.19it/s]                          


AHAP files generated successfully in 85.44 seconds.
Generated files:
 - meditate/zengarden/body1_background.ahap
AHAP file saved as 'meditate/zengarden/body1_background.ahap'.
AHAP file saved as 'meditate/zengarden/body1_background.ahap'.
Processing body2 with setting tag 'zengarden' and event tag singing_bowl
Attributes of audio_config:  {'_AudioOutputStream__handle': <azure.cognitiveservices.speech.interop._Handle object at 0x328b86ed0>}
SynthesizingAudioCompleted
Info: on_underlying_io_bytes_received: Close frame received
Info: on_underlying_io_bytes_received: closing underlying io.
Info: on_underlying_io_close_complete: uws_state: 6.
Event 'singing_bowl' detected. Lowering background volume and adding event.
Audio file saved as 'meditate/zengarden/body2.wav'.
Background file saved as 'meditate/zengarden/body2_background.wav'.


## generate all functions and scenarios

In [None]:
scenarios = ["beach", "forest", "zengarden"]
functions = ["meditate", "relax", "sleep", "oneminutereset"]
for function in functions:
    for scenario in scenarios:
        if function == "beach" and scenario == "meditate":
            continue
        process_csv_and_generate_audio_with_ahap('ssml_content_database.csv', 
                                                 '../../sound_library',
                                                 target_function=function, 
                                                 target_scenario=scenario, 
                                                 background_delay= 0)

Processing entry1 with setting tag 'grass' and event tag nan
Attributes of audio_config:  {'_AudioOutputStream__handle': <azure.cognitiveservices.speech.interop._Handle object at 0x345c0cf50>}
SynthesizingAudioCompleted
Info: on_underlying_io_bytes_received: Close frame received
Info: on_underlying_io_bytes_received: closing underlying io.
Info: on_underlying_io_close_complete: uws_state: 6.
No event detected. Using default background.
Audio file saved as 'meditate/beach/entry1.wav'.
Background file saved as 'meditate/beach/entry1_background.wav'.


Processing transient events: 100%|██████████| 322/322 [00:00<00:00, 325.38it/s]
Processing continuous events: 875it [00:00, 2568.40it/s]                         


AHAP files generated successfully in 8.26 seconds.
Generated files:
 - meditate/beach/entry1_background.ahap
AHAP file saved as 'meditate/beach/entry1_background.ahap'.
AHAP file saved as 'meditate/beach/entry1_background.ahap'.
Processing entry2 with setting tag 'seagull' and event tag wave1
Attributes of audio_config:  {'_AudioOutputStream__handle': <azure.cognitiveservices.speech.interop._Handle object at 0x345b05ed0>}
SynthesizingAudioCompleted
Info: on_underlying_io_bytes_received: Close frame received
Info: on_underlying_io_bytes_received: closing underlying io.
Info: on_underlying_io_close_complete: uws_state: 6.
Event 'wave1' detected. Lowering background volume and adding event.
Audio file saved as 'meditate/beach/entry2.wav'.
Background file saved as 'meditate/beach/entry2_background.wav'.


Processing transient events: 100%|██████████| 341/341 [00:01<00:00, 271.85it/s]
Processing continuous events: 2378it [00:02, 1031.70it/s]                          


AHAP files generated successfully in 22.45 seconds.
Generated files:
 - meditate/beach/entry2_background.ahap
AHAP file saved as 'meditate/beach/entry2_background.ahap'.
AHAP file saved as 'meditate/beach/entry2_background.ahap'.
Processing body1 with setting tag 'seagull' and event tag wave1
Attributes of audio_config:  {'_AudioOutputStream__handle': <azure.cognitiveservices.speech.interop._Handle object at 0x32c035d10>}
SynthesizingAudioCompleted
Info: on_underlying_io_bytes_received: Close frame received
Info: on_underlying_io_bytes_received: closing underlying io.
Info: on_underlying_io_close_complete: uws_state: 6.
Event 'wave1' detected. Lowering background volume and adding event.
Audio file saved as 'meditate/beach/body1.wav'.
Background file saved as 'meditate/beach/body1_background.wav'.


Processing transient events: 100%|██████████| 880/880 [00:03<00:00, 221.33it/s]
Processing continuous events: 6276it [00:15, 414.44it/s]                          


AHAP files generated successfully in 73.03 seconds.
Generated files:
 - meditate/beach/body1_background.ahap
AHAP file saved as 'meditate/beach/body1_background.ahap'.
AHAP file saved as 'meditate/beach/body1_background.ahap'.
Processing body2 with setting tag 'wave1' and event tag bird
Attributes of audio_config:  {'_AudioOutputStream__handle': <azure.cognitiveservices.speech.interop._Handle object at 0x1239b2350>}
SynthesizingAudioCompleted
Info: on_underlying_io_bytes_received: Close frame received
Info: on_underlying_io_bytes_received: closing underlying io.
Info: on_underlying_io_close_complete: uws_state: 6.
Event 'bird' detected. Lowering background volume and adding event.
Audio file saved as 'meditate/beach/body2.wav'.
Background file saved as 'meditate/beach/body2_background.wav'.


Processing transient events: 100%|██████████| 2181/2181 [00:16<00:00, 135.69it/s]
Processing continuous events: 9565it [00:38, 250.39it/s]                          


AHAP files generated successfully in 140.00 seconds.
Generated files:
 - meditate/beach/body2_background.ahap
AHAP file saved as 'meditate/beach/body2_background.ahap'.
AHAP file saved as 'meditate/beach/body2_background.ahap'.
Processing body3 with setting tag 'wave1' and event tag campfire
Attributes of audio_config:  {'_AudioOutputStream__handle': <azure.cognitiveservices.speech.interop._Handle object at 0x32c7ab9d0>}
SynthesizingAudioCompleted
Info: on_underlying_io_bytes_received: Close frame received
Info: on_underlying_io_bytes_received: closing underlying io.
Info: on_underlying_io_close_complete: uws_state: 6.
Event 'campfire' detected. Lowering background volume and adding event.
Audio file saved as 'meditate/beach/body3.wav'.
Background file saved as 'meditate/beach/body3_background.wav'.


Processing transient events: 100%|██████████| 2919/2919 [00:13<00:00, 213.17it/s]
Processing continuous events: 6140it [00:16, 379.41it/s]                          


AHAP files generated successfully in 82.33 seconds.
Generated files:
 - meditate/beach/body3_background.ahap
AHAP file saved as 'meditate/beach/body3_background.ahap'.
AHAP file saved as 'meditate/beach/body3_background.ahap'.
Processing end with setting tag 'seagull' and event tag grass
Attributes of audio_config:  {'_AudioOutputStream__handle': <azure.cognitiveservices.speech.interop._Handle object at 0x1682a7650>}
SynthesizingAudioCompleted
Info: on_underlying_io_bytes_received: Close frame received
Info: on_underlying_io_bytes_received: closing underlying io.
Info: on_underlying_io_close_complete: uws_state: 6.
Event 'grass' detected. Lowering background volume and adding event.
Audio file saved as 'meditate/beach/end.wav'.
Background file saved as 'meditate/beach/end_background.wav'.


Processing transient events: 100%|██████████| 491/491 [00:01<00:00, 291.71it/s]
Processing continuous events: 1347it [00:00, 1459.21it/s]                          


AHAP files generated successfully in 13.14 seconds.
Generated files:
 - meditate/beach/end_background.ahap
AHAP file saved as 'meditate/beach/end_background.ahap'.
AHAP file saved as 'meditate/beach/end_background.ahap'.
All slices processed successfully.
Processing entry1 with setting tag 'bird' and event tag grass
Attributes of audio_config:  {'_AudioOutputStream__handle': <azure.cognitiveservices.speech.interop._Handle object at 0x34fc48a50>}
SynthesizingAudioCompleted
Info: on_underlying_io_bytes_received: Close frame received
Info: on_underlying_io_bytes_received: closing underlying io.
Info: on_underlying_io_close_complete: uws_state: 6.
Event 'grass' detected. Lowering background volume and adding event.
Audio file saved as 'meditate/forest/entry1.wav'.
Background file saved as 'meditate/forest/entry1_background.wav'.


Processing transient events: 100%|██████████| 399/399 [00:01<00:00, 299.97it/s]
Processing continuous events: 1062it [00:00, 1851.59it/s]                         


AHAP files generated successfully in 10.18 seconds.
Generated files:
 - meditate/forest/entry1_background.ahap
AHAP file saved as 'meditate/forest/entry1_background.ahap'.
AHAP file saved as 'meditate/forest/entry1_background.ahap'.
Processing entry2 with setting tag 'bird' and event tag nan
Attributes of audio_config:  {'_AudioOutputStream__handle': <azure.cognitiveservices.speech.interop._Handle object at 0x1680d7e50>}
SynthesizingAudioCompleted
Info: on_underlying_io_bytes_received: Close frame received
Info: on_underlying_io_bytes_received: closing underlying io.
Info: on_underlying_io_close_complete: uws_state: 6.
No event detected. Using default background.
Audio file saved as 'meditate/forest/entry2.wav'.
Background file saved as 'meditate/forest/entry2_background.wav'.


Processing transient events: 100%|██████████| 516/516 [00:01<00:00, 266.08it/s]
Processing continuous events: 2014it [00:01, 1051.95it/s]                          


AHAP files generated successfully in 20.32 seconds.
Generated files:
 - meditate/forest/entry2_background.ahap
AHAP file saved as 'meditate/forest/entry2_background.ahap'.
AHAP file saved as 'meditate/forest/entry2_background.ahap'.
Processing body1 with setting tag 'bird' and event tag water_stream
Attributes of audio_config:  {'_AudioOutputStream__handle': <azure.cognitiveservices.speech.interop._Handle object at 0x32b45b590>}
SynthesizingAudioCompleted
Info: on_underlying_io_bytes_received: Close frame received
Info: on_underlying_io_bytes_received: closing underlying io.
Info: on_underlying_io_close_complete: uws_state: 6.
Event 'water_stream' detected. Lowering background volume and adding event.
Audio file saved as 'meditate/forest/body1.wav'.
Background file saved as 'meditate/forest/body1_background.wav'.


Processing transient events: 100%|██████████| 108/108 [00:00<00:00, 224.81it/s]
Processing continuous events: 6595it [00:16, 408.09it/s]                          


AHAP files generated successfully in 75.10 seconds.
Generated files:
 - meditate/forest/body1_background.ahap
AHAP file saved as 'meditate/forest/body1_background.ahap'.
AHAP file saved as 'meditate/forest/body1_background.ahap'.
Processing body2 with setting tag 'bird' and event tag leaves
Attributes of audio_config:  {'_AudioOutputStream__handle': <azure.cognitiveservices.speech.interop._Handle object at 0x3288b6b10>}
SynthesizingAudioCompleted
Info: on_underlying_io_bytes_received: Close frame received
Info: on_underlying_io_bytes_received: closing underlying io.
Info: on_underlying_io_close_complete: uws_state: 6.
Event 'leaves' detected. Lowering background volume and adding event.
Audio file saved as 'meditate/forest/body2.wav'.
Background file saved as 'meditate/forest/body2_background.wav'.


In [19]:
os.getenv("SPEECH_KEY")

'85XkVYuiMQ3hK1shqTMFVqArxcF7kYFyifxNLtt3nZscm7ofCP3QJQQJ99AKAC1i4TkXJ3w3AAAYACOGFPFv'

In [18]:
load_dotenv()

True