## DeepSeek for Data Processing

In [4]:
import os
from openai import OpenAI
import azure.cognitiveservices.speech as speechsdk
from dotenv import load_dotenv
from pydub import AudioSegment
from io import BytesIO
import re
import time

# Load environment variables from .env file
load_dotenv()

def process_meditation_script_from_file(file_path):
    """
    Process a meditation script from a text file, splitting it by "\n\n",
    and converting each section to SSML format.
    """
    # Read the text from the file
    with open(file_path, 'r', encoding='utf-8') as file:
        content = file.read()
    
    # Split the content into chunks based on double newlines
    script_chunks = content.split("\n\n")
    
    # Function to process individual chunks using LLM
    def process_chunk(chunk):
        api_key = os.getenv("DEEPSEEK_API_KEY")
        base_url = os.getenv("DEEPSEEK_BASE_URL")

        # Initialize the OpenAI client
        client = OpenAI(api_key=api_key, base_url=base_url)
        prompt = f"""
        Convert the following meditation script into one paragraph and then transform it into SSML format with appropriate breaks: 
        {chunk}
        """
        system_message = f"""
        You are a meditation expert that converts text to SSML, your response output should be raw SSML start wtih <speak> end with <\speak>.
        The requirements are: 
        1. There should be 1-second break at every period.
        2. There will be 0.5-second break at every comma.
        3. When it comes to seconds indicated in square brackets in the text, add break with indicated seconds. For example, there should be 30-second break at [30s] in the text.
        """
        response = client.chat.completions.create(
            model="deepseek-chat",
            messages=[
                {"role": "system", "content": system_message},
                {"role": "user", "content": prompt},
            ],
            stream=False
        )
        ssml_output = response.choices[0].message.content
        return ssml_output
    
    # Process each chunk and collect SSML outputs
    ssml_outputs = [process_chunk(chunk) for chunk in script_chunks if chunk.strip()]
    
    return ssml_outputs



def process_meditation_script_from_file_pure_code(file_path, voice_name="en-US-AriaNeural", prosody_rate="-15.00%"):
    """
    Processes a meditation script from a text file, splits it by "\n\n",
    and converts each section to SSML format with specified prosody and voice settings.

    Args:
    - file_path (str): Path to the text file containing the meditation script.
    - voice_name (str): The name of the Azure Neural voice to use.
    - prosody_rate (str): The prosody rate adjustment for the voice.

    Returns:
    - str: Combined SSML text.
    """
    # Read the text from the file
    with open(file_path, 'r', encoding='utf-8') as file:
        content = file.read()

    # Split the content into chunks based on double newlines
    script_chunks = content.split("\n\n")

    # Function to convert a single chunk into SSML
    def process_chunk_to_ssml(chunk):
        # Replace periods and commas with SSML breaks
        # Add a 1-second break after periods
        chunk = re.sub(r"(\.)", r'\1<break time="2s"/>', chunk)
        # Add a 0.5-second break after commas
        chunk = re.sub(r"(,)", r'\1<break time="1s"/>', chunk)
        # Wrap with SSML structure
        return f"""
<speak xmlns="http://www.w3.org/2001/10/synthesis" 
       xmlns:mstts="http://www.w3.org/2001/mstts" 
       xmlns:emo="http://www.w3.org/2009/10/emotionml" 
       version="1.0" xml:lang="en-US">
  <voice name="{voice_name}">
    <s />
    <mstts:express-as style="whispering">
      <prosody rate="{prosody_rate}">
        {chunk}
      </prosody>
    </mstts:express-as>
  </voice>
</speak>
        """.strip()

    # Process each chunk into SSML
    ssml_chunks = [process_chunk_to_ssml(chunk.strip()) for chunk in script_chunks if chunk.strip()]

    return ssml_chunks


def save_ssml_list_to_file(ssml_list, file_path="combined_output.ssml"):
    """
    Saves a list of SSML texts into one file.

    Args:
    - ssml_list (list of str): A list of SSML strings to combine and save.
    - file_path (str): The path to the file where the combined SSML content will be saved.

    Returns:
    - str: Confirmation message with the file path.
    """
    # Combine all SSML texts into one string, separated by a newline for readability
    combined_ssml = "\n\n".join(ssml_list)

    # Save the combined SSML content to the specified file
    with open(file_path, "w", encoding="utf-8") as file:
        file.write(combined_ssml)

    print(f"All SSML texts have been saved to '{file_path}'.")
    return file_path


In [5]:
# Example usage
# file_path = "slices_MVP.txt"
# ssml_outputs = process_meditation_script_from_file(file_path)
# ssml_path = save_ssml_list_to_file(ssml_outputs)

# Example usage
file_path = "slices_MVP.txt"
ssml_outputs = process_meditation_script_from_file_pure_code(file_path)
ssml_path = save_ssml_list_to_file(ssml_outputs, "combined_output_purecode.ssml")

All SSML texts have been saved to 'combined_output_purecode.ssml'.


## Genearte AI audio Along with Haptics and Soundscaping

In [18]:
import pandas as pd
import random
from pydub import AudioSegment
from io import BytesIO
import azure.cognitiveservices.speech as speechsdk
import os
from AHAPpy import generate_ahap as ga

seed = 17

def synthesize_text_to_audio(ssml_text, speech_config):
    """
    Synthesizes SSML text to an audio segment using Azure TTS.
    """
    audio_config = speechsdk.audio.PullAudioOutputStream()
    print("Attributes of audio_config: ", vars(audio_config))
    synthesizer = speechsdk.SpeechSynthesizer(speech_config=speech_config, audio_config=audio_config)

    # Synthesize the SSML
    synthesis_result = synthesizer.speak_ssml_async(ssml_text).get()

    if synthesis_result.reason == speechsdk.ResultReason.SynthesizingAudioCompleted:
        print("SynthesizingAudioCompleted")
        audio_data = synthesis_result.audio_data
        return AudioSegment.from_file(BytesIO(audio_data), format="wav")
    elif synthesis_result.reason == speechsdk.ResultReason.Canceled:
        cancellation_details = synthesis_result.cancellation_details
        print(f"Speech synthesis canceled: {cancellation_details.reason}")
        if cancellation_details.reason == speechsdk.CancellationReason.Error:
            print(f"Error details: {cancellation_details.error_details}")
        return None

def overlay_background_sound(speech_audio, background_tag, delay = 1000, volume=-15):
    """
    Overlays a background sound onto a speech audio segment.
    """
    background_file = f"{background_tag}.mp3"
    if os.path.exists(background_file):
        # Load the background audio and adjust its volume
        background_audio = AudioSegment.from_file(background_file).apply_gain(volume)
        
        # Add silence to the beginning of the background audio (one-time delay)
        delayed_background = AudioSegment.silent(duration=delay) + background_audio

        # Extend the background audio to match the speech audio's length
        loops_needed = (len(speech_audio) - len(delayed_background)) // len(background_audio) + 1
        extended_background = delayed_background + (background_audio * loops_needed)
        
        # Trim the background to match the length of the speech audio (no looping)
        final_background = extended_background[:len(speech_audio)]

        return speech_audio.overlay(final_background), final_background
    else:
        print(f"Background file {background_file} not found.")
        return speech_audio, None

def generate_ahap(background_file, output_dir="ahap_outputs"):
    """
    Generates an AHAP file from the provided background audio file.
    """
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    ahap_output_file = os.path.join(output_dir, os.path.splitext(os.path.basename(background_file))[0] + ".ahap")
    ga.convert_wav_to_ahap(background_file, output_dir, "sfx", "none")
    print(f"AHAP file saved as '{ahap_output_file}'.")
    return ahap_output_file

def process_csv_and_generate_audio_with_ahap(csv_file_path, num_slices=5, section_breaks=5000, background_delay = 5000, output_file="final_audio.wav"):
    """
    Processes a CSV file containing SSML text and sound tags, synthesizes audio,
    adds background sounds, generates AHAP files, and combines the results.
    """
    # Load the CSV
    data = pd.read_csv(csv_file_path, encoding='latin1')

    # Randomly select slices
    #selected_slices = data.sample(n=num_slices, random_state=seed)

    # Initialize Azure TTS config
    speech_config = speechsdk.SpeechConfig(
        subscription=os.getenv("SPEECH_KEY"),
        region=os.getenv("SPEECH_REGION")
    )

    # Initialize the combined audio
    combined_background = AudioSegment.silent(duration=0)
    combined_audio = AudioSegment.silent(duration=0)
    audio_break = AudioSegment.silent(duration=section_breaks)

    # AHAP generation directory
    # ahap_files = []

    # Process each slice
    for index, row in data.iterrows():
        print("Index: ", index)
        if index > 0:
            continue
        ssml_text = row['ssml_content']
        sound_tag = row['sound_tag']
        print(f"Processing slice with tag '{sound_tag}'...")

        # Synthesize SSML text to audio
        speech_audio = synthesize_text_to_audio(ssml_text, speech_config)
        if speech_audio is None:
            continue 

        if type(sound_tag) == str:
            # Overlay background sound
            speech_audio, background_audio = overlay_background_sound(speech_audio, sound_tag, delay = background_delay)
        
        else:
            # If no background sound, use silence for the background
            background_audio = AudioSegment.silent(duration=len(speech_audio))

        # Add the background audio to the combined background track
        combined_background += background_audio
        combined_background += audio_break

        # Append to combined audio
        combined_audio += speech_audio
        combined_audio += audio_break
        
    # Export the merged background audio
    merged_background_file = f"background_{output_file}"
    combined_background.export(merged_background_file, format="wav")

    # Export the final combined audio
    combined_audio.export(output_file, format="wav")
    print(f"Final audio file saved as '{output_file}'.")

    # Generate a single AHAP file for the merged background
    ahap_output_file = f"ahap_{output_file}"
    generate_ahap(merged_background_file)
    print(f"Merged AHAP file saved as '{ahap_output_file}'.")

    return output_file

In [19]:
process_csv_and_generate_audio_with_ahap('mvp_script.csv', section_breaks= 0, background_delay= 5000)

Index:  0
Processing slice with tag 'grass'...
Attributes of audio_config:  {'_AudioOutputStream__handle': <azure.cognitiveservices.speech.interop._Handle object at 0x1308ce590>}
SynthesizingAudioCompleted
Index:  1
Index:  2
Index:  3
Index:  4
Final audio file saved as 'final_audio.wav'.


Processing transient events: 100%|███████████| 175/175 [00:01<00:00, 109.45it/s]
Processing continuous events: 432it [00:00, 549.99it/s]                         


AHAP files generated successfully in 10.48 seconds.
Generated files:
 - ahap_outputs/background_final_audio_combined.ahap
AHAP file saved as 'ahap_outputs/background_final_audio.ahap'.
Merged AHAP file saved as 'ahap_final_audio.wav'.


'final_audio.wav'