In [1]:
##Libs
import os
from google.cloud import texttospeech
import re
import requests
from dotenv import load_dotenv

##Moduls
from reader import ebook

In [2]:
example=''' 
<speak>
    <prosody rate="slow" pitch="+1st">Mother died today.</prosody>
    <break time="700ms"/>
    <prosody rate="medium">Or, maybe, yesterday; I can’t be sure.</prosody>
    <break time="1s"/>

    <prosody pitch="-1st">
        The telegram from the Home says: <emphasis level="strong">YOUR MOTHER PASSED AWAY. FUNERAL TOMORROW. DEEP SYMPATHY.</emphasis>
    </prosody>
    <break time="1s"/>
    <prosody rate="medium">Which leaves the matter doubtful; it could have been yesterday.</prosody>
    <break time="1s"/>

    The Home for Aged Persons is at Marengo, some fifty miles from Algiers.
    <break time="500ms"/>
    <prosody rate="slow">With the two o’clock bus, I should get there well before nightfall.</prosody>
    <break time="700ms"/>
    Then I can spend the night there, keeping the usual vigil beside the body, and be back here by tomorrow evening.
    <break time="1s"/>

    <prosody pitch="-2st">I have fixed up with my employer for two days’ leave; obviously, under the circumstances, he couldn’t refuse.</prosody>
    <break time="1s"/>
    <prosody pitch="+2st">Still, I had an idea he looked annoyed, and I said, without thinking: <emphasis level="moderate">“Sorry, sir, but it’s not my fault, you know.”</emphasis></prosody>
    <break time="1s"/>

    Afterwards, it struck me I needn’t have said that.
    <break time="700ms"/>
    <prosody rate="medium">I had no reason to excuse myself; it was up to him to express his sympathy and so forth.</prosody>
    <break time="1s"/>
    Probably he will do so the day after tomorrow, when he sees me in black.
    <break time="700ms"/>
    For the present, it’s almost as if Mother weren’t really dead.
    <break time="700ms"/>
    The funeral will bring it home to me, put an official seal on it, so to speak.
    <break time="1.5s"/>

    <prosody pitch="-1st">I took the two-o’clock bus. It was a blazing hot afternoon.</prosody>
    <break time="1s"/>
    I’d lunched, as usual, at Céleste’s restaurant.
    <break time="500ms"/>
    Everyone was most kind, and Céleste said to me, <emphasis level="strong">“There’s no one like a mother.”</emphasis>
    <break time="1s"/>
    When I left, they came with me to the door.
    <break time="500ms"/>
    It was something of a rush, getting away, as at the last moment I had to call in at Emmanuel’s place to borrow his black tie and mourning band.
    <break time="700ms"/>

    I had to run to catch the bus.
    <break time="500ms"/>
    I suppose it was my hurrying like that, what with the glare off the road and from the sky, the reek of gasoline, and the jolts, that made me feel so drowsy.
    <break time="1s"/>
    Anyhow, I slept most of the way.
    <break time="700ms"/>
    When I woke, I was leaning against a soldier; he grinned and asked me if I’d come from a long way off, and I just nodded, to cut things short.
    <break time="1s"/>
    I wasn’t in a mood for talking.
    <break time="1.5s"/>

    <prosody pitch="+1st">The Home is a little over a mile from the village.</prosody>
    <break time="700ms"/>
    I went there on foot. I asked to be allowed to see Mother at once, but the doorkeeper told me I must see the warden first.
    <break time="1s"/>
    He wasn’t free, and I had to wait a bit.
    <break time="700ms"/>
    The doorkeeper chatted with me while I waited; then he led me to the office.
    <break time="1.5s"/>

    The warden was a very small man, with gray hair, and a Legion of Honor rosette in his buttonhole.
    <break time="500ms"/>
    He gave me a long look with his watery blue eyes.
    <break time="700ms"/>
    Then we shook hands, and he held mine so long that I began to feel embarrassed.
    <break time="1.5s"/>
</speak>
'''

In [3]:
load_dotenv()
API = os.getenv("HF_API")

headers = {
    "Authorization": f"Bearer {API}",
    "Content-Type": "application/json",
}
url = "https://api-inference.huggingface.co/models/mistralai/Mistral-7B-Instruct-v0.3/v1/chat/completions"

def msg(text: str) -> list:

    message = [
        {
            "role": "system",
            "content": f'''(Note-Only output SSML format).Convert given text engaging SSML format for Text-to-Speech.
            ex🙏le -> {example}
            '''
        },
        {
            "role": "user",
            "content": text
            },
    ]
    return message

In [4]:
def get_ssml(txt:str) :
    data = {
        "messages": msg(txt),
        "temperature": 0.7,
        "stream": False,
        "max_tokens":10000
    }
    response = requests.post(url, headers=headers, json=data)

    if response.status_code == 200:
        response_data = response.json()
        assistant_message = response_data["choices"][0]["message"]["content"]
        return assistant_message
    else:
        print(f"Error: {response.status_code}, {response.text}")
        return None

In [5]:
# Set the path to your service account key file
service_account_json = "./exalted-skein-446217-e2-e83f57244ce8.json"
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = service_account_json

def synthesize_speech(text:str,chapter:str):
    # Initialize the Text-to-Speech client
    client = texttospeech.TextToSpeechClient()

    # Set the text input to be synthesized
    input_text = texttospeech.SynthesisInput(ssml=text)

    # Build the voice request
    voice = texttospeech.VoiceSelectionParams(
        language_code="en-US",  # Language code
        name="en-GB-Wavenet-B",  # Voice name
        ssml_gender=texttospeech.SsmlVoiceGender.MALE,  # Gender
    )

    # Specify the type of audio file you want to receive
    audio_config = texttospeech.AudioConfig(
        audio_encoding=texttospeech.AudioEncoding.MP3,# Audio format
        effects_profile_id=["telephony-class-application"],
    )

    try:
        # Perform the text-to-speech request
        response = client.synthesize_speech(
            input=input_text, voice=voice, audio_config=audio_config
        )

        # Save the response audio to an MP3 file
        with open(f"./output/audio/{chapter}.mp3", "wb") as out:
            out.write(response.audio_content)
            print("Audio content written to 'output.mp3'")
    except Exception as e:
        print(f"An error occurred: {e}")




In [6]:
def clean_text(raw_text):
    # Remove unwanted characters, keeping only alphanumeric characters, punctuation, and whitespace
    cleaned_text = re.sub(r"[^\w\s.,!?'-]", "", raw_text)
    
    # Replace multiple spaces and new lines with a single space
    cleaned_text = re.sub(r'\s+', ' ', cleaned_text)
    
    # Strip leading and trailing whitespace
    cleaned_text = cleaned_text.strip()
    
    return cleaned_text

In [7]:
book=ebook("./books/LP.epub")
chapters = book.get_chapters()
title,text=zip(*chapters)
input=clean_text(text[1])[:1000]


In [8]:
final_input=get_ssml(input)

In [9]:
final_input

'<speak>\n    <prosody rate="medium">So I had to choose another profession.</prosody>\n    <break time="700ms"/>\n    <prosody rate="slow">I learned to fly planes.</prosody>\n    <break time="1s"/>\n    <prosody pitch="+1st">I flew all over the world.</prosody>\n    <break time="1s"/>\n    <prosody rate="medium">And geography, that\'s right, served me well.</prosody>\n    <break time="1s"/>\n    <prosody rate="slow">I knew how to recognize, at first glance, China, or Arizona.</prosody>\n    <break time="1s"/>\n    <prosody rate="medium">It is useful if you have gone astray during the night.</prosody>\n    <break time="1s"/>\n    <prosody pitch="-1st">I have had, in the course of my life, a lot of contact with many serious people.</prosody>\n    <break time="1s"/>\n    <prosody rate="medium">I have lived among the grown-ups.</prosody>\n    <break time="1s"/>\n    <prosody rate="slow">I saw them up close.</prosody>\n    <break time="1s"/>\n    <prosody rate="medium">It did not really imp

In [92]:
final_input='''
<speak>
  Here are <say-as interpret-as="characters">SSML</say-as> samples.
  I can pause <break time="3s"/>.
  I can play a sound
  <audio src="https://www.example.com/MY_MP3_FILE.mp3">didn't get your MP3 audio file</audio>.
  I can speak in cardinals. Your number is <say-as interpret-as="cardinal">10</say-as>.
  Or I can speak in ordinals. You are <say-as interpret-as="ordinal">10</say-as> in line.
  Or I can even speak in digits. The digits for ten are <say-as interpret-as="characters">10</say-as>.
  I can also substitute phrases, like the <sub alias="World Wide Web Consortium">W3C</sub>.
  Finally, I can speak a paragraph with two sentences.
  <p><s>This is sentence one.</s><s>This is sentence two.</s></p>
</speak>
'''

In [10]:
synthesize_speech(final_input,"chapter_1_Custom")

Audio content written to 'output.mp3'


In [107]:
from gtts import gTTS

# Input text
text = input

# Create gTTS object
tts = gTTS(text=text, lang='en', slow=False)

# Save as an MP3 file
tts.save("output.mp3")

print("Audio file has been saved as output.mp3")


Audio file has been saved as output.mp3


In [91]:
input

"So I had to choose another profession. I learned to fly planes. I flew all over the world. And geography, that's right, served me well. I knew how to recognize, at first glance, China, or Arizona. It is useful if you have gone astray during the night. I have had, in the course of my life, a lot of contact with many serious people. I have lived among the grown-ups. I saw them up close. It did not really improve my opinion of them. When I met one that seemed to me a little lucid, I had them experience my drawing number 1, which I had preserved. I wanted to know if they could come to a real understanding. But they always replied It's a hat. After that I spoke to them neither of boa constrictors, nor of virgin forests, nor of stars. I put myself within their reach. I talked about bridge, golf, politics, and ties. And the grown-ups were glad to know such a reasonable man. II So I lived alone, with no one to talk to, until a breakdown in the Sahara desert six years ago. Something had broken