# Text-To-Speech using IBM - Watson

# **Documentation** : "https://cloud.ibm.com/docs/text-to-speech?topic=text-to-speech-voices"

# **0.Installing Dependencies**

In [2]:
!pip install ibm_watson

Collecting ibm_watson
  Downloading ibm-watson-8.0.0.tar.gz (398 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/398.3 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━[0m[90m╺[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.4/398.3 kB[0m [31m1.7 MB/s[0m eta [36m0:00:01[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m[90m━━━━━━[0m [32m337.9/398.3 kB[0m [31m4.8 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m398.3/398.3 kB[0m [31m4.7 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Collecting ibm-cloud-sdk-core==3.*,>=3.3.6 (from ibm_watson)
  Downloading ibm-cloud-sdk-core-3.20.0.tar.gz (62 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m62.1/62.1 kB[0m [31m7.2 MB/s[0m eta [36m0:00:00[0m
[

# **1. Authenticate**

In [3]:
from ibm_watson import TextToSpeechV1
from ibm_cloud_sdk_core.authenticators import IAMAuthenticator

In [4]:
api_key = "YOUR API KEY"
service_url = "SERVICE URL"

In [6]:
# setup device
authenticator = IAMAuthenticator(api_key)
tts = TextToSpeechV1(authenticator=authenticator)
tts.set_service_url(service_url)

## **2.Converting with a basic language model**

In [11]:
with open('./speech.mp3','wb') as audio_file:
  res = tts.synthesize('Hello in this tutorial i will be using the IBM watson texttospeech model', accept='audio/mp3', voice='en-US_AllisonExpressive').get_result()
  audio_file.write(res.content)

# **3.Reading from the file**

In [13]:
with open('Churchill.txt','r') as f:
  text = f.readlines()

In [15]:
text = [line.replace('\n','') for line in text]

In [16]:
text = ''.join(str(line) for line in text)

In [17]:
with open('./churchill_audio.mp3', 'wb') as audio_file:
  res = tts.synthesize(text, accept='audio/mp3', voice='en-GB_JamesV3Voice').get_result()
  audio_file.write(res.content)

# **4.Converting with a different language model**

**Effects of adding a punctuation**

In [19]:
import json
voices = tts.list_voices().get_result()
print(json.dumps(voices, indent=2))

{
  "voices": [
    {
      "name": "de-DE_BirgitVoice",
      "language": "de-DE",
      "gender": "female",
      "description": "Birgit: Standard German (Standarddeutsch) female voice.",
      "customizable": true,
      "supported_features": {
        "custom_pronunciation": true,
        "voice_transformation": false
      },
      "url": "https://api.au-syd.text-to-speech.watson.cloud.ibm.com/instances/c7c01ed9-9c61-4261-9c1a-1e234e60e429/v1/voices/de-DE_BirgitVoice"
    },
    {
      "name": "es-LA_DanielaExpressive",
      "language": "es-LA",
      "gender": "female",
      "description": "Daniela: Latin American Spanish American (espa\\xF1ol latinoamericano) female voice. Dnn E2E technology with conversational and expressive capabilities.",
      "customizable": true,
      "supported_features": {
        "custom_pronunciation": true,
        "voice_transformation": false
      },
      "url": "https://api.au-syd.text-to-speech.watson.cloud.ibm.com/instances/c7c01ed9-9c61-42

In [20]:
# Frere Jacque - french lullaby
frere = """Frère Jacques
    Frère Jacques
    Dormez-vous?
    Dormez-vous?
    Sonnez les matines
    Sonnez les matines
    Ding, ding, dong
    Ding, ding, dong
    Frère Jacques
    Frère Jacques
    Dormez-vous?
    Dormez-vous?
    Sonnez les matines
    Sonnez les matines
    Ding, ding, dong
    Ding, ding, dong
    Ding, ding, dong
    Ding, ding, dong"""

In [21]:
with open('./frere.mp3','wb') as audio_file:
  res = tts.synthesize(frere, accept='audio/mp3', voice='fr-FR_ReneeV3Voice').get_result()
  audio_file.write(res.content)