# Inference examples

In [None]:
# Do not forget to install all dependencies first:
!pip install -Uqq WhisperSpeech

In [None]:
def is_colab():
    try: import google.colab; return True
    except: return False

import torch
if not torch.cuda.is_available():
    if is_colab(): raise BaseException("Please change the runtime type to GPU. In the menu: Runtime -> Change runtime type (the free T4 instance is enough)")
    else:          raise BaseException("Currently the example notebook requires CUDA, make sure you are running this on a machine with a GPU.")

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import torch
import torch.nn.functional as F

from IPython.display import Markdown, HTML

## The whole pipeline

In [None]:
# check "7. Pipeline.ipynb"
from whisperspeech.pipeline import Pipeline

In [None]:
# let's start with the fast SD S2A model
pipe = Pipeline(s2a_ref='collabora/whisperspeech:s2a-q4-tiny-en+pl.model')



In [None]:
# this is very slow right now since our inference code is not very optimized
# but even without this crucial optimization it is still better than real-time on an RTX 4090
pipe.generate_to_notebook("""
This is the first demo of Whisper Speech, a fully open source text-to-speech model trained by Collabora and Lion on the Juwels supercomputer.
""")

In [None]:
# The model knows how to speak in Polish
pipe.generate_to_notebook("""
To jest pierwszy test naszego modelu. Pozdrawiamy serdecznie.
""", lang='pl')

In [None]:
# We can also mix different languages (e.g. for borrowed words) in a single sentence
stoks = pipe.t2s.generate(["To jest pierwszy test wielojęzycznego ", " Whisper Speech ", ", modelu zamieniającego tekst na mowę, który Collabora i Laion nauczyli na superkomputerze", " Jewels."], lang=['pl', 'en', 'pl', 'en'])
pipe.vocoder.decode_to_notebook(pipe.s2a.generate(stoks, pipe.default_speaker.unsqueeze(0)))

In [None]:
stoks = pipe.t2s.generate(["I love to eat eastern european food! Especially ", "pierogi i bigos."], lang=['en', 'pl'], cps=11)
pipe.vocoder.decode_to_notebook(pipe.s2a.generate(stoks, pipe.default_speaker.unsqueeze(0)))

## Voice cloning

In [None]:
# we can give it an audio file reference to get zero-shot voice cloning
pipe.generate_to_notebook("""
This is the first demo of Whisper Speech, a fully open source text-to-speech model trained by Collabora and Lion on the Juwels supercomputer.
""", lang='en', speaker='nbs/EASY Photo Transfer to Wood ｜ LASER PRINT on Wood Using Mailing Labels, Fast, Reusable, Cheap DIY [h7FKdW5ndLw].opus')

In [None]:
# it works even better (but slower) with the HQ model
pipe = Pipeline()

In [None]:
# we can give it an audio file reference to get zero-shot voice cloning
pipe.generate_to_notebook("""
This is the first demo of Whisper Speech, a fully open source text-to-speech model trained by Collabora and Lion on the Juwels supercomputer.
""", lang='en', speaker='nbs/EASY Photo Transfer to Wood ｜ LASER PRINT on Wood Using Mailing Labels, Fast, Reusable, Cheap DIY [h7FKdW5ndLw].opus')