Skip to content

Commit

Permalink
Use libRHVoice.so instead RHVoice-test
Browse files Browse the repository at this point in the history
Response time reduced in 3-6 x
  • Loading branch information
Aculeasis committed Sep 15, 2018
1 parent b2f8b94 commit 1db7fa3
Show file tree
Hide file tree
Showing 7 changed files with 639 additions and 21 deletions.
5 changes: 3 additions & 2 deletions Dockerfile.amd64
Expand Up @@ -30,8 +30,9 @@ ENV LC_ALL ru_RU.UTF-8
ENV LANG ru_RU.UTF-8
ENV LANGUAGE ru_RU.UTF-8

ADD entrypoint.sh /opt/entrypoint.sh
ADD app.py /opt/app.py
COPY entrypoint.sh /opt/entrypoint.sh
COPY app.py /opt/app.py
COPY rhvoice_proxy/rhvoice*.py /opt/rhvoice_proxy/

EXPOSE 8080/tcp

Expand Down
5 changes: 3 additions & 2 deletions Dockerfile.arm32v7
Expand Up @@ -30,8 +30,9 @@ ENV LC_ALL ru_RU.UTF-8
ENV LANG ru_RU.UTF-8
ENV LANGUAGE ru_RU.UTF-8

ADD entrypoint.sh /opt/entrypoint.sh
ADD app.py /opt/app.py
COPY entrypoint.sh /opt/entrypoint.sh
COPY app.py /opt/app.py
COPY rhvoice_proxy/rhvoice*.py /opt/rhvoice_proxy/

EXPOSE 8080/tcp

Expand Down
5 changes: 3 additions & 2 deletions Dockerfile.arm64v8
Expand Up @@ -30,8 +30,9 @@ ENV LC_ALL ru_RU.UTF-8
ENV LANG ru_RU.UTF-8
ENV LANGUAGE ru_RU.UTF-8

ADD entrypoint.sh /opt/entrypoint.sh
ADD app.py /opt/app.py
COPY entrypoint.sh /opt/entrypoint.sh
COPY app.py /opt/app.py
COPY rhvoice_proxy/rhvoice*.py /opt/rhvoice_proxy/

EXPOSE 8080/tcp

Expand Down
22 changes: 7 additions & 15 deletions app.py
@@ -1,11 +1,11 @@
#!/usr/bin/env python3

import subprocess
from shlex import quote
from urllib import parse

from flask import Flask, request, make_response, Response, stream_with_context

from rhvoice_proxy.rhvoice import TTS
from tools.preprocessing.text_prepare import text_prepare

SUPPORT_VOICES = {
Expand All @@ -19,25 +19,18 @@
}
DEFAULT_VOICE = 'anna'

FORMATS = {
'mp3': ['echo {text} | RHVoice-test -p {voice} -o - | lame -h -V 4 -t - -', 'audio/mpeg'],
'wav': ['echo {text} | RHVoice-test -p {voice} -o -', 'audio/wav'],
'opus': ['echo {text} | RHVoice-test -p {voice} -o - | opusenc --ignorelength - -', 'audio/ogg'],
}
FORMATS = {'mp3': 'audio/mpeg', 'wav': 'audio/wav', 'opus': 'audio/ogg'}
DEFAULT_FORMAT = 'mp3'

app = Flask(__name__, static_url_path='')
tts = TTS()


@app.route('/say')
def say():
def stream_():
with subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True).stdout as fp:
while fp.readable():
data = fp.read(2048)
if not data:
break
yield data
for chunk in tts.say(text, voice, format_):
yield chunk

text = request.args.get('text', '')
voice = request.args.get('voice', DEFAULT_VOICE)
Expand All @@ -51,9 +44,8 @@ def stream_():
return make_response('Unset \'text\'.', 400)

text = quote(text_prepare(parse.unquote(text).replace('\r\n', ' ').replace('\n', ' ')))
cmd = FORMATS[format_][0].format(text=text, voice=voice), FORMATS[format_][1]
return Response(stream_with_context(stream_()), mimetype=FORMATS[format_][1])
return Response(stream_with_context(stream_()), mimetype=FORMATS[format_])


if __name__ == "__main__":
app.run(host='0.0.0.0', port=8080)
app.run(host='0.0.0.0', port=8080, threaded=False)
10 changes: 10 additions & 0 deletions rhvoice_proxy/.gitignore
@@ -0,0 +1,10 @@
.idea
0.mp3
0.wav
1.wav
2.wav
__pycache__
mp3.mp3
opus.ogg
wav.wav

244 changes: 244 additions & 0 deletions rhvoice_proxy/rhvoice.py
@@ -0,0 +1,244 @@
#!/usr/bin/env python3

import queue
import shutil
import subprocess
import threading
import wave
from ctypes import string_at

if __name__ == '__main__':
import rhvoice_proxy
else:
from rhvoice_proxy import rhvoice_proxy


class FakeFile(queue.Queue):
def __init__(self):
super().__init__()
self._pos = 0
self._seeking = False
self._open = True

def seek(self, pos, *_):
# Игнорируем попытки wave пропатчить хидер.
self._seeking = pos != self._pos

def tell(self, *_):
return self._pos

def write(self, data):
writen = len(data)
if writen and not self._seeking:
self.put_nowait(data)
self._pos += writen
return writen

def read(self, *_):
if not self._open:
return b''
data = self.get()
if not data:
self._open = False
return data

def end(self):
if self._open:
self.put_nowait(b'')

def close(self):
pass

def flush(self):
pass


def _cmd_init():
base_cmd = {
'mp3': [['lame', '-htv', '--silent', '-', '-'], 'lame', 'lame'],
'opus': [['opusenc', '--quiet', '--discard-comments', '--ignorelength', '-', '-'], 'opusenc', 'opus-tools']
}
cmd = {}
for key, val in base_cmd.items():
if shutil.which(val[1]):
cmd[key] = val[0]
else:
print('Disable {} support - {} not found. Use apt install {}'.format(key, val[1], val[2]))
return cmd


class TTS(threading.Thread):
BUFF_SIZE = 1024
SAMPLE_SIZE = 2

def __init__(self, lib_path=None, data_path=None, resources=None):
super().__init__()
self._CMD = _cmd_init()
self._wait = threading.Event()
self._lock = threading.Event()
self._lock.set()
self._queue = queue.Queue()
self._sample_rate = 24000
self._format = 'wav'
rhvoice_proxy.load_tts_library(lib_path)
api = rhvoice_proxy.__version__
ver = rhvoice_proxy.get_rhvoice_version()
if api != ver:
print('Warning! API version ({}) different of library version ({})'.format(api, ver))
self._engine = rhvoice_proxy.get_engine(self._speech_callback, self._sr_callback, resources, data_path)
self._popen = None
self._file = None
self._wave = None
self._work = True
self._processing = False
self.start()

def _popen_open(self):
self._popen_close()
self._popen = subprocess.Popen(
self._CMD.get(self._format),
stderr=subprocess.PIPE,
stdout=subprocess.PIPE,
stdin=subprocess.PIPE
)

def _file_open(self):
self._file_close()
self._file = FakeFile()

def _wave_open(self):
self._wave_close()
self._file_open()
self._wave = wave.Wave_write(self._file)
self._wave.setnchannels(1)
self._wave.setsampwidth(self.SAMPLE_SIZE)
self._wave.setframerate(self._sample_rate)

def _wave_close(self):
if self._wave:
self._wave.close()
self._wave = None
self._file_close()
self._popen_close()

def _file_close(self):
if self._file:
self._file.close()
self._file = None

def _popen_close(self):
if self._popen:
self._popen.kill()
self._popen = None

def join(self, timeout=None):
self._work = False
self._queue.put_nowait(None)
super().join(timeout)

def _speech_callback(self, samples, count, *_):
if not self._wave:
self._wave_open()
# TODO: Посчитать хидер самостоятельно и выкинуть wave
# noinspection PyProtectedMember
self._wave._write_header(0xFFFFFFF) # Задаем 'бесконечную' длинну файла
self._wave.writeframesraw(string_at(samples, count * self.SAMPLE_SIZE))
if self._format in self._CMD:
self._popen_open()
self._in_out()
self._wait.set()
else:
self._wave.writeframesraw(string_at(samples, count * self.SAMPLE_SIZE))
if self._popen:
self._in_out()
return self._work

def _in_out(self):
data = self._file.read()
if data:
self._popen.stdin.write(data)
return True
return False

def _sr_callback(self, rate, *_):
self._sample_rate = rate
return True

def say(self, text, voice='anna', format_='mp3', buff=1024):
if format_ != 'wav' and format_ not in self._CMD:
raise RuntimeError('Unsupported format: {}'.format(format_))
self._lock.wait(3600)
self._lock.clear()
self._queue.put_nowait([text, voice, format_])
self._wait.wait(3600)
self._wait.clear()
for chunk in self._iter_me(buff):
yield chunk
self._wave_close()
self._lock.set()

def to_file(self, filename, text, voice='anna', format_='mp3'):
with open(filename, 'wb') as fp:
for chunk in self.say(text, voice, format_):
fp.write(chunk)

def _iter_me(self, buff):
while True:
if self._popen:
chunk = self._popen.stdout.read(buff)
else:
chunk = self._file.read()
if not chunk:
if self._processing:
continue
else:
break
yield chunk

def _generate(self, text, voice, format_):
self._format = format_
synth_params = rhvoice_proxy.get_synth_params(voice)
self._processing = True
rhvoice_proxy.speak_generate(text, synth_params, self._engine)
if self._wave:
self._wave.close()
self._wave = None
self._file.end()

if self._popen:
while self._in_out():
pass
self._popen.stdin.close()
try:
self._popen.wait(5)
except subprocess.TimeoutExpired:
pass
self._processing = False

def run(self):
while True:
data = self._queue.get()
if data is None:
break
self._generate(*data)


def main():
import time
names = ['mp3.mp3', 'opus.ogg', 'wav.wav']
text = 'Я умею сохранять свой голос в {}'
voice = 'anna'
w_time = time.time()
tts = TTS()
print('Init time: {}'.format(time.time() - w_time))
print()
for name in names:
format_ = name.split('.', 1)[0]
w_time = time.time()
tts.to_file(name, text.format(format_), voice, format_)
w_time = time.time() - w_time
print('File {} created in {} sec.'.format(name, w_time))


if __name__ == '__main__':
main()

0 comments on commit 1db7fa3

Please sign in to comment.