Skip to content

Commit

Permalink
0.0.3 language hotfix
Browse files Browse the repository at this point in the history
  • Loading branch information
CypherousSkies committed Oct 5, 2021
1 parent 4b28c2c commit a1d8c27
Show file tree
Hide file tree
Showing 6 changed files with 37 additions and 23 deletions.
19 changes: 15 additions & 4 deletions .idea/workspace.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

10 changes: 10 additions & 0 deletions r4l/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
lang_dict = {
'en': ['tts_models/en/ljspeech/tacotron2-DDC', 'vocoder_models/en/ljspeech/hifigan_v2', 'eng'],
'fr': ['tts_models/fr/mai/tacotron2-DDC', 'vocoder_models/universal/libri-tts/fullband-melgan', 'fra'],
'es': ['tts_models/es/mai/tacotron2-DDC', 'vocoder_models/universal/libri-tts/fullband-melgan', 'spa'],
'de': ['tts_models/de/thorsten/tacotron2-DCA', 'vocoder_models/de/thorsten/fullband-melgan', 'deu'],
'ja': ['tts_models/ja/kokoro/tacotron2-DDC', 'vocoder_models/ja/kokoro/hifigan_v1', 'jpn'],
'nl': ['tts_models/nl/mai/tacotron2-DDC', 'vocoder_models/nl/mai/parallel-wavegan', 'nld'],
'zh_sim': ['tts_models/zh-CN/baker/tacotron2-DDC-GST', None, 'chi_sim'],
'zh_tra': ['tts_models/zh-CN/baker/tacotron2-DDC-GST', None, 'chi_tra']
}
7 changes: 4 additions & 3 deletions r4l/bin/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@
import sys

from r4l.util.text import TextProcessor
from r4l.util.reader import Reader, models_dict
from r4l.util.reader import Reader
from r4l import lang_dict
import os
import time
import csv
Expand All @@ -28,7 +29,7 @@ def get_ext(filename):

def get_texts(sesspath, lang, force_english):
wordcount = 0
tp = TextProcessor(sc_langs=lang)
tp = TextProcessor(langs=lang)
files = [f for f in os.listdir(sesspath) if get_ext(f) in ['pdf', 'txt', 'muse']]
texts = [[] for _ in files]
print(f"> Reading {files}")
Expand Down Expand Up @@ -86,7 +87,7 @@ def main():
)
args = parser.parse_args()
if args.list_langs:
print(models_dict.keys())
print(list(lang_dict.keys()))
sys.exit()
if not os.path.isdir(args.in_path):
print("input path must exist and contain files!")
Expand Down
13 changes: 2 additions & 11 deletions r4l/util/reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,16 +4,7 @@
import numpy as np
from pathlib import Path
from r4l.util.text import split_into_sentences

models_dict = {
'en': ['tts_models/en/ljspeech/tacotron2-DDC', 'vocoder_models/en/ljspeech/hifigan_v2'],
'fr': ['tts_models/fr/mai/tacotron2-DDC', 'vocoder_models/universal/libri-tts/fullband-melgan'],
'es': ['tts_models/es/mai/tacotron2-DDC', 'vocoder_models/universal/libri-tts/fullband-melgan'],
'de': ['tts_models/de/thorsten/tacotron2-DCA', 'vocoder_models/de/thorsten/fullband-melgan'],
'ja': ['tts_models/ja/kokoro/tacotron2-DDC', 'vocoder_models/ja/kokoro/hifigan_v1'],
'nl': ['tts_models/nl/mai/tacotron2-DDC', 'vocoder_models/nl/mai/parallel-wavegan'],
'zh': ['tts_models/zh-CN/baker/tacotron2-DDC-GST', None]
}
from r4l import lang_dict

# later i'll figure out how to load TTS's .models.json

Expand All @@ -22,7 +13,7 @@
class Reader:
def __init__(self, outpath, lang='en', tts_name=None, voc_name=None):
self.outpath = outpath
model_name, vocoder_name = models_dict[lang]
model_name, vocoder_name, _ = lang_dict[lang]
if tts_name is not None:
model_name = tts_name
if voc_name is not None:
Expand Down
9 changes: 5 additions & 4 deletions r4l/util/text.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from difflib import SequenceMatcher
from ocrmypdf import ocr
import os
from r4l import lang_dict

def only_english(text):
import nltk
Expand Down Expand Up @@ -38,12 +39,12 @@ def split_into_sentences(string):
spec = dict((re.escape(k), v) for k, v in spec.items())
spec_re = re.compile("|".join(spec.keys()))


class TextProcessor:
def __init__(self, bert_model="distilbert-base-multilingual-cased", sc_langs=["en", "fr"]):
def __init__(self, bert_model="distilbert-base-multilingual-cased", langs=["en", "fr"]):
self.tokenizer = AutoTokenizer.from_pretrained(bert_model)
self.model = AutoModelForMaskedLM.from_pretrained(bert_model)
self.sc = SpellChecker(distance=1, language=sc_langs)
self.sc = SpellChecker(distance=1, language=langs)
self.lang = [l[3] for l in lang_dict[langs]]
print("> BERT initialized")

# get and correct text
Expand All @@ -69,7 +70,7 @@ def _load(self, filename, sesspath, force):
if not os.path.isdir(sesspath + "tmp/"):
os.mkdir(sesspath + "tmp/")
ocr(sesspath + filename, sesspath + "tmp/tmp.pdf", sidecar=tpath, redo_ocr=(not force), deskew=force,
rotate_pages=force, remove_background=force, clean=force, force_ocr=force)
rotate_pages=force, remove_background=force, clean=force, force_ocr=force,language=self.lang)
with open(tpath, "r") as txt:
text = txt.read()
print("> OCR complete")
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
readme = f.read()
setup(
name='reading4listeners',
version='0.0.2',
version='0.0.3',
packages=['r4l'],
url='https://github.com/CypherousSkies/reading-for-listeners',
project_urls={
Expand Down

0 comments on commit a1d8c27

Please sign in to comment.