From ea7179b91545e6f21f508e7dc30c8a2263f202f9 Mon Sep 17 00:00:00 2001 From: CypherousSkies <5472563+CypherousSkies@users.noreply.github.com> Date: Mon, 4 Oct 2021 19:25:45 -0400 Subject: [PATCH] 0.0.2 release! --- .idea/workspace.xml | 45 ++++++++++++++++++++++++++++++++++---- r4l/bin/cli.py | 53 +++++++++++++++++++++++++++++++++------------ setup.py | 2 +- time_data.csv | 1 + 4 files changed, 82 insertions(+), 19 deletions(-) diff --git a/.idea/workspace.xml b/.idea/workspace.xml index f6d1531..1d265d8 100644 --- a/.idea/workspace.xml +++ b/.idea/workspace.xml @@ -1,9 +1,10 @@ - + - + + + + + + + + + + + + @@ -68,7 +97,14 @@ - @@ -86,6 +122,7 @@ - \ No newline at end of file diff --git a/r4l/bin/cli.py b/r4l/bin/cli.py index 95e319c..9ea5707 100644 --- a/r4l/bin/cli.py +++ b/r4l/bin/cli.py @@ -1,8 +1,10 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- import argparse +import sys + from r4l.util.text import TextProcessor -from r4l.util.reader import Reader +from r4l.util.reader import Reader, models_dict import os import time import csv @@ -11,14 +13,22 @@ os.environ["TOKENIZERS_PARALLELISM"] = "False" tag_remover = re.compile('<.*?>') +def str2bool(v): + if isinstance(v, bool): + return v + if v.lower() in ("yes", "true", "t", "y", "1"): + return True + if v.lower() in ("no", "false", "f", "n", "0"): + return False + raise argparse.ArgumentTypeError("Boolean value expected.") def get_ext(filename): return filename.split(".")[-1] -def get_texts(sesspath, force_english): +def get_texts(sesspath, lang, force_english): wordcount = 0 - tp = TextProcessor() + tp = TextProcessor(sc_langs=lang) files = [f for f in os.listdir(sesspath) if get_ext(f) in ['pdf', 'txt', 'muse']] texts = [[] for _ in files] print(f"> Reading {files}") @@ -43,8 +53,8 @@ def get_texts(sesspath, force_english): return texts, files, wordcount -def read_texts(texts, files, outpath): - reader = Reader(outpath, lang='en') +def read_texts(texts, files, outpath, lang): + reader = Reader(outpath, lang=lang) for text, name in zip(texts, files): reader.tts(text, name) return @@ -56,27 +66,42 @@ def main(): """In the interests of user-friendliness, this cli will be kept pretty bare-bones""" """ Basic usage: - $ ./r4l/bin/cli.py [--in_path in/] [--out_path out/] - will convert (english language) pdfs in the folder "in/" and output mp3s to the folder "out/" + $ r4l [--in_path in/] [--out_path out/] [--lang "en"] + Converts pdfs, txts, muses in the folder "in/" and output mp3s to the folder "out/" with the primary language set to "en" + List languages: + $ r4l --list_languages + Lists available languages (Warning! Not tested on non-latin scripts!) """ ) - parser.add_argument("--in_path", type=str, default="in/", help="Path containing pdfs to be converted") - parser.add_argument("--out_path", type=str, default="out/", help="Output path") + parser.add_argument("--in_path", type=str, default="in/", help="Path containing files to be converted.") + parser.add_argument("--out_path", type=str, default="out/", help="Output path.") + parser.add_argument("--lang", type=str, default="en", help="Two-letter language code.") + parser.add_argument( + "--list_langs", + type=str2bool, + nargs="?", + const=True, + default=False, + help="list available languages.", + ) args = parser.parse_args() + if args.list_langs: + print(models_dict.keys()) + sys.exit() if not os.path.isdir(args.in_path): print("input path must exist and contain files!") parser.parse_args(["-h"]) if not os.path.isdir(args.out_path): os.mkdir(args.out_path) - run(args.in_path, args.out_path) + run(args.in_path, args.out_path, args.lang) return -def run(in_path, out_path): +def run(in_path, out_path, lang): start_time = time.time() force_english: bool = False - texts, files, wordcount = get_texts(in_path, force_english) - read_texts(texts, files, out_path) + texts, files, wordcount = get_texts(in_path, lang, force_english) + read_texts(texts, files, out_path, lang) time_taken = time.time() - start_time with open('time_data.csv', 'a') as f: writer = csv.writer(f) @@ -86,4 +111,4 @@ def run(in_path, out_path): if __name__ == "__main__": - run("in/", "out/") + run("in/", "out/", "en") diff --git a/setup.py b/setup.py index 08bfcbb..b6e6cd5 100644 --- a/setup.py +++ b/setup.py @@ -7,7 +7,7 @@ readme = f.read() setup( name='reading4listeners', - version='0.0.1', + version='0.0.2', packages=['r4l'], url='https://github.com/CypherousSkies/reading-for-listeners', license=legal, diff --git a/time_data.csv b/time_data.csv index 5f73b8e..6f04beb 100644 --- a/time_data.csv +++ b/time_data.csv @@ -5,3 +5,4 @@ 544,101.4726173877716 544,69.36171174049377 544,95.81861162185669 +544,83.5948269367218