diff --git a/audiobook/config.py b/audiobook/config.py index 972f5e9..aaee27f 100644 --- a/audiobook/config.py +++ b/audiobook/config.py @@ -1,4 +1,4 @@ -supported_file_types = (".pdf", ".txt", ".epub") +supported_file_types = (".pdf", ".txt", ".epub", ".docx", ".doc") speed_dict = { "slow": 100, "normal": 150, diff --git a/audiobook/main.py b/audiobook/main.py index 1519923..4585f88 100644 --- a/audiobook/main.py +++ b/audiobook/main.py @@ -13,6 +13,7 @@ from audiobook.utils import mobi_to_json from audiobook.utils import epub_to_json from audiobook.utils import html_to_json +from audiobook.utils import docs_to_json from audiobook.config import speed_dict @@ -82,6 +83,8 @@ def create_json_book(self, input_book_path, password=None): json_book, metadata = mobi_to_json(input_book_path) elif input_book_path.startswith("http"): json_book, metadata = html_to_json(input_book_path) + elif input_book_path.endswith((".docx", ".doc")): + json_book, metadata = docs_to_json(input_book_path) write_json_file(json_book, os.path.join(BOOK_DIR, json_filename)) diff --git a/audiobook/utils.py b/audiobook/utils.py index fcca5f3..b368efc 100644 --- a/audiobook/utils.py +++ b/audiobook/utils.py @@ -1,5 +1,7 @@ import re import os + +import docx2txt import mobi import json import PyPDF2 @@ -110,10 +112,19 @@ def txt_to_json(input_book_path): metadata["book_name"] = book_name return json_book, metadata - def docs_to_json(input_book_path): """ sub method to create json book from docs file """ - pass + metadata = {} + json_book = {} + book_name = os.path.basename(input_book_path).split(".")[0] + book_data = docx2txt.process(input_book_path) + for i in range(0, len(book_data), 2000): + page_num = i // 2000 + json_book[str(page_num)] = book_data[i:i + 2000] + + metadata["pages"] = len(json_book) + metadata["book_name"] = book_name + return json_book, metadata def epub_to_json(input_book_path): metadata = {} diff --git a/requirements.txt b/requirements.txt index b2202bc..5789c19 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,3 +4,6 @@ ebooklib==0.17.1 beautifulsoup4==4.11.1 html2text==2020.1.16 mobi==0.3.3 +docx2txt>=0.8 +requests>=2.28.1 +tqdm>=4.64.1