From b621878c053fc1c76b7d780d7963ff6b87c2630d Mon Sep 17 00:00:00 2001 From: sco1 Date: Mon, 19 Mar 2018 23:36:51 -0400 Subject: [PATCH] Add basic check for toolbox documentation with denied access See #2 --- MATLABfcnscrape.py | 8 ++++++-- README.md | 8 ++++++-- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/MATLABfcnscrape.py b/MATLABfcnscrape.py index de7eed5..877a143 100644 --- a/MATLABfcnscrape.py +++ b/MATLABfcnscrape.py @@ -40,7 +40,7 @@ def scrapedocpage(URL): Object methods (foo.bar) and comments (leading %) are excluded - Returns a list of function name strings + Returns a list of function name strings, or an empty list if none are found (e.g. no permission for toolbox) """ r = requests.get(URL, timeout=2) soup = BeautifulSoup(r.content, 'html.parser') @@ -136,7 +136,11 @@ def helpURLbuilder(shortlink, prefix="https://www.mathworks.com/help/", suffix=" for toolbox, URL in toolboxdict.items(): try: fcnlist = scrapedocpage(URL) - writeToolboxJSON(fcnlist, toolbox, outpath) + if len(fcnlist) == 0: + # No functions found, most likely because permission for the toolbox documentation is denied + logging.info(f"Permission to view documentation for '{toolbox}' has been denied: {URL}") + else: + writeToolboxJSON(fcnlist, toolbox, outpath) except (requests.exceptions.Timeout, requests.exceptions.ConnectionError): # TODO: Add a retry pipeline, verbosity of exception logging.info(f"Unable to access online docs for '{toolbox}': '{URL}'") diff --git a/README.md b/README.md index 649be78..b437a86 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,11 @@ # MATLABfcnscrape Scrape MATLAB's documentation for all valid function names and output to JSON files for external use -A JSON file is output per toolbox +A JSON file is output per toolbox. All unique functions are also consolidated into a single JSON. ## Notes -Object methods (e.g. `cdflib.close`) are ignored for the JSON output \ No newline at end of file +* Only those toolboxes under the 'MATLAB Family' are considered at this time: https://www.mathworks.com/help/index.html +* Several toolboxes are inaccessible by some users due to licensing restrictions + * See [this issue](https://github.com/StackOverflowMATLABchat/MATLABfcnscrape/issues/2) for an up-to-date list + * Pull requests for these toolboxes are welcome +* Object methods (e.g. `cdflib.close`) are ignored for the JSON output \ No newline at end of file