Skip to content
This repository was archived by the owner on Mar 21, 2024. It is now read-only.

Commit 52e7949

Browse files
committed
Fix function parsing for very old versions
R2016a and R2015b do not wrap function names in code tags, so they need to be found using a different method
1 parent 623602a commit 52e7949

File tree

1 file changed

+8
-3
lines changed

1 file changed

+8
-3
lines changed

src/MATLABfcnscrape.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -230,12 +230,17 @@ def filter_functions(function_list: t.List[str], function_blacklist: t.List[str]
230230
return filtered_functions
231231

232232

233-
def _scrape_doc_page_html(url: str) -> t.List[str]:
233+
def _scrape_doc_page_html(url: str, release: str) -> t.List[str]:
234234
"""Scrape the toolbox function list for a MATLAB release with static documentation serving."""
235235
r = httpx.get(url, timeout=2)
236236
soup = BeautifulSoup(r.content, "html.parser")
237237

238-
functions = soup.findAll("code")
238+
if release in NON_CODE_FCN:
239+
# The very old releases do not wrap function names in code tags
240+
functions = soup.findAll("td", {"class": {"term"}})
241+
else:
242+
functions = soup.findAll("code")
243+
239244
return [function.text for function in functions]
240245

241246

@@ -266,7 +271,7 @@ def scrape_doc_page(url: str, release: str) -> t.List[str]:
266271
Returns a list of function name strings, or an empty list if none are found (e.g. no permission)
267272
"""
268273
if release in LEGACY_FN_LIST_RELEASES:
269-
raw_functions = _scrape_doc_page_html(url)
274+
raw_functions = _scrape_doc_page_html(url, release)
270275
else:
271276
raw_functions = _scrape_doc_page_browser(url)
272277

0 commit comments

Comments
 (0)