Skip to content

Commit

Permalink
Fixed Scraping English on new Forvo Pages
Browse files Browse the repository at this point in the history
  • Loading branch information
Rascalov committed Jul 18, 2022
1 parent 732c580 commit 2dbe457
Showing 1 changed file with 6 additions and 4 deletions.
10 changes: 6 additions & 4 deletions bs4Scraper.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,11 +145,13 @@ def lookup_word(word, languageCode, automatic=False):
return audioList # no results for that language-code
speachSections = forvoPage.select_one("div#language-container-" + languageCode)
audioListUl = speachSections.select_one("ul#pronunciations-list-" + languageCode)

if(audioListUl == None or len(audioListUl.findChildren(recursive=False)) == 0):
print("FORVO: Word not found (Language Container exists, but audio not found)")
return audioList
if(languageCode == "en"):
audioListLis = forvoPage.select("li[class*=en_]")
if(languageCode == "en"):
audioListLis = speachSections.select("li.pronunciation")
else:
print("FORVO: Word not found (Language Container exists, but audio not found)")
return audioList
else:
audioListLis = audioListUl.find_all("li",attrs={'class': "pronunciation"} )
#forvo_audio bs4-ify
Expand Down

0 comments on commit 2dbe457

Please sign in to comment.