Skip to content

Commit

Permalink
OTW(AO3) Support Paginated Series
Browse files Browse the repository at this point in the history
  • Loading branch information
JimmXinu committed Apr 2, 2024
1 parent 8eecd0a commit 704ea89
Showing 1 changed file with 25 additions and 1 deletion.
26 changes: 25 additions & 1 deletion fanficfare/adapters/base_otw_adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -619,9 +619,33 @@ def get_series_from_page(self,url,data,normalize=False):
## up to date with future changes.
m = re.match(self.getSiteURLPattern().replace('/works/','/series/'),url)
if m:
seriesid = m.group('id')
soup = self.make_soup(data)
retval = {}
retval['urllist']=[ 'https://'+self.host+a['href'] for a in soup.select('h4.heading a:first-child') ]
urllist = []
## series pages can do '...' and not have a link for all
## pages. Also, the page for the given URL, eg
## /series/99999?page=3, will *not* be in the list.
pageparam = '?page='
pageas = soup.select("ol.pagination li a")
if pageas:
pageurls = [ a['href'] for a in pageas ]
if pageparam in url:
pageurls.append(url)
logger.debug(pageurls)
## need to find largest page number, including url
maxpagenum = max([ int(x[x.index(pageparam)+len(pageparam):]) for x in pageurls ])
logger.debug(maxpagenum)
for j in range(1,maxpagenum+1):
pageurl = 'https://' + self.getSiteDomain() + '/series/' + seriesid + pageparam + unicode(j)
logger.debug(pageurl)
pagesoup = self.make_soup(self.get_request(pageurl))
urllist.extend([ 'https://'+self.host+a['href'] for a in pagesoup.select('h4.heading a:first-child') ])
logger.debug(urllist)
if urllist:
retval['urllist']=urllist
else:
retval['urllist']=[ 'https://'+self.host+a['href'] for a in soup.select('h4.heading a:first-child') ]
retval['name']=stripHTML(soup.select_one("h2.heading"))
desc=soup.select_one("div.wrapper dd blockquote.userstuff")
if desc:
Expand Down

0 comments on commit 704ea89

Please sign in to comment.