Skip to content

Commit

Permalink
fix(book): ignore ads in introduction
Browse files Browse the repository at this point in the history
  • Loading branch information
NateScarlet committed Nov 29, 2022
1 parent 4183f1d commit 910cfeb
Show file tree
Hide file tree
Showing 2 changed files with 14 additions and 5 deletions.
2 changes: 1 addition & 1 deletion pkg/book/__snapshots__/TestBook_Fetch.json
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
"ID": "1004608738",
"Introduction": "在破败中崛起,在寂灭中复苏。\n沧海成尘,雷电枯竭,那一缕幽雾又一次临近大地,世间的枷锁被打开了,一个全新的世界就此揭开神秘的一角……",
"LastUpdated": {
"$Time": "2021-05-01 10:10:55 +0800 CST"
"$Time": "2022-11-26 20:42:24 +0800 CST"
},
"MonthRecommendCount": "*count*",
"MonthTicketCount": "*count*",
Expand Down
17 changes: 13 additions & 4 deletions pkg/book/book.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ import (
"github.com/NateScarlet/qidian/pkg/client"
"github.com/NateScarlet/qidian/pkg/util"
"github.com/PuerkitoBio/goquery"
"golang.org/x/net/html"
)

// Book model
Expand Down Expand Up @@ -135,10 +136,18 @@ func (b *Book) Fetch(ctx context.Context) (err error) {

// Introduction
b.Summary = infoElem.Find(".intro").Text()
b.Introduction, ok = doc.Find("meta[property=\"og:description\"]").First().Attr("content")
if !ok {
err = fmt.Errorf("'og:description' meta tag not found")
return
for _, p := range doc.Find(".book-intro > p").Nodes {
for n := p.FirstChild; n != nil; n = n.NextSibling {
switch n.Type {
case html.TextNode:
b.Introduction += n.Data
case html.ElementNode:
if n.Data == "br" {
b.Introduction += "\n"
}
}
}
b.Introduction += "\n"
}
b.Introduction = strings.TrimSpace(b.Introduction)

Expand Down

0 comments on commit 910cfeb

Please sign in to comment.