Skip to content

Commit

Permalink
Merge pull request #149 from Lukas0907/derstandard
Browse files Browse the repository at this point in the history
 derStandard.at: Support for featured articles.
  • Loading branch information
Lukas0907 committed Aug 12, 2018
2 parents 8539176 + 7114c88 commit d918a7e
Showing 1 changed file with 14 additions and 1 deletion.
15 changes: 14 additions & 1 deletion feeds/spiders/derstandard_at.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,11 @@ def _parse_article(self, response):
".sequence-number",
]
change_tags = {"#media-list li": "div", "#media-list": "div"}
replace_regex = {r'<img[^>]+data-zoom-src="([^"]+)"': r'<img src="\1"'}
replace_regex = {
# data-zoom-src is only valid if it starts with //images.derstandard.at.
r'<img[^>]+data-zoom-src="(//images.derstandard.at/[^"]+)"':
r'<img src="\1"'
}
replace_elems = {
".embedded-posting": "<p><em>Hinweis: Das eingebettete Posting ist nur "
+ "im Artikel verfügbar.</em></p>"
Expand All @@ -106,6 +110,15 @@ def _parse_article(self, response):
"https://{}/userprofil/bloggingdelivery/blogeintrag?godotid={}"
).format(self.name, blog_id)
yield scrapy.Request(url, self._parse_blog_article, meta={"il": il})
elif response.css("#feature-content"):
cover_photo = (
response.css("#feature-cover-photo::attr(style)").
re_first('\((.*)\)')
)
il.add_value("content_html", '<img src="{}">'.format(cover_photo))
il.add_css("content_html", "#feature-cover-title h2")
il.add_css("content_html", "#feature-content > .copytext")
yield il.load_item()
else:
il.add_css("content_html", "#content-aside")
il.add_css("content_html", "#objectContent > .copytext")
Expand Down

0 comments on commit d918a7e

Please sign in to comment.