Skip to content

Commit

Permalink
Merge pull request #105 from Lukas0907/dev/next
Browse files Browse the repository at this point in the history
Übermedien.de: Remove Blendle support.
  • Loading branch information
nblock committed Sep 27, 2017
2 parents 9ce7d57 + 4a4057b commit 48d5562
Show file tree
Hide file tree
Showing 4 changed files with 1 addition and 183 deletions.
9 changes: 0 additions & 9 deletions docs/spiders/uebermedien.com.rst
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,4 @@ Add ``uebermedien.com`` to the list of spiders:
spiders =
uebermedien.com
Übermedien_ has a paywall for certain articles. If you want to crawl paid
articles, please provide your Blendle ``username`` and ``password``.

.. code-block:: ini
[uebermedien.de]
username =
password =
.. _Übermedien: http://www.uebermedien.de
7 changes: 0 additions & 7 deletions feeds.cfg.dist
Original file line number Diff line number Diff line change
Expand Up @@ -41,13 +41,6 @@ useragent = feeds (+https://github.com/nblock/feeds)
## Location of your library that uses biblioweb.at.
# location =

#[uebermedien.de]
## uebermedien.de has a paywall for certain articles.
## If you want to crawl paid articles, please provide your Blendle username
## and password.
# username =
# password =

#[facebook.com]
## App id and app secret for your app.
## Can be gathered by creating your own app at https://developers.facebook.com/.
Expand Down
129 changes: 0 additions & 129 deletions feeds/spiders/blendle.py

This file was deleted.

39 changes: 1 addition & 38 deletions feeds/spiders/uebermedien_de.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,6 @@

from feeds.loaders import FeedEntryItemLoader
from feeds.spiders import FeedsXMLFeedSpider
from feeds.spiders.blendle import BlendleAuthenticationError
from feeds.spiders.blendle import BlendleSession


class UebermedienDeSpider(FeedsXMLFeedSpider):
Expand All @@ -17,23 +15,6 @@ class UebermedienDeSpider(FeedsXMLFeedSpider):
_title = 'uebermedien.de'
_subtitle = 'Medien besser kritisieren.'

def parse(self, response):
# Try to login to Blendle.
self._blendle_session = BlendleSession(spider=self,
provider='uebermedien')
try:
# Continue with parsing the feed after trying to log in.
yield self._blendle_session.login(
callback=(
# Continue with parsing the feed after logging in.
lambda: super(UebermedienDeSpider, self).parse(response)
)
)
except BlendleAuthenticationError as ex:
# No username or password given.
self.logger.info(str(ex))
yield from super().parse(response)

def parse_node(self, response, node):
il = FeedEntryItemLoader(response=response,
base_url='http://{}'.format(self.name),
Expand All @@ -56,25 +37,7 @@ def parse_node(self, response, node):
return scrapy.Request(link, self._parse_article, meta={'il': il})

def _parse_article(self, response):
if response.css('.entry_blendle_text'):
self.logger.debug('Article {} is paywalled'.format(response.url))
response.meta['il'].add_value('category', 'paywalled')
callback_url = response.css(
'.pwb-item::attr(data-purchase-callback-url)').extract_first()
item_jwt = (
response.css('.pwb-item::attr(data-item-jwt)').extract_first()
)
return self._blendle_session.parse_article(
response=response, item_jwt=item_jwt,
callback_url=callback_url,
callback=self._parse_article_text)
else:
# Not paywalled.
return self._parse_article_text(response)

def _parse_article_text(self, response):
remove_elems = ['iframe', '.blendlebutton__hide__post', '.pwb-item',
'.uebermedien_slogan', '.pwb-subscription']
remove_elems = ['iframe', 'script']
il = FeedEntryItemLoader(response=response,
parent=response.meta['il'],
remove_elems=remove_elems,
Expand Down

0 comments on commit 48d5562

Please sign in to comment.