Skip to content

Commit

Permalink
Merge pull request #145 from Lukas0907/oe1
Browse files Browse the repository at this point in the history
Oe1
  • Loading branch information
Lukas0907 committed Aug 7, 2018
2 parents f196905 + cea8c1e commit 9a0ab8b
Showing 1 changed file with 17 additions and 9 deletions.
26 changes: 17 additions & 9 deletions feeds/spiders/oe1_orf_at.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,32 +8,29 @@

class Oe1OrfAtSpider(FeedsSpider):
name = "oe1.orf.at"
allowed_domains = ["audioapi.orf.at"]
allowed_domains = ["audioapi.orf.at", name]
start_urls = ["https://audioapi.orf.at/oe1/api/json/current/broadcasts"]

_title = "oe1.ORF.at"
_subtitle = "Ö1 Webradio"
_link = "https://oe1.orf.at"
_timezone = "Europe/Vienna"
_logo = "https://{}/static/img/logo_oe1.png".format(name)

def parse(self, response):
for day in json.loads(response.text)[-2:]:
for broadcast in day["broadcasts"]:
# Only parse if already recorded (i.e. not live/in the future).
if broadcast["state"] == "C":
yield scrapy.Request(
broadcast["href"],
self.parse_broadcast,
meta={"oe1_day": day["day"]},
)
yield scrapy.Request(broadcast["href"], self._parse_broadcast)

def parse_broadcast(self, response):
def _parse_broadcast(self, response):
broadcast = json.loads(response.text)
il = FeedEntryItemLoader(
response=response, timezone=self._timezone, dayfirst=False
)
link = "https://{}/programm/{}/{}".format(
self.name, response.meta["oe1_day"], broadcast["programKey"]
link = "https://{}/player/{}/{}".format(
self.name, broadcast["broadcastDay"], broadcast["programKey"]
)
il.add_value("link", link)
il.add_value("title", broadcast["programTitle"])
Expand All @@ -54,4 +51,15 @@ def parse_broadcast(self, response):
il.add_value("content_html", "<h3>{}</h3>".format(item["title"]))
il.add_value("content_html", item.get("description"))
il.add_value("content_html", broadcast["description"])
il.add_value("category", broadcast["tags"])
if "no_canonical_url" not in broadcast["url"]:
yield scrapy.Request(
broadcast["url"], self._parse_show, dont_filter=True, meta={"il": il},
)
else:
yield il.load_item()

def _parse_show(self, response):
il = FeedEntryItemLoader(response=response, parent=response.meta["il"])
il.add_css("category", ".asideBlock:first-child h2::text")
yield il.load_item()

0 comments on commit 9a0ab8b

Please sign in to comment.