Merge pull request #134 from Lukas0907/fixes

Migrate all http:// URLs to https://.
PyFeeds · Jul 24, 2018 · ca248b3 · ca248b3
2 parents 0af56d0 + e12289a
commit ca248b3
Show file tree

Hide file tree

Showing 14 changed files with 28 additions and 28 deletions.
diff --git a/feeds/spiders/__init__.py b/feeds/spiders/__init__.py
@@ -21,7 +21,7 @@ def generate_feed_header(
         il.add_value("title", _title)
         il.add_value("subtitle", subtitle or getattr(self, "_subtitle", None))
         il.add_value(
-            "link", link or getattr(self, "_link", "http://www.{}".format(self.name))
+            "link", link or getattr(self, "_link", "https://www.{}".format(self.name))
         )
         il.add_value("path", path or getattr(self, "_path", None))
         il.add_value(

diff --git a/feeds/spiders/ak_ciando_com.py b/feeds/spiders/ak_ciando_com.py
@@ -11,7 +11,7 @@ class AkCiandoComSpider(FeedsSpider):
     name = "ak.ciando.com"
     allowed_domains = ["ak.ciando.com"]
     start_urls = [
-        "http://ak.ciando.com/shop/index.cfm?fuseaction=cat_overview&cat_ID=0"
+        "https://ak.ciando.com/shop/index.cfm?fuseaction=cat_overview&cat_ID=0"
         "&cat_nav=0&more_new=1&rows=100&intStartRow=1"
     ]
 

diff --git a/feeds/spiders/atv_at.py b/feeds/spiders/atv_at.py
@@ -10,7 +10,7 @@
 class AtvAtSpider(FeedsSpider):
     name = "atv.at"
     allowed_domains = ["atv.at"]
-    start_urls = ["http://atv.at/mediathek/neue-folgen/"]
+    start_urls = ["https://atv.at/mediathek/neue-folgen/"]
     custom_settings = {
         # The site is really shitty, don't overwhelm it with more requests.
         "CONCURRENT_REQUESTS": 1
@@ -41,7 +41,7 @@ def parse_program(self, response):
         data = data["config"]["initial_video"]["parts"][0]["tracking"]["nurago"]
         il = FeedEntryItemLoader(
             response=response,
-            base_url="http://{}".format(self.name),
+            base_url="https://{}".format(self.name),
             timezone=self._timezone,
             dayfirst=True,
         )

diff --git a/feeds/spiders/biblioweb_at.py b/feeds/spiders/biblioweb_at.py
@@ -18,9 +18,9 @@ def start_requests(self):
             self._path = self._library
             self._title = "Bibliothek {}".format(self._library.title())
             self._subtitle = "Neue Titel in der {}".format(self._title)
-            self._link = "http://www.biblioweb.at/{}/".format(self._library)
+            self._link = "https://www.biblioweb.at/{}/".format(self._library)
             yield scrapy.Request(
-                "http://www.biblioweb.at/{}/start.asp".format(self._library),
+                "https://www.biblioweb.at/{}/start.asp".format(self._library),
                 callback=self.parse,
                 meta={"dont_cache": True},
             )
@@ -37,7 +37,7 @@ def parse(self, response):
         # ASP cookie. Without this cookie the requests to webopac123 (!) are
         # ignored and will be redirected to the "login" page.
         yield scrapy.Request(
-            "http://www.biblioweb.at/webopac123/webopac.asp"
+            "https://www.biblioweb.at/webopac123/webopac.asp"
             "?kat=1&content=show_new&seit={}&order_by=Sachtitel".format(self._days),
             callback=self.parse_overview_page,
             meta={"dont_cache": True},

diff --git a/feeds/spiders/cbird_at.py b/feeds/spiders/cbird_at.py
@@ -8,7 +8,7 @@
 class CbirdAtSpider(FeedsCrawlSpider):
     name = "cbird.at"
     allowed_domains = ["cbird.at"]
-    start_urls = ["http://cbird.at/hilfe/neu/", "http://cbird.at/impressum"]
+    start_urls = ["https://cbird.at/hilfe/neu/", "https://cbird.at/impressum"]
     rules = (Rule(LinkExtractor(allow=("hilfe/neu/(\d+)",)), callback="parse_item"),)
 
     _title = "Neue cbird Versionen"

diff --git a/feeds/spiders/falter_at.py b/feeds/spiders/falter_at.py
@@ -78,7 +78,7 @@ def parse_wwei(self, response):
             review = entry["review"]
             il = FeedEntryItemLoader(
                 response=response,
-                base_url="http://{}".format(self.name),
+                base_url="https://{}".format(self.name),
                 timezone=self._timezone,
                 dayfirst=False,
             )
@@ -122,7 +122,7 @@ def parse_archive_search(self, response):
         for i, item in enumerate(json.loads(response.text)["result"]["hits"]):
             il = FeedEntryItemLoader(
                 response=response,
-                base_url="http://{}".format(self.name),
+                base_url="https://{}".format(self.name),
                 timezone=self._timezone,
             )
             il.add_value("path", "magazine")
@@ -148,7 +148,7 @@ def parse_item_text(self, response):
             response=response,
             parent=response.meta["il"],
             remove_elems=remove_elems,
-            base_url="http://{}".format(self.name),
+            base_url="https://{}".format(self.name),
         )
         content = response.xpath("//article").extract_first()
         if "Lesen Sie diesen Artikel in voller Länge" in content:

diff --git a/feeds/spiders/konsument_at.py b/feeds/spiders/konsument_at.py
@@ -7,7 +7,7 @@
 class KonsumentAtSpider(FeedsSpider):
     name = "konsument.at"
     allowed_domains = ["konsument.at"]
-    start_urls = ["http://www.konsument.at/page/das-aktuelle-heft"]
+    start_urls = ["https://www.konsument.at/page/das-aktuelle-heft"]
 
     _title = "KONSUMENT.AT"
     _subtitle = "Objektiv, unbestechlich, keine Werbung"
@@ -49,7 +49,7 @@ def _parse_article_url(self, response):
         il = FeedEntryItemLoader(
             response=response,
             timezone=self._timezone,
-            base_url="http://{}".format(self.name),
+            base_url="https://{}".format(self.name),
             dayfirst=True,
             remove_elems=remove_elems,
         )
@@ -78,7 +78,7 @@ def _parse_article(self, response):
         il = FeedEntryItemLoader(
             response=response,
             parent=response.meta["il"],
-            base_url="http://{}".format(self.name),
+            base_url="https://{}".format(self.name),
             remove_elems=remove_elems,
         )
         il.add_css("content_html", "#page")

diff --git a/feeds/spiders/nachrichten_at.py b/feeds/spiders/nachrichten_at.py
@@ -11,7 +11,7 @@ class NachrichtenAtSpider(FeedsXMLFeedSpider):
     _title = "Oberösterreichische Nachrichten"
     _subtitle = "OÖN"
     _link = "https://www.{}".format(name)
-    _icon = "http://static1.{}.at/oonup/images/apple-touch-icon.png".format(name)
+    _icon = "https://static1.{}.at/oonup/images/apple-touch-icon.png".format(name)
     _logo = "https://www.{}/pics/webapp/touchicon_180x180.png".format(name)
     _timezone = "Europe/Vienna"
 

diff --git a/feeds/spiders/oe1_orf_at.py b/feeds/spiders/oe1_orf_at.py
@@ -13,7 +13,7 @@ class Oe1OrfAtSpider(FeedsSpider):
 
     _title = "oe1.ORF.at"
     _subtitle = "Ö1 Webradio"
-    _link = "http://oe1.orf.at"
+    _link = "https://oe1.orf.at"
     _timezone = "Europe/Vienna"
 
     def parse(self, response):
@@ -39,7 +39,7 @@ def parse_broadcast(self, response):
         il.add_value("title", broadcast["programTitle"])
         il.add_value("title", broadcast["title"])
         if broadcast.get("streams"):
-            stream = "http://loopstream01.apa.at/?channel=oe1&id={}".format(
+            stream = "https://loopstream01.apa.at/?channel=oe1&id={}".format(
                 broadcast["streams"][0]["loopStreamId"]
             )
             il.add_value("enclosure_iri", stream)

diff --git a/feeds/spiders/orf_at.py b/feeds/spiders/orf_at.py
@@ -71,7 +71,7 @@ def feed_headers(self):
             channel_url = "{}.ORF.at".format(channel)
             yield self.generate_feed_header(
                 title=channel_url,
-                link="http://{}".format(channel_url.lower()),
+                link="https://{}".format(channel_url.lower()),
                 path=channel,
                 logo=self._get_logo(channel),
             )
@@ -97,7 +97,7 @@ def parse_node(self, response, node):
         for link in links:
             if any(
                 link.startswith(url)
-                for url in ["https://debatte.orf.at", "http://iptv.orf.at"]
+                for url in ["https://debatte.orf.at", "https://iptv.orf.at"]
             ):
                 self.logger.debug("Ignoring link to '{}'".format(link))
             else:

diff --git a/feeds/spiders/puls4_com.py b/feeds/spiders/puls4_com.py
@@ -10,11 +10,11 @@
 class Pusl4ComSpider(FeedsSpider):
     name = "puls4.com"
     allowed_domains = ["puls4.com"]
-    start_urls = ["http://www.puls4.com/api/json-fe/page/sendungen"]
+    start_urls = ["https://www.puls4.com/api/json-fe/page/sendungen"]
 
     _timezone = "Europe/Vienna"
     _icon = (
-        "http://www.puls4.com/bundles/wundermanpuls4/images/" + "favicon/favicon.png"
+        "https://www.puls4.com/bundles/wundermanpuls4/images/" + "favicon/favicon.png"
     )
 
     def parse(self, response):
@@ -55,7 +55,7 @@ def _parse_show(self, response):
     def _parse_episode(self, response):
         il = FeedEntryItemLoader(
             response=response,
-            base_url="http://{}".format(self.name),
+            base_url="https://{}".format(self.name),
             timezone=self._timezone,
             dayfirst=True,
         )

diff --git a/feeds/spiders/tvthek_orf_at.py b/feeds/spiders/tvthek_orf_at.py
@@ -16,7 +16,7 @@ class TvthekOrfAtSpider(FeedsSpider):
 
     _title = "TVthek.ORF.at"
     _subtitle = "ORF TVTHEK"
-    _link = "http://tvthek.orf.at"
+    _link = "https://tvthek.orf.at"
     _timezone = "Europe/Vienna"
 
     def start_requests(self):

diff --git a/feeds/spiders/uebermedien_de.py b/feeds/spiders/uebermedien_de.py
@@ -73,7 +73,7 @@ def _set_steady_token(self, response):
 
     def parse_node(self, response, node):
         il = FeedEntryItemLoader(
-            response=response, base_url="http://{}".format(self.name), dayfirst=True
+            response=response, base_url="https://{}".format(self.name), dayfirst=True
         )
         il.add_value("updated", node.xpath("//pubDate/text()").extract_first())
         il.add_value("author_name", node.xpath("//dc:creator/text()").extract_first())
@@ -100,7 +100,7 @@ def _parse_article(self, response):
             response=response,
             parent=response.meta["il"],
             remove_elems=remove_elems,
-            base_url="http://{}".format(self.name),
+            base_url="https://{}".format(self.name),
         )
         il.add_css("content_html", ".entry-content")
         return il.load_item()
diff --git a/feeds/spiders/wienerlinien_at.py b/feeds/spiders/wienerlinien_at.py
@@ -16,7 +16,7 @@ class WienerLinienAtSpider(FeedsSpider):
         }
     }
     start_urls = [
-        "http://www.wienerlinien.at/eportal3/ep/scrollingListView.do?"
+        "https://www.wienerlinien.at/eportal3/ep/scrollingListView.do?"
         "scrolling=true&startIndex=0&channelId=-47186&programId=74577"
     ]
 
@@ -33,7 +33,7 @@ def parse(self, response):
                 response=response,
                 timezone=self._timezone,
                 ignoretz=True,
-                base_url="http://{}".format(self.name),
+                base_url="https://{}".format(self.name),
             )
             link = response.urljoin(item.css("a::attr(href)").extract_first())
             il.add_value("link", link)
@@ -49,7 +49,7 @@ def parse_item(self, response):
             parent=response.meta["il"],
             remove_elems=remove_elems,
             change_tags=change_tags,
-            base_url="http://{}".format(self.name),
+            base_url="https://{}".format(self.name),
         )
         il.add_xpath("content_html", '//div[@id="main-inner"]')
         yield il.load_item()