Skip to content

Commit

Permalink
Fix torznab/jackett and others parsing with parse_rss_item.
Browse files Browse the repository at this point in the history
Signed-off-by: miigotu <miigotu@gmail.com>
  • Loading branch information
miigotu committed Sep 4, 2022
1 parent 24102cf commit 5737f5e
Show file tree
Hide file tree
Showing 4 changed files with 48 additions and 27 deletions.
4 changes: 3 additions & 1 deletion sickchill/oldbeard/providers/newznab.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,7 @@ def get_newznab_categories(self, just_caps=False):
logger.debug(error_string)
return False, return_categories, error_string

self.torznab = self.check_torznab(html)
self.caps = html.find("searching")
if just_caps:
return True, return_categories, "Just checking caps!"
Expand Down Expand Up @@ -307,6 +308,7 @@ def search(self, search_strings, age=0, ep_obj=None):
data = self.get_url(urljoin(self.url, "api"), params=search_params, returns="text")

if not data:
logger.debug("No data was returned from the provider")
break

with BS4Parser(data, language="xml") as html:
Expand All @@ -317,7 +319,7 @@ def search(self, search_strings, age=0, ep_obj=None):

for item in html("item"):
try:
result = self.parse_feed_item(item, self.url, self.torznab)
result = self.parse_feed_item(item, self.url)
if result:
items.append(result)
except Exception:
Expand Down
2 changes: 1 addition & 1 deletion sickchill/oldbeard/providers/nyaa.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ def search(self, search_strings, age=0, ep_obj=None):
with BS4Parser(data, language="xml") as html:
for item in html.find_all("item"):
try:
result = self.parse_feed_item(item, self.url, True, size_units=self.size_units)
result = self.parse_feed_item(item, self.url, size_units=self.size_units)
if result:
if result["seeders"] < self.minseed or result["leechers"] < self.minleech:
if mode != "RSS":
Expand Down
67 changes: 43 additions & 24 deletions sickchill/oldbeard/tvcache.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,25 +37,30 @@ class RSSTorrentMixin:

@classmethod
def check_link(cls, link, url):
return urlparse(link).netloc == urlparse(url).netloc or validators.url(link) == True
return urlparse(link).netloc == urlparse(url).netloc or validators.url(link) == True or link.startswith("magnet")

@classmethod
def parse_feed_item(cls, item, url, torznab=False, size_units=None):
def parse_feed_item(cls, item, url, size_units=None):
title = item.title.get_text(strip=True)
found_urls = set()
download_url = None
if item.link:
if cls.check_link(item.link.get_text(strip=True), url):
download_url = item.link.get_text(strip=True)
elif cls.check_link(item.link.next.strip(), url):
download_url = item.link.next.strip()
found_urls.add(item.link.get_text(strip=True))
if item.link.next.strip():
found_urls.add(item.link.next.strip())
if item.enclosure and item.enclosure.get("url", "").strip():
found_urls.add(item.enclosure.get("url", "").strip())

if not download_url and item.enclosure and cls.check_link(item.enclosure.get("url", "").strip(), url):
download_url = item.enclosure.get("url", "").strip()
for found_url in found_urls:
if cls.check_link(found_url, url):
download_url = found_url

if not (title and download_url):
logger.debug(f"Skipping result {title}, {found_urls}")
logger.debug(f"{item}")
return

seeders = leechers = None
seeders = leechers = item_size = None

regex = "^.*(?P<guid>[{]?[0-9a-fA-F]{8}-([0-9a-fA-F]{4}-){3}[0-9a-fA-F]{12}[}]?).*$"
info_hash = item.infoHash or item.guid
Expand All @@ -66,27 +71,41 @@ def parse_feed_item(cls, item, url, torznab=False, size_units=None):
else:
info_hash = info_hash.get_text(strip=True)

if item.size and item.seeders and item.leechers:
torznab = True
item_size = item.size.get_text(strip=True) or -1
if item.seeders:
seeders = try_int(item.seeders.get_text(strip=True))
if item.peers:
leechers = try_int(item.peers.get_text(strip=True))
elif item.leechers:
leechers = try_int(item.leechers.get_text(strip=True))
logger.debug(item_size)
else:
if "gingadaddy" in url:
size_regex = re.search(r"\d*.?\d* [KMGT]B", str(item.description))
item_size = size_regex.group() if size_regex else -1
else:
item_size = item.size.get_text(strip=True) if item.size else -1

for attr in item.find_all(["newznab:attr", "torznab:attr"]):
item_size = attr["value"] if attr["name"] == "size" else item_size
seeders = try_int(attr["value"]) if attr["name"] == "seeders" else seeders
leechers = try_int(attr["value"]) if attr["name"] == "peers" else leechers
for attr in item.find_all(["newznab:attr", "torznab:attr"]):
item_size = attr["value"] if attr["name"] == "size" else item_size
seeders = try_int(attr["value"]) if attr["name"] == "seeders" else seeders
leechers = try_int(attr["value"]) if attr["name"] == "peers" else leechers
info_hash = attr["value"] if attr["name"] == "infoHash" else info_hash
# download_url = attr["value"] if attr["name"] == "magneturl" else download_url
# genre = attr["value"] if attr["name"] == "genre" else genre

# Multiple values possible for category
# category = attr["value"] if attr["name"] == "category" else category

if item.size:
item_size = item.size.get_text(strip=True) or -1
elif "gingadaddy" in url:
size_regex = re.search(r"\d*.?\d* [KMGT]B", str(item.description))
item_size = size_regex.group() if size_regex else -1

torznab = any([item.seeders, item.leechers, item.peers, download_url.endswith("torrent"), download_url.startswith("magnet")])

if not item_size or (torznab and (seeders is None or leechers is None)):
logger.debug(f"Did not find size|seeders|leechers: {item_size}, {seeders}, {leechers}")
logger.debug(f"{item}")
return

if torznab and seeders == 0:
# TODO: Implement minseed/minleech for torznab/jackett
logger.debug(f"Skipping torznab result {title} because there are no seeders.")

size = convert_size(item_size, units=size_units) or -1

return {"title": title, "link": download_url, "size": size, "seeders": seeders, "leechers": leechers, "hash": info_hash}
Expand All @@ -113,7 +132,7 @@ def getFeed(cls, url, params=None, request_hook=None, size_units=None):
with BS4Parser(data, language="xml") as feed:
for item in feed("item"):
try:
result = cls.parse_feed_item(item, url, cls.check_torznab(feed), size_units=size_units)
result = cls.parse_feed_item(item, url, size_units=size_units)
if result:
items.append(result)
except Exception as error:
Expand Down
2 changes: 1 addition & 1 deletion sickchill/providers/nzb/NZBProvider.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ def is_active(self):

def _get_result(self, episodes):
result = NZBSearchResult(episodes)
if self.torznab or result.url.startswith("magnet"):
if self.torznab or result.url.startswith("magnet") or result.url.endswith("torrent"):
result.resultType = GenericProvider.TORRENT

return result
Expand Down

0 comments on commit 5737f5e

Please sign in to comment.