Skip to content

Commit

Permalink
Merge 6cf356c into 76525cc
Browse files Browse the repository at this point in the history
  • Loading branch information
pkoutsias committed Mar 1, 2019
2 parents 76525cc + 6cf356c commit d145865
Show file tree
Hide file tree
Showing 16 changed files with 5,599 additions and 3 deletions.
5 changes: 5 additions & 0 deletions docs/api/providers.rst
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,11 @@ Shooter
.. automodule:: subliminal.providers.shooter
:private-members:

Subs4Free
-----------
.. automodule:: subliminal.providers.subs4free
:private-members:

TheSubDB
--------
.. automodule:: subliminal.providers.thesubdb
Expand Down
1 change: 1 addition & 0 deletions docs/user/how_it_works.rst
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ subtitles. Current supported providers are:
* OpenSubtitles
* Podnapisi
* Shooter
* Subs4Free
* TheSubDB
* TvSubtitles

Expand Down
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@ def find_version(*file_paths):
'opensubtitles = subliminal.providers.opensubtitles:OpenSubtitlesProvider',
'podnapisi = subliminal.providers.podnapisi:PodnapisiProvider',
'shooter = subliminal.providers.shooter:ShooterProvider',
'subs4free = subliminal.providers.subs4free:Subs4FreeProvider',
'thesubdb = subliminal.providers.thesubdb:TheSubDBProvider',
'tvsubtitles = subliminal.providers.tvsubtitles:TVsubtitlesProvider'
],
Expand Down
1 change: 1 addition & 0 deletions subliminal/extensions.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,7 @@ def unregister(self, entry_point):
'opensubtitlesvip = subliminal.providers.opensubtitles:OpenSubtitlesVipProvider',
'podnapisi = subliminal.providers.podnapisi:PodnapisiProvider',
'shooter = subliminal.providers.shooter:ShooterProvider',
'subs4free = subliminal.providers.subs4series:Subs4FreeProvider',
'thesubdb = subliminal.providers.thesubdb:TheSubDBProvider',
'tvsubtitles = subliminal.providers.tvsubtitles:TVsubtitlesProvider'
])
Expand Down
272 changes: 272 additions & 0 deletions subliminal/providers/subs4free.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,272 @@
# -*- coding: utf-8 -*-
import io
import logging
import os
import random
import re
import zipfile

import rarfile
from babelfish import Language
from guessit import guessit
from requests import Session

from . import ParserBeautifulSoup, Provider
from .. import __short_version__
from ..cache import SHOW_EXPIRATION_TIME, region
from ..matches import guess_matches
from ..subtitle import SUBTITLE_EXTENSIONS, Subtitle, fix_line_ending
from ..utils import sanitize
from ..video import Movie

logger = logging.getLogger(__name__)

year_re = re.compile(r'^\((\d{4})\)$')


class Subs4FreeSubtitle(Subtitle):
"""Subs4Free Subtitle."""
provider_name = 'subs4free'

def __init__(self, language, page_link, title, year, version, download_link):
super(Subs4FreeSubtitle, self).__init__(language, page_link=page_link)
self.title = title
self.year = year
self.version = version
self.download_link = download_link
self.hearing_impaired = None
self.encoding = 'utf8'

@property
def id(self):
return self.download_link

@property
def info(self):
return self.version or self.download_link

def get_matches(self, video):
matches = guess_matches(video, {
'title': self.title,
'year': self.year,
'release_group': self.version
})

# other properties
matches |= guess_matches(video, guessit(self.version, {'type': 'movie'}), partial=True)

return matches


class Subs4FreeProvider(Provider):
"""Subs4Free Provider."""
languages = {Language.fromalpha2(l) for l in {'el', 'en'}}
video_types = (Movie,)
server_url = 'https://www.sf4-industry.com'
download_url = '/getSub.html'
search_url = '/search_report.php?search={}&searchType=1'
anti_block_1 = 'https://images.subs4free.info/favicon.ico'
anti_block_2 = 'https://www.subs4series.com/includes/anti-block-layover.php?launch=1'
anti_block_3 = 'https://www.subs4series.com/includes/anti-block.php'
subtitle_class = Subs4FreeSubtitle

def __init__(self):
self.session = None

def initialize(self):
self.session = Session()
self.session.headers['User-Agent'] = 'Subliminal/{}'.format(__short_version__)

def terminate(self):
self.session.close()

def get_show_links(self, title, year=None):
"""Get the matching show links for `title` and `year`.
First search in the result of :meth:`_get_show_suggestions`.
:param title: show title.
:param year: year of the show, if any.
:type year: int
:return: the show links, if found.
:rtype: list of str
"""
title = sanitize(title)
suggestions = self._get_suggestions(title)

show_links = []
for suggestion in suggestions:
show_title = sanitize(suggestion['title'])

if show_title == title or (year and show_title == '{title} {year:d}'.format(title=title, year=year)):
logger.debug('Getting show id')
show_links.append(suggestion['link'].split('?p=')[-1])

return show_links

@region.cache_on_arguments(expiration_time=SHOW_EXPIRATION_TIME, should_cache_fn=lambda value: value)
def _get_suggestions(self, title):
"""Search the show or movie id from the `title` and `year`.
:param str title: title of the show.
:return: the show suggestions found.
:rtype: list of dict
"""
# make the search
logger.info('Searching show ids with %r', title)
r = self.session.get(self.server_url + self.search_url.format(title),
headers={'Referer': self.server_url}, timeout=10)
r.raise_for_status()

if not r.content:
logger.debug('No data returned from provider')
return []

soup = ParserBeautifulSoup(r.content, ['lxml', 'html.parser'])
suggestions = [{'link': l.attrs['value'], 'title': l.text}
for l in soup.select('select[name="Mov_sel"] > option[value]')]
logger.debug('Found suggestions: %r', suggestions)

return suggestions

def query(self, movie_id, title, year):
# get the season list of the show
logger.info('Getting the subtitle list of show id %s', movie_id)
if movie_id:
page_link = self.server_url + '/' + movie_id
else:
page_link = self.server_url + self.search_url.format(' '.join([title, str(year)]))

r = self.session.get(page_link, timeout=10)
r.raise_for_status()

if not r.content:
logger.debug('No data returned from provider')
return []

soup = ParserBeautifulSoup(r.content, ['html.parser'])

year = None
year_element = soup.select_one('td#dates_header > table div')
matches = False
if year_element:
matches = year_re.match(str(year_element.contents[2]).strip())
if matches:
year = int(matches.group(1))

title_tag = soup.select_one('td#dates_header > table u')
show_title = str(title_tag.contents[0]).strip() if title_tag else None

subtitles = []
# loop over episode rows
for subs_tag in soup.select('table .seeDark,.seeMedium'):
# read common info
version = subs_tag.find('b').text
download_link = self.server_url + subs_tag.find('a')['href']
language = Language.fromalpha2(subs_tag.find('img')['src'].split('/')[-1].split('.')[0])

subtitle = self.subtitle_class(language, page_link, show_title, year, version, download_link)

logger.debug('Found subtitle {!r}'.format(subtitle))
subtitles.append(subtitle)

return subtitles

def list_subtitles(self, video, languages):
# lookup show_id
titles = [video.title] + video.alternative_titles if isinstance(video, Movie) else []

show_links = None
for title in titles:
show_links = self.get_show_links(title, video.year)
if show_links:
break

subtitles = []
# query for subtitles with the show_id
if show_links:
for show_link in show_links:
subtitles += [s for s in self.query(show_link, video.title, video.year) if s.language in languages]
else:
subtitles += [s for s in self.query(None, video.title, video.year) if s.language in languages]

return subtitles

def download_subtitle(self, subtitle):
if isinstance(subtitle, Subs4FreeSubtitle):
# download the subtitle
logger.info('Downloading subtitle %r', subtitle)
r = self.session.get(subtitle.download_link, headers={'Referer': subtitle.page_link}, timeout=10)
r.raise_for_status()

if not r.content:
logger.debug('Unable to download subtitle. No data returned from provider')
return

soup = ParserBeautifulSoup(r.content, ['lxml', 'html.parser'])
download_element = soup.select_one('input[name="id"]')
image_element = soup.select_one('input[type="image"]')
subtitle_id = download_element['value'] if download_element else None
width = int(str(image_element['width']).strip('px')) if image_element else 0
height = int(str(image_element['height']).strip('px')) if image_element else 0

if not subtitle_id:
logger.debug('Unable to download subtitle. No download link found')
return

self.apply_anti_block(subtitle)

download_url = self.server_url + self.download_url
r = self.session.post(download_url, data={'id': subtitle_id, 'x': random.randint(0, width),
'y': random.randint(0, height)},
headers={'Referer': subtitle.download_link}, timeout=10)
r.raise_for_status()

if not r.content:
logger.debug('Unable to download subtitle. No data returned from provider')
return

archive = _get_archive(r.content)

subtitle_content = _get_subtitle_from_archive(archive) if archive else r.content

if subtitle_content:
subtitle.content = fix_line_ending(subtitle_content)
else:
logger.debug('Could not extract subtitle from %r', archive)

def apply_anti_block(self, subtitle):
self.session.get(self.anti_block_1, headers={'Referer': subtitle.download_link}, timeout=10)
self.session.get(self.anti_block_2, headers={'Referer': subtitle.download_link}, timeout=10)
self.session.get(self.anti_block_3, headers={'Referer': subtitle.download_link}, timeout=10)


def _get_archive(content):
# open the archive
archive_stream = io.BytesIO(content)
archive = None
if rarfile.is_rarfile(archive_stream):
logger.debug('Identified rar archive')
archive = rarfile.RarFile(archive_stream)
elif zipfile.is_zipfile(archive_stream):
logger.debug('Identified zip archive')
archive = zipfile.ZipFile(archive_stream)

return archive


def _get_subtitle_from_archive(archive):
for name in archive.namelist():
# discard hidden files
if os.path.split(name)[-1].startswith('.'):
continue

# discard non-subtitle files
if not name.lower().endswith(SUBTITLE_EXTENSIONS):
continue

return archive.read(name)

return None

0 comments on commit d145865

Please sign in to comment.