In [186]:
from lxml import html
import requests

# URL key for HD tvshows
CONST_HD_TVSHOW = '208'
# URL key for type of sorting, descending
CONST_SORT_BY = { 'seeds': '8'
                , 'upload_date': '3' }
TORRENTS_FOLDERS = [ 'D:/torrents/unsorted'
                   , 'D:/torrents/tvseries' ]
                   # mind the forward slash on Windows
WATCHED_SERIES = [ 'expanse'
                 , 'x-files'
                 , 'flash']
PROXYBAY_URL = "https://proxybay.la/"

def load_proxies():
    page = requests.get(PROXYBAY_URL)
    tree = html.fromstring(page.content)
    
    urls   = tree.xpath('//a[@rel="nofollow"]/@href')
    states = tree.xpath('//td[@class="status"]/img/@alt')
    speeds = tree.xpath('//td[@class="speed"]/text()')
    
    return zip(urls, states, speeds)

def get_fastest_proxies():
    p = load_proxies()
    p = filter(lambda x: x[1] == 'up', p)
    p.sort(key=lambda t: t[2])
    return map(lambda t: t[0], p[2:]) # throw out the first two, just in case

    # it's actually third fastest, but oh well
def get_fastest_proxy():
    return get_fastest_proxies()[0]

def load_tpb_video(title, sort_by):
    """Returns a list of tuples of (torrent name, description, magnet link)"""
    
    base_url = 'https://tpb.proxyduck.net'#get_fastest_proxy()
    query = '/search/' + title + '/0/' + CONST_SORT_BY[sort_by] + '/' + CONST_HD_TVSHOW
    page = requests.get(base_url + query)                # https://thepiratebay.se/search/x-files/0/3/208
    if page.status_code == 404:
        page = requests.get(base_url + '/?load=' + query)# https://tpb.proxyduck.net/?load=/search/24/0/3/208

    tree = html.fromstring(page.content)
    names = tree.xpath('//a[@class="detLink"]/text()')
    descs = tree.xpath('//font[@class="detDesc"]/text()')
    magnets = tree.xpath('//a[@title="Download this torrent using magnet"]/@href')
    return zip(names, descs, magnets)

from IPython.display import HTML    

def get_links_for_episode(title, sort_by):
    """Returns a list of HTML links that pop up when searching for $title$"""
    htmls = []

    for a,b,c in load_tpb_video(title, sort_by):
        htmls.append('<a href="' + c + '">' + a + ", " + b[:-10] + '</a><br>')
        # b[:-10] cuts the ", ULed By" part from the end, because we're not scrapping the name anyway

    return htmls

def join_all_links(htmls):
    "".join(htmls)
    
def join_top_links(htmls):
    if htmls:
        return "".join(htmls[:3])
    else:
        return ""

# Checking the most recent episodes in the torrents folders
import re, string, os

def list_downloaded_episodes(series):
    """Searches in the TORRENTS_FOLDERS for files containing $series$ and S\d\dE\d\d. Returns a list of dicts"""
    files = []
    for f in TORRENTS_FOLDERS:
        files += os.listdir(f)
    filtered = filter(lambda(x): string.find(string.lower(x), series) != -1, files)
    eps = []
    for f in filtered:
        match = re.search('(?:S)(?P<S>\d\d)(?:E)(?P<E>\d\d)', f)
        if match:
            eps.append ({ 'S': match.group('S')
                        , 'E': match.group('E') })
    return eps # that's a list of dicts!

def last_downloaded_episode(series):
    if list_downloaded_episodes(series):
        return list_downloaded_episodes(series)[-1]
    else:
        return { 'S': '01'
               , 'E': '00' }

def episode_to_str(e):
    e1 = e
    if len(str(e1["S"]))<2:
        e1["S"] = '0' + str(e1["S"])
    
    if len(str(e1["E"]))<2:
        e1["E"] = '0' + str(e1["E"])
        
    return 'S' + e1['S'] + 'E' + e1['E']

import json
from itertools import dropwhile

def next_episode(title, current_episode):
    """Uses tvmaze api to look it up""" # http://www.tvmaze.com/api
    
    # Unpacking the episode dict
    season  = int(current_episode['S'])
    episode = int(current_episode['E'])
    
    series_page = requests.get('http://api.tvmaze.com/singlesearch/shows?q=' + title).content
    series_id = json.loads(series_page)["id"]
    
    episodes_page = requests.get('http://api.tvmaze.com/shows/' + str(series_id) + '/episodes').content
    episodes_json = json.loads(episodes_page)
    
    e = next(dropwhile(lambda e: (e["season"], e["number"])<=(season,episode), episodes_json), None)
    
    if not e:
        return None
    
    e1 = {}
    e1["S"] = str(e["season"])
    e1["E"] = str(e["number"])
    
    return e1

def pprq(dict):
    for d in dict:
        print episode_to_str(d)

def check_for_next_episode(series):
    next_ep = next_episode(series, last_downloaded_episode(series))
    if not next_ep:
        return []
    next_episode_str = episode_to_str(next_ep)
    htmls = get_links_for_episode(series + ' ' + next_episode_str, 'upload_date')
    # if there are many results (meaning, it's popular and old enough),
    # sort by seeds instead
    if len(htmls) > 10:
        htmls = get_links_for_episode(series + ' ' + next_episode_str, 'seeds')
    return htmls

def check_watched_series():
    html = ""
    for s in WATCHED_SERIES:
        links = join_top_links(check_for_next_episode(s))
        if links:
            html += "<font size=+2>" + s.title() + "</font><br>"
            html += join_top_links(check_for_next_episode(s))
            html += "<br>"
    return html

HTML(check_watched_series())

In [1]:
import PTN

PTN.parse('The.Expanse.S01E09.720p.HDTV.x264-KILLERS[ettv]')

{'codec': 'x264',
 'episode': 9,
 'group': 'KILLERS[ettv]',
 'quality': 'HDTV',
 'resolution': '720p',
 'season': 1,
 'title': 'The Expanse'}

In [7]:
import subtitle_downloader
        
subtitle_downloader.sub_downloader('The.Expanse.S01E09.720p.HDTV.x264-KILLERS[ettv].mkv')

In [141]:
import json
from itertools import dropwhile

def lookup_next_episode(title, season, episode):
    series_page = requests.get('http://api.tvmaze.com/singlesearch/shows?q=' + title).content
    series_id = json.loads(series_page)["id"]
    episodes_page = requests.get('http://api.tvmaze.com/shows/' + str(series_id) + '/episodes').content
    episodes_json = json.loads(episodes_page)
    e = dropwhile(lambda e: (e["season"], e["number"])<=(season,episode), episodes_json).next()
    return "s" + str(e["season"]) + "e" + str(e["number"])

lookup_next_episode("x-files", 2, 25)

's3e1'

In [136]:
print (1,24) < (2,1)

True


In [114]:
import json

p = requests.get('http://api.tvmaze.com/singlesearch/shows?q=x-files').content
print json.loads(p)["id"]

def lookup_last_episode(title):
    series_page = requests.get('http://api.tvmaze.com/singlesearch/shows?q=' + title).content
    series_id = json.loads(series_page)["id"]
    episodes_page = requests.get('http://api.tvmaze.com/shows/' + str(series_id) + '/episodes').content
    episodes_json = json.loads(episodes_page)
    #print "s" + str(episodes_json[-1]["season"]) + "e" + str(episodes_json[-1]["number"])
    return episodes_json[-1]["airdate"]
    
#lookup_last_episode("x-files")

import time
from time import mktime
from datetime import datetime

d1 = time.strptime(lookup_last_episode("x-files"), "%Y-%m-%d")
d2 = datetime.now()

print d2 < datetime.fromtimestamp(mktime(d1))

430
True


In [97]:
import json

p = requests.get('http://api.tvmaze.com/shows/430/episodes').content
print json.dumps( json.loads(p)
                 , sort_keys=False
                 , indent=4
                 , separators=(',', ': '))


[
    {
        "name": "Pilot",
        "airdate": "1993-09-10",
        "url": "http://www.tvmaze.com/episodes/40420/the-x-files-1x01-pilot",
        "season": 1,
        "image": {
            "medium": "http://tvmazecdn.com/uploads/images/medium_landscape/38/95040.jpg",
            "original": "http://tvmazecdn.com/uploads/images/original_untouched/38/95040.jpg"
        },
        "number": 1,
        "summary": "<p><span>Special Agent Dana Scully is partnered with \nSpecial Agent Fox Mulder to validate his work on a special project \ncalled The X-Files. While he is a believer in the paranormal, fueled by a\n lost memory where<span> his \nsister was abducted by aliens; she is a scientist and prefers to look \nfor rational, logical explanations. Their first case takes them to \nOregon, to investigate the unsolved deaths of several high school \nclassmates, which Mulder believes are linked to an alien abduction.</span></span></p>",
        "airtime": "21:00",
        "_links": {
    