In [3]:
import sys
import os
import urllib
import requests
import time
import datetime
import threading
import logging

In [4]:
def add_handler(logger, fh):
    logger.addHandler(fh)
    fh_fmt = logging.Formatter("%(message)s")
    fh.setFormatter(fh_fmt)


logger = logging.getLogger('SkyScanner')
add_handler(logger, logging.FileHandler('skyscranner.log', 'w'))
add_handler(logger, logging.StreamHandler())
logger.setLevel(logging.INFO)

In [5]:
SKYSCANNER_API_KEY = os.environ.get('SKYSCANNER_API_KEY', 'prtl6749387986743898559646983194')

In [6]:
def do_request(request_type, url, data):
    def get_method(request_type):
        return getattr(requests, request_type)
    
    headers = {'Cache-control': 'no-cache, no-store, must-revalidate'}
    
    if request_type != 'get':
        data['apikey'] = SKYSCANNER_API_KEY
        headers['content-type'] = 'application/x-www-form-urlencoded'
    else:
        url = add_apikey(url)
    
    logger.info("\t[REQ] %s" % (url))
    
    payload = urllib.urlencode(data, doseq=True)
    r = get_method(request_type)(url, data=payload, headers=headers)
    return r

def add_apikey(url):
    return url + '?apikey=' + SKYSCANNER_API_KEY

def parse_request(r):
    if r.status_code == 201:
        return do_request('get', r.headers['Location'], {})
    
    return r

In [7]:
def fetch_data(origin, destination, when, tries=3, fail_wait=60):
    current_try = 0
    while current_try < tries:
        data = {
            'cabinclass': 'Economy',
            'country': 'ES',
            'currency': 'EUR',
            'locale': 'en-GB',
            'locationSchema': 'iata',
            'originplace': origin,
            'destinationplace': destination,
            'outbounddate': when,
            'adults': '1',
            'children': '0',
            'infants': '0'
        }
            
        r = do_request('post', 'http://partners.api.skyscanner.net/apiservices/pricing/v1.0', data)
        
        r_poll = None
        pending_session = 0
        while (r_poll == None or r_poll.status_code == 304) and pending_session < 5:
            logger.info("\t[WAIT] Waiting %ds for session to be created" % (pending_session * 2 + 1))
            time.sleep(pending_session * 2 + 1) # Let session be created
            r_poll = parse_request(r)
            pending_session += 1
                    
        if r_poll.status_code == 200:
            return r_poll.text.encode('utf-8')
                
        current_try += 1
        logger.error("\t[FAIL] Reattempting in %d seconds (reason: %d)" % (fail_wait, r_poll.status_code))
        time.sleep(fail_wait)
        
    return None

In [8]:
def get_date(fmt, ts=None):
    ts = int(time.time()) if ts is None else int(ts)
    date = datetime.datetime.fromtimestamp(ts).strftime(fmt)
    return date

def fetch_and_save(origin, destination, when, tries=3, fail_wait=60):
    logger.info("[DO] %s to %s on %s" % (origin, destination, when))
    
    date = get_date('%Y-%m-%d-%H')
    filename = '%s_%s_%s_%s.json' % (origin, destination, date, when)
    filename = os.path.join('flights-data', filename)
    
    try:
        os.makedirs('flights-data')
    except:
        pass
    
    data = fetch_data(origin, destination, when, tries, fail_wait)
    if data is None:
        logger.error("[ERROR] %s to %s on %s" % (origin, destination, when))
        
    with open(filename, 'w') as fp:
        fp.write(data)
        
    logger.info("[SAVED] %s to %s on %s" % (origin, destination, when))
    
    
def wait_for(date, fmt, callback):
    def worker():
        self = threading.currentThread()
        logger.info('\n[WAIT] %s' % date)
        while get_date(fmt) != date:
            if self.stop:
                logger.error('[STOP] Cancel thread')
                return
            
            logger.info('+1')
            time.sleep(Fetcher.HOUR)
            
        callback()
    
    for thread in wait_for.threads:
        thread.stop = True
    
    t = threading.Thread(target=worker)
    t.stop = False
    t.start()
    wait_for.threads.append(t)
    
wait_for.threads = []

In [9]:
class Fetcher(object):
    HOUR = 60 * 60
    DAY = HOUR * 24
        
    def __init__(self, look_ahead_days, itineraries, start_time=None):
        start_time = int(time.time()) if start_time is None else int(start_time)
    
        self.LOOK_AHEAD = look_ahead_days * Fetcher.DAY
        self.TARGET_TIME = start_time + self.LOOK_AHEAD

        self.now = start_time
        self.start = self.now + self.LOOK_AHEAD
        self.target = self.TARGET_TIME
        self.itineraries = itineraries
        self.stop = False
        
        # Fix it up in case start_time is not now
        while self.now + self.LOOK_AHEAD + Fetcher.DAY - self.target >= self.LOOK_AHEAD:
            self.target += Fetcher.DAY
            
    def fetch(self, day_moment):
        if self.stop:
            return
        
        self.now = int(time.time())
        logger.info("-------------------------------------")
        logger.info("[UPDATE] Fetching day %d - %d" % ((self.now + self.LOOK_AHEAD + Fetcher.DAY - self.start) / Fetcher.DAY,
                                                        self.now))

        for ts in range(self.target, self.now + self.LOOK_AHEAD + Fetcher.DAY, Fetcher.DAY):
            for origin, destination in self.itineraries:
                fetch_and_save(origin, destination, get_date('%Y-%m-%d', ts))

        if self.now + self.LOOK_AHEAD + Fetcher.DAY - self.target >= self.LOOK_AHEAD:
            self.target += Fetcher.DAY

        # Next day
        if day_moment == 0: # 08 --> 12
            self.now += 4 * Fetcher.HOUR
        elif day_moment == 1: # 12 --> 16
            self.now += 4 * Fetcher.HOUR
        elif day_moment == 2: # 16 --> 21
            self.now += 5 * Fetcher.HOUR
        elif day_moment == 3: # 21 --> 08
            self.now += 11 * Fetcher.HOUR
            
        fmt = '%Y-%m-%d-%H'
        wait_for(get_date(fmt, self.now), fmt, lambda: self.fetch((day_moment + 1) % 4))

In [12]:
destinations = (
    'LCY', 'LHR', 'LGW', 'LTN', 'SEN', 'STN', # LONDRES
    'CDG', 'ORY', 'BVA', # Paris
    'MAD', # Madrid
    'ATH', # Atenas
    'FCO', 'CIA', # Atenas
    'BRU', 'CRL', # Bruselas
    'BER', 'SXF', # Berlin
    'DME', 'SVO', # Moscu
    'SFO', # San Francisco
    'JFK', # Nueva York
    'PEK', # Pekin
    'EZE', # Buenos Aires
    'GIG', # Rio de Janeiro
    'DEL', # Delphi
)

itineraries = (('BCN', dest) for dest in destinations)
fetcher = Fetcher(90, itineraries)

wait_for('2017-02-23-08', '%Y-%m-%d-%H', lambda: fetcher.fetch(0))


[WAIT] 2017-02-23-01
-------------------------------------
[UPDATE] Fetching day 1 - 1487808078
[DO] BCN to LCY on 2017-06-23
	[REQ] http://partners.api.skyscanner.net/apiservices/pricing/v1.0
	[WAIT] Waiting 1s for session to be created
	[REQ] http://partners.api.skyscanner.net/apiservices/pricing/uk1/v1.0/bdf10cd7573e4bd089deb4d5d531f25e_rrsqbjcb_06a13f0a788e803fcc56e78802891a26?apikey=prtl6749387986743898559646983194
[SAVED] BCN to LCY on 2017-06-23
[DO] BCN to LHR on 2017-06-23
	[REQ] http://partners.api.skyscanner.net/apiservices/pricing/v1.0
	[WAIT] Waiting 1s for session to be created
	[REQ] http://partners.api.skyscanner.net/apiservices/pricing/uk1/v1.0/a99cebadee1b4f9abbfa04e6c8899c4d_rrsqbjcb_06a13f0a788e803fcc56e78802891a26?apikey=prtl6749387986743898559646983194
[SAVED] BCN to LHR on 2017-06-23
[DO] BCN to LGW on 2017-06-23
	[REQ] http://partners.api.skyscanner.net/apiservices/pricing/v1.0
	[WAIT] Waiting 1s for session to be created
	[FAIL] Reattempting in 60 seconds (rea