In [1]:
import requests
from selenium import webdriver
import datetime
import time

import numpy as np
import pandas as pd

In [2]:
import matplotlib.pyplot as plt

In [3]:
# create a request url from JFK to Hong Kong, roundtrip
def create_request_url(start_date, end_date):
    base_url = 'https://www.justfly.com/flight/search?'
    flight_options = 'num_adults=1&num_children=0&num_infants=0&num_infants_lap=0&seat_class=Economy'
    seg0 = '&seg0_date={}-{}-{}&seg0_from=BOS&seg0_to=HKG'.format(*start_date)
    seg1 = '&seg1_date={}-{}-{}&seg1_from=HKG&seg1_to=BOS'.format(*end_date)
    flight_type = '&type=roundtrip'
    request_url = base_url + flight_options + seg0 + seg1 + flight_type
    return request_url

In [7]:
start_date = ('2019', '04', '01')
end_date = ('2019', '04', '08')

url = create_request_url(start_date, end_date)

# start up a Firefox session
browser = webdriver.Firefox()
browser.get(url)
time.sleep(25)

In [14]:
elements = browser.find_elements_by_class_name('total-price')
prices = np.asarray([float(elm.text.replace(',', '').replace('$', '')) for elm in elements])

In [16]:
np.min(prices)

671.51

In [5]:
def date2tuple(datetime_obj):
    return datetime_obj.strftime("%Y/%m/%d").split('/')

def gen_daterange(start_day, days):
    return [start_day + datetime.timedelta(days=i) for i in days]

def add_week(start_date):
    """ start_date is datetime object"""
    end_date = start_date + datetime.timedelta(days=7)
    return end_date

In [17]:
# start up a Firefox session
browser = webdriver.Firefox()

In [18]:
# from March 1, 2019
start_dt = datetime.date(year=2019, month=2, day=1)
advance_search_days = 200
search_range = gen_daterange(start_dt, days=range(advance_search_days))

# collect info in a DataFrame
min_prices = []
for search_date in search_range:
    vacation_start = search_date
    vacation_end = add_week(vacation_start)
    
    url = create_request_url(date2tuple(vacation_start), date2tuple(vacation_end))
    browser.get(url)
    time.sleep(30)
    try:
        elements = browser.find_elements_by_class_name('total-price')
        prices = np.asarray([float(elm.text.replace(',', '').replace('$', '')) for elm in elements])
        min_prices.append(np.min(prices))
    except:
        min_prices.append(np.nan)
    
    print(vacation_start, 'flight price: {}'.format(min_prices[-1]))

2019-02-01 flight price: 688.51
2019-02-02 flight price: 747.74
2019-02-03 flight price: 679.74
2019-02-04 flight price: 641.74
2019-02-05 flight price: 641.74
2019-02-06 flight price: 646.67
2019-02-07 flight price: 671.51
2019-02-08 flight price: 679.74
2019-02-09 flight price: 735.95
2019-02-10 flight price: 688.51
2019-02-11 flight price: 671.51
2019-02-12 flight price: 688.51
2019-02-13 flight price: 805.51
2019-02-14 flight price: 837.51
2019-02-15 flight price: 996.51
2019-02-16 flight price: 824.51
2019-02-17 flight price: 688.51
2019-02-18 flight price: 671.51
2019-02-19 flight price: 671.51
2019-02-20 flight price: 671.51
2019-02-21 flight price: 676.03
2019-02-22 flight price: 693.06
2019-02-23 flight price: 719.95
2019-02-24 flight price: 759.51
2019-02-25 flight price: 671.51
2019-02-26 flight price: 671.51
2019-02-27 flight price: 671.51
2019-02-28 flight price: 671.51
2019-03-01 flight price: 688.51
2019-03-02 flight price: 764.95
2019-03-03 flight price: 688.51
2019-03-