# Building a Basic Web Request

Using Kayak as a starting point as the URL seems buildable


  ------- BASE_URL ----------- {ORGIN-CODE}-{DEST-CODE}/{YYYY-MM-DD}?{QUERY}sort=bestflight_a 
  
https://www.kayak.co.uk/flights/LHR-DEL/2024-11-16?sort=bestflight_a

In [1]:
# https://www.kayak.co.uk/flights/LHR-DEL/2024-11-16?sort=price_a

# query parameters:
# stops: &fs=stops=0 = DIRECT, &fs=stops=-2 = DIRECT OR 1 STOP, &fs=stops=-0 = NOT DIRECT

# baggage: &fs=cfc=1 = a CABIN BAG   &fs=bfc=1 = HOLD BAG    &fs=cfc=1;bfc=1 = BOTH BAGS

# duration: &fs=legdur=-1200 -DURATION IN MINUTES NO LONGER THAN

# take off times: &fs=takeoff=1301,1839 TAKE OFF TIME FROM, TAKE OFF TIME TO

# sorting: sort=price_a = CHEAPEST    sort=bestflight_a = BEST FLIGHTS      sort=duration_a = DURATION



# https://www.kayak.co.uk/flights/LHR-BKK/2024-10-19/?sort=price_a&fs=cfc=1;takeoff=1200,1600;stops=-2

# looks like we need to add ; between the filters and the first one needs the &fs= then the rest ;filter= 

In [139]:
import datetime

today = datetime.date.today()

tomorrow = str(today + datetime.timedelta(days=1))
week = str(today + datetime.timedelta(days=7))
month = str(today + datetime.timedelta(days=31))

In [184]:
def get_stops(stops) -> str:
    if stops == 'one':
        return 'stops=-2'
    elif stops == 'direct':
        return 'stops=0'
    elif stops == 'any':
        return ''
    else: 
        raise ValueError("stops should be either: any, one, direct")
    

def get_baggage(cabin_bag=False, hold_bag=False) -> str:
    if cabin_bag and hold_bag:
        return 'cfc=1;bfc=1'
    elif cabin_bag:
        return 'cfc=1'
    elif hold_bag:
        return 'bfc=1'
    else: 
        return ''


def get_max_duration(max_duration=None) -> str:
    if max_duration:
        return f'legdur=-{max_duration*60}'
    else:
        return ''
    

def get_take_off_time(take_off) -> str:
    if take_off == None: 
        return ''
    elif len(take_off) == 2: 
        return f'takeoff={take_off[0]},{take_off[1]}'
    else:
        raise ValueError("Take off duration should be list of take off times: [0700, 2300]")
    

# Loop through any filters present
# Apply each function to its corresponding parameters
def append_search_filters(function_map):
    result = ""
    for func, params in function_map.items():
        if params is None:
            # Skip the function if params are None
            continue
        elif isinstance(params, tuple):
            # Ensure params is a tuple before unpacking
            output = func(*params)
        else:
            # Handle single parameter case
            output = func(params)

        # Append non-empty outputs
        if output != '':
            result += f'{output};'
    
    return result

In [187]:
BASE_URL = 'https://www.kayak.co.uk/flights'
SORTING = 'sort=price_a'
origin_code = 'MAN'
dest_code = 'GVA'

Example of a restrictive search

In [181]:
stops = 'direct' # or: one, direct
cabin_bag = False # or: True
hold_bag = False # or: True
max_duration = 3 # maximum number of hours
take_off = ['0600', '1300']

As this is to be used to find cheap flights more likely the search should look like

In [169]:
stops = 'any' # or: one, direct
cabin_bag = False # or: True
hold_bag = False # or: True
max_duration = None  # maximum number of hours
take_off = None

In [182]:
# Dictionary of functions and their corresponding parameters
# Needs to be in this order for KAYAK
# TODO: Ensure the order is correct. Without the correct order it doesnt work
function_map = {
    get_baggage: (cabin_bag, hold_bag),
    get_take_off_time: take_off,
    get_stops: stops,
    get_max_duration: max_duration,
}

filters = append_search_filters(function_map)

In [188]:
url = f"{BASE_URL}/{origin_code}-{dest_code}/{tomorrow}/?{SORTING}&fs={filters}"

In [189]:
url

'https://www.kayak.co.uk/flights/MAN-GVA/2024-10-19/?sort=price_a&fs=takeoff=0600,1300;stops=0;legdur=-180;'