# Flight Finder

In [1]:
import smtplib
import pandas as pd
from time import sleep
from datetime import datetime, timedelta
from random import randint
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service

In [2]:
chrome_driver_path = 'chromedriver.exe'
airports = ['RIX', 'TBS'] # Search using an IATA airport code
kayak = "https://www.kayak.com/explore/{}-anywhere/"
from_date, to_date = datetime(2022, 10, 1), datetime(2022, 10, 6)
duration_min, duration_max = 4, 8 # In days, inclusive

In [3]:
class FlightLoader:
    def __init__(self, url, airport=None, load_n=3, range=3):
        self.TIME = 0.25
        self.url = url
        self.airport = airport
        self.load_n = load_n
        self.range = range
    
    def increase_time(self):
        self.TIME += 0.25
    
    def press_button(self, button, n_presses=1):
        i = 0
        while i < n_presses:
            try:
                self.driver.find_element(By.XPATH, button).click()
                i += 1
                sleep(0.1)
            except Exception:
                self.driver.find_element(By.TAG_NAME, "Body").send_keys(Keys.CONTROL + 'r')
                sleep(0.5)
    
    def load_more_destinations(self):
        self.press_button('//*[substring(@id, string-length(@id) - string-length("-showMoreButton") +1) = "-showMoreButton"]', n_presses=self.load_n)
        
    def zoom_out(self):
        self.driver.find_element(By.TAG_NAME, "Body").send_keys(Keys.CONTROL + Keys.HOME)
        self.press_button('//*[substring(@id, string-length(@id) - string-length("-zoomControl-minusButton") +1) = "-zoomControl-minusButton"]')
    
    def add_flights(self):
        self.load_more_destinations()
        sleep(self.TIME)
        return [x.text for x in self.driver.find_elements(By.XPATH, '//*[@class = "_iae _lc _ss"]')]
        
    def to_pandas(self, flight_list):
        flights = pd.DataFrame(flight_list, columns=['City', 'Price', 'Country', 'Date']).loc[:, ['Country', 'City', 'Price', 'Date']]
        flights['Price'] = flights['Price'].map(lambda x: int(x[6:]))
        flights[['Departure date', 'Arrival date']] = flights['Date'].str.split('-', expand=True)
        flights.drop('Date', axis=1, inplace=True)
        if self.airport is not None: flights.rename({'Price': f"Price from {self.airport}"}, inplace=True, axis=1)
        return flights.drop_duplicates()
    
    def get_flights(self):
        self.driver = webdriver.Chrome(service=Service(chrome_driver_path))
        self.driver.get(self.url)
        
        flights = []
        flights += self.add_flights()
        for _ in range(self.range):
            self.zoom_out()
            flights += self.add_flights()

        return self.to_pandas([x.split('\n') for x in flights if x != ''])

In [4]:
dates = [
    (from_date + timedelta(i), from_date + timedelta(i) + delta)
    for i in range((to_date - from_date).days)
    for delta in [timedelta(i) for i in range(duration_min, duration_max+1)]
    if from_date + timedelta(i) + delta <= to_date
]
len(dates)

3

In [5]:
all_flights = []
for airport in airports:
    flights = []
    for s_date, e_date in dates:
        url = kayak.format(airport) + s_date.strftime("%Y%m%d") + ',' + e_date.strftime("%Y%m%d")
        loader = FlightLoader(url, airport)
        while True:
            try: flights.append(loader.get_flights())
            except Exception: loader.increase_time()
            else: break
        del(loader)
        sleep(randint(1, 3))
    all_flights.append(pd.concat(flights).drop_duplicates())

In [6]:
flight_list = all_flights[0].rename({'Price': "Price from RIX"}, axis=1).merge(all_flights[1].rename({'Price': "Price from TBS"}, axis=1))
flight_list['Combined price'] = flight_list['Price from RIX'] + flight_list['Price from TBS']
flight_list = flight_list.sort_values('Combined price').iloc[:, [0, 1, 2, 5, 6, 3, 4]]

In [7]:
flight_list

Unnamed: 0,Country,City,Price from RIX,Price from TBS,Combined price,Departure date,Arrival date
28,Belgium,Brussels,85,298,383,"Sat, Oct 1","Thu, Oct 6"
41,Norway,Oslo,57,343,400,"Sun, Oct 2","Thu, Oct 6"
30,United Kingdom,London,41,360,401,"Sat, Oct 1","Thu, Oct 6"
42,Belgium,Brussels,86,316,402,"Sun, Oct 2","Thu, Oct 6"
26,Norway,Oslo,61,348,409,"Sat, Oct 1","Thu, Oct 6"
0,Sweden,Stockholm,29,395,424,"Sat, Oct 1","Wed, Oct 5"
25,Poland,Warsaw,33,402,435,"Sat, Oct 1","Thu, Oct 6"
27,Denmark,Copenhagen,83,355,438,"Sat, Oct 1","Thu, Oct 6"
6,Denmark,Copenhagen,78,362,440,"Sat, Oct 1","Wed, Oct 5"
35,France,Paris,97,344,441,"Sat, Oct 1","Thu, Oct 6"
