# ✈️ Flight Price Tracker

A web scraping tool built with **Selenium** and **BeautifulSoup** that tracks round-trip flight prices, saves results to a CSV, and sends an email notification if prices fall below a defined threshold.

## ✅ Features
- Scrapes round-trip flight details: Airline, From, To, Departure, Arrival, Return details, and Price.
- Saves flight data in `data/flights_output.csv`.
- Sends email notification if the price falls below a threshold.

---


In [1]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
import time 
import pandas as pd

import os
from dotenv import load_dotenv
import smtplib
from email.mime.text import MIMEText


In [2]:
options = Options()
#options.add_argument("--start-maximized")
options.add_argument("--disable-blink-features=AutomationControlled")
options.add_argument("--start-maximized")
options.add_argument("--disable-extensions")
options.add_argument("--no-sandbox")
options.add_argument("--disable-gpu")
options.add_argument("--disable-dev-shm-usage")


service = Service()

driver = webdriver.Chrome(service=service, options=options)
driver.get("https://www.kayak.com/flights")
time.sleep(5)
print(driver.title)


Cheap Flights, Airline Tickets & Airfare Deals | KAYAK


In [3]:
departure_city = "New York"
destination_city = "London"
departure_date = "April 4, 2025"
return_date = "April 5, 2025"

# Fill departure city

In [4]:

departure_input = driver.find_element(By.XPATH, "//input[@aria-label='Flight origin input']")
departure_input

<selenium.webdriver.remote.webelement.WebElement (session="dafed1f2d9b9885af2db81240d7b5221", element="f.9725BBF047FD7C5AB898D95769BBD3D6.d.694925A91E4DC6BD11484F9DE45BEA5F.e.19")>

In [5]:
departure_input.click()
time.sleep(2)

In [6]:
departure_input.send_keys(Keys.CONTROL + "a")  # select all text
departure_input.send_keys(Keys.BACKSPACE)
departure_input.send_keys(Keys.BACKSPACE)
departure_input.send_keys(Keys.BACKSPACE)

In [7]:
departure_input.send_keys(departure_city)
time.sleep(2)

In [8]:
departure_input.send_keys(Keys.ENTER)

# Fill destination city

In [9]:
destination_input = driver.find_element(By.XPATH, "//input[@aria-label='Flight destination input']")
destination_input.click()
time.sleep(2)
destination_input.send_keys(Keys.CONTROL + "a")
destination_input.send_keys(Keys.BACKSPACE)
destination_input.send_keys(Keys.BACKSPACE)
destination_input.send_keys(Keys.BACKSPACE)
destination_input.send_keys(destination_city)
time.sleep(2)
destination_input.send_keys(Keys.ENTER)
time.sleep(5)

In [10]:
departure_date_xpath = f"//div[@role='button' and contains(@aria-label, '{departure_date}')]"

departure_date_element = driver.find_element(By.XPATH, departure_date_xpath)
departure_date_element.click()

In [None]:
return_date_xpath = f"//div[@role='button' and contains(@aria-label, '{return_date}')]"

return_date_element = driver.find_element(By.XPATH, return_date_xpath)
return_date_element.click()
time.sleep(2)

In [12]:
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

search_button = WebDriverWait(driver, 10).until(
    EC.element_to_be_clickable((By.XPATH, "//button[@aria-label='Search']"))
)
search_button.click()
time.sleep(10)

Switching to the new tab, if redirected

In [13]:
# Store the current window handle
main_window = driver.current_window_handle

# After clicking search button, wait a second or two for new tab to open
time.sleep(2)

# Get all open windows/tabs
all_windows = driver.window_handles

# Switch to the new tab (the one that is not the main window)
for window in all_windows:
    if window != main_window:
        driver.switch_to.window(window)
        break

In [14]:
flights = driver.find_elements(By.XPATH, "//div[contains(@id,'flight-results-list-wrapper')]")

flights

[<selenium.webdriver.remote.webelement.WebElement (session="dafed1f2d9b9885af2db81240d7b5221", element="f.4640B8A7CAC376C3B3AC3A06D067C1CD.d.B554CC92BCD9D97CCA3A3227453A6B82.e.316")>]

In [15]:
len(flights)

1

## Loading more 5 times
 

In [16]:
flight_combos = driver.find_elements(By.XPATH, "//ol[contains(@class, 'hJSA-list')]")
len(flight_combos)

16

In [17]:
load_more_clicks = 5
for i in range(load_more_clicks):
    try:
        load_more = WebDriverWait(driver, 5).until(
            EC.element_to_be_clickable((By.XPATH, "//div[contains(text(),'Show more results')]"))
        )
        driver.execute_script("arguments[0].scrollIntoView();", load_more)
        time.sleep(1)
        load_more.click()
        print(f"Clicked Load More {i+1} times")
        time.sleep(5)  # Allow time for new results to load
    except:
        print("No more 'Show more results' button found.")
        break

Clicked Load More 1 times
Clicked Load More 2 times
Clicked Load More 3 times
No more 'Show more results' button found.


In [18]:
flight_results = driver.find_elements(By.XPATH, "//div[contains(@class, 'nrc6-wrapper')]")
len(flight_results)

32

## EXTRACTING DETAILS

In [19]:

for i,combo in enumerate(flight_results):
    
    flights = combo.find_elements(By.XPATH, ".//li[contains(@class,'hJSA-item')]")
    '''
    for i, el in enumerate(flights, start=1):
        html_code = el.get_attribute('outerHTML')
        soup = BeautifulSoup(html_code, 'html.parser')
        pretty_html = soup.prettify()
        print(f"\nElement {i}:\n{pretty_html}\n{'='*40}")
    '''
    print(i)
    price=combo.find_elements(By.XPATH, ".//div[contains(@class,'price-section')]//div[contains(@class, 'price-text')]")[0].text
    print(price)
    for flight in flights:
        # departure & arrival times
        times = flight.find_elements(
            By.XPATH, ".//div[contains(@class,'VY2U')]//span[contains(text(), 'am') or contains(text(), 'pm')]"
        )
        dep_time = times[0].text.strip().split('+')[0].strip()
        arr_time = times[1].text.strip().split('+')[0].strip()
        print(dep_time,arr_time)
        # airline
       
        airline = flight.find_element(
            By.XPATH, ".//div[contains(@class,'c_cgF') and not(.//span)]"
        ).text.strip()

        # duration
        duration = flight.find_element(
            By.XPATH, ".//div[contains(text(),'h') and contains(text(),'m')]"
        ).text.strip()

        # origin and destination airports
        airports = flight.find_elements(
            By.XPATH, ".//div[contains(@class,'EFvI')]//div[contains(@class,'c_cgF c_cgF-mod-variant-default')]"
        )
        origin = airports[0].get_attribute("title")
        destination = airports[1].get_attribute("title")
        
        '''for i, el in enumerate(airports, start=1):
            html_code = el.get_attribute('outerHTML')
            soup = BeautifulSoup(html_code, 'html.parser')
            pretty_html = soup.prettify()
            print(f"\nElement {i}:\n{pretty_html}\n{'='*40}")'''

        print(f"Departure: {dep_time}, Arrival: {arr_time}, Airline: {airline}, Duration: {duration}")
        print(f"From: {origin}  -->  To: {destination}\n")
       

       

0
$1,099
9:45 am 9:40 pm
Departure: 9:45 am, Arrival: 9:40 pm, Airline: British Airways, Duration: 6h 55m
From: New York John F Kennedy Intl  -->  To: London Heathrow

7:05 pm 10:00 pm
Departure: 7:05 pm, Arrival: 10:00 pm, Airline: British Airways, Duration: 7h 55m
From: London Heathrow  -->  To: New York John F Kennedy Intl

1
$930
8:10 am 8:00 pm
Departure: 8:10 am, Arrival: 8:00 pm, Airline: Delta, Duration: 6h 50m
From: New York John F Kennedy Intl  -->  To: London Heathrow

8:10 pm 11:00 pm
Departure: 8:10 pm, Arrival: 11:00 pm, Airline: Delta, Duration: 7h 50m
From: London Heathrow  -->  To: New York John F Kennedy Intl

2
$487
6:20 pm 6:20 am
Departure: 6:20 pm, Arrival: 6:20 am, Airline: Norse Atlantic UK, Duration: 7h 00m
From: New York John F Kennedy Intl  -->  To: London Gatwick

5:00 pm 1:15 pm
Departure: 5:00 pm, Arrival: 1:15 pm, Airline: Scandinavian Airlines, Duration: 25h 15m
From: London Heathrow  -->  To: Newark

3
$930
8:10 am 8:00 pm
Departure: 8:10 am, Arrival: 8

## Modifing web above scrapping code to save results in a csv file

In [20]:

flight_data = []
for i, combo in enumerate(flight_results, start=1):
    flights = combo.find_elements(By.XPATH, ".//li[contains(@class,'hJSA-item')]")
    price = combo.find_elements(By.XPATH, ".//div[contains(@class,'price-section')]//div[contains(@class, 'price-text')]")[0].text

    if len(flights) == 2:
        # Outbound
        flight_out = flights[0]
        out_times = flight_out.find_elements(
            By.XPATH, ".//div[contains(@class,'VY2U')]//span[contains(text(), 'am') or contains(text(), 'pm')]"
        )
        out_dep_time = out_times[0].text.strip().split('+')[0].strip()
        out_arr_time = out_times[1].text.strip().split('+')[0].strip()
        out_airline = flight_out.find_element(
            By.XPATH, ".//div[contains(@class,'c_cgF') and not(.//span)]"
        ).text.strip()
        out_airports = flight_out.find_elements(
            By.XPATH, ".//div[contains(@class,'EFvI')]//div[contains(@class,'c_cgF c_cgF-mod-variant-default')]"
        )
        out_origin = out_airports[0].get_attribute("title")
        out_destination = out_airports[1].get_attribute("title")

        # Return
        flight_ret = flights[1]
        ret_times = flight_ret.find_elements(
            By.XPATH, ".//div[contains(@class,'VY2U')]//span[contains(text(), 'am') or contains(text(), 'pm')]"
        )
        ret_dep_time = ret_times[0].text.strip().split('+')[0].strip()
        ret_arr_time = ret_times[1].text.strip().split('+')[0].strip()
        ret_airline = flight_ret.find_element(
            By.XPATH, ".//div[contains(@class,'c_cgF') and not(.//span)]"
        ).text.strip()
        ret_airports = flight_ret.find_elements(
            By.XPATH, ".//div[contains(@class,'EFvI')]//div[contains(@class,'c_cgF c_cgF-mod-variant-default')]"
        )
        ret_origin = ret_airports[0].get_attribute("title")
        ret_destination = ret_airports[1].get_attribute("title")

        # Append to data list
        flight_data.append({
            "Index": i,
            "Airline Name": out_airline,
            "From": out_origin,
            "To": out_destination,
            "Departure": out_dep_time,
            "Arrival": out_arr_time,
            "Return Airline Name": ret_airline,
            "Return From": ret_origin,
            "Return To": ret_destination,
            "Return Departure": ret_dep_time,
            "Return Arrival": ret_arr_time,
            "Price": price
        })

In [21]:
flight_data

[{'Index': 1,
  'Airline Name': 'British Airways',
  'From': 'New York John F Kennedy Intl',
  'To': 'London Heathrow',
  'Departure': '9:45 am',
  'Arrival': '9:40 pm',
  'Return Airline Name': 'British Airways',
  'Return From': 'London Heathrow',
  'Return To': 'New York John F Kennedy Intl',
  'Return Departure': '7:05 pm',
  'Return Arrival': '10:00 pm',
  'Price': '$1,099'},
 {'Index': 2,
  'Airline Name': 'Delta',
  'From': 'New York John F Kennedy Intl',
  'To': 'London Heathrow',
  'Departure': '8:10 am',
  'Arrival': '8:00 pm',
  'Return Airline Name': 'Delta',
  'Return From': 'London Heathrow',
  'Return To': 'New York John F Kennedy Intl',
  'Return Departure': '8:10 pm',
  'Return Arrival': '11:00 pm',
  'Price': '$930'},
 {'Index': 3,
  'Airline Name': 'Norse Atlantic UK',
  'From': 'New York John F Kennedy Intl',
  'To': 'London Gatwick',
  'Departure': '6:20 pm',
  'Arrival': '6:20 am',
  'Return Airline Name': 'Scandinavian Airlines',
  'Return From': 'London Heathrow

In [None]:
df = pd.DataFrame(flight_data)
df.to_csv("output/flights_output.csv", index=False)
print("✅ Saved to flights_output.csv!")

✅ Saved to flights_output.csv!


## MAIL tracker

Send you the list of all the flights with THEIR details, whose price is less than your set price, through mail

In [23]:
'''
import smtplib
from email.mime.text import MIMEText
from email.mime.multipart import MIMEMultipart

def send_email(subject, body, to_email):
    from_email = "tarunsaxena1000@gmail.com"
    password = ""  # Use app password if using Gmail with 2FA

    msg = MIMEMultipart()
    msg["From"] = from_email
    msg["To"] = to_email
    msg["Subject"] = subject

    msg.attach(MIMEText(body, "plain"))

    try:
        server = smtplib.SMTP("smtp.gmail.com", 587)
        server.starttls()
        server.login(from_email, password)
        server.sendmail(from_email, to_email, msg.as_string())
        server.quit()
        print(f"Notification email sent to {to_email}")
    except Exception as e:
        print(f"Error sending email: {e}")'
'''

'\nimport smtplib\nfrom email.mime.text import MIMEText\nfrom email.mime.multipart import MIMEMultipart\n\ndef send_email(subject, body, to_email):\n    from_email = "tarunsaxena1000@gmail.com"\n    password = ""  # Use app password if using Gmail with 2FA\n\n    msg = MIMEMultipart()\n    msg["From"] = from_email\n    msg["To"] = to_email\n    msg["Subject"] = subject\n\n    msg.attach(MIMEText(body, "plain"))\n\n    try:\n        server = smtplib.SMTP("smtp.gmail.com", 587)\n        server.starttls()\n        server.login(from_email, password)\n        server.sendmail(from_email, to_email, msg.as_string())\n        server.quit()\n        print(f"Notification email sent to {to_email}")\n    except Exception as e:\n        print(f"Error sending email: {e}")\'\n'

In [24]:
'''send_email(
    subject="Flight Price Tracking Completed",
    body="Scraping completed successfully! Check the tracked_prices.csv file for the latest data.",
    to_email="tarunsaxena1000@gmail.co"
)'''

'send_email(\n    subject="Flight Price Tracking Completed",\n    body="Scraping completed successfully! Check the tracked_prices.csv file for the latest data.",\n    to_email="tarunsaxena1000@gmail.co"\n)'

In [25]:
load_dotenv()# This will load the variables from .env into environment variables

#EMAIL_ADDRESS = os.getenv("EMAIL_ADDRESS")
#EMAIL_APP_PASSWORD = os.getenv("EMAIL_APP_PASSWORD")
EMAIL_ADDRESS = "tarunsaxena1000@gmail.com"
EMAIL_APP_PASSWORD = os.getenv("EMAIL_APP_PASSWORD")

def send_email(subject, body, to_email):
    msg = MIMEText(body)
    msg["Subject"] = subject
    msg["From"] = EMAIL_ADDRESS
    msg["To"] = to_email

    try:
        with smtplib.SMTP_SSL("smtp.gmail.com", 465) as smtp:
            smtp.login(EMAIL_ADDRESS, EMAIL_APP_PASSWORD)
            smtp.send_message(msg)
        print("Email sent successfully.")
    except Exception as e:
        print(f"Error sending email: {e}")

def notify_if_price_below_threshold(csv_path, threshold_price, to_email):
    df = pd.read_csv(csv_path)
    cheap_flights = df[df['Price'].str.replace(',', '').str[1:].astype(int) < threshold_price]
    print(f"{len(cheap_flights)} flights found")
    if not cheap_flights.empty:
        email_body = f"🚀 {len(cheap_flights)} Flight deals found with set price less than ${threshold_price}:\n\n {driver.current_url} \n\n"
        i=0
        for index, row in cheap_flights.iterrows():
            i=i+1
            email_body += (f"{i}:\n\n "
                f"Airline Name: {row['Airline Name']} \n| {row['From']} ➡ {row['To']} | \n"
                f"Departure: {row['Departure']}-Arrival: {row['Arrival']}\n\n"
                f"Return:\n Airline Name: {row['Return Airline Name']} | {row['Return From']}(return) ➡ {row['Return To']}(return) | \n"
                f"Departure: {row['Return Departure']}- Arrival: {row['Return Arrival']}\n\n"
                f"💰 Price: {row['Price']}\n"
                f"{'-'*50}\n"
            )
        send_email(
            subject="🔥 Cheap Flight Alert",
            body=email_body,
            to_email=to_email
        )
    else:
        print(f"No flights found under {threshold_price}.")

In [28]:
#from utils.notifier import notify_if_price_below_threshold

# Example usage
notify_if_price_below_threshold(
    csv_path="output/flights_output.csv",
    threshold_price=1000,  # change this threshold as per your need
    to_email="tarunsaxena1000@gmail.com"
)

11 flights found


InvalidSessionIdException: Message: invalid session id: session deleted as the browser has closed the connection
from disconnected: not connected to DevTools
  (Session info: chrome=134.0.6998.166)
Stacktrace:
0   chromedriver                        0x00000001033ab6c8 cxxbridge1$str$ptr + 2791212
1   chromedriver                        0x00000001033a3c9c cxxbridge1$str$ptr + 2759936
2   chromedriver                        0x0000000102ef5e30 cxxbridge1$string$len + 92928
3   chromedriver                        0x0000000102edef28 chromedriver + 192296
4   chromedriver                        0x0000000102f02800 cxxbridge1$string$len + 144592
5   chromedriver                        0x0000000102f65558 cxxbridge1$string$len + 549416
6   chromedriver                        0x0000000102f7e0f8 cxxbridge1$string$len + 650696
7   chromedriver                        0x0000000102f312fc cxxbridge1$string$len + 335820
8   chromedriver                        0x00000001033706c4 cxxbridge1$str$ptr + 2549544
9   chromedriver                        0x0000000103373988 cxxbridge1$str$ptr + 2562540
10  chromedriver                        0x000000010335071c cxxbridge1$str$ptr + 2418560
11  chromedriver                        0x00000001033741e8 cxxbridge1$str$ptr + 2564684
12  chromedriver                        0x0000000103341750 cxxbridge1$str$ptr + 2357172
13  chromedriver                        0x0000000103393f58 cxxbridge1$str$ptr + 2695100
14  chromedriver                        0x00000001033940e0 cxxbridge1$str$ptr + 2695492
15  chromedriver                        0x00000001033a3910 cxxbridge1$str$ptr + 2759028
16  libsystem_pthread.dylib             0x000000018cd432e4 _pthread_start + 136
17  libsystem_pthread.dylib             0x000000018cd3e0fc thread_start + 8
