# Flight Price Prediction

Anyone who has booked a flight ticket knows how unexpectedly the prices vary. The cheapest 
available ticket on a given flight gets more and less expensive over time. This usually happens as 
an attempt to maximize revenue based on -
1. Time of purchase patterns (making sure last-minute purchases are expensive)
2. Keeping the flight as full as they want it (raising prices on a flight which is filling up in order 
to reduce sales and hold back inventory for those expensive last-minute expensive 
purchases)
So, we have to work on a project where we collect data of flight fares with other features and 
work to make a model to predict fares of flights.

# Import Libraries:

In [1]:
import pandas as pd
import numpy as np
import selenium


# Importing selenium webdriver
from selenium import webdriver 

# Importing required Exceptions which needs to handled
from selenium.common.exceptions import StaleElementReferenceException, NoSuchElementException

# Importing request
import requests

# Import regex
import re
import time

# Ignore warnings
import warnings
warnings.filterwarnings('ignore')

In [2]:
# Activation the chrome browser
driver = webdriver.Chrome('chromedriver.exe')

# Maximize the window
driver.maximize_window()

time.sleep(2)

# Scrape data from makemytrip.com

In [3]:
# Open the homepage
url = 'https://www.makemytrip.com/ '
driver.get(url)
time.sleep(2)

# Ignore Popups:

In [7]:
# Close the popup button
try:
    X_button = driver.find_element_by_xpath('//div[@id="root"]/div/div[2]/div/main/div[9]/span') 
    X_button.click()
except NoSuchElementException : 
    print('Ignore Popups')

In [8]:
# Ignore login page
try:
    login_button = driver.find_element_by_xpath('//li[@id="SW"]/div[1]/div[1]/ul/li[4]')
    login_button.click()
except NoSuchElementException : 
    print("No Login page")

No Login page


In [9]:
# Ignore the popup: 
try:
    login_button = driver.find_element_by_xpath('//div[@id="SW"]/div[1]/div[1]/ul/li[4]/div[3]/div/div[1]/div/p/label')
    login_button.click()
except NoSuchElementException : 
    print("Ignore Popups")


Ignore Popups


# Entering the data:

In [10]:
# Entering location and date of journey:
search_boarding_at = driver.find_element_by_xpath('/html/body/div[1]/div/div[2]/div/div/div[2]/div[1]/div[1]/label')
search_boarding_at.send_keys("New Delhi")

search_destination = driver.find_element_by_xpath('/html/body/div[1]/div/div[2]/div/div/div[2]/div[1]/div[2]/label')
search_destination.send_keys("Bangalore")

search_date = driver.find_element_by_xpath('//div[@id="root"]/div/div[2]/div/div/div[2]/div[1]/div[3]/label/span')
search_date.click()


In [11]:
# Set journey date and travell class:
set_journey_date = driver.find_element_by_xpath('//div[@id="root"]/div/div[2]/div/div/div[2]/div[1]/div[3]/div[1]/div/div/div/div[2]/div/div[2]/div[1]/div[3]/div[2]/div[3]/div/p[1]')
set_journey_date.click()

set_travell_class = driver.find_element_by_xpath('//div[@id="root"]/div/div[2]/div/div/div[2]/div[1]/div[5]/label/p[2]')
set_travell_class.click()

select_class = driver.find_element_by_xpath('//div[@id="root"]/div/div[2]/div/div/div[2]/div[1]/div[5]/div[2]/div[1]/ul[2]/li[1]')
select_class.click()


In [12]:
# Passengers seat allocation:
seat = driver.find_element_by_xpath('//div[@id="root"]/div/div[2]/div/div/div[2]/div[1]/div[5]/div[2]/div[1]/ul[1]/li[2]')
seat.click()

apply_key = driver.find_element_by_xpath('//div[@id="root"]/div/div[2]/div/div/div[2]/div[1]/div[5]/div[2]/div[2]/button')
apply_key.click()

In [13]:
# Click search button:
search_button = driver.find_element_by_xpath('/html/body/div/div/div[2]/div/div/div[2]/p/a')
search_button.click()
time.sleep(4)


In [14]:
# Ignore the popup:
try:
    popup = driver.find_element_by_xpath('//div[@id="root"]/div/div[2]/div[2]/div[2]/div/span')
    popup.click()
except NoSuchElementException : 
    print("Ignore popup ")

# Making the list of scrape data:

In [33]:
# Creating empty lists:
flight_name = []
flight_code = []
departure_time = []
arrival_time = []
duration = []
ticket_price = []


# Scrap the flight name:
names=driver.find_elements_by_xpath('//p[@class = "boldFont blackText airlineName"]')
for i in names:
    if i.text is None :
        flight_name.append("--") 
    else:
        flight_name.append(i.text)
        

time.sleep(3)

# Scrap the flight code:
codes=driver.find_elements_by_xpath('//p[@class ="fliCode"]')
for i in codes:
    if i.text is None :
        flight_code.append("--") 
    else:
        flight_code.append(i.text)
        

time.sleep(3)

# Scrap the departure time:
dep_time = driver.find_elements_by_xpath('//div[@class = "flightTimeSection flexOne timeInfoLeft"]')
for i in dep_time:
    if i.text is None:
        departure_time.append("--")
    else:
        departure_time.append(i.text)
        
time.sleep(3)

# Scrap the arrival time:
av_time = driver.find_elements_by_xpath('//div[@class = "flightTimeSection flexOne timeInfoRight"]')
for i in av_time:
    if i.text is None:
        arrival_time.append("--")
    else:
        arrival_time.append(i.text)
        
time.sleep(3)

# Scrap the duration/total time taken:
total_time = driver.find_elements_by_xpath('//div[@class = "stop-info flexOne"]')
for i in total_time:
    if i.text is None:
        duration.append("--")
    else:
        duration.append(i.text)
        
time.sleep(3)


# Scrap the cost of ticket:
ticket = driver.find_elements_by_xpath('//div[@class = "priceSection"]')
for i in ticket:
    if i.text is None:
        ticket_price.append("--")
    else:
        ticket_price.append(i.text)

time.sleep(3)

# Checking the length of list items:

In [34]:
print(len(flight_name)),
print(len(flight_code)),
print(len(departure_time)),
print(len(arrival_time)),
print(len(duration)),
print(len(ticket_price))


18
18
18
18
18
18


# Making data frame:

In [35]:
# Creating the dataframe from the scraped data:
df=pd.DataFrame({"Airlines": flight_name,
                 "Airlines_Code": flight_code,
                 "Departure": departure_time,
                "Arrival": arrival_time,
                "Duration of trip": duration,
                "Ticket Price": ticket_price})
df
    

Unnamed: 0,Airlines,Airlines_Code,Departure,Arrival,Duration of trip,Ticket Price
0,Go First,G8 113,05:50\nNew Delhi,08:35\nBengaluru,02 h 45 m\nNon stop,"₹ 9,418\nVIEW PRICES"
1,Go First,G8 119,20:30\nNew Delhi,23:25\nBengaluru,02 h 55 m\nNon stop,"₹ 9,418\nVIEW PRICES"
2,IndiGo,6E 5036,02:25\nNew Delhi,05:15\nBengaluru,02 h 50 m\nNon stop,"₹ 9,419\nVIEW PRICES"
3,IndiGo,6E 6612,06:00\nNew Delhi,08:35\nBengaluru,02 h 35 m\nNon stop,"₹ 9,419\nVIEW PRICES"
4,SpiceJet,SG 191,06:05\nNew Delhi,08:55\nBengaluru,02 h 50 m\nNon stop,"₹ 9,419\nVIEW PRICES"
5,IndiGo,6E 5009,06:55\nNew Delhi,09:50\nBengaluru,02 h 55 m\nNon stop,"₹ 9,419\nVIEW PRICES"
6,AirAsia,I5 740,08:10\nNew Delhi,10:40\nBengaluru,02 h 30 m\nNon stop,"₹ 9,419\nVIEW PRICES"
7,IndiGo,6E 308,08:20\nNew Delhi,11:00\nBengaluru,02 h 40 m\nNon stop,"₹ 9,419\nVIEW PRICES"
8,IndiGo,6E 6827,09:15\nNew Delhi,11:55\nBengaluru,02 h 40 m\nNon stop,"₹ 9,419\nVIEW PRICES"
9,AirAsia,I5 1529,09:35\nNew Delhi,12:25\nBengaluru,02 h 50 m\nNon stop,"₹ 9,419\nVIEW PRICES"


# Scrape data from Yatra.com

In [3]:
# Open the homepage
url = 'https://www.yatra.com/'
driver.get(url)
time.sleep(2)

In [4]:
# Entering location and date of journey:
search_boarding_at = driver.find_element_by_xpath('//*[@id="BE_flight_origin_city"]')
search_boarding_at.send_keys("New Delhi")

search_destination = driver.find_element_by_xpath('//*[@id="BE_flight_arrival_city"]')
search_destination.send_keys("Bangalore")

In [5]:
# Set Journey date:
set_journey_date = driver.find_element_by_xpath('//td[@class = "depart-daybox"]')
set_journey_date.click()

# Journey class:
set_travell_class = driver.find_element_by_xpath('//span[@class = "txt-ellipses flight_passengerBox travellerPaxBox"]')
set_travell_class.click()

search_date = driver.find_element_by_xpath('//*[@id="BE_flight_flsearch_btn"]')
search_date.click()


NoSuchElementException: Message: no such element: Unable to locate element: {"method":"xpath","selector":"//td[@class = "depart-daybox"]"}
  (Session info: chrome=103.0.5060.53)
Stacktrace:
Backtrace:
	Ordinal0 [0x011CD953+2414931]
	Ordinal0 [0x0115F5E1+1963489]
	Ordinal0 [0x0104C6B8+837304]
	Ordinal0 [0x01079500+1021184]
	Ordinal0 [0x0107979B+1021851]
	Ordinal0 [0x010A6502+1205506]
	Ordinal0 [0x010944E4+1131748]
	Ordinal0 [0x010A4812+1198098]
	Ordinal0 [0x010942B6+1131190]
	Ordinal0 [0x0106E860+976992]
	Ordinal0 [0x0106F756+980822]
	GetHandleVerifier [0x0143CC62+2510274]
	GetHandleVerifier [0x0142F760+2455744]
	GetHandleVerifier [0x0125EABA+551962]
	GetHandleVerifier [0x0125D916+547446]
	Ordinal0 [0x01165F3B+1990459]
	Ordinal0 [0x0116A898+2009240]
	Ordinal0 [0x0116A985+2009477]
	Ordinal0 [0x01173AD1+2046673]
	BaseThreadInitThunk [0x75C7FA29+25]
	RtlGetAppContainerNamedObjectPath [0x77917A9E+286]
	RtlGetAppContainerNamedObjectPath [0x77917A6E+238]


In [51]:
# Creating empty lists:
flight_name = []
flight_code = []
departure_time = []
arrival_time = []
duration = []
ticket_price = []


# Scrap the flight name:
names=driver.find_elements_by_xpath('//div[@class = "fs-13 airline-name no-pad col-8"]')
for i in names:
    if i.text is None :
        flight_name.append("--") 
    else:
        flight_name.append(i.text)
        

time.sleep(3)

# Scrap the flight code:
codes=driver.find_elements_by_xpath('//p[@class = "normal fs-11 font-lightestgrey no-wrap fl-no"]')
for i in codes:
    if i.text is None :
        flight_code.append("--") 
    else:
        flight_code.append(i.text)
        

time.sleep(3)

# Scrap the departure time:
dep_time = driver.find_elements_by_xpath('//div[@class = "i-b pr"]')
for i in dep_time:
    if i.text is None:
        departure_time.append("--")
    else:
        departure_time.append(i.text)
        
time.sleep(3)

# Scrap the arrival time:
av_time = driver.find_elements_by_xpath('//p[@class = "bold fs-15 mb-2 pr time"]')
for i in av_time:
    if i.text is None:
        arrival_time.append("--")
    else:
        arrival_time.append(i.text)
        
time.sleep(3)

# Scrap the duration/total time taken:
total_time = driver.find_elements_by_xpath('//p[@class = "fs-12 bold du mb-2"]')
for i in total_time:
    if i.text is None:
        duration.append("--")
    else:
        duration.append(i.text)
        
time.sleep(3)


# Scrap the cost of ticket:
ticket = driver.find_elements_by_xpath('//div[@class = "i-b tipsy fare-summary-tooltip fs-18"]')
for i in ticket:
    if i.text is None:
        ticket_price.append("--")
    else:
        ticket_price.append(i.text)

time.sleep(3)

In [52]:
print(len(flight_name)),
print(len(flight_code)),
print(len(departure_time)),
print(len(arrival_time)),
print(len(duration)),
print(len(ticket_price))

0
0
0
0
0
0
