In [None]:
from bs4 import BeautifulSoup as soup
import requests
import pandas as pd
import numpy as np
import re
import math
import time
from time import sleep
import datetime

## Part I: Scraping Hotels Information on Booking.com

In [None]:
# define the function of date list
def dates(start_y, start_m, start_d, end_y, end_m, end_d):
    begin = datetime.date(start_y, start_m, start_d)
    end = datetime.date(end_y, end_m, end_d)
    d = begin
    delta = datetime.timedelta(days = 1)
    date = []
    while d <= end:
        date.append(d.strftime('%Y-%m-%d'))
        d += delta
    return date

In [None]:
# define the function of date urls
def date_url(date, url): 
    url_date = []
    checkin_date = []
    checkout_date = []
    # split the url by checkin and checkout
    for i in range(len(date)-1): 
        url_split = url.split('checkin=')
        url1 = url_split[0]
        url2 = url_split[1].split('checkout=')
        # delete the initialized date
        for j in range(len(url2)): 
            url2[j] = url2[j][10:]
        # assign date to the url
        start = 'checkin=' + date[i]
        end = 'checkout=' + date[i+1]
        date_url = url1 + start + url2[0] + end + url2[1]
        url_date.append(date_url)
        checkin_date.append(date[i])
        checkout_date.append(date[i+1])
    return url_date, checkin_date, checkout_date

In [None]:
# define the function of total page numbers of each url on each date to get the url list: 
def page_url(url): 
    # request city property number
    request = requests.get(url, headers = headers)
    time.sleep(2)
    destination = soup(request.text, 'html.parser')
    # calculate the page number
    found = destination.find('h1', {'class': 'e1f827110f d3a14d00da'}).text.strip()
    property_num = found.split(': ')[1].split()[0]
    page_num = math.ceil(int(property_num)/25)
    # add page number to the url
    page_url = []
    url_new = '' 
    for p in range(page_num):
        if p == 0:
            url_new = url
        else:
            url_new = url + '&offset=' + str(p*25)
        page_url.append(url_new)
    return page_url

In [None]:
# define function for scraping
def hotel_scraper(con, city, start, end):
  # initialize lists for storing features
  hotel = []
  location = []
  distance_center = []
  room_type = []
  bed_type = []
  reviews = []
  rating = []
  policy = []
  availability = []
  price = []

  # loop every property cards from the requested page
  # if one feature is missing from the property card, nan will be stored
  for card in con.findAll('div', {'data-testid': 'property-card'}):
    # hotel name
    name = card.find('div', {'data-testid': 'title'})
    if name == None:
      hotel.append(np.nan)
    else:
      for element0 in name:
        hotel.append(element0.strip())

    # hotel locations 
    loc = card.find('span', {'data-testid': 'address'})
    if loc == None:
      location.append(np.nan)
    else:
      for element1 in loc:
        location.append(element1.strip())

    # distance from city center
    dist = card.find('span', {'data-testid': 'distance'})
    if dist == None:
      distance_center.append(np.nan)
    else:
      for element2 in dist:
        distance_center.append(element2.strip())

    # hotel room type
    room = card.find('span', {'class': 'df597226dd'})
    if room == None:
      room_type.append(np.nan)
    else:
      for element3 in room:
        room_type.append(element3.strip())

    # bed type
    bed = card.find('div', {'class': 'cb5b4b68a4'})
    if bed == None:
      bed_type.append(np.nan)
    else:
      for element4 in bed:
        bed_type.append(element4.text.strip())

    # no. of reviews
    rev = card.find('div', {'class': 'd8eab2cf7f c90c0a70d3 db63693c62'})
    if rev == None:
      reviews.append(np.nan)
    else:
      for element5 in rev:
        reviews.append(element5.strip())
    
    # ratings
    rate = card.find('div', {'class': 'b5cd09854e d10a6220b4'})
    if rate == None:
      rating.append(np.nan)
    else:
      for element6 in rate:
        rating.append(element6.strip())

    # cancellation policy
    po = card.find('div', {'class': 'd506630cf3'})
    if po == None:
      policy.append(np.nan)
    else:
      for element7 in po:
        policy.append(element7.strip())

    # room availability
    ava = card.find('div', {'class': 'cb1f9edcd4'})
    if ava == None:
      availability.append(np.nan)
    else:
      for element8 in ava:
        availability.append(element8.strip())

    # price per night
    pr = card.find('span', {'class': 'fcab3ed991 bd73d13072'})
    if pr == None:
      price.append(np.nan)
    else:
      price.append(pr.text.strip())
    
  # create lists for checkin date, checkout date, and city
  # repeat same strings to match the length of dataframe created 
  city = [city]*len(hotel)
  checkin = [start]*len(hotel)
  checkout = [end]*len(hotel)

  # store features in one list
  lists = list(zip(checkin, checkout, city, hotel, location, distance_center, room_type, bed_type, reviews, rating, policy, availability, price))
  
  # save as dataframe
  colnames = ['checkin', 'checkout', 'city', 'hotel', 'location', 'distance_center', 'room_type', 'bed_type', 'reviews', 'rating', 'policy', 'availability', 'price']
  df = pd.DataFrame(lists, columns = colnames)
  return df

In [None]:
# Cities list: Miami, New york city, Chicago, Las vegas, Seattle, Los angeles, Honolulu, Salt lake city, New orleans
# initialize the city list urls

hotels = {'Miami':'https://www.booking.com/searchresults.html?ss=Miami&ssne=Miami&ssne_untouched=Miami&label=gen173nr-1DCAEoggI46AdIM1gEaIkCiAEBmAExuAEHyAEM2AED6AEB-AECiAIBqAIDuALIg8eUBsACAdICJGExNzc5ZTczLTY0NzQtNGY5MS1hZGEwLTFlZjIxM2UzODk1Y9gCBOACAQ&sid=e25f95f2d2069b4ee59a027fb5b138db&aid=304142&lang=en-us&sb=1&src_elem=sb&src=searchresults&dest_id=20023181&dest_type=city&checkin=2022-07-15&checkout=2022-07-16&group_adults=2&no_rooms=1&group_children=0&sb_travel_purpose=leisure',
          'New York':'https://www.booking.com/searchresults.html?ss=New+York&ssne=New+York&ssne_untouched=New+York&label=gen173nr-1DCAEoggI46AdIM1gEaIkCiAEBmAExuAEHyAEM2AED6AEB-AECiAIBqAIDuALIg8eUBsACAdICJGExNzc5ZTczLTY0NzQtNGY5MS1hZGEwLTFlZjIxM2UzODk1Y9gCBOACAQ&sid=e25f95f2d2069b4ee59a027fb5b138db&aid=304142&lang=en-us&sb=1&src_elem=sb&src=searchresults&dest_id=20088325&dest_type=city&checkin=2022-07-15&checkout=2022-07-16&group_adults=2&no_rooms=1&group_children=0&sb_travel_purpose=leisure',
          'Chicago':'https://www.booking.com/searchresults.html?ss=Chicago&ssne=Chicago&ssne_untouched=Chicago&label=gen173nr-1DCAEoggI46AdIM1gEaIkCiAEBmAExuAEHyAEM2AED6AEB-AECiAIBqAIDuALIg8eUBsACAdICJGExNzc5ZTczLTY0NzQtNGY5MS1hZGEwLTFlZjIxM2UzODk1Y9gCBOACAQ&sid=e25f95f2d2069b4ee59a027fb5b138db&aid=304142&lang=en-us&sb=1&src_elem=sb&src=searchresults&dest_id=20033173&dest_type=city&checkin=2022-07-15&checkout=2022-07-16&group_adults=2&no_rooms=1&group_children=0&sb_travel_purpose=leisure',
          'Las Vegas':'https://www.booking.com/searchresults.html?ss=Las+Vegas&ssne=Las+Vegas&ssne_untouched=Las+Vegas&label=gen173nr-1DCAEoggI46AdIM1gEaIkCiAEBmAExuAEHyAEM2AED6AEB-AECiAIBqAIDuALIg8eUBsACAdICJGExNzc5ZTczLTY0NzQtNGY5MS1hZGEwLTFlZjIxM2UzODk1Y9gCBOACAQ&sid=e25f95f2d2069b4ee59a027fb5b138db&aid=304142&lang=en-us&sb=1&src_elem=sb&src=searchresults&dest_id=20079110&dest_type=city&checkin=2022-07-15&checkout=2022-07-16&group_adults=2&no_rooms=1&group_children=0&sb_travel_purpose=leisure', 
          'Seattle':'https://www.booking.com/searchresults.html?ss=Seattle&ssne=Seattle&ssne_untouched=Seattle&label=gen173nr-1DCAEoggI46AdIM1gEaIkCiAEBmAExuAEHyAEM2AED6AEB-AECiAIBqAIDuALIg8eUBsACAdICJGExNzc5ZTczLTY0NzQtNGY5MS1hZGEwLTFlZjIxM2UzODk1Y9gCBOACAQ&sid=e25f95f2d2069b4ee59a027fb5b138db&aid=304142&lang=en-us&sb=1&src_elem=sb&src=searchresults&dest_id=20144883&dest_type=city&checkin=2022-07-15&checkout=2022-07-16&group_adults=2&no_rooms=1&group_children=0&sb_travel_purpose=leisure',
          'Los Anageles':'https://www.booking.com/searchresults.html?ss=Los+Angeles&ssne=Los+Angeles&ssne_untouched=Los+Angeles&label=gen173nr-1DCAEoggI46AdIM1gEaIkCiAEBmAExuAEHyAEM2AED6AEB-AECiAIBqAIDuALIg8eUBsACAdICJGExNzc5ZTczLTY0NzQtNGY5MS1hZGEwLTFlZjIxM2UzODk1Y9gCBOACAQ&sid=e25f95f2d2069b4ee59a027fb5b138db&aid=304142&lang=en-us&sb=1&src_elem=sb&src=searchresults&dest_id=20014181&dest_type=city&checkin=2022-07-15&checkout=2022-07-16&group_adults=2&no_rooms=1&group_children=0&sb_travel_purpose=leisure',
          'Honolulu':'https://www.booking.com/searchresults.html?ss=Honolulu&ssne=Honolulu&ssne_untouched=Honolulu&label=gen173nr-1DCAEoggI46AdIM1gEaIkCiAEBmAExuAEHyAEM2AED6AEB-AECiAIBqAIDuALIg8eUBsACAdICJGExNzc5ZTczLTY0NzQtNGY5MS1hZGEwLTFlZjIxM2UzODk1Y9gCBOACAQ&sid=e25f95f2d2069b4ee59a027fb5b138db&aid=304142&lang=en-us&sb=1&src_elem=sb&src=searchresults&dest_id=20030916&dest_type=city&checkin=2022-07-15&checkout=2022-07-16&group_adults=2&no_rooms=1&group_children=0&sb_travel_purpose=leisure',
          'Salt Lake City':'https://www.booking.com/searchresults.html?ss=Salt+Lake+City&ssne=Salt+Lake+City&ssne_untouched=Salt+Lake+City&label=gen173nr-1DCAEoggI46AdIM1gEaIkCiAEBmAExuAEHyAEM2AED6AEB-AECiAIBqAIDuALIg8eUBsACAdICJGExNzc5ZTczLTY0NzQtNGY5MS1hZGEwLTFlZjIxM2UzODk1Y9gCBOACAQ&sid=e25f95f2d2069b4ee59a027fb5b138db&aid=304142&lang=en-us&sb=1&src_elem=sb&src=searchresults&dest_id=20133956&dest_type=city&checkin=2022-07-15&checkout=2022-07-16&group_adults=2&no_rooms=1&group_children=0&sb_travel_purpose=leisure',
          'New Orlearns':'https://www.booking.com/searchresults.html?ss=New+Orleans&ssne=New+Orleans&ssne_untouched=New+Orleans&label=gen173nr-1DCAEoggI46AdIM1gEaIkCiAEBmAExuAEHyAEM2AED6AEB-AECiAIBqAIDuALIg8eUBsACAdICJGExNzc5ZTczLTY0NzQtNGY5MS1hZGEwLTFlZjIxM2UzODk1Y9gCBOACAQ&sid=e25f95f2d2069b4ee59a027fb5b138db&aid=304142&lang=en-us&sb=1&src_elem=sb&src=searchresults&dest_id=20050264&dest_type=city&checkin=2022-07-15&checkout=2022-07-16&group_adults=2&no_rooms=1&group_children=0&sb_travel_purpose=leisure'
              }
headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.4758.80 Safari/537.36'}


In [None]:
# request url using the urls list
start_y = 2022
start_m = 6
start_d = 20
end_y = 2022
end_m = 6
end_d = 22

hotel_urls =list(hotels.values())
cities = list(hotels.keys())

# build an empty df for storing info scrapped
df_hotel = pd.DataFrame()
for i in range(len(hotel_urls)): 
    url = hotel_urls[i]
    city = cities[i]

    # find date loop
    date_list = dates(start_y, start_m, start_d, end_y, end_m, end_d)
    date_urls, start, end = date_url(date_list, url)
    print(start)
    for j in range(len(date_urls)):
        url_page = page_url(date_urls[j])
        checkin = start[j]
        checkout = end[j]
        for url in url_page:
          req = requests.get(url, headers = headers)
          time.sleep(5)
          content = soup(req.text, 'html.parser')
          # call scraper function to generate df
          df_page = hotel_scraper(content, city, checkin, checkout)
          df_hotel = df_hotel.append(df_page)

['2022-06-20', '2022-06-21']
['2022-06-20', '2022-06-21']
['2022-06-20', '2022-06-21']
['2022-06-20', '2022-06-21']
['2022-06-20', '2022-06-21']
['2022-06-20', '2022-06-21']
['2022-06-20', '2022-06-21']
['2022-06-20', '2022-06-21']
['2022-06-20', '2022-06-21']


In [None]:
# # request for the whole url list

# # build an empty df for storing info scrapped
# df_hotel = pd.DataFrame()
# for url in url_final: 
#   req = requests.get(url, headers = headers)
#   time.sleep(5)
#   content = soup(req.text, 'html.parser')

#   # call scraper function to generate df
#   df_page = hotel_scraper(content, city, start, end)
#   df_hotel = df_hotel.append(df_page)

NameError: ignored

In [None]:
# u = 'https://www.booking.com/searchresults.html?ss=Miami&ssne=Miami&ssne_untouched=Miami&label=gen173nr-1DCAEoggI46AdIM1gEaIkCiAEBmAExuAEHyAEM2AED6AEB-AECiAIBqAIDuALIg8eUBsACAdICJGExNzc5ZTczLTY0NzQtNGY5MS1hZGEwLTFlZjIxM2UzODk1Y9gCBOACAQ&sid=e25f95f2d2069b4ee59a027fb5b138db&aid=304142&lang=en-us&sb=1&src_elem=sb&src=searchresults&dest_id=20023181&dest_type=city&checkin=2022-07-15&checkout=2022-07-16&group_adults=2&no_rooms=1&group_children=0&sb_travel_purpose=leisure'
# headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.4758.80 Safari/537.36'}

# req = requests.get(u, headers = headers)
# con = soup(req.text, 'html.parser')

# df = hotel_scraper(con, 'Miami', '2022-07-15', '2022-07-16')
# df

Unnamed: 0,checkin,checkout,city,hotel,location,distance_center,room_type,bed_type,reviews,rating,policy,availability,price
0,2022-07-15,2022-07-16,Miami,"InterContinental Miami, an IHG Hotel","Downtown Miami, Miami",0.4 miles from center,Two Double Bed City View,2 full beds,"1,328 reviews",8.4,FREE cancellation • No prepayment needed,,$279
1,2022-07-15,2022-07-16,Miami,"Kimpton EPIC Hotel, an IHG Hotel","Brickell, Miami",0.3 miles from center,Room Selected at Check In,1 king bed,975 reviews,8.3,FREE cancellation • No prepayment needed,Only 6 rooms left at this price on our site,$293
2,2022-07-15,2022-07-16,Miami,Hilton Garden Inn Miami Dolphin Mall,Miami,11.4 miles from center,King Room,1 king bed,"2,248 reviews",8.6,FREE cancellation • No prepayment needed,,$159
3,2022-07-15,2022-07-16,Miami,Nuvo Suites Hotel - Miami / Doral,Miami,11.1 miles from center,Queen Suite,Private suite • 1 bedroom • 1 bathroom • 33m²,"2,733 reviews",8.3,,Only 1 left at this price on our site,$184
4,2022-07-15,2022-07-16,Miami,Homewood Suites by Hilton Miami Dolphin Mall,Miami,11.5 miles from center,King Suite - Non-Smoking,"2 beds (1 sofa bed, 1 queen)",766 reviews,8.5,FREE cancellation • No prepayment needed,,$152
5,2022-07-15,2022-07-16,Miami,"Holiday Inn Miami International Airport, an IH...","Miami Springs, Miami",5.1 miles from center,King Room - Disability Access/Non-Smoking,Private suite,"2,696 reviews",7.4,Free cancellation,,$148
6,2022-07-15,2022-07-16,Miami,"Hotel Indigo - Miami Brickell, an IHG Hotel","Brickell, Miami",0.7 miles from center,Standard King Room - Non-Smoking,1 king bed,590 reviews,8.0,FREE cancellation • No prepayment needed,,$249
7,2022-07-15,2022-07-16,Miami,Candlewood Suites Miami Intl Airport - 36th St...,Miami,6.8 miles from center,Standard Room,1 king bed,969 reviews,8.9,Free cancellation,,$162
8,2022-07-15,2022-07-16,Miami,Novotel Miami Brickell,"Brickell, Miami",0.9 miles from center,Superior King Room - Hearing Accessible,1 king bed,"1,073 reviews",8.2,FREE cancellation • No prepayment needed,,$299
9,2022-07-15,2022-07-16,Miami,"Holiday Inn Hotel Port of Miami-Downtown, an I...","Downtown Miami, Miami",0.5 miles from center,Double Room with Two Double Beds - Non-Smoking,1 queen bed,"2,352 reviews",7.5,Free cancellation,,$314


## Part II: Scraping for Flights on Booking.com

some issues need to be settled:

1. roundtrip or oneway for flights?

2. if roundtrip, what date ranges?

3. departure airport only sfo or plus sjc, oak, smf?

In [None]:
!pip install selenium
!apt-get update # to update ubuntu to correctly run apt install
!apt install chromium-chromedriver
!cp /usr/lib/chromium-browser/chromedriver /usr/bin
import sys
sys.path.insert(0,'/usr/lib/chromium-browser/chromedriver')
from selenium import webdriver
chrome_options = webdriver.ChromeOptions()
chrome_options.add_argument('--headless')
chrome_options.add_argument('--no-sandbox')
chrome_options.add_argument('--disable-dev-shm-usage')
wd = webdriver.Chrome('chromedriver',chrome_options=chrome_options)

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting selenium
  Downloading selenium-4.2.0-py3-none-any.whl (983 kB)
[K     |████████████████████████████████| 983 kB 28.2 MB/s 
[?25hCollecting trio~=0.17
  Downloading trio-0.20.0-py3-none-any.whl (359 kB)
[K     |████████████████████████████████| 359 kB 24.4 MB/s 
[?25hCollecting trio-websocket~=0.9
  Downloading trio_websocket-0.9.2-py3-none-any.whl (16 kB)
Collecting urllib3[secure,socks]~=1.26
  Downloading urllib3-1.26.9-py2.py3-none-any.whl (138 kB)
[K     |████████████████████████████████| 138 kB 44.9 MB/s 
[?25hCollecting async-generator>=1.9
  Downloading async_generator-1.10-py3-none-any.whl (18 kB)
Collecting outcome
  Downloading outcome-1.1.0-py2.py3-none-any.whl (9.7 kB)
Collecting sniffio
  Downloading sniffio-1.2.0-py3-none-any.whl (10 kB)
Collecting wsproto>=0.14
  Downloading wsproto-1.1.0-py3-none-any.whl (24 kB)
Collecting pyOpenSSL>=0.14
  Downloading py

0% [Working]            Hit:1 http://archive.ubuntu.com/ubuntu bionic InRelease
0% [Waiting for headers] [Connected to cloud.r-project.org (52.85.151.54)] [Con                                                                               Get:2 http://archive.ubuntu.com/ubuntu bionic-updates InRelease [88.7 kB]
                                                                               Get:3 http://security.ubuntu.com/ubuntu bionic-security InRelease [88.7 kB]
0% [2 InRelease 86.6 kB/88.7 kB 98%] [Connected to cloud.r-project.org (52.85.10% [Waiting for headers] [Connected to cloud.r-project.org (52.85.151.54)] [Wai0% [1 InRelease gpgv 242 kB] [Waiting for headers] [Waiting for headers] [Waiti                                                                               Get:4 https://cloud.r-project.org/bin/linux/ubuntu bionic-cran40/ InRelease [3,626 B]
0% [1 InRelease gpgv 242 kB] [Waiting for headers] [4 InRelease 3,626 B/3,626 B0% [1 InRelease gpgv 242 kB] [Waiting

  if sys.path[0] == '':


In [None]:
driver = webdriver.Chrome('chromedriver',chrome_options=chrome_options)

  """Entry point for launching an IPython kernel.


In [None]:
driver.get('https://flights.booking.com/flights/SFO-HNL/?type=ONEWAY&adults=1&cabinClass=ECONOMY&children=&from=SFO&to=HNL&fromCountry=US&toCountry=US&fromLocationName=San+Francisco+International+Airport&toLocationName=Honolulu+International+Airport&depart=2022-07-15&sort=BEST&aid=359627&label=gen173rf-1DEgdmbGlnaHRzGOgHKIICOOgHSAlYBGjsAZIBAlhYmAExwgEET1MgWNgBA_IBAlhY-AEJmAIBqAIDuALowtSUBsACAdICJDhjMTgzNjVkLWRmNzQtNDNjYy1iODg4LTIxMzk4ZTdjMDRmY9gCA-ACAg&gclid=Cj0KCQjw1tGUBhDXARIsAIJx01n1C8O3mupyIr9QPpmdmogrMyQuA1s8wDVeZuTQL32gRQUu5zBTj64aAmwWEALw_wcB')
time.sleep(5)

In [None]:
from selenium.webdriver.common.by import By

# define a scraper funtion for retrieve flights information
def flight_scraper(driver):
    # 1) get flight departure time
    departure_time = []
    dt = driver.find_elements(By.XPATH, '//div[@data-testid="flight_card_segment_departure_time"]')
    for time in dt:
        departure_time.append(time.text)
    
    # 2) get flight departure date
    departure_date = []
    dd = driver.find_elements(By.XPATH, '//div[@data-testid="flight_card_segment_departure_date"]')
    for date in dd:
        departure_date.append(date.text)
    
    # 3) get flight departure airport
    departure_airport = []
    da = driver.find_elements(By.XPATH, '//div[@data-testid="flight_card_segment_departure_airport"]')
    for airport in da:
        departure_airport.append(airport.text)
        
    # get flight arrival time
    arrival_time = []
    at = driver.find_elements(By.XPATH, '//div[@data-testid="flight_card_segment_destination_time"]')
    for time in at:
        arrival_time.append(time.text)
    
    # get flight arrival date
    arrival_date = []
    ad = driver.find_elements(By.XPATH, '//div[@data-testid="flight_card_segment_destination_date"]')
    for date in ad:
        arrival_date.append(date.text)
    
    # get flight arrival airport
    arrival_airport = []
    aa = driver.find_elements(By.XPATH, '//div[@data-testid="flight_card_segment_destination_airport"]')
    for airport in aa:
        arrival_airport.append(airport.text)
        
    # get flight duration
    duration = []
    dur = driver.find_elements(By.XPATH, '//div[@data-testid="flight_card_segment_duration"]')
    for d in dur:
        duration.append(d.text)
        
    # get no. of stops
    stop = []
    stops = driver.find_elements(By.XPATH, '//div[@data-testid="flight_card_segment_stops"]')
    for s in stops:
        stop.append(s.text)
        
    # get airline name
    airline = []
    airline_name = driver.find_elements(By.XPATH, '//div[@class="css-1dimx8f"]')
    for name in airline_name:
        airline.append(name.text)
    
    # get ticket price
    price = []
    pri = driver.find_elements(By.XPATH, '//div[@data-test-id="flight_card_price_main_price"]')
    for p in pri:
        price.append(p.text)
    
    # save features to dataframe
    lists = list(zip(departure_time, departure_date, departure_airport, arrival_time, arrival_date, arrival_airport, duration, stop, airline, price))
    colnames = ['departure_time', 'departure_date', 'departure_airport', 'arrival_time', 'arrival_date', 'arrival_airport', 'duration', 'stop', 'airline', 'price']
    
    df = pd.DataFrame(lists, columns = colnames)
    
    return df


In [None]:
df = flight_scraper(driver)

In [None]:
df

Unnamed: 0,departure_time,departure_date,departure_airport,arrival_time,arrival_date,arrival_airport,duration,stop,airline,price
0,8:00 AM,Jul 15,SJC,10:35 AM,Jul 15,HNL,5h 35m,Direct,Hawaiian Airlines,$264.79
1,7:00 AM,Jul 15,SFO,9:35 AM,Jul 15,HNL,5h 35m,Direct,Alaska Airlines,$308.99
2,7:00 AM,Jul 15,SFO,9:30 AM,Jul 15,HNL,5h 30m,Direct,Hawaiian Airlines,$315.79
3,9:15 AM,Jul 15,SFO,11:45 AM,Jul 15,HNL,5h 30m,Direct,Hawaiian Airlines,$315.79
4,4:45 PM,Jul 15,SFO,6:59 PM,Jul 15,HNL,5h 14m,Direct,United Airlines,$338.47
5,1:30 PM,Jul 15,SFO,3:46 PM,Jul 15,HNL,5h 16m,Direct,United Airlines,$338.47
6,7:20 AM,Jul 15,OAK,9:45 AM,Jul 15,HNL,5h 25m,Direct,Hawaiian Airlines,$335.79
7,9:15 AM,Jul 15,SFO,11:39 AM,Jul 15,HNL,5h 24m,Direct,United Airlines,$338.47
8,11:12 AM,Jul 15,SFO,1:38 PM,Jul 15,HNL,5h 26m,Direct,United Airlines,$338.47
9,9:00 AM,Jul 15,OAK,1:15 PM,Jul 15,HNL,7h 15m,1 stop,Hawaiian Airlines,$260.29
