# Lab | Working with APIs

Following the class example, create a function that returns the price, names of origin and arrival airports and the name of the company. Do it for all the flights between two dates that cost the same.

In [71]:
from bs4 import BeautifulSoup
import requests
import pandas as pd
import json
import datetime
import time 

In [72]:
# function to get the user input for skyscanner flight search by date and price
# output: start and end date string, min and max price int
def skyscanner_flight_search_user_input(): 
    # user inputs start and end date of search periode and price range
    date_start = str(input("Enter a future date as start date (YYYY-MM-DD) for the flight search: "))
    date_end = str(input("Enter an end date (YYYY-MM-DD) the flight search: "))
    price_min = int(input("Enter a price minimum for the flight search: "))
    price_max = int(input("Enter a price maximum for the flight search: "))
    
    # convert dates to datetime
    date_start = date_start.split('-')
    date_end = date_end.split('-')
    datetime_start = datetime.datetime(int(date_start[0]), int(date_start[1]), int(date_start[2]))
    datetime_end = datetime.datetime(int(date_end[0]), int(date_end[1]), int(date_end[2]))
    return datetime_start, datetime_end, price_min, price_max

In [73]:
# function to create a list of airport codes
# output: list of codes
def airport_codes_fetch():
    # TODO: web scrap of airport code list with options to select by region, traffic ranking...
    # get list for airports
    aiport_codes = ['LAX', 'SFO', 'FRA', 'DXB', 'IST']
    return aiport_codes

In [74]:
#  func to perform api data fetching
def skyscanner_flight_search_fetch(key, origin, destination, date):
    url = "https://skyscanner-skyscanner-flight-search-v1.p.rapidapi.com/apiservices/browsedates/v1.0/US/USD/en-US/" + origin + "-sky/" + destination + "-sky/" + date
    querystring = {"inboundpartialdate":""}
    headers = {'x-rapidapi-host': "skyscanner-skyscanner-flight-search-v1.p.rapidapi.com",
               'x-rapidapi-key': key
              }
    response = requests.request("GET", url, headers=headers, params=querystring)
    time.sleep(3)
    return response

In [75]:
# func for skyscanner flight search in a time intervall within a price range
# inputs: user key string
# outputs: df with list of flights
def skyscanner_flight_search_by_price(key):
    # calling func to get user inputs start and end date of search periode and price range
    datetime_start, datetime_end, price_min, price_max = skyscanner_flight_search_user_input()
    
    # lists for flights that match criteria
    picks_flightdates, picks_origins, picks_destinations, picks_prices = [], [], [], []
    
    # calling func to get list for airports
    aiport_codes = airport_codes_fetch()
    
    for i in range((datetime_end - datetime_start).days):
        # day itterator for url                    
        date_pos = (datetime_start + datetime.timedelta(days=i)).date()
        date_index = str(date_pos)

        for origin in aiport_codes:
            for destination in aiport_codes:
                if destination != origin:
                    # calling func to perform api data fetching
                    response = skyscanner_flight_search_fetch(key=key, origin=origin, destination=destination, date=date_index)
                    try:
                        # transforming fetch to dfs
                        df_full = pd.json_normalize(response.json())
                        quotes = pd.DataFrame(df_full["Quotes"][0])
                        carriers = pd.DataFrame(df_full["Carriers"][0])
                        places = pd.DataFrame(df_full["Places"][0])
        
                        # storing flight data in lists
                        list_prices = [price for price in quotes['MinPrice']]
                        list_airlines = [airline for airline in carriers['Name']]
                        list_origins = [places['Name'][0] for x in range(len(list_prices))]
                        list_destinations = [places['Name'][1] for y in range(len(list_prices))]
                        list_flightdates = [date_pos for j in range(len(list_prices))]
                       
                        # append lists with data of flights that match the price range  
                        for indx, price in enumerate(list_prices):
                            if (price >= price_min) and (price <= price_max):
                                picks_flightdates.append(list_flightdates[indx])
                                picks_origins.append(list_origins[indx])
                                picks_destinations.append(list_destinations[indx])
                                picks_prices.append(list_prices[indx])
                    except:
                        continue
    
    # df with matching flights   
    flights_data = pd.DataFrame({"flight_date": picks_flightdates, "origin_airport": picks_origins, "destination airport": picks_destinations, "price": picks_prices})
    return flights_data

In [76]:
check1 = skyscanner_flight_search_by_price(key="01b9029c5amsh7424e15954d5eb6p1bf518jsn7c6241f30d35")

Enter a future date as start date (YYYY-MM-DD) for the flight search:  2021-12-06
Enter an end date (YYYY-MM-DD) the flight search:  2021-12-10
Enter a price minimum for the flight search:  400
Enter a price maximum for the flight search:  500


In [78]:
check1

Unnamed: 0,flight_date,origin_airport,destination airport,price
0,2021-12-06,Dubai,Los Angeles International,430
1,2021-12-06,Dubai,Frankfurt am Main,469
2,2021-12-07,Dubai,Los Angeles International,430
3,2021-12-07,Istanbul,Los Angeles International,445
4,2021-12-07,Frankfurt am Main,Los Angeles International,438
5,2021-12-07,Dubai,Frankfurt am Main,479
6,2021-12-07,Dubai,Frankfurt am Main,487
7,2021-12-08,Dubai,Los Angeles International,476
8,2021-12-08,Dubai,Frankfurt am Main,489
9,2021-12-09,Istanbul,San Francisco International,465
