In [1]:
import requests
import google.auth.transport.requests
import google.oauth2.id_token
import os
import json
import concurrent.futures
import pandas as pd

In [2]:
def auth_to_gcp(function_url:str) -> str:
    """
    Authenticates to the provided GCP Cloud Function
    """
    auth_req = google.auth.transport.requests.Request()
    id_token = google.oauth2.id_token.fetch_id_token(auth_req, function_url)

    return "Bearer "+id_token

In [3]:
# Service account API key path
credential_path = "../GCP_Drivers/cloud_functions.json"
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = credential_path

In [4]:
# Here I collect all the endpoints that I'm gonna be using
# in the following blocks to perform requests to GCP Cloud Functions
endpoints = json.load(open("./endpoints.json", "r"))

### Customize search parameters
Let's suppose (without supposing too much :) ) that I want to travel to Madeira in the month of September
- for 5 to 8 days
- spending at most 280€
- with a total flight duration (per trip, including layovers) of 480 minutes (8hrs)

These would be the parameters

In [5]:
flight_parameters = {
    "departureAirport" : "MXP",
    "destinationAirport" : "FNC",
    "startDate" : "2024-09-01",
    "returnDate" : "2024-09-30",
    "minDays" : 5,
    "maxDays" : 8,
    "maxPrice": 280,
    "maxDuration": 480,
}

## Get price graph
This function searches in a dynamic **window of dates** the cheapest flight offers and returns them in an array.

The window of dates is created by shifting `startDate` by X days where X is looped from `minDays` to `maxDays`.
Example with the above parameters:

| StartDate   | ReturnDate  | Price |
|-------------|-------------|-------|
| 2024-09-01  | 2024-09-06  | 100   |
| 2024-09-01  | 2024-09-07  | 120   |
| 2024-09-01  | 2024-09-08  | 109   |
| 2024-09-01  | 2024-09-09  | 201   |
| 2024-09-02  | 2024-09-07  | 224   |
| 2024-09-02  | 2024-09-08  | 209   |

and so on until `startDate` reaches `returnDate`.

The result is further filtered on `Price <= maxPrice`

In [6]:
response = requests.post(
    url=endpoints["getPriceGraph"],
    data=flight_parameters,
    headers={
        "Authorization": auth_to_gcp(endpoints["getPriceGraph"])
    }
)

In [7]:
data = json.loads(response.content)
priceChart_df = pd.json_normalize(data)

In [8]:
priceChart_df["StartDate"] = priceChart_df["StartDate"].str[:10]
priceChart_df["ReturnDate"] = priceChart_df["ReturnDate"].str[:10]
priceChart_df.rename({
    "StartDate": "startDate",
    "ReturnDate": "returnDate"
}, axis=1, inplace=True)

In [9]:
priceChart_df.sort_values(["startDate", "returnDate"], inplace=True)
priceChart_df.reset_index(inplace=True, drop=True)

print(priceChart_df.shape)
priceChart_df.head()

(120, 3)


Unnamed: 0,startDate,returnDate,Price
0,2024-09-01,2024-09-06,431
1,2024-09-01,2024-09-07,360
2,2024-09-01,2024-09-08,428
3,2024-09-01,2024-09-09,371
4,2024-09-02,2024-09-07,330


### Filter only price combinations below the selected `maxPrice` parameter

In [10]:
priceChart_df = priceChart_df[priceChart_df["Price"] <= flight_parameters["maxPrice"]]
print(priceChart_df.shape)
priceChart_df.head()

(68, 3)


Unnamed: 0,startDate,returnDate,Price
10,2024-09-03,2024-09-10,230
13,2024-09-04,2024-09-10,222
14,2024-09-04,2024-09-11,273
16,2024-09-05,2024-09-10,261
17,2024-09-05,2024-09-11,279


## Get Departure Offers

One limitation on the current implementation of the [Google Flights API library](https://github.com/krisukox/google-flights-api/tree/main) is that [return flights are not implemented](https://pkg.go.dev/github.com/krisukox/google-flights-api/flights#FullOffer).

```
type FullOffer struct {
	Offer
	Flight               []Flight      // contains all flights in the trip
	ReturnFlight         []Flight      // not implemented yet  <---
    ...
}
```

Thus, we are going to work around it by calling the `getOffers` API twice, one for the departure flight and again for the returning one.
We will then match them together.

In [11]:
def get_offer_df(request_data:dict, getOffersToken:str) -> pd.DataFrame :
    """
    Perform a POST request towards GCP Cloud functions and retrieve a
    `pandas.DataFrame` containing flight offers

    Parameters:
    - `request_data`: the content to pass to the GCP function
    - `getOffersToken`: the bearer token for the GCP function to pass in the `Authorization` header
    """
    response = requests.post(
        url=endpoints["getOffers"],
        headers={
            "Authorization": getOffersToken
        },
        data=request_data
    )
    raw_response = json.loads(response.content)

    offers = [val for val in raw_response]  # Unnest
    flights = []
    for ix, o in enumerate(offers):
        offer_id = f"offer_{ix}"
        start_date = o['StartDate'][:10]
        return_date = o['ReturnDate'][:10]
        price = o['Price']
        flights_list = o['Flight']

        for f in flights_list:
            dep_airport = f['DepAirportCode']
            arr_airport = f['ArrAirportCode']
            dep_time = f['DepTime'][:16]
            arr_time = f['ArrTime'][:16]

            flights.append({
                "offerID": offer_id,
                "startDate": start_date,
                "returnDate": return_date,
                "price": price,
                "departureAirport": dep_airport,
                "arrivalAirport": arr_airport,
                "departureTime": dep_time,
                "arrivalTime": arr_time
            })
    
    return pd.DataFrame(flights)

In [12]:
def getOffers(df, args:dict):
    """
    Takes advantage of `futures` to invoke `get_offer_df()` in parallel
    """
    getOffersToken = auth_to_gcp(endpoints["getOffers"])
    responses = []
    with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor:
        futures = []
        for _, row in df.iterrows():
            request_data = {
                "startDate": row[args["startDate"]],
                "returnDate": row[args["returnDate"]],
                "departureAirport": args["departureAirport"],
                "destinationAirport": args["destinationAirport"],
                "tripType": args["tripType"]
            }
            future = executor.submit(get_offer_df, request_data, getOffersToken)
            futures.append(future)
        
        for future in concurrent.futures.as_completed(futures):
            data = future.result()
            responses.append(data)
    
    return pd.concat(responses)

In [13]:
def data_prep(df:pd.DataFrame) -> pd.DataFrame :
    """
    Performs some basic data preparation on the result of `get_offer_df()`.
    
    Also filters the values for the specified `flight_parameters["maxPrice"]` and `flight_parameters["maxDuration"]`
    """
    df['startDate'] = pd.to_datetime(df['startDate'])
    df['returnDate'] = pd.to_datetime(df['returnDate'])
    df['departureTime'] = pd.to_datetime(df['departureTime'])
    df['arrivalTime'] = pd.to_datetime(df['arrivalTime'])

    df = df\
        .groupby(["offerID", "startDate", "returnDate"])\
        .agg({
            "price": "max",
            "departureTime": "min",
            "arrivalTime": "max",
            "departureAirport": list,
            "arrivalAirport": list
        })\
        .reset_index()

    # Flight duration
    df['flightDuration'] = (df['arrivalTime'] - df['departureTime']).dt.total_seconds() / 60
    df['flightDuration'] = df['flightDuration'].astype(int)

    # Filtering based on defined parameters
    df = df[
        (df["price"] <= flight_parameters["maxPrice"]) &
        (df["flightDuration"] <= flight_parameters["maxDuration"])
    ]

    return df

In [14]:
# Departure flights
offers_outb_df = getOffers(priceChart_df, args={
    "startDate": "startDate",
    "returnDate": "returnDate",
    "departureAirport": flight_parameters["departureAirport"],
    "destinationAirport": flight_parameters["destinationAirport"],
    "tripType": "oneway"
})
print(offers_outb_df.shape)
offers_outb_df.head()

(1600, 8)


Unnamed: 0,offerID,startDate,returnDate,price,departureAirport,arrivalAirport,departureTime,arrivalTime
0,offer_0,2024-09-04,2024-09-10,101,MXP,LIS,2024-09-04T17:20,2024-09-04T19:15
1,offer_0,2024-09-04,2024-09-10,101,LIS,FNC,2024-09-04T23:25,2024-09-05T01:10
2,offer_1,2024-09-04,2024-09-10,108,MXP,LIS,2024-09-04T20:25,2024-09-04T22:15
3,offer_1,2024-09-04,2024-09-10,108,LIS,FNC,2024-09-04T23:25,2024-09-05T01:10
4,offer_2,2024-09-04,2024-09-10,113,MXP,LIS,2024-09-04T17:20,2024-09-04T19:15


In [15]:
offers_outb_df = data_prep(offers_outb_df)
print(offers_outb_df.shape)
offers_outb_df.head()

(312, 9)


Unnamed: 0,offerID,startDate,returnDate,price,departureTime,arrivalTime,departureAirport,arrivalAirport,flightDuration
1,offer_0,2024-09-04,2024-09-10,101,2024-09-04 17:20:00,2024-09-05 01:10:00,"[MXP, LIS]","[LIS, FNC]",470
2,offer_0,2024-09-04,2024-09-11,101,2024-09-04 17:20:00,2024-09-05 01:10:00,"[MXP, LIS]","[LIS, FNC]",470
7,offer_0,2024-09-11,2024-09-17,118,2024-09-11 06:40:00,2024-09-11 13:35:00,"[MXP, LIS]","[LIS, FNC]",415
8,offer_0,2024-09-11,2024-09-18,118,2024-09-11 06:40:00,2024-09-11 13:35:00,"[MXP, LIS]","[LIS, FNC]",415
9,offer_0,2024-09-12,2024-09-17,128,2024-09-12 11:15:00,2024-09-12 15:55:00,"[MXP, LIS]","[LIS, FNC]",280


## Get Return Offers

Currently, even if the flight is one-way, the library still takes `ReturnDate` as a mandatory argument. This is a [reported issue](https://github.com/krisukox/google-flights-api/issues/32) which currently isn't resolved, but won't stop us!

In fact, we can simulate a new one-way trip lasting (for example) one week. As a matter of fact, we are only really interested in the flight itself and its price

In [16]:
offers_outb_df["Fake returnDate"] = offers_outb_df["returnDate"] + pd.to_timedelta(7, unit='d')

In [17]:
return_combinations = offers_outb_df.drop_duplicates(["returnDate", "Fake returnDate"])
return_combinations = return_combinations[["returnDate", "Fake returnDate"]]

return_combinations["returnDate"] = return_combinations["returnDate"].astype(str)
return_combinations["Fake returnDate"] = return_combinations["Fake returnDate"].astype(str)

print(return_combinations.shape)
return_combinations.head()

(22, 2)


Unnamed: 0,returnDate,Fake returnDate
1,2024-09-10,2024-09-17
2,2024-09-11,2024-09-18
7,2024-09-17,2024-09-24
8,2024-09-18,2024-09-25
11,2024-09-20,2024-09-27


In [18]:
# Returning flight offers
offers_inb_df = getOffers(return_combinations, args={
    "startDate": "returnDate",
    "returnDate": "Fake returnDate",
    "departureAirport": flight_parameters["destinationAirport"],
    "destinationAirport": flight_parameters["departureAirport"],
    "tripType": "oneway"
})
print(offers_inb_df.shape)
offers_inb_df.head()

(546, 8)


Unnamed: 0,offerID,startDate,returnDate,price,departureAirport,arrivalAirport,departureTime,arrivalTime
0,offer_0,2024-09-20,2024-09-27,142,FNC,LIS,2024-09-20T14:40,2024-09-20T16:25
1,offer_0,2024-09-20,2024-09-27,142,LIS,MXP,2024-09-20T20:20,2024-09-21T00:05
2,offer_1,2024-09-20,2024-09-27,191,FNC,LIS,2024-09-20T16:25,2024-09-20T18:10
3,offer_1,2024-09-20,2024-09-27,191,LIS,MXP,2024-09-20T20:20,2024-09-21T00:05
4,offer_2,2024-09-20,2024-09-27,317,FNC,LIS,2024-09-20T04:15,2024-09-20T05:55


In [19]:
offers_inb_df = data_prep(offers_inb_df)
print(offers_inb_df.shape)
offers_inb_df.head()

(35, 9)


Unnamed: 0,offerID,startDate,returnDate,price,departureTime,arrivalTime,departureAirport,arrivalAirport,flightDuration
2,offer_0,2024-09-17,2024-09-24,195,2024-09-17 09:30:00,2024-09-17 17:30:00,"[FNC, LIS]","[LIS, MXP]",480
8,offer_0,2024-09-24,2024-10-01,127,2024-09-24 04:10:00,2024-09-24 10:55:00,"[FNC, LIS]","[LIS, MXP]",405
14,offer_0,2024-10-01,2024-10-08,181,2024-10-01 09:30:00,2024-10-01 17:30:00,"[FNC, LIS]","[LIS, MXP]",480
23,offer_1,2024-09-11,2024-09-18,256,2024-09-11 04:20:00,2024-09-11 10:55:00,"[FNC, LIS]","[LIS, MXP]",395
24,offer_1,2024-09-17,2024-09-24,214,2024-09-17 09:30:00,2024-09-17 16:25:00,"[FNC, LIS]","[LIS, MXP]",415


## Merging departure and returning flights
We are going to join `offers_outb_df` and `offers_inb_df` on

`offers_outb_df.returnDate = offers_inb_df.startDate`

since, well, the returning date in the departure flight's dataframe was the starting date of the returning flight all along!

In [20]:
full_offers = pd.merge(
    offers_outb_df,
    offers_inb_df,
    left_on="returnDate",
    right_on="startDate",
    suffixes=('_Outbound', '_Inbound')
)
print(full_offers.shape)
full_offers.columns

(505, 19)


Index(['offerID_Outbound', 'startDate_Outbound', 'returnDate_Outbound',
       'price_Outbound', 'departureTime_Outbound', 'arrivalTime_Outbound',
       'departureAirport_Outbound', 'arrivalAirport_Outbound',
       'flightDuration_Outbound', 'Fake returnDate', 'offerID_Inbound',
       'startDate_Inbound', 'returnDate_Inbound', 'price_Inbound',
       'departureTime_Inbound', 'arrivalTime_Inbound',
       'departureAirport_Inbound', 'arrivalAirport_Inbound',
       'flightDuration_Inbound'],
      dtype='object')

In [22]:
full_offers = full_offers[[
    "price_Outbound",
    "departureTime_Outbound",
    "arrivalTime_Outbound",
    "flightDuration_Outbound",
    "price_Inbound",
    "departureTime_Inbound",
    "arrivalTime_Inbound",
    "flightDuration_Inbound"
]]
full_offers.head()

Unnamed: 0,price_Outbound,departureTime_Outbound,arrivalTime_Outbound,flightDuration_Outbound,price_Inbound,departureTime_Inbound,arrivalTime_Inbound,flightDuration_Inbound
0,101,2024-09-04 17:20:00,2024-09-05 01:10:00,470,256,2024-09-11 04:20:00,2024-09-11 10:55:00,395
1,101,2024-09-04 17:20:00,2024-09-05 01:10:00,470,232,2024-09-11 10:45:00,2024-09-11 18:40:00,475
2,108,2024-09-04 20:25:00,2024-09-05 01:10:00,285,256,2024-09-11 04:20:00,2024-09-11 10:55:00,395
3,108,2024-09-04 20:25:00,2024-09-05 01:10:00,285,232,2024-09-11 10:45:00,2024-09-11 18:40:00,475
4,132,2024-09-05 17:20:00,2024-09-06 00:05:00,405,256,2024-09-11 04:20:00,2024-09-11 10:55:00,395


### We now filter on the full price of the combination of flights

In [26]:
full_offers["fullPrice"] = full_offers["price_Outbound"] + full_offers["price_Inbound"]
full_offers = full_offers[
    full_offers["fullPrice"] <= flight_parameters["maxPrice"]
]
full_offers.shape

(30, 9)

In [27]:
full_offers.reset_index(drop=True, inplace=True)
full_offers.head()

Unnamed: 0,price_Outbound,departureTime_Outbound,arrivalTime_Outbound,flightDuration_Outbound,price_Inbound,departureTime_Inbound,arrivalTime_Inbound,flightDuration_Inbound,fullPrice
0,121,2024-09-16 20:25:00,2024-09-17 01:10:00,285,127,2024-09-24 04:10:00,2024-09-24 10:55:00,405,248
1,121,2024-09-16 20:25:00,2024-09-17 01:10:00,285,145,2024-09-24 09:30:00,2024-09-24 17:30:00,480,266
2,97,2024-09-17 15:05:00,2024-09-17 20:35:00,330,127,2024-09-24 04:10:00,2024-09-24 10:55:00,405,224
3,97,2024-09-17 15:05:00,2024-09-17 20:35:00,330,145,2024-09-24 09:30:00,2024-09-24 17:30:00,480,242
4,128,2024-09-18 20:25:00,2024-09-19 01:10:00,285,127,2024-09-24 04:10:00,2024-09-24 10:55:00,405,255


## Get URLs
The library also offers the functionality of generating a Google Flights URL based on a combination of `startDate`, `returnDate`, `departureAirport` and `destinationAirport`.

We will generate that for the valid offers we just found

In [28]:
url_requests_outb_data = []
url_requests_inb_data = []
date_pattern = "%Y-%m-%d"
for i, row in full_offers.iterrows():
    request_outb_data = {
        "startDate": row["departureTime_Outbound"].strftime(date_pattern),
        "returnDate": row["arrivalTime_Outbound"].strftime(date_pattern),
        "departureAirport": flight_parameters["departureAirport"],
        "destinationAirport": flight_parameters["destinationAirport"],
        "tripType": "oneway"
    }

    request_inb_data = {
        "startDate": row["departureTime_Inbound"].strftime(date_pattern),
        "returnDate": row["arrivalTime_Inbound"].strftime(date_pattern),
        "departureAirport": flight_parameters["destinationAirport"],
        "destinationAirport": flight_parameters["departureAirport"],
        "tripType": "oneway"
    }

    url_requests_outb_data.append({
        "index": i,
        "data": request_outb_data
    })
    url_requests_inb_data.append({
        "index": i,
        "data": request_inb_data
    })

In [29]:
url_requests_outb_data[0]

{'index': 0,
 'data': {'startDate': '2024-09-16',
  'returnDate': '2024-09-17',
  'departureAirport': 'MXP',
  'destinationAirport': 'FNC',
  'tripType': 'oneway'}}

In [30]:
# handle a single request for both URLs
def get_flight_urls(outb_req, inb_req, url_token, endpoint):
    outb_response = requests.post(
        endpoint,
        data=outb_req["data"],
        headers={"Authorization": url_token}
    )
    inb_response = requests.post(
        endpoint,
        data=inb_req["data"],
        headers={"Authorization": url_token}
    )

    outb_url = {
        "index": outb_req["index"],
        "url": json.loads(outb_response.content)["url"]
    }
    inb_url = {
        "index": inb_req["index"],
        "url": json.loads(inb_response.content)["url"]
    }

    return outb_url, inb_url

In [31]:
outb_urls = []
inb_urls = []
url_token = auth_to_gcp(endpoints["getUrl"])
with concurrent.futures.ThreadPoolExecutor(max_workers=15) as executor:
    futures = []
    for i in range(len(url_requests_inb_data)):
        future = executor.submit(
            get_flight_urls,
            url_requests_outb_data[i],
            url_requests_inb_data[i],
            url_token,
            endpoints["getUrl"]
        )
        futures.append(future)

    for future in concurrent.futures.as_completed(futures):
        outb_url, inb_url = future.result()
        outb_urls.append(outb_url)
        inb_urls.append(inb_url)

In [32]:
outb_urls_df = pd.DataFrame(outb_urls)
inb_urls_df = pd.DataFrame(inb_urls)
outb_urls_df.head()

Unnamed: 0,index,url
0,4,https://www.google.com/travel/flights/search?t...
1,11,https://www.google.com/travel/flights/search?t...
2,10,https://www.google.com/travel/flights/search?t...
3,5,https://www.google.com/travel/flights/search?t...
4,8,https://www.google.com/travel/flights/search?t...


In [33]:
full_offers = pd.merge(
    full_offers,
    outb_urls_df,
    left_index=True,
    right_on="index"
)

full_offers = pd.merge(
    full_offers,
    inb_urls_df,
    left_on="index",
    right_on="index"
)
print(full_offers.shape)
full_offers.head()

(30, 12)


Unnamed: 0,price_Outbound,departureTime_Outbound,arrivalTime_Outbound,flightDuration_Outbound,price_Inbound,departureTime_Inbound,arrivalTime_Inbound,flightDuration_Inbound,fullPrice,index,url_x,url_y
0,121,2024-09-16 20:25:00,2024-09-17 01:10:00,285,127,2024-09-24 04:10:00,2024-09-24 10:55:00,405,248,0,https://www.google.com/travel/flights/search?t...,https://www.google.com/travel/flights/search?t...
1,121,2024-09-16 20:25:00,2024-09-17 01:10:00,285,145,2024-09-24 09:30:00,2024-09-24 17:30:00,480,266,1,https://www.google.com/travel/flights/search?t...,https://www.google.com/travel/flights/search?t...
2,97,2024-09-17 15:05:00,2024-09-17 20:35:00,330,127,2024-09-24 04:10:00,2024-09-24 10:55:00,405,224,2,https://www.google.com/travel/flights/search?t...,https://www.google.com/travel/flights/search?t...
3,97,2024-09-17 15:05:00,2024-09-17 20:35:00,330,145,2024-09-24 09:30:00,2024-09-24 17:30:00,480,242,3,https://www.google.com/travel/flights/search?t...,https://www.google.com/travel/flights/search?t...
4,128,2024-09-18 20:25:00,2024-09-19 01:10:00,285,127,2024-09-24 04:10:00,2024-09-24 10:55:00,405,255,4,https://www.google.com/travel/flights/search?t...,https://www.google.com/travel/flights/search?t...


In [34]:
full_offers.rename({
    "url_x": "url_Outbound",
    "url_y": "url_Inbound"
}, axis=1, inplace=True)

full_offers.drop(["index"], axis=1, inplace=True)
full_offers.head()

Unnamed: 0,price_Outbound,departureTime_Outbound,arrivalTime_Outbound,flightDuration_Outbound,price_Inbound,departureTime_Inbound,arrivalTime_Inbound,flightDuration_Inbound,fullPrice,url_Outbound,url_Inbound
0,121,2024-09-16 20:25:00,2024-09-17 01:10:00,285,127,2024-09-24 04:10:00,2024-09-24 10:55:00,405,248,https://www.google.com/travel/flights/search?t...,https://www.google.com/travel/flights/search?t...
1,121,2024-09-16 20:25:00,2024-09-17 01:10:00,285,145,2024-09-24 09:30:00,2024-09-24 17:30:00,480,266,https://www.google.com/travel/flights/search?t...,https://www.google.com/travel/flights/search?t...
2,97,2024-09-17 15:05:00,2024-09-17 20:35:00,330,127,2024-09-24 04:10:00,2024-09-24 10:55:00,405,224,https://www.google.com/travel/flights/search?t...,https://www.google.com/travel/flights/search?t...
3,97,2024-09-17 15:05:00,2024-09-17 20:35:00,330,145,2024-09-24 09:30:00,2024-09-24 17:30:00,480,242,https://www.google.com/travel/flights/search?t...,https://www.google.com/travel/flights/search?t...
4,128,2024-09-18 20:25:00,2024-09-19 01:10:00,285,127,2024-09-24 04:10:00,2024-09-24 10:55:00,405,255,https://www.google.com/travel/flights/search?t...,https://www.google.com/travel/flights/search?t...


## Conclusion

Here's that! All the offers that match our initial criteria. Now the only thing that remains to do is... take off!