In [198]:
import pandas as pd
import numpy as np

import requests
import json
import time
import datetime as dt

import httpx
import asyncio

In [199]:
api_key = 'sh428739766321522266746152871799'

header = {
    'x-api-key': api_key
}

url = f'https://partners.api.skyscanner.net/apiservices/v3/flights/indicative/search'


In [200]:
origin = "LON"
option = [
    # "RAK",
    # "IST",
    # "DXB",
    # "BCN",
    # "AYT",
    # "MLA",
    # "PMI",
    # "DOH",
    # "TUN",
    # "FAO",
    # "LIS",
    # "BOJ",
    # "SOF",
    # "BKK",
    # "SGN",
    # "CGK"
]
months = [5,6,7]
adults = 7
filters = {
    "class=4": True,
    "distance=3000": True,
    "review_score=90": True,
    "roomfacility=108": False,
    "roomfacility=81": True
} 
# roomfacility=108 - sea view
# roomfacility=81 - view



In [201]:
def construct_query(origin, destination, month, year):
  query = {
    "query": {
      "currency": "GBP",
      "locale": "en-GB",
      "market": "UK",
      "dateTimeGroupingType": "DATE_TIME_GROUPING_TYPE_BY_DATE",
      "queryLegs": [
        {
          "originPlace": {
              "queryPlace":{
                   "iata":origin
                  }
              },
          "destinationPlace": {
              "queryPlace":{
                  "iata": destination
                  }
              },
          # The end date and start date should always be the same. This would give you quotes for
          # November 2024.
          "date_range": {
            "startDate": {
              "year": year,
              "month": month
            },
            "endDate": {
              "year": year,
              "month": month
            }
          }
        },
        {
          "originPlace": {
              "queryPlace":{
                   "iata":destination
                  }
              },
          "destinationPlace": {
              "queryPlace":{
                  "iata": origin
                  }
              },
          # The end date and start date should always be the same. This would give you quotes for
          # November 2024.
          "date_range": {
            "startDate": {
              "year": year,
              "month": month
            },
            "endDate": {
              "year": year,
              "month": month
            }
          }
        }
      ]
    }
  }

  return query

In [202]:
def req_quotes(origin, destination, month, year):
    response = requests.post(url = url, headers = header, data = json.dumps(construct_query(origin, destination, month, year)))
    response_json = json.loads(response.text)

    return response_json

In [203]:
def convert_response_to_df(response):
    return pd.DataFrame.from_dict(response['content']['results']['quotes'], orient='index').reset_index()

In [204]:
def format_flight_df(quotes):
    outboundLeg = pd.json_normalize(quotes['outboundLeg']).rename(columns=lambda x: x.replace('departureDateTime.', ''))
    outboundLegDateTime = pd.to_datetime(outboundLeg[['year', 'month', 'day', 'hour', 'minute', 'second']])
    outboundLeg = pd.concat([outboundLeg[['originPlaceId', 'destinationPlaceId', 'quoteCreationTimestamp', 'marketingCarrierId']], outboundLegDateTime], axis = 1).rename(columns=lambda x: 'outboundLegDateTime' if x == 0 else 'outbound' + x.capitalize())

    inboundLeg = pd.json_normalize(quotes['inboundLeg']).rename(columns=lambda x: x.replace('departureDateTime.', ''))
    inboundLegDateTime = pd.to_datetime(inboundLeg[['year', 'month', 'day', 'hour', 'minute', 'second']])
    inboundLeg = pd.concat([inboundLeg[['originPlaceId', 'destinationPlaceId', 'quoteCreationTimestamp', 'marketingCarrierId']], inboundLegDateTime], axis = 1).rename(columns=lambda x: 'inboundLegDateTime' if x == 0 else 'inbound' + x.capitalize())

    minPrice = pd.json_normalize(quotes['minPrice'])

    quotes = pd.concat([quotes[['index', 'isDirect']], minPrice, outboundLeg, inboundLeg], axis = 1)

    quotes['tripDuration'] = quotes['inboundLegDateTime'] - quotes['outboundLegDateTime']

    quotes['amount'] = quotes['amount'].astype(int)

    return quotes.sort_values('amount')

In [205]:
def get_quote(origin, destination, month, year):
    quote = req_quotes(origin, destination, month, year)
    quote = convert_response_to_df(quote)
    quote = format_flight_df(quote)

    quote['destination'] = destination

    return quote

In [206]:
def get_all_quotes(origin, destinations, months, year):
    numberOfCountries = len(destinations)
    numberOfMonths = len(months)

    destination = destinations[0]
    month = months[0]

    quotes_outbound_temp = get_quote(origin, destination, month, year)

    if numberOfCountries == 1 & numberOfMonths == 1:
        return quotes_outbound_temp
    
    
    elif numberOfCountries != 1 & numberOfMonths == 1:
        for i in destinations[1:]:
            quote = get_quote(origin, i, month, year)
            quotes_outbound_temp = pd.concat([quotes_outbound_temp, quote], axis = 0)

        return quotes_outbound_temp
    
    elif numberOfCountries == 1 & numberOfMonths != 1:
        for i in months[1:]:
            quote = get_quote(origin, destination, i, year)
            quotes_outbound_temp = pd.concat([quotes_outbound_temp, quote], axis = 0)
        
        return quotes_outbound_temp
    
    else:
        for i in destinations[1:]:
            for c in months[1:]:
                quote = get_quote(origin, i, c, year)
                quotes_outbound_temp = pd.concat([quotes_outbound_temp, quote], axis = 0)
        return quotes_outbound_temp

In [207]:
quotes_outbound_initial = get_all_quotes(origin, option, months, 2024)

In [208]:
def keep_columns(df, cols_to_keep):
    df = df[cols_to_keep]
    return df.reset_index(drop=True).reset_index()

In [209]:
quotes = keep_columns(quotes_outbound_initial, ['isDirect', 'amount', 'outboundLegDateTime', 'inboundLegDateTime', 'tripDuration', 'destination'])

In [210]:
def filter_trip_duration(df, min_days, max_days):
    df = df[(df['tripDuration'] >= dt.timedelta(days = min_days)) & (df['tripDuration'] <= dt.timedelta(days = max_days))]
    return df

In [211]:
quotes = filter_trip_duration(quotes, 4, 6)

In [212]:
quotes.head(10)

Unnamed: 0,index,isDirect,amount,outboundLegDateTime,inboundLegDateTime,tripDuration,destination
0,0,False,231,2024-05-12,2024-05-16,4 days,DXB
3,3,False,234,2024-05-04,2024-05-10,6 days,DXB
6,6,False,234,2024-05-11,2024-05-15,4 days,DXB
10,10,False,234,2024-05-18,2024-05-22,4 days,DXB
11,11,False,234,2024-05-13,2024-05-17,4 days,DXB
14,14,False,236,2024-05-10,2024-05-15,5 days,DXB
15,15,False,236,2024-05-14,2024-05-18,4 days,DXB
21,21,False,236,2024-05-10,2024-05-14,4 days,DXB
24,24,False,237,2024-05-04,2024-05-09,5 days,DXB
31,31,False,237,2024-05-14,2024-05-20,6 days,DXB


In [3]:
with open("cookies.txt", 'r') as file:
    cookies = file.read().rstrip()

In [5]:
with open("headers.txt", 'r') as file:
    headers = file.read().rstrip()

In [1]:
with open("query.txt", 'r') as file:
    query = file.read().rstrip()

In [216]:
def format_filter(filters):
    filter = ""
    for i in filters:
        if filters[i]:
            if len(filter) == 0:
                filter = f"{i}"
            else:
                filter += f";{i}"

    return filter

In [217]:
def get_json_data(start_date, end_date, destination):
    json_data = {
            'operationName': 'FullSearch',
            'variables': {
                'input': {
                    'acidCarouselContext': None,
                    'childrenAges': [],
                    'dates': {
                        'checkin': f'{start_date}',
                        'checkout': f'{end_date}',
                    },
                    'doAvailabilityCheck': False,
                    'encodedAutocompleteMeta': None,
                    'enableCampaigns': True,
                    'filters': {
                        'selectedFilters': f'{format_filter(filters)}',
                    },
                    'forcedBlocks': None,
                    'location': {
                        'searchString': f'{destination}',
                        'destType': 'CITY'
                    },
                    'metaContext': {
                        'metaCampaignId': 0,
                        'externalTotalPrice': None,
                        'feedPrice': None,
                        'hotelCenterAccountId': None,
                        'rateRuleId': None,
                        'dragongateTraceId': None,
                        'pricingProductsTag': None,
                    },
                    'nbRooms': 1,
                    'nbAdults': adults,
                    'nbChildren': 0,
                    'showAparthotelAsHotel': True,
                    'needsRoomsMatch': False,
                    'optionalFeatures': {
                        'forceArpExperiments': True,
                        'testProperties': False,
                    },
                    'pagination': {
                        'rowsPerPage': 10,
                        'offset': 0,
                    },
                    'referrerBlock': None,
                    'sorters': {
                        'selectedSorter': 'price',
                        'referenceGeoId': None,
                        'tripTypeIntentId': None,
                    },
                    'travelPurpose': 2,
                    'seoThemeIds': [],
                    'useSearchParamsFromSession': True,
                    'merchInput': {
                        'testCampaignIds': [],
                    },
                },
                'geniusVipUI': {
                    'enableEnroll': True,
                    'page': 'SEARCH_RESULTS',
                },
                'merchIntExp': False,
                'carouselLowCodeExp': False,
            },
            'extensions': {},
            'query': query,
        }
    return json_data

In [218]:
start_dates = quotes['outboundLegDateTime'].dt.strftime("%Y-%m-%d").to_numpy()
end_dates = quotes['inboundLegDateTime'].dt.strftime("%Y-%m-%d").to_numpy()
destinations = quotes['destination'].to_numpy()

total = len(quotes)

async def get_price(client, headers, cookies, json_data):
    response = await client.post('https://www.booking.com/dml/graphql', headers = headers, cookies=cookies, json=json_data)
    data = response.json()
    result = data['data']['searchQueries']['search']['results']
    return result

async def main(low_range, high_range):
        limits = httpx.Limits(max_keepalive_connections=100, max_connections=None)

        async with httpx.AsyncClient(limits=limits, timeout=None) as client:

                tasks = []
                count = 1

                for i in range(low_range, high_range):
                        start_date = start_dates[i]
                        end_date = end_dates[i]
                        destination = destinations[i]

                        json_data = get_json_data(start_date, end_date, destination)

                        tasks.append(asyncio.ensure_future(get_price(client, headers, cookies, json_data)))
                        
                quotes = await asyncio.gather(*tasks)
                print("gather complete")

        return quotes

In [219]:
epoch = 1000

if total < epoch:
    prices = await main(0, total)
else:
    prices = await main(0,epoch)

    iterations = total // epoch

    time.sleep(15)

    for i in range(1, iterations + 1):
        low_bound = i*epoch
        high_bound = (i+1)*epoch

        if i == iterations:
            prices_temp = await main(low_bound,total)
            prices = prices + prices_temp

        else:
            prices_temp = await main(low_bound, high_bound)
            prices = prices + prices_temp

        time.sleep(15)

gather complete


In [220]:
quotes['data'] = prices
quotes = quotes[quotes['data'].str.len() != 0]
quotes = quotes.explode('data')

In [221]:
quotes['cheapest'] = quotes['data'].apply(lambda x: x['priceDisplayInfoIrene']['displayPrice']['amountPerStay']['amountUnformatted'])
quotes['name'] = quotes['data'].apply(lambda x: x['displayName']['text'])
quotes['address'] = quotes['data'].apply(lambda x: x['basicPropertyData']['location']['address'])
quotes['location'] = quotes['data'].apply(lambda x: x['location']['displayLocation'])
quotes['distance'] = quotes['data'].apply(lambda x: x['location']['mainDistance'])
quotes['unitConfig'] = quotes['data'].apply(lambda x: x['matchingUnitConfigurations']['unitConfigurations'])

In [222]:
def get_all_units(list, field):
    output = []
    for i in list:
        output.append(i[field])
    return output

quotes['roomName'] = quotes['unitConfig'].apply(get_all_units, args = ('name',))
quotes['nbUnits'] = quotes['unitConfig'].apply(get_all_units, args = ('nbUnits',))

In [223]:
pd.set_option('display.max_colwidth', 100)
quotes['totalHolidayCost'] = quotes['amount'] + (quotes['cheapest']/adults)
quotes = quotes.sort_values('totalHolidayCost')
test = quotes[quotes['totalHolidayCost']<500]
test

Unnamed: 0,index,isDirect,amount,outboundLegDateTime,inboundLegDateTime,tripDuration,destination,data,cheapest,name,address,location,distance,unitConfig,roomName,nbUnits,totalHolidayCost
1612,1612,True,74,2024-05-13,2024-05-17,4 days,FAO,"{'__typename': 'SearchResultProperty', 'trackOnView': [], 'licenseDetails': None, 'location': {'...",627.692494,Casa das Figuras,"Urbanizacao Horta das Figuras, Rua Salazar Moscozo Lote 123, 8005-328 no 123",Faro,0.8 miles from centre,"[{'localizedArea': {'localizedArea': '2,691', 'unit': 'feet²', '__typename': 'LocalizedArea'}, '...",[Four-Bedroom House],[1],163.670356
736,736,True,55,2024-05-12,2024-05-16,4 days,BCN,"{'inferredLocationScore': 0, '__typename': 'SearchResultProperty', 'displayName': {'__typename':...",1058.012002,Elegante y Amplio apartamento,208 Carrer d'Entença,"Eixample, Barcelona",1.4 miles from centre,"[{'localizedArea': {'unit': 'feet²', 'localizedArea': '1,162.5', '__typename': 'LocalizedArea'},...",[Apartment],[1],206.144572
1570,1570,True,63,2024-05-12,2024-05-16,4 days,FAO,"{'matchingUnitConfigurations': {'unitConfigurations': [{'nbKitchens': 1, 'nbBedrooms': 4, 'nbUni...",1040.139307,Holiday villa in elite residential area of Faro,"Rua Luís Mascarenhas, 38",Faro,0.8 miles from centre,"[{'nbKitchens': 1, 'nbBedrooms': 4, 'nbUnits': 1, 'apartmentRooms': [{'roomName': {'translation'...",[Villa],[1],211.591330
736,736,True,55,2024-05-12,2024-05-16,4 days,BCN,"{'matchingUnitConfigurations': {'commonConfiguration': {'unitId': 0, 'nbUnits': 1, '__typename':...",1127.956107,Eixample 24,342 Carrer de Sardenya,"Eixample, Barcelona",1.2 miles from centre,"[{'nbKitchens': 1, 'localizedArea': {'localizedArea': '861.1', '__typename': 'LocalizedArea', 'u...",[Apartment],[1],216.136587
1612,1612,True,74,2024-05-13,2024-05-17,4 days,FAO,"{'showGeniusLoginMessage': False, 'displayName': {'text': 'Holiday villa in elite residential ar...",1040.139307,Holiday villa in elite residential area of Faro,"Rua Luís Mascarenhas, 38",Faro,0.8 miles from centre,"[{'unitId': 732468901, 'unitTypeId': 31, 'bedConfigurations': [{'nbAllBeds': 9, '__typename': 'B...",[Villa],[1],222.591330
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
271,271,False,308,2024-05-25,2024-05-29,4 days,DXB,"{'basicPropertyData': {'location': {'__typename': 'Location', 'city': 'Dubai', 'countryCode': 'a...",1336.469315,White Sage - Amna Tower,Amna Tower,"Business Bay, Dubai",1.4 miles from centre,"[{'nbKitchens': 1, 'unitTypeId': 1, 'bedConfigurations': [{'nbAllBeds': 2, '__typename': 'BedCon...","[Apartment with Balcony, Two-Bedroom Apartment]","[1, 1]",498.924188
888,888,True,73,2024-05-16,2024-05-22,6 days,BCN,"{'isTpiExclusiveProperty': False, 'inferredLocationScore': 0, 'basicPropertyData': {'pageName': ...",2982.506017,Apartament Colon Bcn 130 mts 3 Dormitorios Port Vell,Paseo Colon,"Ciutat Vella, Barcelona",0.7 miles from centre,"[{'nbAllBeds': 6, 'nbKitchens': 1, 'nbBathrooms': 2, 'unitTypeId': 1, 'localizedArea': {'localiz...",[Three-Bedroom Apartment],[1],499.072288
99,99,False,246,2024-05-20,2024-05-25,5 days,DXB,"{'showGeniusLoginMessage': False, 'location': {'displayLocation': 'Downtown Dubai, Dubai', 'main...",1772.876603,Magnificent Brand New Lux 3BR with Burj/ Fnt View,Burj Khalifa Boulevard,"Downtown Dubai, Dubai",0.6 miles from centre,"[{'apartmentRooms': [{'__typename': 'ApartmentRoomDetails', 'config': {'bedCount': 1, 'roomType'...",[Apartment],[1],499.268086
49,49,False,239,2024-05-22,2024-05-28,6 days,DXB,"{'basicPropertyData': {'location': {'address': 'Marquise Square Tower', 'countryCode': 'ae', 'ci...",1824.220268,Ultimate Stay / 4 Beds / Burj Khalifa View / Ultra Modern / Business Bay,Marquise Square Tower,"Business Bay, Dubai",0.7 miles from centre,"[{'name': 'Two-Bedroom Apartment', 'apartmentRooms': [{'config': {'__typename': 'ApartmentRoom',...",[Two-Bedroom Apartment],[1],499.602895


In [224]:
# quotes.to_excel("cheapest holidays2.xlsx")

In [225]:
# with open(r"C:\Users\manik\Desktop\repos\query.txt", 'r') as file:
#     query = file.read().rstrip()

# start_date = quotes.loc[0]["outboundLegDateTime"].strftime("%Y-%m-%d")
# end_date = quotes.loc[0]["inboundLegDateTime"].strftime("%Y-%m-%d")
# destination = quotes.loc[0]["destination"]

# params = get_params(start_date, end_date, destination)
# json_data = get_json_data(start_date, end_date, destination)

# start_time = time.perf_counter()
# response = requests.post('https://www.booking.com/dml/graphql', params=params, headers = headers, cookies=cookies, json=json_data)
# print(response.text)
# print(time.perf_counter() - start_time)
# data = response.json()
# result = data['data']['searchQueries']['search']['results']

# result