Connection to API

In [3]:
import http.client
import json

conn = http.client.HTTPSConnection("google-flights2.p.rapidapi.com")

headers = {
    'x-rapidapi-key': "ef81eb96e9msh5c4170aadaa7789p118793jsnca6aae8d2439",
    'x-rapidapi-host': "google-flights2.p.rapidapi.com"
}

Fetching the airports data:

In [4]:
conn.request("GET", f"/api/v1/searchAirport?query=Italy&language_code=en-US", headers=headers)

res = conn.getresponse()
data = res.read()
data = data.decode("utf-8")
response = json.loads(data)

if response.get("status") and response.get("message") == "Success":
    country_data = {
        "country": "Italy",
        "cities": {}
    }

    for city_data in response.get("data", []):
        city_name = city_data["city"]

        airports = [
            {"id": item["id"], "name": item["title"]}
            for item in city_data.get("list", [])
            if item["type"] == "airport"
        ]

        if airports:
            country_data["cities"][city_name] = airports

else:
    print(f"API response indicates failure for country: Italy")

print(json.dumps(country_data, indent=2))

{
  "country": "Italy",
  "cities": {
    "Rome": [
      {
        "id": "FCO",
        "name": "Leonardo da Vinci International Airport"
      },
      {
        "id": "CIA",
        "name": "Giovan Battista Pastine International Airport"
      }
    ],
    "Florence": [
      {
        "id": "FLR",
        "name": "Amerigo Vespucci Airport"
      },
      {
        "id": "PSA",
        "name": "Pisa International Airport"
      }
    ],
    "Venice": [
      {
        "id": "VCE",
        "name": "Venice Marco Polo Airport"
      },
      {
        "id": "TSF",
        "name": "Treviso Airport"
      }
    ],
    "Milan": [
      {
        "id": "MXP",
        "name": "Milano Malpensa Airport"
      },
      {
        "id": "BGY",
        "name": "Il Caravaggio International Airport"
      },
      {
        "id": "LIN",
        "name": "Milan Linate Airport"
      }
    ],
    "Naples": [
      {
        "id": "NAP",
        "name": "Naples International Airport"
      }
    ]
  }


MongoDB Connection

In [8]:
from pymongo import MongoClient

client = MongoClient("mongodb://admin:DataMan2023!@localhost:27017/?authSource=admin")

db = client['flight_prices_db']
collection = db['flights']

Getting the flight prices from API 

In [9]:
from datetime import date, timedelta

start_date = date(2025, 4, 1)
end_date = date(2025, 4, 30)

while start_date <= end_date:
    outbound_date = start_date.strftime("%Y-%m-%d")  # Formatting the date as "YYYY-MM-DD"
    
    print(f"Fetching data for date: {outbound_date}")

    # Iterating over each pair of cities and their airports
    for departure_city, departure_airports in country_data["cities"].items():
        for arrival_city, arrival_airports in country_data["cities"].items():

            if departure_city == arrival_city:
                continue  # Skiping routes within the same city

            # Generating all possible airport combinations between the two cities
            for departure_airport in departure_airports:
                for arrival_airport in arrival_airports:
                    departure_id = departure_airport["id"]
                    departure_name = departure_airport["name"]
                    arrival_id = arrival_airport["id"]
                    arrival_name = arrival_airport["name"]

                    conn.request(
                        "GET",
                        f"/api/v1/searchFlights?departure_id={departure_id}&arrival_id={arrival_id}&outbound_date={outbound_date}"
                        "&travel_class=ECONOMY&adults=1&show_hidden=1&currency=EUR&language_code=en-US",
                        headers=headers
                    )

                    try:
                        res = conn.getresponse()
                        if res.status != 200:
                            print(f"Failed to fetch flight data: HTTP {res.status} {res.reason}")
                            continue

                        data = res.read().decode("utf-8")
                        response = json.loads(data)

                        if response.get("status") and response.get("message") == "Success":
                            itineraries = response.get("data", {}).get("itineraries", {})
                            top_flights = itineraries.get("topFlights", [])

                            if top_flights:
                                flight = top_flights[0]
                                flight_info = flight["flights"][0]

                                # Preparing the data to be inserted
                                flight_data = {
                                    "departure": {
                                        "airport_id": departure_id,
                                        "airport_name": departure_name,
                                        "city": departure_city,
                                        "day": flight.get("departure_time")[:10],
                                        "time": flight.get("departure_time")[11:]
                                    },
                                    "arrival": {
                                        "airport_id": arrival_id,
                                        "airport_name": arrival_name,
                                        "city": arrival_city,
                                        "day": flight.get("arrival_time")[:10],
                                        "time": flight.get("arrival_time")[11:]
                                    },
                                    "duration_minutes": flight["duration"]["raw"],
                                    "airline": flight_info["airline"],
                                    "flight_number": flight_info["flight_number"],
                                    "delay": flight.get("delay", {}).get("values", False),
                                    "price": flight["price"],
                                    "source": "GoogleFlightAPI"
                                }

                                collection.insert_one(flight_data)

                                print(f"Successfully stored flight data for {departure_city} ({departure_id}) -> {arrival_city} ({arrival_id}) on {outbound_date}.")
                            else:
                                print(f"No top flights found for {departure_city} -> {arrival_city} on {outbound_date}.")
                        else:
                            print(f"API response indicates failure: {response.get('message', 'Unknown error')}")

                    except Exception as e:
                        print(f"An error occurred for route {departure_city} -> {arrival_city} on {outbound_date}: {str(e)}")

    # Moving to the next date
    start_date += timedelta(days=1)

conn.close()

Fetching data for date: 2025-04-01
Successfully stored flight data for Rome (FCO) -> Florence (FLR) on 2025-04-01.
Successfully stored flight data for Rome (FCO) -> Florence (PSA) on 2025-04-01.
No top flights found for Rome -> Florence on 2025-04-01.
No top flights found for Rome -> Florence on 2025-04-01.
Successfully stored flight data for Rome (FCO) -> Venice (VCE) on 2025-04-01.
No top flights found for Rome -> Venice on 2025-04-01.
No top flights found for Rome -> Venice on 2025-04-01.
No top flights found for Rome -> Venice on 2025-04-01.
Successfully stored flight data for Rome (FCO) -> Milan (MXP) on 2025-04-01.
Successfully stored flight data for Rome (FCO) -> Milan (BGY) on 2025-04-01.
Successfully stored flight data for Rome (FCO) -> Milan (LIN) on 2025-04-01.
No top flights found for Rome -> Milan on 2025-04-01.
No top flights found for Rome -> Milan on 2025-04-01.
No top flights found for Rome -> Milan on 2025-04-01.
Successfully stored flight data for Rome (FCO) -> Naple

In [None]:
import http.client

conn = http.client.HTTPSConnection("booking-com15.p.rapidapi.com")

headers = {
    'x-rapidapi-key': "ef81eb96e9msh5c4170aadaa7789p118793jsnca6aae8d2439",
    'x-rapidapi-host': "booking-com15.p.rapidapi.com"
}

In [None]:
from datetime import date, datetime, timedelta
from pymongo import MongoClient

client = MongoClient("mongodb://admin:DataMan2023!@localhost:27017/?authSource=admin")
db = client['flight_prices_db']
collection = db['flights']

start_date = date(2025, 4, 1)
end_date = date(2025, 4, 1)

while start_date <= end_date:
    outbound_date = start_date.strftime("%Y-%m-%d")
    print(f"\nFetching data for date: {outbound_date}\n")

    for departure_city, departure_airports in country_data["cities"].items():
        for arrival_city, arrival_airports in country_data["cities"].items():

            if departure_city == arrival_city:
                continue  

            for departure_airport in departure_airports:
                for arrival_airport in arrival_airports:
                    departure_id = departure_airport["id"]
                    departure_name = departure_airport["name"]
                    arrival_id = arrival_airport["id"]
                    arrival_name = arrival_airport["name"]

                    api_url = (
                        f"/api/v1/flights/searchFlights?fromId={departure_id}.AIRPORT"
                        f"&toId={arrival_id}.AIRPORT&departDate={outbound_date}"
                        f"&pageNo=1&adults=1&children=0%2C17&sort=BEST&cabinClass=ECONOMY&currency_code=EUR"
                    )

                    conn.request("GET", api_url, headers=headers)

                    try:
                        res = conn.getresponse()
                        if res.status != 200:
                            print(f"Failed: {departure_city} → {arrival_city} on {outbound_date}. HTTP {res.status}")
                            continue

                        data = res.read().decode("utf-8")
                        response = json.loads(data)

                        if not response.get("status") or response.get("message") != "Success":
                            print(f"API failure: {departure_city} → {arrival_city} on {outbound_date}")
                            continue

                        flight_offers = response.get("data", {}).get("flightOffers", [])

                        if not flight_offers:
                            print(f"No flights found: {departure_city} → {arrival_city} on {outbound_date}")
                            continue

                        first_flight = flight_offers[0]  
                        segments = first_flight.get("segments", [])

                        if not segments:
                            continue

                        first_leg = segments[0]

                        departure_time_obj = datetime.strptime(first_leg["departureTime"], "%Y-%m-%dT%H:%M:%S")
                        arrival_time_obj = datetime.strptime(first_leg["arrivalTime"], "%Y-%m-%dT%H:%M:%S")

                        departure_day = departure_time_obj.strftime("%d-%m-%Y")
                        departure_time = departure_time_obj.strftime("%I:%M %p") 
                        arrival_day = arrival_time_obj.strftime("%d-%m-%Y")
                        arrival_time = arrival_time_obj.strftime("%I:%M %p")

                        duration_minutes = first_leg.get("totalTime", 0) // 60

                        airline_name = first_leg["legs"][0]["carriersData"][0]["name"] if first_leg["legs"] else "Unknown"
                        flight_number = first_leg["legs"][0]["flightInfo"].get("flightNumber", "N/A") if first_leg["legs"] else "N/A"

                        total_price = first_flight["priceBreakdown"]["total"]["units"]

                        new_flight_data = {
                            "departure": {
                                "airport_id": departure_id,
                                "airport_name": departure_name,
                                "city": departure_city,
                                "day": departure_day,
                                "time": departure_time
                            },
                            "arrival": {
                                "airport_id": arrival_id,
                                "airport_name": arrival_name,
                                "city": arrival_city,
                                "day": arrival_day,
                                "time": arrival_time
                            },
                            "duration_minutes": duration_minutes,
                            "airline": airline_name,
                            "flight_number": flight_number,
                            "delay": False,  
                            "price": total_price,
                            "source": "BookingFlightAPI"
                        }

                        existing_flight = collection.find_one({
                            "departure.airport_id": departure_id,
                            "departure.day": departure_day,  
                            "departure.time": departure_time,
                            "arrival.airport_id": arrival_id,
                            "arrival.day": arrival_day,  
                            "arrival.time": arrival_time
                        })

                        if existing_flight:
                            existing_price = existing_flight["price"]

                            if total_price < existing_price:
                                collection.update_one(
                                    {"_id": existing_flight["_id"]},
                                    {"$set": {"price": total_price, "source": "BookingFlightAPI"}}
                                )
                                print(f"Existing flight data was found and updated price for {departure_city} → {arrival_city}. New price: {total_price} EUR (Previous: {existing_price} EUR)")
                            else:
                                print(f"Existing flight data was found but price is higher or unchanged for {departure_city} → {arrival_city}. Keeping the original price: {existing_price} EUR")

                        else:
                            collection.insert_one(new_flight_data)
                            print(f"New flight added: {departure_city} ({departure_id}) → {arrival_city} ({arrival_id})")

                    except Exception as e:
                        print(f"Error: {departure_city} → {arrival_city} on {outbound_date}: {str(e)}")

    start_date += timedelta(days=1)

conn.close()


Fetching data for date: 2025-04-01

New flight added: Rome (FCO) → Florence (FLR)
New flight added: Rome (FCO) → Florence (PSA)
New flight added: Rome (CIA) → Florence (FLR)
New flight added: Rome (CIA) → Florence (PSA)
New flight added: Rome (FCO) → Venice (VCE)
New flight added: Rome (FCO) → Venice (TSF)
New flight added: Rome (CIA) → Venice (VCE)
New flight added: Rome (CIA) → Venice (TSF)
New flight added: Rome (FCO) → Milan (MXP)
New flight added: Rome (FCO) → Milan (BGY)
Price is higher or unchanged for Rome → Milan. Keeping the original price: 57 EUR
New flight added: Rome (CIA) → Milan (MXP)
No flights found: Rome → Milan on 2025-04-01
New flight added: Rome (CIA) → Milan (LIN)
New flight added: Rome (FCO) → Naples (NAP)
New flight added: Rome (CIA) → Naples (NAP)
Price is higher or unchanged for Florence → Rome. Keeping the original price: 63 EUR
New flight added: Florence (FLR) → Rome (CIA)


KeyboardInterrupt: 