## **Google Flights API**

In [1]:
import pandas as pd
import requests
import json

### Function to run API:

In [2]:
def run_API (dep_id, arr_id, date):
    url = "https://google-flights2.p.rapidapi.com/api/v1/searchFlights"
    headers = {"X-RapidAPI-Key": "ba7cdcaca5mshe733bf910269aafp1b245cjsnef6eeb88a0b5",
               "X-RapidAPI-Host": "google-flights2.p.rapidapi.com"
    }

    querystring = {"departure_id": dep_id,
    "arrival_id": arr_id,
    "outbound_date": date,
    }

    response = requests.get(url, headers=headers, params=querystring)

    try:
        response = requests.get(url, headers=headers, params=querystring)
        response.raise_for_status()  # Raises an HTTPError for bad responses

        data = response.json()

        # Optionally process or return the data
        return data

    except requests.exceptions.HTTPError as http_err:
        print(f"HTTP error occurred: {http_err}")
    except Exception as err:
        print(f"An error occurred: {err}")

    # Return None if there is an error
    return None
    

In [3]:
dep_id = "GIG"
arr_id = "CDG"
date = "2025-12-10"

data = run_API(dep_id, arr_id, date)

### Function to get dataframe with data from API response:

In [4]:
def normalize(row):
    flight_info = pd.json_normalize(row)
    return flight_info

In [5]:
def get_df(API_info):
    df_0 = pd.json_normalize(API_info)
    df_0 = pd.json_normalize(df_0['data.itineraries.topFlights'][0], sep='_')
    df_1 = normalize(df_0["flights"])
    df_2 = normalize(df_1[0])
    df_3 = pd.concat([df_0, df_2], axis=1)
    df_4 = df_3.drop(columns=["flights", "airline_logo", "duration_raw", "booking_token", "extensions", "departure_airport.time", "arrival_airport.time", "self_transfer", "layovers", "delay_values", "delay_text", "duration.raw", "duration.text", "carbon_emissions_difference_percent", "carbon_emissions_CO2e", "carbon_emissions_typical_for_this_route", "carbon_emissions_higher", "carbon_emissions_difference_percent", "carbon_emissions_CO2e", "carbon_emissions_typical_for_this_route", "carbon_emissions_higher"])
    return df_4
    


In [6]:
data = run_API(dep_id, arr_id, date)

In [7]:
df = get_df(data)
df.head()

Unnamed: 0,departure_time,arrival_time,price,stops,duration_text,bags_carry_on,bags_checked,airline,flight_number,aircraft,seat,legroom,departure_airport.airport_name,departure_airport.airport_code,arrival_airport.airport_name,arrival_airport.airport_code
0,10-12-2025 03:20 PM,11-12-2025 09:30 AM,490,0,14 hr 10 min,1,,LATAM,LA 3429,Airbus A321,Below average legroom,28 in,RIOgaleão International Airport,GIG,São Paulo/Guarulhos–Governor André Franco Mont...,GRU
1,10-12-2025 03:35 PM,11-12-2025 10:50 AM,662,0,15 hr 15 min,1,,ITA,AZ 673,Airbus A330-900neo,Average legroom,31 in,RIOgaleão International Airport,GIG,Leonardo da Vinci International Airport,FCO
2,10-12-2025 06:10 PM,11-12-2025 01:50 PM,688,0,15 hr 40 min,1,,British Airways,BA 248,Boeing 777,Average legroom,31 in,RIOgaleão International Airport,GIG,Heathrow Airport,LHR
3,10-12-2025 08:45 PM,11-12-2025 11:55 AM,837,0,11 hr 10 min,1,,Air France,AF 485,Boeing 777,Average legroom,31 in,RIOgaleão International Airport,GIG,Paris Charles de Gaulle Airport,CDG
4,10-12-2025 11:10 PM,11-12-2025 02:15 PM,837,0,11 hr 5 min,1,,Air France,AF 409,Boeing 777,Average legroom,31 in,RIOgaleão International Airport,GIG,Paris Charles de Gaulle Airport,CDG


In [8]:
df_airports = pd.read_csv("/Users/laurasuemitsu/Desktop/DATA_FT_May_2025/Final project/airports.txt", sep=";", header=None)
df_airports.columns = ["name", "city", "country", "IATA", "ICAO", "lat", "long", "alt", "timezone", "DST"]
df_airports.head()

Unnamed: 0,name,city,country,IATA,ICAO,lat,long,alt,timezone,DST
0,7 Novembre,Tabarka,Tunisia,TBJ,DTKA,36.978333,8.876389,0,100,E
1,A Coruna,La Coruna,Spain,LCG,LECO,43.302061,-8.377256,326,100,E
2,Aalborg,Aalborg,Denmark,AAL,EKYT,57.092789,9.849164,10,100,E
3,Aarhus,Aarhus,Denmark,AAR,EKAH,56.300017,10.619008,82,100,E
4,Aasiaat,Aasiaat,Greenland,JEG,BGEM,68.7,-52.75,2,-300,E


In [9]:
# Name	Name of airport. May or may not contain the City name.
# City	Main city served by airport. May be spelled differently from Name.
# Country	Country or territory where airport is located.
# IATA	3-letter IATA code (identifier).
# ICAO	4-letter ICAO code.
# Latitude	Decimal degrees, usually to six significant digits. Negative is South, positive is North.
# Longitude	Decimal degrees, usually to six significant digits. Negative is West, positive is East.
# Altitude	In feet.
# Timezone	Hours offset from UTC.
# DST	Daylight savings time. One of E (Europe), A (US/Canada), S (South America), O (Australia), Z (New Zealand), N (None) or U (Unknown).


In [10]:
df_airports = df_airports[["city", "IATA"]]
df_airports.head()

Unnamed: 0,city,IATA
0,Tabarka,TBJ
1,La Coruna,LCG
2,Aalborg,AAL
3,Aarhus,AAR
4,Aasiaat,JEG


In [11]:
df_airports.to_csv('airports_codes.csv')

In [12]:
# Function to get the airport code for a given city
def get_airport_code(city_name, df):
    # Locate the city in the DataFrame
    result = df.loc[df['city'] == city_name, 'IATA']
    return result.iloc[0] if not result.empty else None

# User inputs
departure_city = input("Enter the departure city: ")
arrival_city = input("Enter the arrival city: ")
date = input("Enter the departure date (YYYY-MM-DD): ")

# Fetch airport codes using the DataFrame
departure_code = get_airport_code(departure_city, df_airports)
arrival_code = get_airport_code(arrival_city, df_airports)

if departure_code and arrival_code:
    result = run_API(departure_code, arrival_code, date)
else:
    print("Error: Could not find airport codes on the database. Please restart the app.")

if not result['data']['itineraries']['topFlights']:
    print("Error: there are no flights available for these cities and date. Please restart the app.")
else:
    print(result['data']['itineraries']['topFlights'])

[{'departure_time': '18-09-2025 11:15 AM', 'arrival_time': '19-09-2025 10:55 AM', 'duration': {'raw': 1840, 'text': '30 hr 40 min'}, 'flights': [{'departure_airport': {'airport_name': 'Narita International Airport', 'airport_code': 'NRT', 'time': '2025-9-18 11:15'}, 'arrival_airport': {'airport_name': 'Colombo Bandaranaike International Airport', 'airport_code': 'CMB', 'time': '2025-9-18 17:05'}, 'duration': {'raw': 560, 'text': '9 hr 20 min'}, 'airline': 'SriLankan', 'airline_logo': 'https://www.gstatic.com/flights/airline_logos/70px/UL.png', 'flight_number': 'UL 455', 'aircraft': 'Airbus A330', 'seat': 'Average legroom', 'legroom': '31 in', 'extensions': ['Average legroom (31 in)', 'In-seat power & USB outlets', 'On-demand video', 'Free Wi-Fi', 'Wi-Fi for a fee', 'Emissions estimate: 421 kg CO2e']}, {'departure_airport': {'airport_name': 'Colombo Bandaranaike International Airport', 'airport_code': 'CMB', 'time': '2025-9-19 03:25'}, 'arrival_airport': {'airport_name': 'Paris Charles 

### To check API calls:

In [13]:
def check_api_rate_limit(url, headers):
    response = requests.get(url, headers=headers)
    
    if response.status_code == 200:
        print("API call succeeded.")
        # Check headers for rate limiting information
        limit = response.headers.get('X-RateLimit-Limit')
        remaining = response.headers.get('X-RateLimit-Remaining')
        reset_time = response.headers.get('X-RateLimit-Reset')
        
        if limit and remaining:
            print(f"Total allowed requests: {limit}")
            print(f"Remaining requests: {remaining}")
            if reset_time:
                print(f"Rate limit resets at: {reset_time}")
        else:
            print("Rate limit headers not present in the response.")
    else:
        print(f"Request failed with status code: {response.status_code}")

# Example Usage
url = "https://google-flights2.p.rapidapi.com/api/v1/searchFlights"
headers = {"X-RapidAPI-Key": "ba7cdcaca5mshe733bf910269aafp1b245cjsnef6eeb88a0b5",
           "X-RapidAPI-Host": "google-flights2.p.rapidapi.com"
          }

check_api_rate_limit(url, headers)

API call succeeded.
Rate limit headers not present in the response.
