In [1]:
from collections import deque
import numpy as np
import pandas as pd
from total_df import df_full
from data_wrangling import df_possible_flights
from pathlib import Path

In [2]:
# TEST: look at all direct flights
direct_df = pd.DataFrame(df_possible_flights[df_possible_flights["Src City"].str.contains("New York") &
                              df_possible_flights["Dest City"].str.contains("San Francisco")])

# Display the result
print(direct_df)

       Airline Airline ID Source Airport Source Airport ID  \
11248       B6       3029            JFK              3797   
16664       DL       2009            JFK              3797   
49632       UA       5209            JFK              3797   
53678       VX       5331            JFK              3797   
61239       AA         24            JFK              3797   
66317       AS        439            JFK              3797   
66318       AS        439            JFK              3797   
101189      US       5265            JFK              3797   

       Destination Airport Destination Airport ID Equipment  \
11248                  SFO                   3469       320   
16664                  SFO                   3469       752   
49632                  SFO                   3469       757   
53678                  SFO                   3469       320   
61239                  SFO                   3469       763   
66317                  SFO                   3469       321   


In [3]:
direct_df = pd.DataFrame(df_full[df_full["Src City"].str.contains("Orlando") &
                              df_full["Dest City"].str.contains("Seattle")])

# Display the result
print(direct_df)

      Airline Airline ID Source Airport Source Airport ID Destination Airport  \
5354       AA         24            MCO              3878                 SEA   
66379      AS        439            MCO              3878                 SEA   
66380      AS        439            MCO              3878                 SEA   

      Destination Airport ID Equipment                       Src Name  \
5354                    3577       737  Orlando International Airport   
66379                   3577       73H  Orlando International Airport   
66380                   3577       73J  Orlando International Airport   

      Src City    Src Country  ... Dest Timezone  Dest DST  \
5354   Orlando  United States  ...            -8         A   
66379  Orlando  United States  ...            -8         A   
66380  Orlando  United States  ...            -8         A   

        Dest Timezone Name            Carrier  Callsign Airline_Name  \
5354   America/Los_Angeles  American Airlines  AMERICAN   Boe

In [4]:
# utilize the Haversine equation to find the distance between two coordinates
def distance_calc(lat1, lon1, lat2, lon2):
    # Earth radius in kilometers
    earth_radius = 6371.0

    # Convert latitude and longitude from degrees to radians
    lat1, lon1, lat2, lon2 = map(np.radians, [lat1, lon1, lat2, lon2])
    
    # Compute differences in coordinates
    dlat = lat2 - lat1
    dlon = lon2 - lon1
    
    # Haversine formula for distance calculation
    a = np.sin(dlat/2)**2 + np.cos(lat1) * np.cos(lat2) * np.sin(dlon/2)**2
    c = 2 * np.arcsin(np.sqrt(a))
    
    # Calculate the distance
    distance = earth_radius * c
    
    return np.round(distance, 2)

In [5]:
# create the adjacency matrix that contains pertainate information to answer the two questions
# 1. what is the max flow from any two given cities?
# 2. which carrier has the max load?

def create_matrix(df, airports):
    num_airports = len(airports)

    # Create an adjacency matrix with dictionaries
    adjacency_matrix = [[{
        'distance': 0, # distance between the two cities and will be used to determine total distance travelled
        'route_max_capacity': 0,  # Added max_capacity field
        'aircraft_max_capacity': 0, # looking at an the max capacity of the largest aircraft(s)
        'flights': {}, # contains information about the airline and aircraft that carriers the max_capcity of passengers

        # helpful for later studies if we we want to see if passengers will be gaining or losing time 
        'SrcTimezone': "", 
        'DestTimezone': ""
    } for _ in range(num_airports)] for _ in range(num_airports)]

    # Fill the adjacency matrix based on the flights
    for _, row in df.iterrows():
        # identifying the row and column 
        source_index = airports.index(row["Source Airport"])
        dest_index = airports.index(row["Destination Airport"])

        # calculating the distance between the two airports
        lat1, lon1 = row["Src Latitude"], row["Src Longitude"]
        lat2, lon2 = row["Dest Latitude"], row["Dest Longitude"]
        distance = distance_calc(lat1, lon1, lat2, lon2)

        # flight info is a diction that contians infomration about the carrier's aircraft and its flight 
        # capcity
        flight_info = {
            'Aircraft': row["Airline_Name"],
            'Flight Capacity': row["Passengers"]
        }

        # setting distance to what is outputted from the distance_calc function
        adjacency_matrix[source_index][dest_index]['distance'] = distance

        # setting route_max_capacity to the sum of all aircraft's passenger capacity
        adjacency_matrix[source_index][dest_index]['route_max_capacity'] += row["Passengers"]

        # Update maximum capacity aircraft for the route
        current_capacity = adjacency_matrix[source_index][dest_index]['aircraft_max_capacity']

        # checking to see if there is an aircraft with a higher passenger capacity
        if row["Passengers"] > current_capacity:
            # If the current flight has higher capacity, update max_capacity and clear existing flights
            adjacency_matrix[source_index][dest_index]['flights'] = {row["Carrier"]: flight_info}
            adjacency_matrix[source_index][dest_index]['aircraft_max_capacity'] = row['Passengers']

        # adds a new entry to the list if there are mutiple planes with the same max capacity
        elif row["Passengers"] == current_capacity:
            adjacency_matrix[source_index][dest_index]['flights'][row["Carrier"]] = flight_info
        
        # Set timezone info only if it hasn't been set yet since timezone
        if not adjacency_matrix[source_index][dest_index]['SrcTimezone']:
            adjacency_matrix[source_index][dest_index]['SrcTimezone'] = row["Src Timezone Name"]

        if not adjacency_matrix[source_index][dest_index]['DestTimezone']:
            adjacency_matrix[source_index][dest_index]['DestTimezone'] = row["Dest Timezone Name"]


    # hard coded for this instance but will be based on user input
    # the is identifying what the start node is (source) and end not (sink)
    source = [airports.index('JFK'), airports.index('LGA')]
    sink = [airports.index('SFO')]

    return adjacency_matrix, source, sink


In [6]:
def ford_fulkerson(graph, sources, terminals, airports):
    def residual_capacity(u, v):
        route_capacity = graph[u][v]['route_max_capacity'] - flow[u][v]
        aircraft_capacity = graph[u][v]['aircraft_max_capacity'] - flow[u][v]
        return route_capacity, aircraft_capacity

    def augment(path):
        # this will allow us to print the flights_info
        flights_and_capacity = []

        # total distance is initized as 0 for all flights 
        total_distance = 0

        #zip(path, path[1:]) helps when it looks something like [A, B, C] because it breakes it apart into
        # [A,B] and [B,C] in which we can do the calculations associateed with each leg of the flight separately
        #https://realpython.com/python-zip-function/
        route_min_capacity, aircraft_min_capacity = zip(*[residual_capacity(u, v) for u, v in zip(path, path[1:])])
            
        # go through and adjust the flow based on the leg of the flight
        # route_min_capcaity: contains the minimum route capacities for each segment of the path
        # flight_min_capacity: contains the minimum flight capacities for each segment of the path
        for u, v, route_min_cap, aircraft_min_cap in zip(path, path[1:], route_min_capacity, aircraft_min_capacity):
            edge_info = graph[u][v] # getting information about the edge between the two nodes
            total_distance += edge_info['distance'] # adding to distnace 

            # we want to change the flow for the max passenger capcity for the route and the aircraft repectively
            flow[u][v] += route_min_cap # heading towards sink (update the augmenting path)
            flow[v][u] -= route_min_cap # heading back towards source (update flow on residual edges)
            flow[u][v] += aircraft_min_cap # heading towards sink (update the augmenting path)
            flow[v][u] -= aircraft_min_cap # heading back towards source (update flow on residual edges)

        # we add the information form the flights info into the array
        for u, v in zip(path, path[1:]):
            flights_and_capacity.extend(graph[u][v]['flights'].items())

        return min(route_min_capacity), min(aircraft_min_capacity), total_distance, flights_and_capacity

    # the BFS utilizes queues to decrease the runtime 
    def bfs():
        # intializing an array that flags if a node has been visited
        # similar to the color system, but using True/False instead
        visited = [False] * len(graph) # getting the first source in the list
        queue = deque(sources)

        for source in sources:
            visited[source] = True

        # setting the sources as parents initially
        parent = {source: None for source in sources}

        # while there is a source in the queue, find flights
        # that connect from source to terminal
        while queue:
            # getting the first element in the queue
            u = queue.popleft()
            # go through the airports that are connected to the parent
            for v in range(len(graph)):
                route, aircraft = residual_capacity(u, v)
                # if it hasn't been visited and it can be augmented
                if not visited[v] and (route > 0 and aircraft > 0):
                    # add it to the queue
                    queue.append(v)
                    visited[v] = True
                    # set the u the parent of v
                    parent[v] = u

                    # if v is a terminal then we will create a path for the flight
                    if v in terminals:
                        path = []
                        # adding cities into the path
                        while v is not None:
                            path.insert(0, v)
                            v = parent[v]
                        return path

        return None
    # Initialize flow to zero
    flow = np.zeros_like(graph, dtype=int)

    route_total_flow = 0
    flight_total_flow = 0

    # Ford-Fulkerson algorithm part of the code 
    while True:

        augmenting_path = bfs()
        if augmenting_path is None:
            break
        route_min_capacity, aircraft_min_capcity, total_distance, flights_and_capacity = augment(augmenting_path)
        route_total_flow += route_min_capacity# updating the total route flow
        flight_total_flow += aircraft_min_capcity # updating the total flight flow
        # printing out reach route
        print(f"Flight Route: {[airports[node] for node in augmenting_path]}, "
              f"Max Passenger Capacity: {route_min_capacity}, "
              f"Max Flight Capacity: {aircraft_min_capcity}, "
              f"Total Distance: {round(total_distance, 2)}km, "
              f"Flights: {flights_and_capacity}")

    # Print the visited airports in the final path
    final_path = bfs()
    if final_path is not None:
        print("Final Path:", " --> ".join(airports[node] for node in final_path))

    print("Max Route Flow:", route_total_flow, "  Max Flight Flow:", flight_total_flow)

In [7]:
df = df_possible_flights
airports = sorted(set(df["Source Airport"]).union(set(df["Destination Airport"])))
adjacency_matrix, sources, sinks = create_matrix(df, airports)
adjacency_df = pd.DataFrame(adjacency_matrix, columns=airports, index=airports)

ford_fulkerson(adjacency_matrix, sources, sinks, airports)

# csv_file_path = "/Users/yuhanburgess/Documents/GitHub/AGP2/csv_files/Flow_matrix.csv"
# adjacency_df.to_csv(csv_file_path, index=True, header=True) 

Flight Route: ['JFK', 'SFO'], Max Passenger Capacity: 2018.0, Max Flight Capacity: 351.0, Total Distance: 4151.79km, Flights: [('American Airlines', {'Aircraft': 'Boeing 767-300', 'Flight Capacity': 351.0}), ('Alaska Airlines', {'Aircraft': 'Boeing 767-300', 'Flight Capacity': 351.0}), ('US Airways', {'Aircraft': 'Boeing 767-300', 'Flight Capacity': 351.0})]
Flight Route: ['JFK', 'AMS', 'SFO'], Max Passenger Capacity: 934.0, Max Flight Capacity: 467.0, Total Distance: 14633.04km, Flights: [('KLM Royal Dutch Airlines', {'Aircraft': 'Boeing 747', 'Flight Capacity': 467.0}), ('Delta Air Lines', {'Aircraft': 'Boeing 747', 'Flight Capacity': 467.0}), ('KLM Royal Dutch Airlines', {'Aircraft': 'Boeing 747', 'Flight Capacity': 467.0})]
Flight Route: ['JFK', 'ATL', 'SFO'], Max Passenger Capacity: 4198.0, Max Flight Capacity: 295.0, Total Distance: 4656.81km, Flights: [('AeroMéxico', {'Aircraft': 'Boeing 757', 'Flight Capacity': 295.0}), ('Delta Air Lines', {'Aircraft': 'Boeing 757', 'Flight Cap

In [29]:
# create the adjacency matrix that contains pertainate information to answer the two questions
# 1. what is the max flow from any two given cities?
# 2. which carrier has the max load?

def create_matrix(df, airports):
    num_airports = len(airports)

    # Create an adjacency matrix with dictionaries
    adjacency_matrix = [[{
        'distance': 0, # distance between the two cities and will be used to determine total distance travelled
        'route_max_capacity': 0,  # Added max_capacity field
        'aircraft_max_capacity': 0, # looking at an the max capacity of the largest aircraft(s)
        'flights': {}, # contains information about the airline and aircraft that carriers the max_capcity of passengers

        # helpful for later studies if we we want to see if passengers will be gaining or losing time 
        'SrcTimezone': "", 
        'DestTimezone': ""
    } for _ in range(num_airports)] for _ in range(num_airports)]

    # Fill the adjacency matrix based on the flights
    for _, row in df.iterrows():
        # identifying the row and column 
        source_index = airports.index(row["Source Airport"])
        dest_index = airports.index(row["Destination Airport"])

        # calculating the distance between the two airports
        lat1, lon1 = row["Src Latitude"], row["Src Longitude"]
        lat2, lon2 = row["Dest Latitude"], row["Dest Longitude"]
        distance = distance_calc(lat1, lon1, lat2, lon2)

        # flight info is a diction that contians infomration about the carrier's aircraft and its flight 
        # capcity
        flight_info = {
            'Aircraft': row["Airline_Name"],
            'Flight Capacity': row["Passengers"]
        }

        # setting distance to what is outputted from the distance_calc function
        adjacency_matrix[source_index][dest_index]['distance'] = distance

        # setting route_max_capacity to the sum of all aircraft's passenger capacity
        adjacency_matrix[source_index][dest_index]['route_max_capacity'] += row["Passengers"]

        # Update maximum capacity aircraft for the route
        current_capacity = adjacency_matrix[source_index][dest_index]['aircraft_max_capacity']

        # checking to see if there is an aircraft with a higher passenger capacity
        if row["Passengers"] > current_capacity:
            # If the current flight has higher capacity, update max_capacity and clear existing flights
            adjacency_matrix[source_index][dest_index]['flights'] = {row["Carrier"]: flight_info}
            adjacency_matrix[source_index][dest_index]['aircraft_max_capacity'] = row['Passengers']

        # adds a new entry to the list if there are mutiple planes with the same max capacity
        elif row["Passengers"] == current_capacity:
            adjacency_matrix[source_index][dest_index]['flights'][row["Carrier"]] = flight_info
        
        # Set timezone info only if it hasn't been set yet since timezone
        if not adjacency_matrix[source_index][dest_index]['SrcTimezone']:
            adjacency_matrix[source_index][dest_index]['SrcTimezone'] = row["Src Timezone Name"]

        if not adjacency_matrix[source_index][dest_index]['DestTimezone']:
            adjacency_matrix[source_index][dest_index]['DestTimezone'] = row["Dest Timezone Name"]


    # hard coded for this instance but will be based on user input
    # the is identifying what the start node is (source) and end not (sink)
    source = [airports.index('MCO')]
    sink = [airports.index('SEA')]

    return adjacency_matrix, source, sink


In [30]:
df = pd.DataFrame(df_full[df_full["Src City"].str.contains("Orlando") |
                              df_full["Dest City"].str.contains("Seattle")])

airports = sorted(set(df["Source Airport"]).union(set(df["Destination Airport"])))
adjacency_matrix, sources, sinks = create_matrix(df, airports)
adjacency_df = pd.DataFrame(adjacency_matrix, columns=airports, index=airports)

ford_fulkerson(adjacency_matrix, sources, sinks, airports)

# csv_file_path = "/Users/yuhanburgess/Documents/GitHub/AGP2/csv_files/Flow_matrix.csv"
# adjacency_df.to_csv(csv_file_path, index=True, header=True) 

Flight Route: ['MCO', 'DTW', 'SEA'], Max Passenger Capacity: 702.0, Max Flight Capacity: 295.0, Total Distance: 4636.66km, Flights: [('Delta Air Lines', {'Aircraft': 'Boeing 757', 'Flight Capacity': 295.0}), ('Delta Air Lines', {'Aircraft': 'Boeing 757', 'Flight Capacity': 295.0})]
Flight Route: ['MCO', 'FRA', 'SEA'], Max Passenger Capacity: 1980.0, Max Flight Capacity: 293.0, Total Distance: 15817.29km, Flights: [('Lufthansa', {'Aircraft': 'Boeing 747-400', 'Flight Capacity': 660.0}), ('Lufthansa Cargo', {'Aircraft': 'Boeing 747-400', 'Flight Capacity': 660.0}), ('United Airlines', {'Aircraft': 'Boeing 747-400', 'Flight Capacity': 660.0}), ('Lufthansa', {'Aircraft': 'Airbus A330-300', 'Flight Capacity': 293.0}), ('Lufthansa Cargo', {'Aircraft': 'Airbus A330-300', 'Flight Capacity': 293.0}), ('United Airlines', {'Aircraft': 'Airbus A330-300', 'Flight Capacity': 293.0})]
Flight Route: ['MCO', 'IAD', 'SEA'], Max Passenger Capacity: 480.0, Max Flight Capacity: 162.0, Total Distance: 4923.

In [8]:
# create the adjacency matrix that contains pertainate information to answer the two questions
# 1. what is the max flow from any two given cities?
# 2. which carrier has the max load?

def create_matrix(df, airports):
    num_airports = len(airports)

    # Create an adjacency matrix with dictionaries
    adjacency_matrix = [[{
        'distance': 0, # distance between the two cities and will be used to determine total distance travelled
        'route_max_capacity': 0,  # Added max_capacity field
        'aircraft_max_capacity': 0, # looking at an the max capacity of the largest aircraft(s)
        'flights': {}, # contains information about the airline and aircraft that carriers the max_capcity of passengers

        # helpful for later studies if we we want to see if passengers will be gaining or losing time 
        'SrcTimezone': "", 
        'DestTimezone': ""
    } for _ in range(num_airports)] for _ in range(num_airports)]

    # Fill the adjacency matrix based on the flights
    for _, row in df.iterrows():
        # identifying the row and column 
        source_index = airports.index(row["Source Airport"])
        dest_index = airports.index(row["Destination Airport"])

        # calculating the distance between the two airports
        lat1, lon1 = row["Src Latitude"], row["Src Longitude"]
        lat2, lon2 = row["Dest Latitude"], row["Dest Longitude"]
        distance = distance_calc(lat1, lon1, lat2, lon2)

        # flight info is a diction that contians infomration about the carrier's aircraft and its flight 
        # capcity
        flight_info = {
            'Aircraft': row["Airline_Name"],
            'Flight Capacity': row["Passengers"]
        }

        # setting distance to what is outputted from the distance_calc function
        adjacency_matrix[source_index][dest_index]['distance'] = distance

        # setting route_max_capacity to the sum of all aircraft's passenger capacity
        adjacency_matrix[source_index][dest_index]['route_max_capacity'] += row["Passengers"]

        # Update maximum capacity aircraft for the route
        current_capacity = adjacency_matrix[source_index][dest_index]['aircraft_max_capacity']

        # checking to see if there is an aircraft with a higher passenger capacity
        if row["Passengers"] > current_capacity:
            # If the current flight has higher capacity, update max_capacity and clear existing flights
            adjacency_matrix[source_index][dest_index]['flights'] = {row["Carrier"]: flight_info}
            adjacency_matrix[source_index][dest_index]['aircraft_max_capacity'] = row['Passengers']

        # adds a new entry to the list if there are mutiple planes with the same max capacity
        elif row["Passengers"] == current_capacity:
            adjacency_matrix[source_index][dest_index]['flights'][row["Carrier"]] = flight_info
        
        # Set timezone info only if it hasn't been set yet since timezone
        if not adjacency_matrix[source_index][dest_index]['SrcTimezone']:
            adjacency_matrix[source_index][dest_index]['SrcTimezone'] = row["Src Timezone Name"]

        if not adjacency_matrix[source_index][dest_index]['DestTimezone']:
            adjacency_matrix[source_index][dest_index]['DestTimezone'] = row["Dest Timezone Name"]


    # hard coded for this instance but will be based on user input
    # the is identifying what the start node is (source) and end not (sink)
    source = [airports.index('JFK'),airports.index('LGA')]
    sink = [airports.index('SEA')]

    return adjacency_matrix, source, sink


In [9]:
df = pd.DataFrame(df_full[df_full["Src City"].str.contains("Orlando") |
                              df_full["Dest City"].str.contains("Seattle")])

airports = sorted(set(df["Source Airport"]).union(set(df["Destination Airport"])))
adjacency_matrix, sources, sinks = create_matrix(df, airports)
adjacency_df = pd.DataFrame(adjacency_matrix, columns=airports, index=airports)

ford_fulkerson(adjacency_matrix, sources, sinks, airports)

# csv_file_path = "/Users/yuhanburgess/Documents/GitHub/AGP2/csv_files/Flow_matrix.csv"
# adjacency_df.to_csv(csv_file_path, index=True, header=True) 

Flight Route: ['JFK', 'SEA'], Max Passenger Capacity: 1095.0, Max Flight Capacity: 295.0, Total Distance: 3886.64km, Flights: [('Delta Air Lines', {'Aircraft': 'Boeing 757', 'Flight Capacity': 295.0})]
Max Route Flow: 1095.0   Max Flight Flow: 295.0
