In [2]:
from collections import deque
import numpy as np
import pandas as pd
from data_wrangling import df_possible_flights
from pathlib import Path

In [3]:
def distance_calc(lat1, lon1, lat2, lon2):
    earth_radius = 6371.0

    # Convert latitude and longitude from degrees to radians
    lat1, lon1, lat2, lon2 = map(np.radians, [lat1, lon1, lat2, lon2])
    
    # Compute differences in coordinates
    dlat = lat2 - lat1
    dlon = lon2 - lon1
    
    # Haversine formula for distance calculation
    a = np.sin(dlat/2)**2 + np.cos(lat1) * np.cos(lat2) * np.sin(dlon/2)**2
    c = 2 * np.arcsin(np.sqrt(a))
    
    # Earth radius in kilometers
    earth_radius = 6371.0
    
    # Calculate the distance
    distance = earth_radius * c
    
    return np.round(distance, 2)

In [15]:
def create_matrix(df, airports):
    num_airports = len(airports)

    # Create an adjacency matrix with dictionaries
    adjacency_matrix = [[{
        'distance': 0,
        'flights': {},
        'SrcTimezone': "",
        'DestTimezone': ""
    } for _ in range(num_airports)] for _ in range(num_airports)]

    # Fill the adjacency matrix based on the flights
    for index, row in df.iterrows():
        source_index = airports.index(row["Source Airport"])
        dest_index = airports.index(row["Destination Airport"])
        lat1, lon1 = row["Src Latitude"], row["Src Longitude"]
        lat2, lon2 = row["Dest Latitude"], row["Dest Longitude"]

        distance = distance_calc(lat1, lon1, lat2, lon2)
        
        flight_info = {
            'Carrier': row["Carrier"],
            'Capacity': row["Passengers"]
        }

        adjacency_matrix[source_index][dest_index]['distance'] = distance
        adjacency_matrix[source_index][dest_index]['flights'][row["Airline_Name"]] = flight_info
        # Set timezone info only if it hasn't been set yet
        if not adjacency_matrix[source_index][dest_index]['SrcTimezone']:
            adjacency_matrix[source_index][dest_index]['SrcTimezone'] = row["Src Timezone Name"]
        
        if not adjacency_matrix[source_index][dest_index]['DestTimezone']:
            adjacency_matrix[source_index][dest_index]['DestTimezone'] = row["Dest Timezone Name"]

    source = [airports.index('JFK'), airports.index('LGA')]
    sink = [airports.index('SFO')]

    return adjacency_matrix, source, sink


In [9]:
def create_matrix(df, airports):
    num_airports = len(airports)

    # Create an adjacency matrix with dictionaries
    adjacency_matrix = [[{'distance': 0, 
                          'Capacity': [], 
                          'Carrier': [],
                          'Airline': [],
                          'SrcTimezone':[],
                          'DestTimezone': []} for _ in range(num_airports)] for _ in range(num_airports)]

    # Fill the adjacency matrix based on the flights
    for index, row in df.iterrows():
        source_index = airports.index(row["Source Airport"])
        dest_index = airports.index(row["Destination Airport"])
        lat1, lon1 = row["Src Latitude"], row["Src Longitude"]
        lat2, lon2 = row["Dest Latitude"], row["Dest Longitude"]

        distance = distance_calc(lat1, lon1, lat2, lon2)
        adjacency_matrix[source_index][dest_index]['distance'] = distance
        adjacency_matrix[source_index][dest_index]['Capacity'].append(row["Passengers"])
        adjacency_matrix[source_index][dest_index]['Carrier'].append(row["Carrier"])
        adjacency_matrix[source_index][dest_index]['Airline'].append(row["Airline_Name"])
        adjacency_matrix[source_index][dest_index]['SrcTimezone'].append(row["Src Timezone Name"])
        adjacency_matrix[source_index][dest_index]['DestTimezone'].append(row["Dest Timezone Name"])

    source = [airports.index('JFK'), airports.index('LGA')]
    sink = [airports.index('SFO')]

    return adjacency_matrix, source, sink

In [16]:
df = df_possible_flights
airports = sorted(set(df["Source Airport"]).union(set(df["Destination Airport"])))
adjacency_matrix, sources, sinks = create_matrix(df_possible_flights, airports)
adjacency_df = pd.DataFrame(adjacency_matrix, columns=airports, index=airports)

csv_file_path = "/Users/yuhanburgess/Documents/GitHub/AGP2/csv_files/merged_matrix.csv"
adjacency_df.to_csv(csv_file_path, index=True, header=True) 