## Read CSV

In [17]:
import numpy as np
import pandas as pd
import heapq

In [29]:
df = pd.read_csv('us_routes_dist.csv')

In [30]:
df.head()

Unnamed: 0.1,Unnamed: 0,airline,airline_id,source,source_id,dest,dest_id,codeshare,stops,equipment,distance
0,172,2O,146,ADQ,3531,KLN,7162,,0,BNI,2.128521
1,177,2O,146,KLN,7162,KYK,7161,,0,BNI,0.603091
2,260,3E,10739,BRL,5726,ORD,3830,,0,CNC,1.153044
3,261,3E,10739,BRL,5726,STL,3678,,0,CNC,1.609324
4,262,3E,10739,DEC,4042,ORD,3830,,0,CNC,11.138693


## Create a graph

In [31]:
# Get uniques source-dest pairs from df
graph = df[['source', 'dest', 'distance']]

In [32]:
# Drop from graph where distance is NaN
graph = graph.dropna()
graph

Unnamed: 0,source,dest,distance
0,ADQ,KLN,2.128521
1,KLN,KYK,0.603091
2,BRL,ORD,1.153044
3,BRL,STL,1.609324
4,DEC,ORD,11.138693
...,...,...,...
2189,TUS,ATL,4.136367
2190,TYS,ATL,34.461664
2191,VLD,ATL,10.263855
2192,VPS,ATL,6.441787


In [61]:
graph.loc[graph['source'] == 'ADQ']

Unnamed: 0,source,dest,distance
0,ADQ,KLN,2.128521
99,ADQ,ANC,1.587634
280,ADQ,AKK,16.320888


In [71]:
graph.loc[(graph['source'] == 'ORD') & (graph['dest'] == 'ATL')]
print(graph.loc[(graph['source'] == 'ORD')])

print(graph.loc[graph['source'] == 'BRL'])

graph.loc[(graph['source'] == 'ORD')&((graph['dest'] == 'STL'))]

     source dest   distance
7       ORD  BRL   0.546768
8       ORD  DEC   0.305825
1390    ORD  ABQ   9.037408
1391    ORD  ALO   9.037408
1392    ORD  ART   0.842480
...     ...  ...        ...
1485    ORD  TVC  11.140467
1486    ORD  TYS  32.384502
1487    ORD  XNA  36.401936
1908    ORD  MSY   4.778303
2152    ORD  ATL  40.090622

[102 rows x 3 columns]
  source dest  distance
2    BRL  ORD  1.153044
3    BRL  STL  1.609324


Unnamed: 0,source,dest,distance
1478,ORD,STL,17.124137


In [62]:
graph_dict = {}

for row in df.itertuples():
    source = row.source
    dest = row.dest
    distance = row.distance
    
    if pd.isna(distance):
        continue

    if source not in graph_dict:
        graph_dict[source] = []
    
    graph_dict[source].append((dest, distance))

KLN 2.128521013470194
UE2
ANC 1.5876342418321463
AKK 16.32088811385432
ANC nan
UE


In [63]:
graph_dict['ADQ']

[('KLN', 2.128521013470194),
 ('ANC', 1.5876342418321463),
 ('AKK', 16.32088811385432)]

## Best First Search

In [75]:
def best_first_search(graph, start, goal):
    open_list = []  # Priority queue
    heapq.heappush(open_list, (0, start))  # (heuristic, node)
    closed_set = set()


    while open_list:
        current_distance, current_node = heapq.heappop(open_list)
        print("current_distance, current_node")
        print(current_distance, current_node)

        if current_node in closed_set:
            print("current_node in closed_set")
            continue

        if current_node == goal:
            print("current_node == goal")
            return current_distance

        closed_set.add(current_node)

        if current_node in graph:
            for neighbor, edge_distance in graph_dict[current_node]:
                print("neighbor, edge_distance")
                print(neighbor, edge_distance)
                
                if neighbor not in closed_set:
                    print("neighbor not in closed_set")
                    heuristic = edge_distance
                    heapq.heappush(open_list, (current_distance + heuristic, neighbor))

    return None  # No path found

In [None]:
# Example: Calculate the distance between airports ADQ and ORD
start_airport = "ORD"
end_airport = "STL"

distance = best_first_search(graph_dict, start_airport, end_airport)

if distance is not None:
    print(f"Distance between {start_airport} and {end_airport}: {distance} units")
else:
    print(f"No path found between {start_airport} and {end_airport}")