<a href="https://colab.research.google.com/github/SSenitha/CCS3052_Advance_DSA/blob/Sandaru-s-Branch/aStar_path.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#Import Dataset

In [58]:
# Load data from the repository
!wget https://raw.githubusercontent.com/SSenitha/CCS3052_Advance_DSA/refs/heads/main/Cities_of_SriLanka.csv


'wget' is not recognized as an internal or external command,
operable program or batch file.


In [59]:
#Import necessary libraries

import math, time, heapq
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from scipy.stats import stats
from sklearn.neighbors import NearestNeighbors
from scipy.sparse import csr_matrix

from collections import defaultdict
from sklearn.neighbors import BallTree

In [60]:
# Read the dataset and output the total count
#df = pd.read_csv('/content/Cities_of_SriLanka.csv')

#----------------For VS Code----------------
df = pd.read_csv('./Cities_of_SriLanka.csv')
#-------------------------------------------

print(f"Total rows count: {df['city id'].count()}")
df.rename(columns={"city id": "city_id"}, inplace=True)
df.head()

Total rows count: 2155


Unnamed: 0,city_id,district_id,name_en,name_si,name_ta,sub_name_en,sub_name_si,sub_name_ta,postcode,latitude,longitude
0,1,1,Akkaraipattu,අක්කරපත්තුව,அக்கரைப்பற்று,,,,32400.0,7.218428,81.854116
1,2,1,Ambagahawatta,අඹගහවත්ත,அம்பகஹவத்த,,,,90326.0,7.301756,81.674729
2,3,1,Ampara,අම්පාර,அம்பாறை,,,,32000.0,7.301756,81.674729
3,4,1,Bakmitiyawa,බක්මිටියාව,பக்மிடியாவ,,,,32024.0,7.029632,81.680205
4,5,1,Deegawapiya,දීඝවාපිය,தீகவாபி,,,,32006.0,7.301756,81.674729


In [61]:
#Remove the Cities with little to no geographical difference

df = df.drop_duplicates(subset=['latitude', 'longitude'], keep='first').reset_index(drop=True)
print(f"Total rows count after dropping duplicates: {df['city_id'].count()}")

Total rows count after dropping duplicates: 1919


In [62]:
#Dropping the columns that won't be used in the making of graph/Matrix
df = df.drop(columns=["district_id","name_si","name_ta","sub_name_en","sub_name_si","sub_name_ta","postcode"])

In [63]:
def resetIndex():
  df.drop(columns=["city_id"], inplace=True)
  df.insert(0, "city_id", df.index)
  return df

In [64]:
resetIndex()
df.head()

Unnamed: 0,city_id,name_en,latitude,longitude
0,0,Akkaraipattu,7.218428,81.854116
1,1,Ambagahawatta,7.301756,81.674729
2,2,Bakmitiyawa,7.029632,81.680205
3,3,Digamadulla Weeragoda,7.390125,81.696588
4,4,Dorakumbura,7.35887,81.301428


#Adjacency/Sparse Matrix Approach

In [65]:
def dist(lat1, lon1, lat2, lon2):
    #Haversine formula for spherical distances
    R = 6371.0088

    phi1 = math.radians(lat1); phi2 = math.radians(lat2)
    dphi = math.radians(lat2 - lat1); dlambda = math.radians(lon2 - lon1)

    a = math.sin(dphi/2.0)**2 + math.cos(phi1) * math.cos(phi2) * math.sin(dlambda/2.0)**2
    a = min(1.0, max(0.0, a))
    return 2 * R * math.asin(math.sqrt(a))

In [66]:
def dist2km(distance):
    R = 6371.0088

    # Calculate spherical distance
    a = math.asin(distance/(2*R))
    c = R * 2 * math.asin(math.sqrt(a))

    return c

In [67]:
num_cities = len(df)
k = 6

# Fit the model on the geographical coordinates
X = df[['latitude', 'longitude']]
nn = NearestNeighbors(n_neighbors=k, algorithm='ball_tree').fit(X)

# Find the 5 nearest neighbors
distances, indices = nn.kneighbors(X)

rows, cols, data = [], [], []
seen_edges = set()

for i in range(num_cities):
    for j in range(1, k):
        n_index = indices[i, j]

        #check if updated before
        edge = frozenset((i, n_index))
        if edge in seen_edges:
            continue
        seen_edges.add(edge)

        #If not calc distance
        n_distance = distances[i, j]

        #Add data in both ways to make it undirected
        rows.append(i); cols.append(n_index); data.append(n_distance)
        rows.append(n_index); cols.append(i); data.append(n_distance)

sparse_matrix = csr_matrix((data, (rows, cols)), shape=(num_cities, num_cities))

print("Shape of the sparse adjacency matrix:", sparse_matrix.shape)

Shape of the sparse adjacency matrix: (1919, 1919)


In [68]:
print("\nFirst 10 rows and their non-zero entries (neighbors and distances):")

for i in range(min(10, num_cities)):
    row_data = sparse_matrix.getrow(i)
    print(f"Row {i}: {list(zip(row_data.indices, row_data.data))}")


First 10 rows and their non-zero entries (neighbors and distances):
Row 0: [(np.int32(19), np.float64(0.10280801203432574)), (np.int32(20), np.float64(0.07630243464241533)), (np.int32(32), np.float64(0.09008373774760883)), (np.int32(33), np.float64(0.10312854912632105)), (np.int32(237), np.float64(0.025839241107277235)), (np.int32(256), np.float64(0.019672846708092478))]
Row 1: [(np.int32(3), np.float64(0.0910322710384622)), (np.int32(15), np.float64(0.09149139670707118)), (np.int32(24), np.float64(0.07909985753767391)), (np.int32(34), np.float64(0.07288142004886898)), (np.int32(857), np.float64(0.07483869999999992))]
Row 2: [(np.int32(6), np.float64(0.10433288545808693)), (np.int32(13), np.float64(0.18960060676363566)), (np.int32(14), np.float64(0.16315676726547387)), (np.int32(35), np.float64(0.14410087188900797)), (np.int32(736), np.float64(0.15465547323990886)), (np.int32(1287), np.float64(0.14722019989906115))]
Row 3: [(np.int32(1), np.float64(0.0910322710384622)), (np.int32(5), 

#A* Algorithm for shortest path

In [69]:
def dist_index(index1, index2):
    lat1 = df.loc[index1, 'latitude']
    lon1 = df.loc[index1, 'longitude']
    lat2 = df.loc[index2, 'latitude']
    lon2 = df.loc[index2, 'longitude']

    return dist(lat1, lon1, lat2, lon2)

In [70]:
def haversine(lat1, lon1, lat2, lon2):
    R = 6371.0088  # Earth's radius in km
    phi1 = math.radians(lat1); phi2 = math.radians(lat2)
    dphi = math.radians(lat2 - lat1); dlambda = math.radians(lon2 - lon1)
    a = math.sin(dphi/2.0)**2 + math.cos(phi1) * math.cos(phi2) * math.sin(dlambda/2.0)**2
    a = min(1.0, max(0.0, a))  # safety
    return 2 * R * math.asin(math.sqrt(a))

In [71]:
def get_neighbors(city_index):
    row_data = sparse_matrix.getrow(city_index)
    return list(row_data.indices)

#print(get_neighbors(0))

In [72]:
def astar(start_index, end_index, sparse_matrix):

    open_set = []
    heapq.heappush(open_set, (0, 0, start_index, [start_index]))

    g_costs = {start_index: 0}
    came_from = {}

    while open_set:
        #pop the top heap element and assign tuple to variables
        f_cost, current_distance, current_index, path = heapq.heappop(open_set)

        #Check if reached
        if current_index == end_index:
            return path, current_distance

        neighbors = get_neighbors(current_index)

        for neighbor_index in neighbors:
            #goal cost = current node cost + current node to neighbor cost
            #goal cost = current node codt + sparse matrix[row,column]
            tentative_g_cost = current_distance + sparse_matrix[current_index, neighbor_index]

            if neighbor_index not in g_costs or tentative_g_cost < g_costs[neighbor_index]:
                #add g_cost to the node position at g_cost tuple - to check if explored
                g_costs[neighbor_index] = tentative_g_cost

                #h-cost (heuristic cost)
                heuristic = dist_index(neighbor_index, end_index)
                #f-cost = steps  + travelled distance
                #f-cost = h_cost + g_cost
                f_cost = tentative_g_cost + heuristic
                #add origin to the neighbour index
                came_from[neighbor_index] = current_index
                heapq.heappush(open_set, (f_cost, tentative_g_cost, neighbor_index, path + [neighbor_index]))

    return None, None


In [73]:
def a_star(sparse_matrix, start, goal, df):
    def heuristic(n1, n2):
        return haversine(df.loc[n1,'latitude'], df.loc[n1,'longitude'],
                        df.loc[n2,'latitude'], df.loc[n2,'longitude'])

    open_set = [(heuristic(start, goal), 0, start, [start])]
    g_costs = {start: 0}

    while open_set:
        f_cost, current_dist, current_node, path = heapq.heappop(open_set)
        if current_node == goal:
            return path, current_dist
        
        row = sparse_matrix.getrow(current_node)
        for neighbor, weight in zip(row.indices, row.data):
            tentative_g = current_dist + weight
            if neighbor not in g_costs or tentative_g < g_costs[neighbor]:
                g_costs[neighbor] = tentative_g
                new_f = tentative_g + heuristic(neighbor, goal)
                heapq.heappush(open_set, (new_f, tentative_g, neighbor, path+[neighbor]))
    
    return None, None

In [78]:
start_city_index = 200
end_city_index = 0

shortest_path, total_distance = a_star(sparse_matrix,start_city_index, end_city_index,df)

In [79]:
if shortest_path:
    print("Shortest path found:")
    for city_index in shortest_path:
        city_name = df.loc[city_index, 'name_en']
        print(f"{city_name}",end=" -> ")

    print(f"\nTotal distance: {dist2km(total_distance):.2f}km")
else:
    print(f"No path found between city index {start_city_index} and {end_city_index}.")

Shortest path found:
Madulsima -> Pallekiruwa -> Medagana -> Pangura -> Dambagalla -> Uva Gangodagama -> Kandaudapanguwa -> Bakmitiyawa -> Madawalalanda -> Irakkamam -> Navatkadu -> Akkaraipattu -> 
Total distance: 111.91km
