<a href="https://colab.research.google.com/github/SSenitha/CCS3052_Advance_DSA/blob/Aashi/adjacencyMatrix.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#Import Dataset

In [1]:
# Load data from the repository
!wget https://raw.githubusercontent.com/SSenitha/CCS3052_Advance_DSA/refs/heads/main/Cities_of_SriLanka.csv

--2025-09-12 11:31:37--  https://raw.githubusercontent.com/SSenitha/CCS3052_Advance_DSA/refs/heads/main/Cities_of_SriLanka.csv
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 264501 (258K) [text/plain]
Saving to: ‘Cities_of_SriLanka.csv’


2025-09-12 11:31:37 (6.57 MB/s) - ‘Cities_of_SriLanka.csv’ saved [264501/264501]



In [2]:
#Import necessary libraries

import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from scipy.stats import stats
from sklearn.neighbors import NearestNeighbors


In [3]:
# Read the dataset and output the total count
df = pd.read_csv('/content/Cities_of_SriLanka.csv')
print(f"Total rows count: {df['city id'].count()}")
df.rename(columns={"city id": "city_id"}, inplace=True)
df.head()

Total rows count: 2155


Unnamed: 0,city_id,district_id,name_en,name_si,name_ta,sub_name_en,sub_name_si,sub_name_ta,postcode,latitude,longitude
0,1,1,Akkaraipattu,අක්කරපත්තුව,அக்கரைப்பற்று,,,,32400.0,7.218428,81.854116
1,2,1,Ambagahawatta,අඹගහවත්ත,அம்பகஹவத்த,,,,90326.0,7.301756,81.674729
2,3,1,Ampara,අම්පාර,அம்பாறை,,,,32000.0,7.301756,81.674729
3,4,1,Bakmitiyawa,බක්මිටියාව,பக்மிடியாவ,,,,32024.0,7.029632,81.680205
4,5,1,Deegawapiya,දීඝවාපිය,தீகவாபி,,,,32006.0,7.301756,81.674729


In [4]:
#Remove the Cities with little to no geographical difference

df = df.drop_duplicates(subset=['latitude', 'longitude'], keep='first').reset_index(drop=True)
print(f"Total rows count after dropping duplicates: {df['city_id'].count()}")

Total rows count after dropping duplicates: 1919


In [5]:
#Dropping the columns that won't be used in the making of graph/Matrix
df = df.drop(columns=["district_id","name_si","name_ta","sub_name_en","sub_name_si","sub_name_ta","postcode"])

In [15]:
def resetIndex():
  df.drop(columns=["city_id"], inplace=True)
  df.insert(0, "city_id", df.index)
  return df

In [7]:
resetIndex()
df.head()

Unnamed: 0,city_id,name_en,latitude,longitude
0,0,Akkaraipattu,7.218428,81.854116
1,1,Ambagahawatta,7.301756,81.674729
2,2,Bakmitiyawa,7.029632,81.680205
3,3,Digamadulla Weeragoda,7.390125,81.696588
4,4,Dorakumbura,7.35887,81.301428


#Adjacency/Sparse Matrix Approach

In [8]:
import math

def dist(lat1, lon1, lat2, lon2):
    R = 6371   # Radius of Earth

    # Convert degrees into radians
    lat1, lon1, lat2, lon2 = map(math.radians, [lat1, lon1, lat2, lon2])

    dlat = lat2 - lat1
    dlon = lon2 - lon1

    #Calculate the linear distance between two points
    distance = dlat ** 2 + dlon ** 2
    distance = math.sqrt(distance)

    # Calculate spherical distance
    a = math.asin(distance/(2*R))
    c = R * 2 * math.asin(math.sqrt(a))

    return c


In [9]:
from scipy.sparse import csr_matrix

num_cities = len(df)
k = 6

# Fit the model on the geographical coordinates
X = df[['latitude', 'longitude']]
nn = NearestNeighbors(n_neighbors=k, algorithm='ball_tree').fit(X)

# Find the 5 nearest neighbors
distances, indices = nn.kneighbors(X)

rows, cols, data = [], [], []
for i in range(num_cities):
    for j in range(1, k):
        neighbor_index = indices[i, j]
        neighbor_distance = distances[i, j]
        rows.append(i)
        cols.append(neighbor_index)
        data.append(neighbor_distance)

sparse_matrix = csr_matrix((data, (rows, cols)), shape=(num_cities, num_cities))

print("Shape of the sparse adjacency matrix:", sparse_matrix.shape)

Shape of the sparse adjacency matrix: (1919, 1919)


In [10]:
print("\nFirst 10 rows and their non-zero entries (neighbors and distances):")

for i in range(min(10, num_cities)):
    row_data = sparse_matrix.getrow(i)
    print(f"Row {i}: {list(zip(row_data.indices, row_data.data))}")


First 10 rows and their non-zero entries (neighbors and distances):
Row 0: [(np.int32(19), np.float64(0.10280801203432574)), (np.int32(20), np.float64(0.07630243464241533)), (np.int32(32), np.float64(0.09008373774760883)), (np.int32(237), np.float64(0.025839241107277235)), (np.int32(256), np.float64(0.019672846708092478))]
Row 1: [(np.int32(3), np.float64(0.0910322710384622)), (np.int32(15), np.float64(0.09149139670707118)), (np.int32(24), np.float64(0.07909985753767391)), (np.int32(34), np.float64(0.07288142004886898)), (np.int32(857), np.float64(0.07483869999999992))]
Row 2: [(np.int32(6), np.float64(0.10433288545808693)), (np.int32(14), np.float64(0.16315676726547387)), (np.int32(35), np.float64(0.14410087188900797)), (np.int32(736), np.float64(0.15465547323990886)), (np.int32(1287), np.float64(0.14722019989906115))]
Row 3: [(np.int32(1), np.float64(0.0910322710384622)), (np.int32(18), np.float64(0.08574358274022831)), (np.int32(29), np.float64(0.10378444672748054)), (np.int32(34),

Bellman - Ford Implementation

In [18]:
# Minimal, non-verbose graph builder + Bellman-Ford runner for Colab
import math, time
import numpy as np
from collections import defaultdict
from sklearn.neighbors import BallTree
from scipy.sparse import csr_matrix

# ---- Utilities ----
def haversine_km(lat1, lon1, lat2, lon2):
    R = 6371.0088
    phi1 = math.radians(lat1); phi2 = math.radians(lat2)
    dphi = math.radians(lat2 - lat1); dlambda = math.radians(lon2 - lon1)
    a = math.sin(dphi/2.0)**2 + math.cos(phi1) * math.cos(phi2) * math.sin(dlambda/2.0)**2
    a = min(1.0, max(0.0, a))
    return 2 * R * math.asin(math.sqrt(a))

# ---- Build adjacency list (k-NN using BallTree, returns dict adj_list) ----
def build_adj_list(df_local, k_neighbors=6):
    """
    Build an undirected adjacency list from df_local with columns ['latitude','longitude'].
    Returns: adj_list (dict: node -> [(neighbor, weight_km), ...])
    """
    coords_deg = df_local[['latitude','longitude']].to_numpy()
    coords_rad = np.radians(coords_deg)
    tree = BallTree(coords_rad, metric='haversine')
    # query k_neighbors + 1 to skip self in results
    dist_rad, idx = tree.query(coords_rad, k=k_neighbors+1)
    EARTH_R = 6371.0088
    dist_km = dist_rad * EARTH_R

    n = len(df_local)
    rows, cols, data = [], [], []
    for i in range(n):
        for pos in range(1, k_neighbors+1):
            j = int(idx[i, pos])
            d = float(dist_km[i, pos])
            rows.append(i); cols.append(j); data.append(d)
            rows.append(j); cols.append(i); data.append(d)  # undirected

    sparse_mat_km = csr_matrix((data, (rows, cols)), shape=(n, n))
    # build adjacency list
    adj = defaultdict(list)
    coo = sparse_mat_km.tocoo()
    for u, v, w in zip(coo.row, coo.col, coo.data):
        adj[int(u)].append((int(v), float(w)))
    return dict(adj)  # convert to normal dict for easier iteration

# ---- Bellman-Ford (reusable) ----
def bellman_ford(adj, n_nodes, source, target=None):
    import math, time
    dist = [math.inf] * n_nodes
    pred = [None] * n_nodes
    dist[source] = 0.0
    relaxations = 0
    start = time.perf_counter()
    for iteration in range(1, n_nodes):
        any_relaxed = False
        for u, neighbors in adj.items():
            du = dist[u]
            if du == math.inf:
                continue
            for v, w in neighbors:
                if du + w < dist[v]:
                    dist[v] = du + w
                    pred[v] = u
                    any_relaxed = True
                    relaxations += 1
        if not any_relaxed:
            break
    negative_cycle = False
    for u, neighbors in adj.items():
        du = dist[u]
        if du == math.inf:
            continue
        for v, w in neighbors:
            if du + w < dist[v]:
                negative_cycle = True
                break
        if negative_cycle:
            break
    elapsed = time.perf_counter() - start
    if target is not None and dist[target] != math.inf:
        path = []
        cur = target
        while cur is not None:
            path.append(cur)
            cur = pred[cur]
        path.reverse()
    else:
        path = None
    info = {
        'time_s': elapsed,
        'relaxations': relaxations,
        'iterations': iteration,
        'negative_cycle': negative_cycle
    }
    return path, (dist[target] if target is not None else dist), info

# ---- Helper: accept index or city name (case-insensitive, exact match preferred) ----
def name_or_index_to_idx(x, df_local):
    """
    Convert x (int index or string) -> integer index in df_local.
    For strings: try case-insensitive exact match on name_en; if not found, try case-insensitive substring and return first match.
    Raises ValueError if not found.
    """
    if isinstance(x, (int, np.integer)):
        idx = int(x)
        if 0 <= idx < len(df_local):
            return idx
        raise ValueError(f"Index {idx} out of range.")
    if isinstance(x, str):
        # exact (case-insensitive)
        lower = df_local['name_en'].str.lower()
        matches_exact = df_local.index[lower == x.lower()].tolist()
        if matches_exact:
            return int(matches_exact[0])
        # substring match, return first
        mask = lower.str.contains(x.lower(), na=False)
        matches = df_local.index[mask].tolist()
        if matches:
            return int(matches[0])
        raise ValueError(f"No city matches '{x}' (case-insensitive exact or substring).")
    raise ValueError("source/target must be int index or city name string")

# ---- Wrapper: build (if needed) and run Bellman-Ford quietly ----
def run_bellman_by_name_or_index(source, target, df_local, adj_list=None, k_neighbors=6, verbose=True):
    """
    source, target: int index or city name (string)
    df_local: DataFrame
    adj_list: optional; if None, built using k_neighbors
    Returns: (path_indices_or_None, total_distance_or_inf, info)
    If verbose True, prints a concise summary. Otherwise returns values silently.
    """
    if adj_list is None:
        adj_list = build_adj_list(df_local, k_neighbors=k_neighbors)
    # convert source/target to indices
    s_idx = name_or_index_to_idx(source, df_local)
    t_idx = name_or_index_to_idx(target, df_local)
    path, total_distance, info = bellman_ford(adj=adj_list, n_nodes=len(df_local), source=s_idx, target=t_idx)
    if verbose:
        if info['negative_cycle']:
            print("⚠️ Negative-weight cycle detected.")
        if path is None:
            print(f"No path found from [{s_idx}] {df_local['name_en'].iloc[s_idx]} -> [{t_idx}] {df_local['name_en'].iloc[t_idx]}")
        else:
            names = [df_local['name_en'].iloc[i] for i in path]
            print("Path: " + " -> ".join(names))
            print(f"Distance (km): {total_distance:.3f}")
        print(f"Time: {info['time_s']:.6f}s; Relaxations: {info['relaxations']}; Iterations: {info['iterations']}")
    return path, total_distance, info


In [20]:
# Interactive helper: choose source & target, run Bellman-Ford
import sys

def get_city_index_from_input(prompt: str, df_local=None):
    """
    Ask user for an index or name. Returns integer index into df.
    Accepts:
      - an integer index (e.g. 12)
      - a city name (partial or full, case-insensitive)
    If multiple matches for a name, asks user to pick from numbered choices.
    """
    if df_local is None:
        raise ValueError("Please pass the DataFrame as df_local (e.g. df).")
    while True:
        raw = input(prompt).strip()
        if raw == "":
            print("Empty input — please enter a valid index or city name.")
            continue

        # 1) Try integer index
        if raw.isdigit():
            idx = int(raw)
            if 0 <= idx < len(df_local):
                return idx
            else:
                print(f"Index out of range (0..{len(df_local)-1}). Try again.")
                continue

        # 2) Case-insensitive exact name match
        matches_exact = df_local.index[df_local['name_en'].str.lower() == raw.lower()].tolist()
        if len(matches_exact) == 1:
            return int(matches_exact[0])

        # 3) Substring match (case-insensitive)
        mask = df_local['name_en'].str.lower().str.contains(raw.lower(), na=False)
        matches = df_local.index[mask].tolist()
        if len(matches) == 0:
            print(f"No city matched '{raw}'. Try again (you can enter an index or part of a name).")
            # show first 20 names for convenience
            print("Sample names (first 20):")
            print(df_local['name_en'].iloc[:20].to_list())
            continue
        elif len(matches) == 1:
            return int(matches[0])
        else:
            # Ambiguous — present choices
            print(f"Multiple matches for '{raw}':")
            for i, m in enumerate(matches[:20]):  # show up to 20
                print(f"{i:2d}) [{m}] {df_local['name_en'].iloc[m]}")
            # Ask user to pick by shown number or full index
            choice = input("Enter the number (left) of your choice, or full index in brackets (e.g. 42): ").strip()
            if choice.isdigit():
                choice_i = int(choice)
                if 0 <= choice_i < len(matches):
                    return int(matches[choice_i])
                else:
                    print("Choice out of range. Starting over.")
                    continue
            # maybe user entered actual index
            if choice.isdigit() and int(choice) in matches:
                return int(choice)
            # else loop again
            print("Didn't understand selection. Let's try again.")

def choose_and_run_bellman():
    """
    Interactive flow: ask for source and target, run bellman_ford, print results.
    """
    # sanity checks
    if 'df' not in globals():
        print("DataFrame 'df' not found. Load the CSV first.")
        return
    if 'adj_list' not in globals():
        print("adj_list not found. Rebuild adjacency as in previous cells.")
        return
    if 'bellman_ford' not in globals():
        print("bellman_ford function not found. Paste the Bellman-Ford implementation cell and run it first.")
        return

    print("\n(You may enter either an integer index or part/full city name.)")
    print("Sample first 20 cities (index : name):")
    for i, name in enumerate(df['name_en'].iloc[:20]):
        print(f"{i:3d} : {name}")

    src_idx = get_city_index_from_input("Enter START city (index or name): ", df_local=df)
    dst_idx = get_city_index_from_input("Enter DESTINATION city (index or name): ", df_local=df)

    print(f"\nRunning Bellman-Ford from [{src_idx}] {df['name_en'].iloc[src_idx]} -> [{dst_idx}] {df['name_en'].iloc[dst_idx]} ...")
    path_indices, total_distance, info = bellman_ford(adj=adj_list, n_nodes=len(df), source=src_idx, target=dst_idx)

    print("\nResult:")
    if info.get('negative_cycle'):
        print("⚠️ Negative-weight cycle detected.")
    if path_indices is None:
        print(f"No path found from {df['name_en'].iloc[src_idx]} to {df['name_en'].iloc[dst_idx]}. Graph may be disconnected.")
    else:
        names = [df['name_en'].iloc[i] for i in path_indices]
        print("Path indices:", path_indices)
        print("Path names  : " + " -> ".join(names))
        print(f"Total distance (km): {total_distance:.3f}")
    print(f"Time taken: {info['time_s']:.6f} s; Relaxations: {info['relaxations']}; Iterations: {info['iterations']}")

# Example: call the interactive runner
choose_and_run_bellman()



(You may enter either an integer index or part/full city name.)
Sample first 20 cities (index : name):
  0 : Akkaraipattu
  1 : Ambagahawatta
  2 : Bakmitiyawa
  3 : Digamadulla Weeragoda
  4 : Dorakumbura
  5 : Gonagolla
  6 : Hulannuge
  7 : Kalmunai
  8 : Kannakipuram
  9 : Karativu
 10 : Kekirihena
 11 : Koknahara
 12 : Kolamanthalawa
 13 : Komari
 14 : Lahugala
 15 : Irakkamam
 16 : Mahaoya
 17 : Namaloya
 18 : Navithanveli
 19 : Nintavur
Enter START city (index or name): 25
Enter DESTINATION city (index or name): 40

Running Bellman-Ford from [25] Periyaneelavanai -> [40] Dematawewa ...

Result:
Path indices: [25, 8, 263, 253, 258, 239, 242, 255, 261, 397, 1401, 1403, 1465, 1391, 1393, 1190, 58, 103, 110, 93, 40]
Path names  : Periyaneelavanai -> Kannakipuram -> Thettativu -> Kurukkalmadam -> Palamunai -> Araipattai -> Chenkaladi -> Miravodai -> Punanai -> Welikanda -> Onegama -> Palugasdamana -> Parakramasamudraya -> Attanakadawala -> Diyabeduma -> Sigiriya -> Habarana -> Namal