# Creation of a Connectivity Matrix

### Ex: 7 days

Some variables taken from the other Notebooks

In [None]:
import xarray as xr
import numpy as np
from scipy.spatial import KDTree
from scipy.sparse import lil_matrix
from scipy.spatial.distance import cdist

### Necessary variables from the other Notebooks

In [None]:
# The results of the simulation from 1133x10 coral spots, April 1997
# 99 days, hourly release every day from 18:00:00 to 06:00:00, output every hour, dt=30min
# Available in the Notebook "Simulations"
ds_own_time_CELLS_6 = xr.open_zarr("Ok_THESIS_simulation_own_time_CELLS_6.zarr")

# Locations of coral spots grouped (36)
new_corals_lon = np.array([39.73223114, 39.6405884, 39.500, 39.92, 39.47441864, 39.50928879,
        39.9375    , 39.56, 39.34, 39.26, 39.03000259, 38.9177742 ,
        39.13373184, 39.30393982, 39.60278183, 39.4683342 , 39.18, 39.42,
        38.88454819, 39.01739502, 39.10639954, 39.20000076, 39.26356888,
        39.6333313 , 39.86, 39.88, 39.7731514 , 39.64492798, 39.36883545,
        39.53882599, 39.67703629, 39.89500046, 40.05157852, 40.23666763,
        40.67300415, 40.98210526]) 
new_corals_lat = np.array([-8.01682186, -7.69152546, -7.99, -7.745, -7.48969936, -7.23591518,
        -6.89461374, -6.982, -6.809, -6.594,  -6.45777082, -6.18345165,
        -6.16138983, -6.36267281, -6.37120247, -6.0890007 , -5.877, -5.776,
        -5.79438543, -5.54667282, -5.28294134, -5.02995682, -4.80961895,
        -5.42941427, -5.304, -5.048 , -4.86327362, -5.17221212, -4.68640804,
        -4.43031645, -4.12676477, -3.63475347, -3.35852289, -2.85103106,
        -2.55003238, -2.24830103])

# Locations of 1133 coral spots, that arises from Notebook "Corals"
    #filtered_lon_list
    #filtered_lat_list

### Time and location of particles at the starting time and after 7 days of advection

In [None]:
time_reduced_0 = ds_own_time_CELLS_6.time.isel(obs=0).values
lon_reduced_0 = ds_own_time_CELLS_6.lon.isel(obs=0).values
lat_reduced_0 = ds_own_time_CELLS_6.lat.isel(obs=0).values

# Since the simulation have an output every hour 7*24=168
time_reduced_7 = ds_own_time_CELLS_6.time.isel(obs=168).values
lon_reduced_7 = ds_own_time_CELLS_6.lon.isel(obs=168).values
lat_reduced_7 = ds_own_time_CELLS_6.lat.isel(obs=168).values

### Substitution of the starting location of the trajectories with the closest point among the 36 coral grouped spots

For the function KDTree see: https://docs.scipy.org/doc/scipy/reference/generated/scipy.spatial.KDTree.html

In [None]:
# Combine coral reef points into a single array
reef_points_36 = np.column_stack((new_corals_lon, new_corals_lat))

# Build KDTree for coral reef points
reef_tree_36 = KDTree(reef_points_36)

# Query KDTree for closest reef point to each starting point
distances_36, indices_36 = reef_tree_36.query(np.column_stack((lon_reduced_0, lat_reduced_0)), k=1)

# Get closest reef points
closest_reef_lon_36 = new_corals_lon[indices_36]
closest_reef_lat_36 = new_corals_lat[indices_36]

### Key variables for the creation of the Matrix

In [None]:
# Proximity threshold (approx 2.2 km)
proximity_threshold = 0.02

# Number of spots
n_grouped_spots = len(new_corals_lon)
n_small_spots = len(filtered_lon_list)

Workflow of the 36x36 Matrix creation:
- Get a matrix with 36 starting spots and with 1133 possible ending spots (36x1133 matrix)
- Link every possible ending spots among the 1133 to the closest grouped spots among the 36
- Update the matrix with 36 starting spots and 36 ending spots (36x36 matrix)
- Get the matrix in an array form
- To assign to 0 values (so no connections between spots) a nan value, transform the matrix into a float

In [None]:
# Initialize the connectivity matrix as a sparse matrix
connectivity_matrix_7_36x36 = lil_matrix((n_grouped_spots, n_grouped_spots), dtype=int)

# Function to find indices of nearest points within a threshold
def find_nearest_reefs(lat, lon, reef_lats, reef_lons, threshold):
    lat_diff = np.abs(reef_lats - lat)
    lon_diff = np.abs(reef_lons - lon)
    mask = (lat_diff <= threshold) & (lon_diff <= threshold)
    return np.where(mask)[0]

# Function to find the index of the nearest point among grouped coral spots
def find_nearest_grouped_index(lat, lon, grouped_lats, grouped_lons):
    distances = cdist([[lat, lon]], np.column_stack((new_corals_lat, new_corals_lon)))
    return np.argmin(distances)

# Process each of the 36 grouped starting positions
for i in range(n_grouped_spots):
    start_lon = new_corals_lon[i]
    start_lat = new_corals_lat[i]

    # Find particles that started from the i-th grouped coral reef spot
    particles_at_start = np.where((closest_reef_lon_36 == start_lon) & (closest_reef_lat_36 == start_lat))[0]

    # Positions after 7 days for these particles
    end_lats = lat_reduced_7[particles_at_start]
    end_lons = lon_reduced_7[particles_at_start]

    # Remove NaN and Inf values
    finite_mask = np.isfinite(end_lats) & np.isfinite(end_lons)
    end_lats = end_lats[finite_mask]
    end_lons = end_lons[finite_mask]

    # Find nearest reef spots for end positions
    for end_lat, end_lon in zip(end_lats, end_lons):
        nearest_reef_indices = find_nearest_reefs(end_lat, end_lon, filtered_lat_list, filtered_lon_list, proximity_threshold)
        
        for reef_idx in nearest_reef_indices:
            # Get the coordinates of the nearest reef
            reef_lat = filtered_lat_list[reef_idx]
            reef_lon = filtered_lon_list[reef_idx]
            
            # Find the nearest grouped coral spot to this reef
            closest_grouped_index = find_nearest_grouped_index(reef_lat, reef_lon, new_corals_lat, new_corals_lon)
            
            # Update the connectivity matrix
            connectivity_matrix_7_36x36[i, closest_grouped_index] += 1

connectivity_matrix_7_36x36_array = connectivity_matrix_7_36x36.toarray()

In [None]:
# Matrix --> float and 0 --> nan
connectivity_matrix_7_36x36_array_nan = connectivity_matrix_7_36x36_array.astype(float)
connectivity_matrix_7_36x36_array_nan[connectivity_matrix_7_36x36_array_nan == 0] = np.nan

### Visualization of the values

To see the values in percentage in respect to the corresponding row in the matrix

In [None]:
row_sums = np.nansum(connectivity_matrix_7_36x36_array_nan, axis=1, keepdims=True)
connectivity_matrix_7_36x36_array_nan_perc = (connectivity_matrix_7_36x36_array_nan / row_sums) * 100

### Analysis of the max values and indexes

In [None]:
valid_indices = ~np.isnan(connectivity_matrix_7_36x36_array_nan)
flattened_valid_matrix = connectivity_matrix_7_36x36_array_nan[valid_indices]
largest_values_indices_flat = np.argpartition(flattened_valid_matrix, -10)[-10:]
sorted_largest_indices_flat = largest_values_indices_flat[np.argsort(flattened_valid_matrix[largest_values_indices_flat])][::-1]
largest_values_indices_original = np.where(valid_indices.flatten())[0][sorted_largest_indices_flat]
max_indices_7_36x36_April97 = np.unravel_index(largest_values_indices_original, connectivity_matrix_7_36x36_array_nan.shape)
largest_values_7_36x36_April97 = connectivity_matrix_7_36x36_array_nan[max_indices_7_36x36_April97]