# <u>Buffer count</u>

In [None]:
# Importing Python packages and modules
import numpy as np
import pandas as pd
import geopandas as gpd
import os
import rasterio
import matplotlib.pyplot as plt
import folium
import rasterio.mask
import warnings
from shapely.geometry import Point
from IPython.display import FileLink
from folium.plugins import HeatMap
from tqdm import tqdm
from time import time
from shapely import wkt
from pathlib import Path
from shapely.geometry import mapping


#### Importing the data

In [None]:
# Snapped clean strandings (points) (see EDA notebook)
clean_strandings = gpd.read_file("clean_strandings.csv")

# Roads GeoDataFrame (lines)
roads = gpd.read_file("merged_roads.gpkg")

# Buildings GeoDataFrame (polygons)
buildings = gpd.read_file("UK_Ireland_Buildings.gpkg")

# Bathymetric (TIFF)
bath = rasterio.open("uk_ireland_bath.tiff")

# Random strandings (points) (see EDA notebook)
random_points = pd.read_csv("random_strandings.csv")

#### Making clean strandings and random points GeoDataFrames

In [None]:
# Making clean_strandings and random_points gdf with a crs of EPSG: 4326
clean_strandings['geometry'] = clean_strandings['geometry'].apply(wkt.loads)
clean_strandings['geometry'] = clean_strandings.apply(lambda row: Point(row['longitude'], row['latitude']), axis=1)
clean_strandings = gpd.GeoDataFrame(clean_strandings, geometry='geometry', crs="EPSG:4326")


random_points['geometry'] = random_points['geometry'].apply(wkt.loads)
random_points = gpd.GeoDataFrame(random_points, geometry='geometry', crs='EPSG:4326')

--------------------------------------------------------------------------------------------------------------------------------------------------------

# Cleaning and checking the data
## Strandings

In [None]:
# Checking CRS basic info for strandings
print("CRS:", clean_strandings.crs)
clean_strandings.info()
clean_strandings.head()

In [None]:
clean_strandings.describe()

## Roads data

In [None]:
# Checking CRS and basic info for roads
print("CRS:", roads.crs)
roads.info()
roads.head()

In [None]:
roads.describe()

In [None]:
#Dropping unnecessary columns
roads = roads.drop(columns=['surface', 'smoothness', 'width', 'lanes', 'oneway', 'bridge', 'layer', 'source', 'name:en'])
roads.head()

## Buildings data

In [None]:
#Checking CRS and basic info for buildings
print("CRS:", buildings.crs)
buildings.info()
buildings.head()

In [None]:
#Dropping unnecessary columns
buildings = buildings.drop(columns=['code',	'fclass',	'name'])
buildings.head()

## Bathymetric data

In [None]:
# Checking CRS and basic info for Bathymetric data
print("CRS:", bath.crs)
print("Bounds:", bath.bounds)
print("Width, Height:", bath.width, bath.height)
print("Number of bands:", bath.count)

## Random points 

In [None]:
# Checking CRS and basic info for random data
random_points.crs
random_points.info()
random_points.head()

--------------------------------------------------------------------------------------------------------------------------------------------------------

# Heatmaps

In [None]:
# Converting GDF into Folium (WGS84) list of lat,long, dropping any missing geometries and keeping only point features for the map
def to_latlon_list(gdf):
    if gdf.crs is not None and gdf.crs.to_epsg() != 4326:
        gdf = gdf.to_crs(4326)
    g = gdf.geometry.dropna()
    g = g[g.geom_type == "Point"]
    return [[p.y, p.x] for p in g]

#Preparing empty lists
clean_heat  = to_latlon_list(clean_strandings.copy())
random_heat = to_latlon_list(random_points.copy())

#Base map
m = folium.Map(location=[55.61, -2.85], zoom_start=5, tiles='cartodbpositron')
#Creating strandings layer in red
fg_clean = folium.FeatureGroup(name="Strandings heat")
HeatMap(
    clean_heat,
    radius=14, blur=16, max_zoom=10, min_opacity=0.2,
    gradient={0.2:'#fee5d9',0.4:'#fcae91',0.6:'#fb6a4a',0.8:'#de2d26',1:'#a50f15'}  # reds
).add_to(fg_clean)
fg_clean.add_to(m)
#Creating random layer in blue
fg_random = folium.FeatureGroup(name="Random points heat")
HeatMap(
    random_heat,
    radius=14, blur=16, max_zoom=10, min_opacity=0.2,
    gradient={0.2:'#deebf7',0.4:'#9ecae1',0.6:'#6baed6',0.8:'#3182bd',1:'#08519c'}  # blues
).add_to(fg_random)
fg_random.add_to(m)
#Adding layer control
folium.LayerControl(collapsed=False).add_to(m)

#Adding a title
title_html = """
<div style="
    position: fixed; top: 10px; left: 50%; transform: translateX(-50%);
    z-index: 9999; background-color: white; padding: 10px; 
    border: 1px solid #777; border-radius: 6px; 
    font-size: 18px; font-weight: bold; 
    box-shadow: 0px 2px 6px rgba(0,0,0,0.3);
">
Stranding vs Random Point Heatmaps
</div>
"""
m.get_root().html.add_child(folium.Element(title_html))

# Adding a legend
legend_html = """
<div style="
    position: fixed; bottom: 50px; left: 50px; z-index:9999;
    background-color: white; padding: 10px; 
    border: 1px solid #777; border-radius: 6px;
    box-shadow: 0px 2px 6px rgba(0,0,0,0.3); 
    font-size: 14px;
">
<b>Legend</b><br>
<span style="display:inline-block;width:12px;height:12px;
    background:red;border:1px solid #555;margin-right:6px;"></span>
Strandings heat<br>
<span style="display:inline-block;width:12px;height:12px;
    background:blue;border:1px solid #555;margin-right:6px;"></span>
Random points heat
</div>
"""
m.get_root().html.add_child(folium.Element(legend_html))

# Display map
m


In [None]:
#Saving the map
m.save("Method_results_images/Buffer_Count_Heatmap.html")

#### **Random hotspot clusters:** in western Scotland, western Ireland, and the northwest coast, this is likely influenced by the indented and irregular coastline in these regions, where numerous headlands, bays, and inlets create more coastal area for points to be placed.

#### **Stranding hotspot clusters:** are concentrated in the southwest of England and we do see a high number of strandings in this area.

#### Saved as a link with Netlify [UK and Ireland Heatmap link](https://heatmap-strandings-random.netlify.app/)

--------------------------------------------------------------------------------------------------------------------------------------------------------

# Creating buffers around each point (strandings and random)

In [None]:
#Creating buffer sizes in meters for the strandings and random point data
buffer_sizes = [500, 1000, 1500, 3000, 5000]
datasets = {
    'Strandings': clean_strandings, 
    'Random': random_points}


#### Setting CRS

In [None]:
# Setting raw lat/long and target metric CRS for buffers
SRC_CRS = "EPSG:4326"    
DST_CRS = "EPSG:27700" 

def set_crs_if_missing(gdf: gpd.GeoDataFrame, crs: str) -> gpd.GeoDataFrame:
    if gdf.crs is None:
        return gdf.set_crs(crs)
    return gdf
def to_metric(gdf: gpd.GeoDataFrame, dst_crs: str = DST_CRS) -> gpd.GeoDataFrame:
    return gdf.to_crs(dst_crs)


#Ensure source CRS is set
clean_strandings = set_crs_if_missing(clean_strandings, SRC_CRS)
buildings        = set_crs_if_missing(buildings, SRC_CRS)    
roads            = set_crs_if_missing(roads, SRC_CRS)       
random_points    = set_crs_if_missing(random_points, SRC_CRS) 

# Reprojecting to metres for analysis
strandings_proj = to_metric(clean_strandings, DST_CRS)
buildings_proj  = to_metric(buildings,        DST_CRS)
roads_proj      = to_metric(roads,            DST_CRS)
random_proj     = to_metric(random_points,    DST_CRS)

# ---- 4) Quick sanity checks ----
print("CRS (proj):")
print("  strandings_proj:", strandings_proj.crs)
print("  buildings_proj :", buildings_proj.crs)
print("  roads_proj     :", roads_proj.crs)
print("  random_proj    :", random_proj.crs)
# print("  buffers_gdf    :", buffers_gdf.crs)  # if applicable

# (Optional) peek at geometries to confirm sensible coordinates in metres
# print("Sample:", strandings_proj.geometry.head())


#### visulising the data

In [None]:
# # Checking all my layers alighn
# fig, ax = plt.subplots(figsize=(8, 8))
# strandings_proj.plot(ax=ax, color='blue', label='Strandings', markersize=2)
# random_proj.plot(ax=ax, color='red', label='Random', markersize=2)
# buildings_proj.plot(ax=ax, color='black', label='Buildings', markersize=1, alpha=0.4)
# roads_proj.plot(ax=ax, color='green', label='Roads', linewidth=0.5, alpha=0.7)
# plt.legend()
# plt.savefig("Method_results_images/Buffer_Count_Checking_crs_layers.png", dpi=150, bbox_inches="tight")
# plt.show()


#### Generating buffer geometries

In [None]:
#creating an empty list
results = []
#looping over the buffer sizes, making copies and computing the buffer polygons
for data_label, df in zip(['Strandings', 'Random'], [strandings_proj, random_proj]):
    for buffer_size in buffer_sizes: 
        df_buf = df.copy()
        df_buf['geometry'] = df.geometry.buffer(buffer_size)
#logging a new row for each buffer polygon
        for i, buf in enumerate(df_buf.geometry):
            results.append({
                'Data': f"{data_label} points",
                'Buffer size': f"{buffer_size}m", 
                'Buffer geometry': buf,
                'Point ID': df.index[i]})

# creating a dataframe 
buffer_df = pd.DataFrame(results)

In [None]:
#Checking the data
buffer_df.sample(15)

#### Visulising the buffer and overlays

In [None]:
# Hiding the warning for aesthetics 
warnings.filterwarnings(
    "ignore", 
    message="Legend does not support handles for PatchCollection",
    category=UserWarning)


# Choosing the index to visualise
i = 1 

# Isolating the point and make a GeoDataFrame
point = strandings_proj.iloc[[i]]
buffer_dist = 1500  # meters, adjust as needed
point_buffer = point.geometry.buffer(buffer_dist)

# Selecting roads and buildings within the buffer
roads_within_buffer = roads_proj[roads_proj.intersects(point_buffer.iloc[0])]
buildings_within_buffer = buildings_proj[buildings_proj.intersects(point_buffer.iloc[0])]

# Plotting everything
fig, ax = plt.subplots(figsize=(8, 8))

# Plotting the buffer
point_buffer.plot(ax=ax, color='lightblue', alpha=0.3, edgecolor='blue', label='Buffer')

# Plotting strandings points
point.plot(ax=ax, color='red', markersize=50, label='Stranding')

# Plotting roads and buildings inside the buffer
roads_within_buffer.plot(ax=ax, color='green', linewidth=1, label='Roads')
buildings_within_buffer.plot(ax=ax, color='black', alpha=0.7, label='Buildings')

plt.legend()
plt.title('Visualisation of Stranding Buffer and Counted Features')
plt.axis('equal')  
plt.savefig("Method_results_images/Buffer_Count_Roads_buildings_buffer_visulisation.png", dpi=150, bbox_inches="tight")
plt.show()

--------------------------------------------------------------------------------------------------------------------------------------------------------

# Counting inside the buffers

#### Counting the features inside each buffer

In [None]:
# Making a copy so I can add matrics with out altering the orginal
buffer_metrics = buffer_df.copy()

# Creating empty columns for the data to go in, bathymetric data is run separately
buffer_metrics['Other points'] = np.nan
buffer_metrics['Road length'] = np.nan
buffer_metrics['Building count'] = np.nan
buffer_metrics['Bathymetry mean'] = np.nan 

# Building spatial indexes to quickly find buffer geometries, to speed up precise geometric tests
roads_sindex = roads_proj.sindex
buildings_sindex = buildings_proj.sindex
strandings_sindex = strandings_proj.sindex
random_sindex = random_proj.sindex

# Its slow code, so I added a progress bar
for idx, row in tqdm(buffer_metrics.iterrows(), total=len(buffer_metrics), desc="Processing buffers"):

# Getting the buffer and correct point layer to count the correct 'other' points    
    buf = row['Buffer geometry']
    point_id = row['Point ID']
    is_stranding = row['Data'] == 'Strandings points'

    points_gdf = strandings_proj if is_stranding else random_proj
    points_index = strandings_sindex if is_stranding else random_sindex

# Counting the other points within buffer (excluding the centre point)
    possible_matches_index = list(points_index.intersection(buf.bounds))
    possible_matches = points_gdf.iloc[possible_matches_index]
    other_points = possible_matches.drop(index=point_id, errors='ignore')
    count_in_buffer = other_points.geometry.within(buf).sum()
    buffer_metrics.at[idx, 'Other points'] = count_in_buffer

# Calculating the road length within each buffer
    road_matches_index = list(roads_sindex.intersection(buf.bounds))
    roads_in = roads_proj.iloc[road_matches_index]
    total_length = 0.0

    for road in roads_in.geometry:
        clipped = road.intersection(buf)
        if not clipped.is_empty:
            if clipped.geom_type == 'LineString':
                total_length += clipped.length
            elif clipped.geom_type == 'MultiLineString':
                total_length += sum(seg.length for seg in clipped.geoms)

    buffer_metrics.at[idx, 'Road length'] = total_length

# Counting the number of buildings inside each buffer
    building_matches_index = list(buildings_sindex.intersection(buf.bounds))
    buildings_in = buildings_proj.iloc[building_matches_index]
    building_count = buildings_in.geometry.within(buf).sum()
    buffer_metrics.at[idx, 'Building count'] = building_count


#### Mean count of Bathymetric tiles inside each buffer

In [None]:
# Making sure my buffers match the rasters CRS
buffer_df_raster = buffer_df.copy()
buffer_df_raster = buffer_df_raster.set_geometry('Buffer geometry')
buffer_df_raster = buffer_df_raster.set_crs("EPSG:27700", allow_override=True)
buffer_df_raster = buffer_df_raster.to_crs(bath.crs)

# Creating an empty list to collect results
bathymetry_results = []

# Adding a timer, not as slow as the previous cell but still wanted to keep an eye on the progress
start = time()

# Looping over each buffer and calculating mean depth, dropping nodata or below zero values
for idx, row in tqdm(buffer_df_raster.iterrows(), total=len(buffer_df_raster)):
    geom = [mapping(row['Buffer geometry'])]

    try:
        out_image, _ = rasterio.mask.mask(bath, geom, crop=True)
        band = out_image[0]

        if bath.nodata is not None:
            band = band[band != bath.nodata]
        band = band[band > 0]

        mean_value = band.mean() if band.size > 0 else np.nan
    except Exception as e:
        mean_value = np.nan

# Storing the row of outputs for each buffer    
    bathymetry_results.append({
        'Data': row['Data'],
        'Buffer size': row['Buffer size'],
        'Point ID': row['Point ID'],
        'Bathymetry mean': mean_value})

# End the timer when finished and printing out how long it took
end = time()
print(f"Finished in {(end - start)/60:.2f} minutes")

# Convert to DataFrame
bath_df = pd.DataFrame(bathymetry_results)


#### Visulising the bathymetric buffer

In [None]:

# Choosing index and buffer distance
i = 10
buffer_dist = 500

# Getting the point and buffer
point = strandings_proj.iloc[[i]]
point_buffer = point.geometry.buffer(buffer_dist)
point_buffer = point_buffer.to_crs(bath.crs)

# Clipping the raster
try:
    clipped, _ = rasterio.mask.mask(bath, [mapping(point_buffer.iloc[0])], crop=True)
    bath_clip = clipped[0]
except Exception as e:
    print(f"Raster clip error: {e}")
    bath_clip = None

# Plotting and saving the buffer image to use in the write up
fig, ax = plt.subplots(figsize=(8, 6))
if bath_clip is not None:
    ax.imshow(bath_clip, cmap='viridis')
    ax.set_title("Clipped Bathymetry for Buffer")
else:
    ax.set_title("No bathymetry available for this buffer")
ax.axis('off')
plt.savefig("Method_results_images/Buffer_Count_Bathy_Buffer_Visulisation.png", dpi=150, bbox_inches="tight")
plt.show()

#### Merging bathymetric results with the roads and buildings results

In [None]:
# Dropping the original column, ignoring the errors when I re-ran the code
buffer_metrics = buffer_metrics.drop(columns='Bathymetry mean', errors='ignore')
# Joining the buffer data with the buffer_metrics df
buffer_metrics = buffer_metrics.merge(
    bath_df,
    on=['Data', 'Buffer size', 'Point ID'],
    how='left')


---------------------------

## Checking the results

In [None]:
# All the columns look good and the data inside makes sense
buffer_metrics.sample(5)

In [None]:
buffer_metrics.info()

## Investigating null values

#### 25,299 (strandings points) + 25,299 (random points) = 50,598 x 5 (buffers) = 252,990. There are 480 missing bathymetric but I wanted to see how many of these were unique or if they were a single point repeated across the 5 buffers. This is investigated more deeply in Under_reported_areas notebook. It appears missing bathymetric buffers seem to fall on land / intertidal / masked shoreline cells, and it shouldn't be a problem as it's only 480.

In [None]:
buffer_metrics[buffer_metrics['Bathymetry mean'].isna()]

In [None]:
# Checking the missing bathymetry data
# Identifying the missing rows
missing = buffer_metrics[buffer_metrics['Bathymetry mean'].isna()].copy()

if missing['Buffer size'].dtype == object:
    missing['buffer_m'] = (
        missing['Buffer size']
        .astype(str).str.replace('m', '', regex=False)
        .astype('Int64'))
else:
    missing['buffer_m'] = missing['Buffer size']

# showing a quick summary
total_missing = len(missing)
unique_points_missing = missing['Point ID'].nunique()
counts_per_point = missing.groupby('Point ID').size().rename('missing_rows')
n_per_point_distribution = counts_per_point.value_counts().sort_index()

# Checking 5 buffers per point:
EXPECTED_PER_POINT = 5
all_are_five = (counts_per_point == EXPECTED_PER_POINT).all()

print("=== Missing Bathymetry Summary ===")
print(f"Total missing rows: {total_missing}")
print(f"Unique Point IDs with missing bathy: {unique_points_missing}")
print("\nCounts of missing rows per Point ID:")
print(n_per_point_distribution)
print(f"\nAll affected points have exactly {EXPECTED_PER_POINT} missing rows? {all_are_five}")

# How many missing rows per buffer size
counts_by_buffer = (
    missing
    .dropna(subset=['buffer_m'])
    .groupby('buffer_m')
    .size()
    .sort_index())

print("\n=== Missing rows by buffer size ===")
for buf_m, n in counts_by_buffer.items():
    print(f"Buffer {int(buf_m)}m has {n} missing rows")


# Creating a table of affected Point IDs (one row per point)
affected_points = (
    missing
    .sort_values(['Data', 'Point ID', 'buffer_m'])
    .groupby(['Data', 'Point ID'])
    .agg(
        missing_rows=('Buffer size', 'size'),
        missing_buffers=('Buffer size', lambda s: ', '.join(map(str, sorted(s, key=lambda x: int(str(x).replace("m",""))))))
    )
    .reset_index())

print("\n=== Affected Point IDs (one row per point) ===")
display(affected_points.head(5)) 

# Detailed table of every missing row (handy for auditing)
missing_details = (
    missing[['Data', 'Point ID', 'Buffer size', 'buffer_m']]
    .sort_values(['Data', 'Point ID', 'buffer_m'])
    .reset_index(drop=True))

print("\n=== Detailed Missing Rows ===")
display(missing_details.sample(5)) 

In [None]:
buffer_metrics.describe()

#### Saving the DataFrame

In [None]:
# #creating a new df
# buffer_metrics = buffer_metrics.copy()

# # Saving to CSV
# buffer_metrics.to_csv('buffer_metrics.csv', index=False)

# # Display download link
# FileLink('buffer_metrics.csv')

-------------------------

# Adding in latitude and longtitude

In [None]:
# Reloading the buffer metrics so I don't have to run buffer count every time
buffer_metrics = gpd.read_file("buffer_metrics.csv")

In [None]:
buffer_metrics.head()

#### I had some trouble merging the lat long back from the clean_strandings df so I have recalculated them from the buffer instead.

In [None]:
# Parse WKT → Shapely
buffer_metrics['Buffer geometry'] = buffer_metrics['Buffer geometry'].apply(wkt.loads)

# Making a gdf with crs
gdf = gpd.GeoDataFrame(buffer_metrics, geometry='Buffer geometry', crs="EPSG:27700")

# Getting the centroids and projecting them to WGS84
centroids = gdf.geometry.centroid
centroids_ll = gpd.GeoSeries(centroids, crs=gdf.crs).to_crs(4326)

# Writing the longitude/latitude columns
gdf['longitude'] = centroids_ll.x
gdf['latitude']  = centroids_ll.y

# Keeping the variable name
buffer_metrics_lat_long = gdf

In [None]:
buffer_metrics.sample(10)

In [None]:
# Checking the data, lat long should be the same and the count should increase with the buffer size
strand_point_id = buffer_metrics_lat_long.loc[buffer_metrics_lat_long['Data'] == 'Strandings points', 'Point ID'].iloc[2]
rand_point_id   = buffer_metrics_lat_long.loc[buffer_metrics_lat_long['Data'] == 'Random points', 'Point ID'].iloc[2]

def get_buffers_for_point(point_id, category):
    return (buffer_metrics_lat_long[
                (buffer_metrics_lat_long['Point ID'] == point_id) &
                (buffer_metrics_lat_long['Data'] == category)]
            .sort_values('Buffer size', key=lambda s: s.str.replace('m', '').astype(int))
            [['Data', 'latitude', 'longitude', 'Buffer size', 'Other points', 'Road length', 'Building count', 'Bathymetry mean']])

# Creating 2 seperate tables for a stranding point adn a random point
strand_table_lat_long = get_buffers_for_point(strand_point_id, 'Strandings points')
rand_table_lat_long   = get_buffers_for_point(rand_point_id, 'Random points')

# Displaying the tables
print(" Strandings point:")
display(strand_table_lat_long)

print("\n Random point:")
display(rand_table_lat_long)

In [None]:
# Save each table as HTML for the write up
strand_table_lat_long.to_html(
    "Method_results_images/Buffer_Count_Strandings_Point_Buffer_Check_lat_long_Table.html",
    index=False)

rand_table_lat_long.to_html(
    "Method_results_images/Buffer_Count_Random_Point_Buffer_Check_lat_long_Table.html",
    index=False)

In [None]:
# #creating a new df
# buffer_metrics_lat_long = buffer_metrics_lat_long.copy()

# # Saving to CSV
# buffer_metrics_lat_long.to_csv('buffer_metrics_lat_long.csv', index=False)

# # Display download link
# FileLink('buffer_metrics_lat_long.csv')

# Continues in 2.T-test notebook