# <u>Finding potentially under reported strandings areas</u>

In [None]:
import geopandas as gpd
import pandas as pd
import rasterio
import numpy as np
import matplotlib.pyplot as plt
import folium
import rasterio.mask
import seaborn as sns
import os
import fiona
import warnings
from shapely.geometry import Point
from shapely import wkt
from shapely.geometry import mapping
from IPython.display import FileLink
from IPython.display import display
from shapely.geometry import LineString, MultiLineString
from tqdm import tqdm
from time import time
from pygam import LogisticGAM, s
from branca.colormap import LinearColormap
from shapely.geometry import box
from folium.plugins import MarkerCluster


#### Creating evenly spread points around the UK to identify areas with low population, roads and the correct bathymetry for strandings.

In [None]:
# Filtering warnings for astetics
warnings.filterwarnings("ignore")

# loading my coastline file (see UK strandings EDA notebook)
gadm = gpd.read_file("merged_outline.gpkg").to_crs('EPSG:4326')
all_boundaries = gadm.geometry.boundary.unary_union

# Reprojecting to meters for accurate distance spacing
gadm_proj = gadm.to_crs(epsg=3857)  # meters
coastline = gadm_proj.geometry.boundary.unary_union

# Setting the distance
spacing_m = 1000 
points = []

# Interpolating the points along the coast, walking each line segment and placing a point every 1km
if isinstance(coastline, MultiLineString):
    for line in coastline.geoms:
        n_points = int(line.length // spacing_m)
        points.extend([line.interpolate(i * spacing_m) for i in range(n_points)])
elif isinstance(coastline, LineString):
    n_points = int(coastline.length // spacing_m)
    points = [coastline.interpolate(i * spacing_m) for i in range(n_points)]

# Converting back to a GeoDataFrame and reproject back to lat/lon for folium
points_gdf = gpd.GeoDataFrame(geometry=points, crs="EPSG:3857").to_crs(epsg=4326)

In [None]:
# Visualising the points to check them

# Center the map on the UK
m = folium.Map(location=[55, -3], zoom_start=5, tiles='cartodbpositron', control_scale=True)

# Plotting each point
for _, row in points_gdf.iterrows():
    folium.CircleMarker(
        location=[row.geometry.y, row.geometry.x],
        radius=2,
        color='blue',
        fill=True,
        fill_color='blue',
        fill_opacity=0.7
    ).add_to(m)

# Title
m.get_root().html.add_child(folium.Element("""
<div style="position: fixed; top: 10px; left: 50%; transform: translateX(-50%);
 z-index: 9999; background: white; padding: 6px 10px; border: 1px solid #777;
 border-radius: 6px; font-weight: 600; box-shadow: 0 1px 3px rgba(0,0,0,.2);">
 Points Spaced by 1000m
</div>
"""))

# Show map
m


In [None]:
#Saving the map
m.save("Method_results_images/Under_Reported_Strandings_1000km_points.html")

#### Saved as a link with Netlify [1000m spaced points](https://points-spaced-1000m.netlify.app/)

#### Adding 500m buffers as thats the size buffer with decent statical results

In [None]:
# Reprojecting to meters
points_gdf_proj = points_gdf.to_crs(epsg=3857)

# Create 500m buffers
points_gdf_proj['geometry'] = points_gdf_proj.buffer(500)

# Reprojecting to 27700 for mapping
buffers_gdf = points_gdf_proj.to_crs(epsg=27700)


------------------------------------

## Buffer count

#### I repeated the process of my original buffer count with the new 1000km points

In [None]:
# Snapped clean strandings (points) (see EDA notebook)
clean_strandings = gpd.read_file("clean_strandings.csv")

# Roads GeoDataFrame (lines)
roads = gpd.read_file("merged_roads.gpkg")

# Buildings GeoDataFrame (polygons)
buildings = gpd.read_file("UK_Ireland_Buildings.gpkg")

# Bathymetric (TIFF)
bath = rasterio.open("uk_ireland_bath.tiff")

#### Setting crs

In [None]:
# Making clean_strandings and random_points gdf with a crs of EPSG: 4326
clean_strandings['geometry'] = clean_strandings['geometry'].apply(wkt.loads)
clean_strandings['geometry'] = clean_strandings.apply(lambda row: Point(row['longitude'], row['latitude']), axis=1)
clean_strandings = gpd.GeoDataFrame(clean_strandings, geometry='geometry', crs="EPSG:4326")

In [None]:
# Setting raw lat/long and target metric CRS for buffers
SRC_CRS = "EPSG:4326"   
DST_CRS = "EPSG:27700"

def set_crs_if_missing(gdf: gpd.GeoDataFrame, crs: str) -> gpd.GeoDataFrame:
    if gdf.crs is None:
        return gdf.set_crs(crs)
    return gdf
def to_metric(gdf: gpd.GeoDataFrame, dst_crs: str = DST_CRS) -> gpd.GeoDataFrame:
    return gdf.to_crs(dst_crs)

#Ensure source CRS is set
clean_strandings = set_crs_if_missing(clean_strandings, SRC_CRS)
buildings        = set_crs_if_missing(buildings, SRC_CRS)   
roads            = set_crs_if_missing(roads, SRC_CRS)   

# Reprojecting to metres for analysis
strandings_proj = to_metric(clean_strandings, DST_CRS)
buildings_proj  = to_metric(buildings,        DST_CRS)
roads_proj      = to_metric(roads,            DST_CRS)
buffers_gdf = set_crs_if_missing(buffers_gdf, SRC_CRS).to_crs(DST_CRS)

# Printing the results
print("CRS (proj):")
print("  strandings_proj:", strandings_proj.crs)
print("  buildings_proj :", buildings_proj.crs)
print("  roads_proj     :", roads_proj.crs)
print("  buffers_gdf    :", buffers_gdf.crs)


### Buildings and roads count

In [None]:
# Making a copy so I can add metrics without altering the original
buffer_metrics_predict = buffers_gdf.copy()
#Creating a buffer index to merge the bathymetry mean with
buffer_metrics_predict['buffer_id'] = buffer_metrics_predict.index

# Prepare columns
buffer_metrics_predict['Road length'] = np.nan
buffer_metrics_predict['Building count'] = np.nan

# Reproject clean strandings to match projected buffer CRS
strandings_proj = clean_strandings.to_crs(buffers_gdf.crs)

# Building spatial indexes to quickly find buffer geometries, to speed up precise geometric tests
roads_sindex = roads_proj.sindex
buildings_sindex = buildings_proj.sindex
strandings_sindex = strandings_proj.sindex

# Adding the progress bar
for idx, row in tqdm(buffer_metrics_predict.iterrows(), total=len(buffer_metrics_predict), desc="Processing buffers"):
    buf = row['geometry']

# Calculating the road length within each buffer
    road_matches_index = list(roads_sindex.intersection(buf.bounds))
    roads_in = roads_proj.iloc[road_matches_index]
    total_length = 0.0
    for road in roads_in.geometry:
        clipped = road.intersection(buf)
        if not clipped.is_empty:
            if clipped.geom_type == 'LineString':
                total_length += clipped.length
            elif clipped.geom_type == 'MultiLineString':
                total_length += sum(seg.length for seg in clipped.geoms)
    buffer_metrics_predict.at[idx, 'Road length'] = total_length

# Counting the number of buildings inside each buffer
    building_matches_index = list(buildings_sindex.intersection(buf.bounds))
    buildings_in = buildings_proj.iloc[building_matches_index]
    building_count = buildings_in.geometry.within(buf).sum()
    buffer_metrics_predict.at[idx, 'Building count'] = building_count



In [None]:
#Checking the count
buffer_metrics_predict.head()

In [None]:
buffer_metrics_predict.describe()

### Calculating bathymetry mean

In [None]:
# Adding an index column to retain buffer identity
buffer_df_raster = buffers_gdf.copy()
buffer_df_raster['buffer_id'] = buffer_df_raster.index

# Reprojecting buffers back to raster CRS before sampling
buffer_df_raster = buffer_df_raster.set_geometry('geometry')
buffer_df_raster = buffer_df_raster.set_crs("EPSG:27700", allow_override=True)
buffer_df_raster = buffer_df_raster.to_crs(bath.crs)

# Creating an empty list to collect results
bathymetry_results = []

# Start timing
start = time()

# Calculating mean depth in each buffer
for idx, row in tqdm(buffer_df_raster.iterrows(), total=len(buffer_df_raster)):
    geom = [mapping(row['geometry'])]

    try:
        out_image, _ = rasterio.mask.mask(bath, geom, crop=True)
        band = out_image[0]

        if bath.nodata is not None:
            band = band[band != bath.nodata]
        band = band[band > 0]

        mean_value = band.mean() if band.size > 0 else np.nan
    except Exception as e:
        mean_value = np.nan

    bathymetry_results.append({
    'buffer_id': row['buffer_id'],
    'Bathymetry mean': mean_value})

# End timing
end = time()
print(f"Finished in {(end - start)/60:.2f} minutes")

# Converting to a DataFrame
bath_df = pd.DataFrame(bathymetry_results)

# Merging using buffer_id 
buffer_metrics_predict = buffer_metrics_predict.reset_index(drop=True)  # just in case
buffer_metrics_predict['buffer_id'] = buffer_metrics_predict.index
buffer_metrics_predict = buffer_metrics_predict.drop(columns='Bathymetry mean', errors='ignore')
buffer_metrics_predict = buffer_metrics_predict.merge(bath_df, on='buffer_id', how='left')
buffer_metrics_predict = buffer_metrics_predict.drop(columns='buffer_id')


In [None]:
#Checking the results
buffer_metrics_predict.sample(5)

## Investigating bathymetric data

In [None]:
# Some bathymetry data missing the same as the orginal buffer count.
buffer_metrics_predict.info()

### Missing rows

#### Mapping the missing bathymetry rows - try it, delete if if it doesn't work, add to write up if it does

In [None]:
# Subset missing bathymetry
missing = buffer_metrics_predict[buffer_metrics_predict['Bathymetry mean'].isna()].copy()
print(f"Missing: {len(missing):,} of {len(buffer_metrics_predict):,} "
      f"({len(missing)/len(buffer_metrics_predict):.1%})")

if missing.empty:
    print("No missing bathymetry rows.")
else:
    # Reproject to WGS84 for Folium
    g = missing.to_crs(epsg=4326)

    # Center map on the median centroid of the missing features
    ctr = g.geometry.centroid
    m = folium.Map(location=[ctr.y.median(), ctr.x.median()],
                   zoom_start=5, tiles="cartodbpositron", control_scale=True)

    # Fast: plot centroids (use GeoJson for polygons if you prefer)
    for p in ctr:
        folium.CircleMarker([p.y, p.x], radius=3, fill=True, fill_opacity=0.8,
                            color="red").add_to(m)

    # Title
m.get_root().html.add_child(folium.Element("""
<div style="position: fixed; top: 10px; left: 50%; transform: translateX(-50%);
 z-index: 9999; background: white; padding: 6px 10px; border: 1px solid #777;
 border-radius: 6px; font-weight: 600; box-shadow: 0 1px 3px rgba(0,0,0,.2);">
 Missing Bathymetric Data
</div>
"""))

folium.LayerControl().add_to(m)
display(m) 
  


In [None]:
m.save("Method_results_images/Under_Reported_Strandings_missing_bathymetry_centroid.html")

#### Saved as a link with Netlify [missing bathymetry link](https://under-reported-missing-bathy.netlify.app/)

In [None]:
# Rows with missing bathy
missing = buffer_metrics_predict[buffer_metrics_predict['Bathymetry mean'].isna()].copy()

with rasterio.open("uk_ireland_bath.tiff") as src:
    r_crs   = src.crs
    r_bounds= box(*src.bounds)
    r_nodata= src.nodata

# Reproject buffers to raster CRS
miss_r = missing.to_crs(r_crs)

# Split: outside raster extent vs inside extent
miss_outside = miss_r[~miss_r.centroid.within(r_bounds)]
miss_inside  = miss_r[ miss_r.centroid.within(r_bounds)]

print(f"Missing total: {len(missing):,}")
print(f"• Outside raster bounds: {len(miss_outside):,}")
print(f"• Inside bounds (likely land/NoData mask or tiny overlap): {len(miss_inside):,}")
print("Raster NoData value:", r_nodata, " | Raster CRS:", r_crs)


#### Missing bathymetric buffers seem to fall on land / intertidal / masked shoreline cells.

In [None]:
missing.sample(15)

In [None]:
# Builds a boolean mask where 'True' is missing bathymetry
bathy_na   = buffer_metrics_predict['Bathymetry mean'].isna()

# Treating very small values as zero just in case of rounding
eps = 1e-9
roads_zero = buffer_metrics_predict['Road length'].fillna(0).abs() <= eps
blds_zero  = buffer_metrics_predict['Building count'].fillna(0).abs() <= eps
# Combines the three masks (bath, buildings and roads) 
mask = bathy_na & roads_zero & blds_zero
# Counts rows with all 3 missing, number of rows missing bathymetric data and the total number of rows
n_all   = int(mask.sum())
n_bathy = int(bathy_na.sum())
n_total = len(buffer_metrics_predict)
# Displaying the results and calculating %
print(f"Rows with NaN bathy AND roads=0 AND buildings=0: {n_all:,}")
print(f"…as % of missing-bathy rows: {n_all/n_bathy:.1%}")
print(f"…as % of all rows: {n_all/n_total:.1%}")


#### No roads/buildings seem to coincide a lot with no bathymetry so investigated it a little further

In [None]:
# Investigating number of rows missing roads and buildings but not bathymetry data
eps = 1e-9 
bathy_ok   = buffer_metrics_predict['Bathymetry mean'].notna()

# Identifying zeros that are NOT NaN
roads_zero = buffer_metrics_predict['Road length'].notna() & (
    buffer_metrics_predict['Road length'].abs() <= eps)
blds_zero  = buffer_metrics_predict['Building count'].notna() & (
    buffer_metrics_predict['Building count'].abs() <= eps)
# Combines the three masks (bath, buildings and roads) 
mask = bathy_ok & roads_zero & blds_zero
# number of rows matching the mask, number of rows with bathy present, total number of rows
n          = int(mask.sum())
n_bathy_ok = int(bathy_ok.sum())
n_total    = len(buffer_metrics_predict)
# Displaying the results and calculating %
print(f"Buffers with bathy (not NaN) AND roads=0 AND buildings=0: {n:,}")
print(f"…as % of buffers with bathy: {n/n_bathy_ok:.1%}")
print(f"…as % of all buffers: {n/n_total:.1%}")


#### Zero roads/buildings buffers account for 31% of all buffers so they are common and I'm not finding a link with zero bathymetric data.

In [None]:
buffer_metrics_predict.describe()

In [None]:
# Saving the data

#creating a new df
buffer_metrics_predict = buffer_metrics_predict.copy()

# Saving to CSV
buffer_metrics_predict.to_csv('buffer_metrics_predict.csv', index=False)

# Display download link
FileLink('buffer_metrics_predict.csv')

In [None]:
buffer_metrics_predict.sample(5)

### Most likely stranding depth

In [None]:
# Loading original buffer/strandings data
gdf = gpd.read_file("buffer_metrics_lat_long.csv")

In [None]:
gdf.sample(10)

In [None]:
# Removing warnings for aesthetics 
warnings.filterwarnings("ignore")

# Making sure is_strandings column exists
if 'is_stranding' not in gdf.columns:
    gdf['is_stranding'] = (gdf['Data'] == 'Strandings points').astype(int)

# Making Buffer size numeric
buf = gdf['Buffer size'].astype(str).str.strip().str.replace('m', '', regex=False)
gdf['Buffer size'] = pd.to_numeric(buf, errors='coerce')

# Making an adjustable subset, settled on 1500
gdf_1500 = gdf[gdf['Buffer size'] == 1500].copy()

# Column to plot (use transformed if available)
col = 'Bathymetry_trans' if 'Bathymetry_trans' in gdf_1500.columns else 'Bathymetry mean'

# Force numeric and dropping bad rows
gdf_1500[col] = pd.to_numeric(gdf_1500[col], errors='coerce')
gdf_1500 = gdf_1500.replace([np.inf, -np.inf], np.nan)
gdf_plot = gdf_1500[['is_stranding', col]].dropna()

print(gdf_plot[col].dtype, gdf_plot.shape)  # should be float64 and nonzero rows

# Plotting and displaying
plt.figure(figsize=(10, 6))
sns.kdeplot(
    data=gdf_plot,
    x=col,
    hue='is_stranding',      # 0=random, 1=stranding
    hue_order=[0, 1],
    common_norm=False,
    fill=True)
plt.title('Bathymetry Distribution for Strandings vs Random Points (1500 m)')
plt.xlabel(col)
plt.ylabel('Density')
plt.legend(title='Is Stranding', labels=['Random', 'Stranding'])
plt.tight_layout()
plt.savefig("Method_results_images/Under_reported_KDE_bath.png", dpi=150, bbox_inches="tight")
plt.show()


#### Strandings appear to be more common in shallow waters, but have a broader distibution, suggesting that slope, coastal morphology and animal behaviour likely all influence strandings risk

In [None]:
#Defining outcome, creating binary column - bring back if crashes
# if 'is_stranding' not in gdf.columns:
#     gdf['is_stranding'] = (gdf['Data'] == 'Strandings points').astype(int)

# normalise "1500m" -> 1500
gdf['Buffer size'] = pd.to_numeric(
    gdf['Buffer size'].astype(str).str.strip().str.replace('m','', regex=False),
    errors='coerce')

# creating subset with chosen buffer size
gdf_1500 = gdf[gdf['Buffer size'] == 1500].copy()

# choosing bathymetry column
bathy_col = 'Bathymetry mean' if 'Bathymetry mean' in gdf_1500.columns else 'Bathymetry_trans'

# Cleaning numeric and dropping NaN/Inf
gdf_1500[bathy_col] = pd.to_numeric(gdf_1500[bathy_col], errors='coerce')
gdf_1500 = gdf_1500.replace([np.inf, -np.inf], np.nan).dropna(subset=[bathy_col, 'is_stranding'])

# X, y
X = gdf_1500[[bathy_col]].values  
y = gdf_1500['is_stranding'].values.astype(int)

# Fit
gam = LogisticGAM(s(0)).gridsearch(X, y)

# 4) Predict on a grid
xx = np.linspace(X.min(), X.max(), 200)[:, None]   # shape (200, 1)
pred = gam.predict_proba(xx)

# 95% intervals around the fitted mean
ci = gam.confidence_intervals(xx, width=0.95)      # shape (200, 2)
conf_lo, conf_hi = ci[:, 0], ci[:, 1]

# Plot, save and display
plt.figure(figsize=(10,6))
plt.plot(xx, pred, label='Fitted probability')
plt.fill_between(xx.ravel(), conf_lo, conf_hi, alpha=0.2, label='95% CI')
plt.xlabel(bathy_col)
plt.ylabel('Predicted probability of stranding')
plt.title('GAM: stranding probability vs. bathymetry (1500 m buffers)')
plt.grid(True, alpha=0.3)
plt.legend()
plt.tight_layout()
plt.savefig("Method_results_images/Under_reported_GAM_bath.png", dpi=150, bbox_inches="tight")
plt.show()

#### Strandings are strongly associated with shallow bathymetry, especially 0-50m, but see a sharp decline beyond ~ 100m. >250m shows very wide confidence intervals.

## Possible strandings areas

#### Adjusting the thresholds for roads, buildings and bathymetry.

In [None]:
# removing warnings for aesthetics 
warnings.filterwarnings("ignore")

# Defining thresholds
road_thresh = buffer_metrics_predict['Road length'].quantile(0.1)
building_thresh = buffer_metrics_predict['Building count'].quantile(0.1)

# Easily adjustable for analysis of best thresholds
underreported = buffer_metrics_predict[
    (buffer_metrics_predict['Road length'] <= road_thresh) &  
    (buffer_metrics_predict['Building count'] <= building_thresh) &
    (buffer_metrics_predict['Bathymetry mean'].between(0, 50))  ]

print(f"Underreported areas found: {len(underreported)}")
underreported.head()

# Plotting
fig, ax = plt.subplots(figsize=(10, 10))

# Plot all buffers in grey
buffer_metrics_predict.plot(
    ax=ax,
    color='lightgrey',
    markersize=10,
    label='All Buffers')

# Highlight under-reported high-risk zones
underreported.plot(
    ax=ax,
    color='red',
    edgecolor='red',
    alpha=0.7,
    markersize=20,
    label='Under-Reported High-Risk')

ax.set_title(
    "Possible Under-Reported Stranding Zones\n(High-Human, 0-50 m Bathymetry)",
    fontsize=14)
ax.legend()
ax.axis('off')
plt.show()



#### The map highlights 15,628 remote shallow water points, highlighting northwestern Scotland and western Ireland as potential areas of interest.

In [None]:
#Saving the map
m.save("Method_results_images/Under_Reported_Strandings_Possible_UnderReported.html")

### Other points analysis

In [None]:
#Making sure types are numeric
df = gdf.copy()
df["Buffer size"]  = pd.to_numeric(df["Buffer size"], errors="coerce")
df["Other points"] = pd.to_numeric(df["Other points"], errors="coerce")

# Adjustable buffer size for analysis, settled on 5000
strand_5k = df[(df["Data"] == "Strandings points") & (df["Buffer size"] == 5000)]

# Filtering the other points column
other_5k = strand_5k["Other points"]

#
other_5k.plot(kind="hist", bins=30)
plt.xlabel("Other points (within 5 km)")
plt.ylabel("Count of strandings sites")
plt.title("Distribution of 'Other points' for 5000 m strandings buffers")
plt.savefig("Method_results_images/Under_reported_other_points_5.png", dpi=150, bbox_inches="tight")
plt.show()


# # DataFrame view
other_5k_df = strand_5k[["Other points"]]



In [None]:
# Under-reported strandings final map
BUFFER_GEOM_CRS = 27700 

def to_num(s):
    if s.dtype == object:
        s = s.astype(str).str.replace(r"[^0-9.\-]", "", regex=True)
    return pd.to_numeric(s, errors="coerce")

def to_points_from_latlon(df, lat="latitude", lon="longitude"):
    d = df.dropna(subset=[lat, lon]).copy()
    d[lat] = to_num(d[lat]); d[lon] = to_num(d[lon])
    return gpd.GeoDataFrame(d, geometry=gpd.points_from_xy(d[lon], d[lat]), crs=4326)

def centroids_from_buffer_geometry(df, geom_col="Buffer geometry"):
    g = gpd.GeoDataFrame(df.copy(), geometry=df[geom_col])
    if g.crs is None:
        g = g.set_crs(BUFFER_GEOM_CRS)
    cent = g.to_crs(27700).geometry.centroid.to_crs(4326)
    g = g.copy()
    g["geometry"] = cent
    return g.set_crs(4326, allow_override=True)


#  Under-reported layer
pred = buffer_metrics_predict.copy()
for c in ["Road length", "Building count", "Bathymetry mean", "latitude", "longitude"]:
    if c in pred.columns:
        pred[c] = to_num(pred[c])

# Building points for plotting
if {"latitude","longitude"}.issubset(pred.columns):
    pred_gdf = to_points_from_latlon(pred)
else:
    pred_gdf = centroids_from_buffer_geometry(pred, geom_col="geometry" if "geometry" in pred.columns else "Buffer geometry")

# Thresholds to play with
road_thresh     = pred_gdf["Road length"].quantile(0.10)
building_thresh = pred_gdf["Building count"].quantile(0.10)

underreported = pred_gdf[
    (pred_gdf["Road length"]    <= road_thresh) &
    (pred_gdf["Building count"] <= building_thresh) &
    (pred_gdf["Bathymetry mean"].between(0, 50))
].copy()

# 5 km strandings with zero other points
g = gdf.copy()
g["Buffer size"]  = to_num(g["Buffer size"])
g["Other points"] = to_num(g["Other points"])

strand_5k = g[(g["Data"] == "Strandings points") & (g["Buffer size"] == 5000)].copy()

if {"latitude","longitude"}.issubset(strand_5k.columns):
    all5 = to_points_from_latlon(strand_5k)
else:
    all5 = centroids_from_buffer_geometry(strand_5k, geom_col="Buffer geometry")

zero5 = all5[all5["Other points"] == 0].copy()

# Single overlaid map
# Shared extent
minx = min(pred_gdf.total_bounds[0], all5.total_bounds[0])
miny = min(pred_gdf.total_bounds[1], all5.total_bounds[1])
maxx = max(pred_gdf.total_bounds[2], all5.total_bounds[2])
maxy = max(pred_gdf.total_bounds[3], all5.total_bounds[3])

fig, ax = plt.subplots(figsize=(10, 10))

# Basemap (grey)
pred_gdf.plot(ax=ax, color="lightgrey", markersize=8, label="All candidate buffers", zorder=1)

# Under-reported map (red) 
underreported.plot(ax=ax, color="red", edgecolor="red", alpha=0.75,
                   markersize=8, label="Under-reported 500 m (low human + 0–50 m)", zorder=2)

# Zero-other strandings (blue)
zero5.plot(ax=ax, marker="*", color="black", edgecolor="black", alpha=0.9,
           markersize=35, label="Zero other strandings (5km)", zorder=3)

ax.set_title("Under-reported candidates overlaid with 5 km strandings (zero other)", fontsize=13)
ax.set_xlim(minx, maxx); ax.set_ylim(miny, maxy)
ax.axis("off")
ax.legend(
    loc="upper left",
    bbox_to_anchor=(0.01, 0.99),  # x,y in axes fraction (0–1)
    frameon=True,)

plt.tight_layout()
# Optional save:
plt.savefig("Method_results_images/Under_Reported_Strandings_Overlay_Underreported_And_Zero5k.png", dpi=300, bbox_inches="tight")
plt.show()



In [None]:
#Saving the map
m.save("Method_results_images/Under_Reported_Strandings_No_Other_Strandings_Reported.html")

In [None]:
# Interactive map

BUFFER_GEOM_CRS = 27700 
OUTPUT_HTML = "Method_results_images/UnderReported_vs_Zero1_5km_interactive.html"

def to_num(s):
    if s.dtype == object:
        s = s.astype(str).str.replace(r"[^0-9.\-]", "", regex=True)
    return pd.to_numeric(s, errors="coerce")

def to_points_from_latlon(df, lat="latitude", lon="longitude"):
    d = df.dropna(subset=[lat, lon]).copy()
    d[lat] = to_num(d[lat]); d[lon] = to_num(d[lon])
    return gpd.GeoDataFrame(d, geometry=gpd.points_from_xy(d[lon], d[lat]), crs=4326)

def centroids_from_buffer_geometry(df, geom_col="Buffer geometry"):
    g = gpd.GeoDataFrame(df.copy(), geometry=df[geom_col])
    if g.crs is None:
        g = g.set_crs(BUFFER_GEOM_CRS)
    cent = g.to_crs(27700).geometry.centroid.to_crs(4326)
    g = g.copy()
    g["geometry"] = cent
    return g.set_crs(4326, allow_override=True)

#Under-reported layer
pred = buffer_metrics_predict.copy()
for c in ["Road length", "Building count", "Bathymetry mean", "latitude", "longitude"]:
    if c in pred.columns:
        pred[c] = to_num(pred[c])

if {"latitude","longitude"}.issubset(pred.columns):
    pred_gdf = to_points_from_latlon(pred)
else:
    pred_gdf = centroids_from_buffer_geometry(pred, geom_col="geometry" if "geometry" in pred.columns else "Buffer geometry")

road_thresh     = pred_gdf["Road length"].quantile(0.10)
building_thresh = pred_gdf["Building count"].quantile(0.10)

underreported = pred_gdf[
    (pred_gdf["Road length"]    <= road_thresh) &
    (pred_gdf["Building count"] <= building_thresh) &
    (pred_gdf["Bathymetry mean"].between(0, 50))].copy()


# B) 5 km strandings with zero other points + merge to add strandings attributes for pop up
g = gdf.copy()
g["Buffer size"]  = to_num(g["Buffer size"])
g["Other points"] = to_num(g["Other points"])

strand_5k = g[(g["Data"] == "Strandings points") & (g["Buffer size"] == 5000)].copy()

# Points for the 5 km buffers
if {"latitude","longitude"}.issubset(strand_5k.columns):
    all5 = to_points_from_latlon(strand_5k)
else:
    all5 = centroids_from_buffer_geometry(strand_5k, geom_col="Buffer geometry")

# Prepare the strandings table for merging
cs = clean_strandings.copy()
for c in ["latitude", "longitude"]:
    if c in cs.columns:
        cs[c] = to_num(cs[c])

# Ensuring all5 has explicit lat/lon columns
if "latitude" not in all5.columns or "longitude" not in all5.columns:
    all5["latitude"]  = all5.geometry.y
    all5["longitude"] = all5.geometry.x

all5["lat_round"] = all5["latitude"].round(5)
all5["lon_round"] = all5["longitude"].round(5)
cs["lat_round"]   = cs["latitude"].round(5)
cs["lon_round"]   = cs["longitude"].round(5)

# Left-merge keeps all 5 km buffer points and adds strandings attributes
all5_merged = all5.merge(
    cs[["lat_round","lon_round","Date","Local Authority","M/s","Species","ID"]],
    on=["lat_round","lon_round"],
    how="left")

# Filting for different number of points
zero5 = all5_merged[all5_merged["Other points"] == 0].copy()

#Creating the map
# basemap
lat0 = pd.concat([underreported.geometry.y, zero5.geometry.y]).mean()
lon0 = pd.concat([underreported.geometry.x, zero5.geometry.x]).mean()
m = folium.Map(location=[float(lat0), float(lon0)], zoom_start=5, tiles="cartodbpositron", control_scale=True)

# Under-reported layer (red)
fg_under = folium.FeatureGroup(name="Potential under reported strandings zones", show=True)
for _, r in underreported.iterrows():
    popup = (f"<b>Under-reported candidate</b><br>"
             f"Buildings: {r.get('Building count','NA')}<br>"
             f"Roads: {r.get('Road length','NA')}<br>"
             f"Bathy mean: {r.get('Bathymetry mean','NA')}")
    folium.CircleMarker(
        location=[r.geometry.y, r.geometry.x],
        radius=2,
        color="red",
        weight=1,
        fill=True,
        fill_color="red",
        fill_opacity=0.8,
        popup=popup
    ).add_to(fg_under)
fg_under.add_to(m)

# Confirmed strandings layer (with detailed pop up information)
fg_zero = folium.FeatureGroup(name="Known single strandings", show=True)
for _, r in zero5.iterrows():
    popup = (f"<b>Zero other strandings within 5 km</b><br>"
             f"ID: {r.get('ID','NA')}<br>"
             f"Date: {r.get('Date','NA')}<br>"
             f"Local Authority: {r.get('Local Authority','NA')}<br>"
             f"M/s: {r.get('M/s','NA')}<br>"
             f"Species: {r.get('Species','NA')}<br>"
             f"Other points: {r.get('Other points','NA')}")
    folium.CircleMarker(
        location=[r.geometry.y, r.geometry.x],
        radius=3,
        color="black",
        weight=1,
        fill=True,
        fill_color="black",
        fill_opacity=0.9,
        popup=popup
    ).add_to(fg_zero)
fg_zero.add_to(m)

    # Title
m.get_root().html.add_child(folium.Element("""
<div style="position: fixed; top: 10px; left: 50%; transform: translateX(-50%);
 z-index: 9999; background: white; padding: 6px 10px; border: 1px solid #777;
 border-radius: 6px; font-weight: 600; box-shadow: 0 1px 3px rgba(0,0,0,.2);">
 Potentially Under-Reported Areas
</div>
"""))

# Legend + controls
legend = """
<div style="position:fixed;bottom:50px;left:50px;z-index:9999;background:#fff;
            padding:8px 10px;border:1px solid #ccc;font-size:14px;">
  <b>Layers</b><br>
  <span style='color:red'>●</span> Under-reported (low buildings & roads; 0–50 m bathymetry)<br>
  <span style='color:black'>●</span> Zero other strandings within 5 km
</div>
"""
m.get_root().html.add_child(folium.Element(legend))
folium.LayerControl(collapsed=False).add_to(m)

# Displaying
m


In [None]:
#Saving the map
m.save("Method_results_images/Under_Reported_Strandings_UnderReported_vs_Zero5km_interactive_Map.html")

#### Saved as a link with Netlify [possible Under Reported Strandings Areas link](https://under-reported-stranding-zones.netlify.app/)

#### The map suggests that angencies could benefit from strengthening reporting effort in northwest/northen Scotland, the western Irish coast and the southwest of the UK. We see a lot of strandings reports in Cornwall and the Cornish Wildlife Trust has a very strong reporter presence there, so increasing Devon, Somerset, Dorset and south Wales could be particularly beneficial. 

------------------------------------