# Bathymetry

Sample data: </br>
Single beam echo sounder data collected from the eastern forereef of One Tree Island on 23/03/2025 by Lachlan Perris

In [None]:
import pandas as pd
import numpy as np
import os
import matplotlib.pyplot as plt
from matplotlib.pyplot import figure
from scipy.signal import medfilt

# Load raw data from the instrument

In [None]:

# Load the CSV file into a DataFrame
#df = pd.read_csv(f'250323_Bathy_survey_forereef.csv')
df = pd.read_csv(f'data/250323_forereef_survey_clip.csv')

df


In [None]:
fig, ax = plt.subplots()
ax.plot(df['Depth_applied_elevation'])

### Still quite messy !

In [None]:
# pip install geopandas folium

import geopandas as gpd
import folium

# --- Inputs ---
# df: pandas DataFrame with 'eastings', 'northings', 'Depth applied elevation' (not used for colour)
SOURCE_EPSG = 28356  # change to your GDA94/MGA zone (e.g., 28355 or 28357)

# GeoDataFrame (projected) → WGS84 for web maps
gdf = gpd.GeoDataFrame(
    df.copy(),
    geometry=gpd.points_from_xy(df['East'], df['North']),
    crs=f"EPSG:{SOURCE_EPSG}"
).to_crs(epsg=4326)

# Map centred on data
centre = [gdf.geometry.y.mean(), gdf.geometry.x.mean()]
m = folium.Map(location=centre, zoom_start=12, tiles='CartoDB Positron')

# Simple points
for pt in gdf.geometry:
    folium.CircleMarker(
        location=[pt.y, pt.x],
        radius=3,   # tweak if needed
        fill=True,
        fill_opacity=0.9,
        weight=0
    ).add_to(m)

# In Jupyter, just display `m`; or save to file:
# m.save('points_map.html')
m



# Bathymetry despiking & smoothing (0.5 m sampling)

**Goal:** Clean a 0.5 m–spaced bathymetry profile by removing impulsive spikes and lightly smoothing noise, while preserving genuine seabed features.

---

## Method
1. **Median filter (despike)**  
   - `window_size = 3` → spans **1.5 m**.  
   - Replaces each value with the **median** of its neighbourhood. Robust to outliers (bad pings, bubbles), preserves sharp breaks.

2. **Rolling mean (smooth)**  
   - `rolling_window = 3` → spans **1.5 m**.  
   - Reduces residual high-frequency jitter; `center=True` avoids lateral shift.

3. **Plot**  
   - Visualise `depth_smoothed` to confirm noise reduction and feature retention.

---

## Parameters used
- Sampling: **0.5 m** along-track  
- Median window: **9 samples (4.5 m)**  
- Mean window: **3 samples (1.5 m)**

> Depths are negative (more negative = deeper). Filters operate the same.

---

## Quick QC
- Plot **raw vs despiked vs smoothed** together.  
- Check **residual = raw − smoothed** to ensure mainly noise is removed.  
- If over-smoothed → reduce window sizes; if spikes remain → increase the **median** window first.


In [None]:
# Step 1: Apply median filter to remove spikes
window_size = 3  # Choose an odd number for the window size (e.g., 3, 5, 7)

# Step 2: Apply rolling average to smooth the data further
rolling_window = 3# Choose the rolling window size


df['depth_despiked'] = medfilt(df['Depth_applied_elevation'], kernel_size=window_size)
df['depth_smoothed'] = df['depth_despiked'].rolling(window=rolling_window, center=True).mean()

In [None]:
fig, ax = plt.subplots()
ax.plot(df['Depth_applied_elevation'], label = 'raw data')
ax.plot(df['depth_despiked'], label = 'depth despiked')
ax.plot(df['depth_smoothed'], label = 'depth smoothed')
ax.legend()
plt.show()

In [None]:
# Calculate Euclidean distances between consecutive points
dx = df['East'].diff()
dy = df['North'].diff()
df['segment_dist'] = np.sqrt(dx**2 + dy**2)

# First point has no previous point, so fill that with 0
df['segment_dist'] = df['segment_dist'].fillna(0)

# Cumulative distance from the start
df['Distance'] = df['segment_dist'].cumsum()

In [None]:
fig, ax = plt.subplots(figsize = (15,8))
ax.plot(df['Distance'], df['depth_smoothed'])

In [None]:

# Assuming your two datasets are:
df1 = pd.DataFrame()
df1['distance'] = dflidar['Distance']
df1['elevation'] = dflidar['Elevation']
df2 = pd.DataFrame()
df2['distance'] = dfsurv['Distance'] - 1.5
df2['elevation'] = dfsurv['depth_despiked'] +0.65

# Step 1: Define common distance range (e.g. from 0 to max distance)
min_dist = max(df1['distance'].min(), df2['distance'].min())
max_dist = min(df1['distance'].max(), df2['distance'].max())

common_distances = np.arange(np.ceil(min_dist), np.floor(max_dist) + 1, 1)

# Step 2: Interpolate both datasets to these distances
interp1 = np.interp(common_distances, df1['distance'], df1['elevation'])
interp2 = np.interp(common_distances, df2['distance'], df2['elevation'])

# Step 3: Subtract interpolated elevations
elevation_diff = interp2 - interp1

# Step 4: Create a new DataFrame with results
df_diff = pd.DataFrame({
    'distance': common_distances,
    'elevation_dataset1': interp1,
    'elevation_dataset2': interp2,
    'elevation_difference': elevation_diff
})

In [None]:
start = 0
stop=200

fig, ax = plt.subplots()
ax.plot(df_diff['distance'][start:stop], df_diff['elevation_difference'][start:stop], c= 'r', label = 'Difference')
ax.plot(df_diff['distance'][start:stop], df_diff['elevation_dataset1'][start:stop], c= 'g', label = 'LiDAR bathymetry')
ax.plot(df_diff['distance'][start:stop], df_diff['elevation_dataset2'][start:stop], c= 'b', label = 'Measured bathymetry (23/3/25)')
ax.grid()
ax.set_xlabel('Chainage (m)')
ax.legend()
ax.set_title('Groove infilling')

In [None]:
df_diff

In [None]:
start = 0
stop=200


df=pd.DataFrame()
df['distance'] = df_diff['distance'][start:stop]
df['rastervalue'] = df_diff['elevation_dataset1'][start:stop]
df['surveyvalue'] =  df_diff['elevation_dataset2'][start:stop]

In [None]:
df.to_csv(r'2025_bathy_survey_example_data/forereef_survey_example.csv')

In [None]:
df_diff['elevation_difference'].mean()

In [None]:
df = pd.read_csv('250323_forereef_bathy_survey/forereef_survey_with_raster_vals.csv')


In [None]:
# Calculate Euclidean distances between consecutive points
dx = dfsurv['East'].diff()
dy = dfsurv['North'].diff()
df['segment_dist'] = np.sqrt(dx**2 + dy**2)

# First point has no previous point, so fill that with 0
df['segment_dist'] = df['segment_dist'].fillna(0)

# Cumulative distance from the start
df['Distance'] = df['segment_dist'].cumsum()

In [None]:
df

In [None]:
from scipy.optimize import minimize_scalar
import numpy as np

# Define the objective: minimise the absolute median elevation change
def objective(offset):
    elev_change = df['depth_despiked'] - (df['RASTERVALU'] - offset)
    return abs(np.median(elev_change))

# Run the optimisation
result = minimize_scalar(objective, bounds=(-10, 10), method='bounded')  # Adjust bounds as needed

print(result)
print("")
# Best offset
best_offset = result.x
print(f"Optimal offset: {best_offset:.4f}")


In [None]:
import numpy as np
import pandas as pd
from scipy.interpolate import interp1d
from sklearn.metrics import mean_squared_error
from sklearn.metrics import root_mean_squared_error
# Assume:
# df['distance'] = original distance
# df['depth_despiked'] = reference profile (e.g., survey)
# df['RASTERVALU'] = comparison profile (e.g., DEM)

# 1. Interpolate both profiles to a common grid
common_dist = np.linspace(df['Distance'].min(), df['Distance'].max(), 500)

# Reference: survey
interp_ref = interp1d(df['Distance'], df['depth_despiked'], kind='linear', fill_value='extrapolate')
ref_elev = interp_ref(common_dist)

# Comparison: DEM
interp_dem = interp1d(df['Distance'], df['RASTERVALU'], kind='linear', fill_value='extrapolate')

# 2. Try a range of x-offsets (shifts)
offsets = np.arange(-10, 10.1, 0.1)  # shift distances by ±10m in 0.1m steps
rmse_values = []

for offset in offsets:
    shifted_dist = common_dist + offset
    dem_elev = interp_dem(shifted_dist)
    rmse = root_mean_squared_error(ref_elev, dem_elev)
    rmse_values.append(rmse)

# 3. Find best offset
best_offset = offsets[np.argmin(rmse_values)]
print(f"Best x-offset: {best_offset:.2f} m")

In [None]:
start = 500
stop = 600


fig, ax = plt.subplots(figsize= (10,8))
ax.plot(df['Distance'][start:stop], df['depth_despiked'][start:stop],label = 'Echo sounder bathy (23/03/2025)')
ax.plot(df['Distance'][start:stop], df['RASTERVALU'][start:stop], label = 'LiDAR bathy')
ax.set_ylim(-7,0)
#ax2 = ax.twinx()
#ax2.grid()
df['elev_change'] = df['depth_despiked'] - (df['RASTERVALU']-best_offset)
ax.plot(df['Distance'][start:stop], df['elev_change'][start:stop], c= 'k', label = 'elevation change')
ax.set_ylim(-7,2)
ax.set_ylabel('elevation change')
ax.set_ylabel('elevation')

print(df['elev_change'].mean())
ax.legend()
ax.set_title('Groove infilling')


In [None]:
from scipy.optimize import minimize_scalar

# Define the objective function: mean elevation change for a given offset
def objective(offset):
    elev_change = df['depth_despiked'] - (df['RASTERVALU'] - offset)
    return abs(elev_change.mean())  # We want the mean to be as close to 0 as possible

# Run the optimisation
result = minimize_scalar(objective, bounds=(-10, 10), method='bounded')  # Adjust bounds as needed

# Best offset
best_offset = result.x
print(f"Optimal offset: {best_offset:.4f}")