In [2]:
# Cell 1: Import packages
import glob
import numpy as np
import xarray as xr
from scipy.spatial.distance import cdist
from scipy.interpolate import Rbf
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D

%matplotlib inline

In [3]:
# Cell 2: Set up a Dask client with increased memory limit
# client = dask.distributed.Client(memory_limit='8GB')  # Adjust the memory limit based on your system's available memory

In [6]:
# Cell 3: Define the path to the NetCDF files and load 
# Define the path to the NetCDF files
processing_mode = 'delayed'  # or realtime
nc_path = './' + processing_mode + '/nc/'
file_pattern = nc_path + '*{}*.nc'.format('trajectory')

# Load the NetCDF file using xarray
filename = sorted(glob.glob(file_pattern))[0]  # Replace with your preferred method for selecting the file
data = xr.open_dataset(filename, engine='netcdf4', decode_times=False)
data_df = data.sortby('time').to_dataframe().reset_index()

In [None]:


# Use a smaller subset of the data to determine appropriate parameters

subset = data_df.sample(frac=0.1, random_state=123)
X = np.column_stack((subset['lon'], subset['lat'], subset['depth']))
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
Y_scaled = scaler.fit_transform(subset['temperature'].values.reshape(-1, 1))

# Define the correlation scales for horizontal and vertical dimensions
horizontal_scale = 0.1  # km
vertical_scale = 5.0  # m

# Calculate the covariance matrix between data points
d = cdist(X_scaled, X_scaled)
C = np.exp(-d**2 / (2 * horizontal_scale**2))
C[np.diag_indices_from(C)] += vertical_scale**2

# Calculate the weights for the optimal interpolation
weights = np.linalg.solve(C, Y_scaled)