In [None]:
#!/usr/bin/env python3
import os, sys
import wget
import numpy as np
import numpy.ma as ma
import netCDF4 as nc
import xarray as xr
import matplotlib.pyplot as plt

from datetime import datetime, date
from collections import Counter
import math
#from scipy import interpolate
# import seawater as sw
# from scipy.interpolate import interp1d

In [None]:
def str2num_noneg(str_in):    
    try:
        # Convert the input string to a float
        num = float(str_in)

        # Check if the number is nonnegative
        if num >= 0:
            return num
        else:
            return float('nan')
    except ValueError:
        # Return NaN if the input contains non-numeric characters
        return float('nan')

In [None]:
def date_to_days(date_str, reference_date_str="01.01.19"):
    # Define the format for the input date string
    date_format = "%d.%m.%y"

    try:
        # Parse the input date string and the reference date string to datetime objects
        date_obj = datetime.strptime(date_str, date_format)
        reference_date_obj = datetime.strptime(reference_date_str, date_format)

        # Calculate the difference in days between the two dates
        days_difference = (date_obj - reference_date_obj).days

        return days_difference
    except ValueError:
        # Return None if there's an error parsing the date strings
        return None

In [None]:
date_to_days("26.09.20")

In [None]:
def mean_of_unique_dates(dates_list, data_array):
    # Create a Counter object to count the occurrences of each date
    date_counts = Counter(dates_list)    
    unique_dates, unique_indices = np.unique(dates_list, return_index=True)    
    mean_values = []
    
    for date, idx in zip(unique_dates, unique_indices):
        count = date_counts[date]
        data_slice = data_array[dates_list.index(date, idx, len(dates_list))]
        mean = np.mean(data_slice) if count == 1 else np.mean(data_slice) * count
        mean_values.append(mean)

    return unique_dates, np.array(mean_values)

In [None]:
REcoM1D_mesh = xr.open_dataset("../data/MESH/REcoM1D_mesh.nc")

In [None]:
mod_depths = REcoM1D_mesh.Z.values
mod_depths_bound = REcoM1D_mesh.zbar.values
mod_depths_bound

In [None]:
chl_file = '../data/CHLA/MOSAiC_Chla_forLaurent_20220905.xlsx'

In [None]:
from pandas import read_excel
chl_df = read_excel(chl_file, sheet_name = 'CTDs')
print(chl_df.head())

In [None]:
chl_date = chl_df['Date']
chl_depth = chl_df['desired_depth']
chla_data = chl_df['Chl[µg L-1]']

In [None]:
def date_to_step(date, reference_date=np.datetime64('2019-04-15')):
    # Convert the date and reference_date to numpy datetime64 objects if they are not already
    date = np.datetime64(date)
    reference_date = np.datetime64(reference_date)

    # Calculate the difference in days between the date and the reference_date
    days_difference = (date - reference_date).astype('timedelta64[D]').item().days
    time_step = days_difference * 48 + 24

    return time_step

In [None]:
unique_date_ch = np.unique(chl_date)
unique_date_ch.shape

In [None]:
for i in range(len(unique_date_ch)):
    print(unique_date_ch[i])

In [None]:
chl_steps = np.zeros_like(chla_data)
for i in range(len(chl_date)):
    chl_steps[i] = date_to_step(chl_date[i])

In [None]:
plt.rcParams["figure.figsize"] = (10,5) 
plt.scatter(chl_steps, -chl_depth,s=80, c=chla_data, marker='o', edgecolor='none')

plt.ylim((-500, 10))
plt.clim(0, 1)
cbar = plt.colorbar(orientation='horizontal', pad=0.1)
cbar.ax.set_xlabel(r'$µg L^{-1}$')
plt.title('MOSAiC_Chla_forLaurent_20220905')
plt.ylabel('Depth (m)')
plt.show()

In [None]:
plt.rcParams["figure.figsize"] = (10,5) 
plt.scatter(chl_steps, -chl_depth,s=80, c=np.log10(chla_data), marker='o', edgecolor='none')

plt.ylim((-100, 0))
plt.clim(-2.5, 0.5)
cbar = plt.colorbar(orientation='horizontal', pad=0.1)
cbar.ax.set_xlabel(r'$µg L^{-1}$')
plt.title('MOSAiC_Chla_forLaurent_20220905')
plt.ylabel('Depth (m)')
plt.show()

In [None]:
chla_rmsd = chla_data* 0.3
# chla_rmsd

In [None]:
chla_data_logtrans = np.zeros_like(chla_data)
chla_rmsd_logtrans = np.zeros_like(chla_data)
for i in range(len(chla_data)):
    if chla_data[i]==0:
        chla_val = 1.0e-10
        chla_err = 1.0e-10 * 0.3 
        chla_data_logtrans[i] = math.log(chla_val) - 0.5 * math.log(1 + (chla_err**2 / chla_val**2))
        chla_rmsd_logtrans[i] = math.sqrt(math.log(1 + (chla_err**2 / chla_val**2)))
    else:
        chla_val = chla_data[i]
        chla_err = chla_data[i] * 0.3 
        chla_data_logtrans[i] = math.log(chla_val) - 0.5 * math.log(1 + (chla_err**2 / chla_val**2))
        chla_rmsd_logtrans[i] = math.sqrt(math.log(1 + (chla_err**2 / chla_val**2)))

In [None]:
plt.rcParams["figure.figsize"] = (10,5) 
plt.scatter(chl_steps, -chl_depth,s=80, c=chla_data_logtrans, marker='o', edgecolor='none')

plt.ylim((-500, 10))
plt.clim(-5, 0)
cbar = plt.colorbar(orientation='horizontal', pad=0.1)
cbar.ax.set_xlabel(r'$µg L^{-1}$')
plt.title('MOSAiC_Chla_forLaurent_20220905: Logtransformed value')
plt.ylabel('Depth (m)')
plt.show()

In [None]:
plt.rcParams["figure.figsize"] = (10,5) 
plt.scatter(chl_steps, -chl_depth,s=80, c=chla_rmsd_logtrans, marker='o', edgecolor='none')

plt.ylim((-500, 10))
plt.clim(0.25, 0.3)
cbar = plt.colorbar(orientation='horizontal', pad=0.1)
cbar.ax.set_xlabel(r'$µg L^{-1}$')
plt.title('MOSAiC_Chla_forLaurent_20220905: Logtransformed value')
plt.ylabel('Depth (m)')
plt.show()

In [None]:
unique_chl_steps = []
unique_indices_dict = {}
for index, item in enumerate(chl_steps):
    if item not in unique_indices_dict:
        unique_chl_steps.append(item)
        unique_indices_dict[item] = [index]
    else:
        unique_indices_dict[item].append(index)

unique_chl_steps = np.array(unique_chl_steps)

print(unique_chl_steps)

In [None]:
unique_indices_dict[unique_chl_steps[2]]

In [None]:
chl_data_2d = np.zeros((len(unique_chl_steps),20))
chl_data_2d = chl_data_2d - 99.99
depth_bin = mod_depths_bound * -1

In [None]:
for row in range(len(unique_chl_steps)):
    
    depths = np.array(chl_depth[unique_indices_dict[unique_chl_steps[row]]])    
    data = np.array(chla_data[unique_indices_dict[unique_chl_steps[row]]])
    
    
    for col in range(20):
        depth_range_min = depth_bin[col]
        depth_range_max = depth_bin[col + 1]
        data_in_range = [data[i] for i in range(len(depths)) if depth_range_min <= depths[i] < depth_range_max]
        
        if data_in_range:
            chl_data_2d[row,col] = np.mean(data_in_range)

In [None]:
obs_steps = unique_chl_steps + 24
obs_steps = obs_steps.astype(int)
obs_steps

In [None]:
sorted_indices = np.argsort(obs_steps)
sorted_indices

In [None]:
sorted_obs_steps = obs_steps[sorted_indices]

In [None]:
print(sorted_obs_steps[1:] - sorted_obs_steps[:-1])

In [None]:
sorted_chl_data_2d = chl_data_2d[sorted_indices]
print(sorted_chl_data_2d.shape)

In [None]:
chl_data_2d = chl_data_2d.astype(np.float64)
#chl_data_2d

In [None]:
chl_da = xr.DataArray(sorted_chl_data_2d, dims=("step", "depth"), coords={"step": sorted_obs_steps, "depth": mod_depths[ :20].astype(np.float64)})

# Add units for variables
chl_da.attrs["units"] = "µg L^-1"

# Create a Dataset and add the DataArray to it
dataset = xr.Dataset({"Chl_a": chl_da})
dataset

In [None]:
dataset.attrs["description"] = "MOSAiC_Chla_forLaurent_20220905 with dimensions step and depth"
dataset.attrs["history"] = "MOSAiC_Chla_forLaurent_20220905"
dataset

In [None]:
output_file = "../data/MOSAiC_Chla_forLaurent_20220905.nc"
dataset.to_netcdf(output_file, format="NETCDF4")
print(f"Chl-a data has been written to {output_file}.")

In [None]:
MOSAiC_Chla = xr.open_dataset("../data/MOSAiC_Chla_forLaurent_20220905.nc")
MOSAiC_Chla

In [None]:
MOSAiC_Chla.step.values

In [None]:
MOSAiC_Chla.close()