In [None]:
import calendar
import datetime
import importlib 
import numpy as np
import os
import sys
from datetime import timedelta
import xarray as xr
import pandas as pd
import matplotlib.pyplot as plt

In [None]:
def proportions(data):
    # 1. Load data into memory
    status = data.status.values
    lat = data.lat.values
    lon = data.lon.values

    # 2. Create Validity Mask
    valid_mask = np.isfinite(status) & np.isfinite(lat) & np.isfinite(lon)

    # 3. Calculate Totals (Denominator)
    totals = valid_mask.sum(axis=0).astype(float)

    # 4. Helper to get raw counts efficiently
    def get_count(condition):
        return (condition & valid_mask).sum(axis=0)

    # 5. Calculate Raw Counts for all categories
    # We store these in a dictionary first
    raw_counts = {
        'initial':            get_count(status < 0),
        'colloidal_water':    get_count(status == 2),
        'marine_water':       get_count(status == 3),
        'sewage_water':       get_count(status == 1),
        'colloidal_sediment': get_count(status == 12),
        'marine_sediment':    get_count(status == 13),
        'sewage_sediment':    get_count(status == 11),
        'out_jdf':            get_count(status == 7),
        'out_js':             get_count(status == 8),
        'total_len':          totals
    }

    # 6. Calculate Percentages based on the Raw Counts
    # logic: (count / total) * 100
    percentages = {}
    for key, count_val in raw_counts.items():
        if key == 'total_len':
            percentages[key] = count_val  # Keep total as is
        else:
            percentages[key] = np.divide(
                count_val, 
                totals, 
                out=np.full_like(totals, np.nan), # Return NaN if divide by zero
                where=totals > 0
            ) * 100

    # 7. Define Column Order
    cols = [
        'initial', 'colloidal_water', 'marine_water', 'sewage_water',
        'colloidal_sediment', 'marine_sediment', 'sewage_sediment',
        'out_jdf', 'out_js', 'total_len'
    ]

    # 8. Create the two DataFrames
    df_proportions = pd.DataFrame(percentages, columns=cols)
    df_counts = pd.DataFrame(raw_counts, columns=cols)

    return df_proportions, df_counts

Temporary conversion to .nc (for Susan)

In [None]:
from joblib import Parallel, delayed

# Define a function that handles a single conversion
def convert_to_nc(month):
    # Construct paths dynamically
    zarr_path = f'/home/vicentev/scratch/vicentev/Simulations_Runs/RESTART_Runs/R_2010/Simulation_2010_{month}.zarr'
    nc_path = f'run_{month}_2010.nc'
    
    # Open, convert, and close
    # chunks={} ensures it opens with Dask (lazy loading) which manages memory better
    ds = xr.open_zarr(zarr_path, chunks={}) 
    
    # Write to NetCDF
    print(f"Starting {month}...")
    ds.to_netcdf(nc_path, mode='w')
    print(f"Finished {month}")

# List of months to process
months = ['JUL', 'AUG', 'SEP', 'OCT', 'NOV', 'DEC']

# Run in parallel
# n_jobs=-1 uses all available CPU cores
Parallel(n_jobs=-1)(delayed(convert_to_nc)(m) for m in months)

# Load data by monthly runs

In [None]:
files_JAN = [
    '/home/vicentev/scratch/vicentev/Simulations_Runs/JAN_2007_netCDF.nc',
    '/home/vicentev/scratch/vicentev/Simulations_Runs/RESTART_Runs/R_2008/Simulation_2008_JAN_S1.nc',
    '/home/vicentev/scratch/vicentev/Simulations_Runs/RESTART_Runs/R_2009/Simulation_2009_JAN_S1.nc',
    '/home/vicentev/scratch/vicentev/Simulations_Runs/RESTART_Runs/R_2010/Simulation_2010_JAN_S1.nc',
    '/home/vicentev/scratch/vicentev/Simulations_Runs/RESTART_Runs/R_2011/Simulation_2011_JAN_S1.nc'
]
#
files_FEB = [
    '/home/vicentev/scratch/vicentev/Simulations_Runs/FEB_2007_netCDF.nc',
    '/home/vicentev/scratch/vicentev/Simulations_Runs/RESTART_Runs/R_2008/Simulation_2008_FEB_S1.nc',
    '/home/vicentev/scratch/vicentev/Simulations_Runs/RESTART_Runs/R_2009/Simulation_2009_FEB_S1.nc',
    '/home/vicentev/scratch/vicentev/Simulations_Runs/RESTART_Runs/R_2010/Simulation_2010_FEB_S1.nc',
    '/home/vicentev/scratch/vicentev/Simulations_Runs/RESTART_Runs/R_2011/Simulation_2011_FEB_S1.nc'
]
#
files_MAR = [
    '/home/vicentev/scratch/vicentev/Simulations_Runs/MAR_2007_netCDF.nc',
    '/home/vicentev/scratch/vicentev/Simulations_Runs/RESTART_Runs/R_2008/Simulation_2008_MAR_S1.nc',
    '/home/vicentev/scratch/vicentev/Simulations_Runs/RESTART_Runs/R_2009/Simulation_2009_MAR_S1.nc',
    '/home/vicentev/scratch/vicentev/Simulations_Runs/RESTART_Runs/R_2010/Simulation_2010_MAR_S1.nc',
    '/home/vicentev/scratch/vicentev/Simulations_Runs/RESTART_Runs/R_2011/Simulation_2011_MAR_S1.nc'
]

files_APR = [
    '/home/vicentev/scratch/vicentev/Simulations_Runs/APR_2007_netCDF.nc',
    '/home/vicentev/scratch/vicentev/Simulations_Runs/RESTART_Runs/R_2008/Simulation_2008_APR_S1.nc',
    '/home/vicentev/scratch/vicentev/Simulations_Runs/RESTART_Runs/R_2009/Simulation_2009_FEB_S1.nc',
    '/home/vicentev/scratch/vicentev/Simulations_Runs/RESTART_Runs/R_2010/Simulation_2010_FEB_S1.nc',
    '/home/vicentev/scratch/vicentev/Simulations_Runs/RESTART_Runs/R_2011/Simulation_2011_FEB_S1.nc'
]

files_MAY = [
    '/home/vicentev/scratch/vicentev/Simulations_Runs/MAY_2007_netCDF.nc',
    '/home/vicentev/scratch/vicentev/Simulations_Runs/RESTART_Runs/R_2008/Simulation_2008_MAY_S1.nc',
    '/home/vicentev/scratch/vicentev/Simulations_Runs/RESTART_Runs/R_2009/Simulation_2009_MAY_S1.nc',
    '/home/vicentev/scratch/vicentev/Simulations_Runs/RESTART_Runs/R_2010/Simulation_2010_MAY_S1.nc',
    '/home/vicentev/scratch/vicentev/Simulations_Runs/RESTART_Runs/R_2011/Simulation_2011_MAY_S1.nc'
]

files_JUN = [
    '/home/vicentev/scratch/vicentev/Simulations_Runs/JUN_2007_netCDF.nc',
    '/home/vicentev/scratch/vicentev/Simulations_Runs/RESTART_Runs/R_2008/Simulation_2008_JUN_S1.nc',
    '/home/vicentev/scratch/vicentev/Simulations_Runs/RESTART_Runs/R_2009/Simulation_2009_JUN_S1.nc',
    '/home/vicentev/scratch/vicentev/Simulations_Runs/RESTART_Runs/R_2010/Simulation_2010_JUN_S1.nc',
    '/home/vicentev/scratch/vicentev/Simulations_Runs/RESTART_Runs/R_2011/Simulation_2011_JUN_S1.nc'
]

files_JUL = [
    '/home/vicentev/scratch/vicentev/Simulations_Runs/JUL_2007_netCDF.nc',
    '/home/vicentev/scratch/vicentev/Simulations_Runs/RESTART_Runs/R_2008/Simulation_2008_JUL_S1.nc',
    '/home/vicentev/scratch/vicentev/Simulations_Runs/RESTART_Runs/R_2009/Simulation_2009_JUL_S1.nc',
    '/home/vicentev/scratch/vicentev/Simulations_Runs/RESTART_Runs/R_2010/Simulation_2010_JUN_S1.nc',
    '/home/vicentev/scratch/vicentev/Simulations_Runs/RESTART_Runs/R_2011/Simulation_2011_JUN_S1.nc'
]
files_AUG = [
    '/home/vicentev/scratch/vicentev/Simulations_Runs/AUG_2007_netCDF.nc',
    '/home/vicentev/scratch/vicentev/Simulations_Runs/RESTART_Runs/R_2008/Simulation_2008_AUG_S1.nc',
    '/home/vicentev/scratch/vicentev/Simulations_Runs/RESTART_Runs/R_2009/Simulation_2009_AUG_S1.nc',
    '/home/vicentev/scratch/vicentev/Simulations_Runs/RESTART_Runs/R_2010/Simulation_2010_AUG_S1.nc',
    '/home/vicentev/scratch/vicentev/Simulations_Runs/RESTART_Runs/R_2011/Simulation_2011_AUG_S1.nc'
]

files_SEP = [
    '/home/vicentev/scratch/vicentev/Simulations_Runs/SEP_2007_netCDF.nc',
    '/home/vicentev/scratch/vicentev/Simulations_Runs/RESTART_Runs/R_2008/Simulation_2008_SEP_S1.nc',
    '/home/vicentev/scratch/vicentev/Simulations_Runs/RESTART_Runs/R_2009/Simulation_2009_SEP_S1.nc',
    '/home/vicentev/scratch/vicentev/Simulations_Runs/RESTART_Runs/R_2010/Simulation_2010_SEP_S1.nc',
    '/home/vicentev/scratch/vicentev/Simulations_Runs/RESTART_Runs/R_2011/Simulation_2011_SEP_S1.nc'
]
files_OCT = [
    '/home/vicentev/scratch/vicentev/Simulations_Runs/OCT_2007_netCDF.nc',
    '/home/vicentev/scratch/vicentev/Simulations_Runs/RESTART_Runs/R_2008/Simulation_2008_OCT_S1.nc',
    '/home/vicentev/scratch/vicentev/Simulations_Runs/RESTART_Runs/R_2009/Simulation_2009_OCT_S1.nc',
    '/home/vicentev/scratch/vicentev/Simulations_Runs/RESTART_Runs/R_2010/Simulation_2010_OCT_S1.nc',
    '/home/vicentev/scratch/vicentev/Simulations_Runs/RESTART_Runs/R_2011/Simulation_2011_OCT_S1.nc'
]

files_NOV = [
    '/home/vicentev/scratch/vicentev/Simulations_Runs/NOV_2007_netCDF.nc',
    '/home/vicentev/scratch/vicentev/Simulations_Runs/RESTART_Runs/R_2008/Simulation_2008_NOV_S1.nc',
    '/home/vicentev/scratch/vicentev/Simulations_Runs/RESTART_Runs/R_2009/Simulation_2009_NOV_S1.nc',
    '/home/vicentev/scratch/vicentev/Simulations_Runs/RESTART_Runs/R_2010/Simulation_2010_NOV_S1.nc',
    '/home/vicentev/scratch/vicentev/Simulations_Runs/RESTART_Runs/R_2011/Simulation_2011_NOV_S1.nc'
]

files_DEC = [
    '/home/vicentev/scratch/vicentev/Simulations_Runs/DEC_2007_netCDF.nc',
    '/home/vicentev/scratch/vicentev/Simulations_Runs/RESTART_Runs/R_2008/Simulation_2008_DEC_S1.nc',
    '/home/vicentev/scratch/vicentev/Simulations_Runs/RESTART_Runs/R_2009/Simulation_2009_DEC_S1.nc',
    '/home/vicentev/scratch/vicentev/Simulations_Runs/RESTART_Runs/R_2010/Simulation_2010_DEC_S1.nc',
    '/home/vicentev/scratch/vicentev/Simulations_Runs/RESTART_Runs/R_2011/Simulation_2011_DEC_S1.nc'
]

# Handle concatenation

In [None]:
all_files = {
    'JAN': files_JAN, 'FEB': files_FEB, 'MAR': files_MAR, 'APR': files_APR,
    'MAY': files_MAY, 'JUN': files_JUN, 'JUL': files_JUL, 'AUG': files_AUG,
    'SEP': files_SEP, 'OCT': files_OCT, 'NOV': files_NOV, 'DEC': files_DEC
}

# Dictionary to store the final concatenated datasets
monthly_datasets = {}

for month, file_list in all_files.items():
    print(f"Processing {month}")
    #
    ds = xr.open_mfdataset(file_list, combine='nested', concat_dim='obs')
    monthly_datasets[month] = ds

In [None]:
month_keys = [
    'JAN', 'FEB', 'MAR', 'APR', 'MAY', 'JUN', 
    'JUL', 'AUG', 'SEP', 'OCT', 'NOV', 'DEC'
]
#
# minimum length across datasets (since simulations are not the same length)
dim_name = 'obs' 
min_obs = min(monthly_datasets[m].sizes[dim_name] for m in month_keys)
print(f"Trimming all datasets to {min_obs} time steps...")
#
trimmed_datasets = []
for m in month_keys:
    ds = monthly_datasets[m]
    #Trim to the common minimum length
    ds_trimmed = ds.isel({dim_name: slice(0, min_obs)})
    
    # Remove the 'obs' coordinate
    # just sees a simple dimension of length N
    if dim_name in ds_trimmed.coords:
        ds_trimmed = ds_trimmed.drop_vars(dim_name)
#        
    trimmed_datasets.append(ds_trimmed)
#
#Concatenate along 'trajectory'
full_year_ds = xr.concat(trimmed_datasets, dim='trajectory')
#New observation index from 0 to min_obs
full_year_ds = full_year_ds.assign_coords({dim_name: range(min_obs)})
#
print('New FULL dataset dimensions: ', full_year_ds.dims)

In [None]:
path_full = '/home/vicentev/scratch/vicentev/long_term_simulation.nc'
full_year_ds.to_netcdf(path_full ,mode='w')

In [None]:
run_full = xr.open_dataset('/home/vicentev/scratch/vicentev/long_term_simulation.nc')
path_proportions = '/home/vicentev/scratch/vicentev/proportions_full_simulation.csv'
pp_percentage, pp_counts = proportions(run_full)
pp_counts.to_csv(path_proportions)

In [None]:
path_proportions = '/home/vicentev/scratch/vicentev/proportions_full_simulation.csv'
pp_counts.to_csv(path_proportions)

In [None]:
window_size = 24  
pp_smooth = pp_percentage.rolling(window=window_size, min_periods=1, center=True).mean()
#
fig, ax = plt.subplots(figsize=(15, 6))
#
time_axis = run_full.time[0, :].values
#
ax.plot(time_axis, pp_smooth['colloidal_water'], label='Colloidal W.', linewidth=2)
ax.plot(time_axis, pp_smooth['marine_water'], label='Marine W.', linewidth=2)
ax.plot(time_axis, pp_smooth['sewage_water'], label='Sewage W.', linewidth=2)
ax.plot(time_axis, pp_smooth['colloidal_sediment'], label='Colloidal S.', linestyle='--')
ax.plot(time_axis, pp_smooth['marine_sediment'], label='Marine S.', linestyle='--')
ax.plot(time_axis, pp_smooth['sewage_sediment'], label='Sewage S.', linestyle='--')
ax.plot(time_axis, pp_smooth['out_jdf'], label='Out JdF', color='black')
ax.plot(time_axis, pp_smooth['out_js'], label='Out Js', color='gray')
ax.legend(loc='upper right', ncol=2)
ax.set_ylabel('Percentage of Particles (%)')
ax.grid(True, alpha=0.3)

plt.show()

Loading OP runs and concatenate for long term visualization

In [None]:
proportion_water = proportion_sewage_water + proportion_colloidal_water + proportion_marine_water
proportion_sediment = proportion_sewage_sediment + proportion_colloidal_sediment + proportion_marine_sediment
proportion_sewage = proportion_sewage_water + proportion_sewage_sediment 
proportion_colloidal = proportion_colloidal_water + proportion_colloidal_sediment 
proportion_marine = proportion_marine_water + proportion_marine_sediment 

In [None]:
plt.rcParams.update({'font.size': 20})
time_axis = data.time[0,:]
fig, ax = plt.subplots(3,1,figsize = (15,12))
# initial
ax[0].plot(time_axis, proportion_initial, '-' ,c = 'tab:grey', label = 'Initial')
# Sewage particles in the water column
ax[0].plot(time_axis, proportion_sewage_water, '-' ,c = 'k', label = 'Sewage-Water')
ax[0].plot(time_axis, proportion_sewage_sediment, '-' , c = 'tab:brown', label = 'Sewage-Sediment')
#
#  Colloidal particles in the water column
ax[0].plot(time_axis, proportion_colloidal_water, '-' ,c = 'r', label = 'Colloidal-Water')
ax[0].plot(time_axis, proportion_colloidal_sediment, '-' , c = 'tab:orange', label = 'Colloidal-Sediment')
#
# Marine particles in the water column
ax[0].plot(time_axis, proportion_marine_water, '-' ,c = 'g', label = 'Marine-Water')
ax[0].plot(time_axis, proportion_marine_sediment, '-' , c = 'c', label = 'Marine-Sediment')
#
ax[0].plot(time_axis, p_out_jdf, '-' , c = 'm', label = 'Out JdF')
ax[0].plot(time_axis, p_out_js, '-' , c = 'tab:purple', label = 'Out Js')


#
ax[0].legend(ncol = 4, fontsize = 12)
ax[0].grid(linestyle = '--')
#
ax[0].set_ylabel('Status Proportion (%)')
#
ax[0].set_ylim([-5,105])
###################################################
ax[1].plot(time_axis, proportion_water, '-b', label = 'Water')
ax[1].plot(time_axis, proportion_sediment, '-g', label = 'Sediment')
#
ax[1].legend(ncol = 2, fontsize = 12)
ax[1].grid(linestyle = '--')
ax[1].set_ylabel('State Proportion (%)')
ax[1].set_ylim([-5,105])
###################################################
ax[2].plot(time_axis, proportion_sewage, '-k', label = 'Sewage')
ax[2].plot(time_axis, proportion_colloidal, '-r', label = 'Colloidal')
ax[2].plot(time_axis, proportion_marine, '-g' , label = 'Marine')
ax[2].legend(ncol = 3, fontsize = 12)
ax[2].grid(linestyle = '--')
ax[2].set_ylabel('Main Status Proportion (%)')
ax[2].set_xlabel('Time')
ax[2].set_ylim([-5,105])
plt.tight_layout()

In [None]:
data3.time[0,-1].values

In [None]:
# Defining parameters from yaml file:
config_yaml = ['/home/vicentev/projects/def-allen/vicentev/analysis-vicente/OP_nibi/config_files/config_file_test.yaml']
param = load_config(config_yaml)
#Timing definitions
start_simulation = datetime.datetime(param['release_params']['start_sim_year'], param['release_params']['start_sim_month'], param['release_params']['start_sim_day']) #Start date simulation
start_release = datetime.datetime(param['release_params']['year_start_release'], param['release_params']['month_start_release'], param['release_params']['day_start_release']) #Start date release
day_release = param['release_params']['days_of_release'] # how many days to release particles
release_freq = param['release_params']['release_particles_freq'] # release frequency in seconds
length = param['release_params']['simulation_length'] # Simulatio length in days
delta_t = param['release_params']['delta_t'] # Processes resolution in seconds
n_outputs = param['release_params']['number_outputs'] # number of output observations
#
# Iona Constants Definitions
lon_iona = param['constants']['lon_iona'] # Longitude coordinate
lat_iona = param['constants']['lat_iona'] # Latitude coordinate
depth_iona = param['constants']['depth_iona'] # Depth of release
fraction_colloidal = param['constants']['fraction_colloidal'] # fraction of particles released in colloidal phase
#
# Particles Features
vel_sewage = param['particles_features']['sinking_vel_sewage'] # sinking vel of sewage particles
vel_marine = param['particles_features']['sinking_vel_marine'] # sinking vel of marine particles
absorption = param['particles_features']['absorption'] # absorption of colloidal to marine particles
ratio_MC = param['particles_features']['ratio_marine_colloidal'] # ratio between colloidal and marine particles in the WC
fraction_sediment = param['particles_features']['fraction_sediment'] # fraction of colloidal to marine particles in the sediment
#
# Grid Parameters
deg2met = param['grid_params']['deg2met'] # conversion from degrees to meters
latT = param['grid_params']['latT'] 
dx_lat = param['grid_params']['dx_lat']
dx_lon = param['grid_params']['dx_lon']
dy_lat = param['grid_params']['dy_lat']
dy_lon = param['grid_params']['dy_lon']
#
# Resuspension Parameters
kappa = param['resuspension_params']['kappa']
zo = param['resuspension_params']['zo']
rho = param['resuspension_params']['rho']
cdmin = param['resuspension_params']['cdmin']
cdmax = param['resuspension_params']['cdmax']
tau_crit = param['resuspension_params']['tau_critical'] # critical resuspension tau value
#
# Name Extension Simulation
extension = param['name_extension']


In [None]:
 # 30  # number of days to release particles
data_length = max(length, 1)
duration = datetime.timedelta(days=length)
delta_t = 5  # seconds

number_particles = int(min(length, day_release) * 86400 / release_freq)
print("number_particles", number_particles)

output_interval = datetime.timedelta(seconds=length * 86400 / n_outputs)

In [None]:
seconds_initial = (start_release - start_simulation).total_seconds()

In [None]:
start_simulation.day

In [None]:
release_freq * number_particles

In [None]:
np.arange(seconds_initial, seconds_initial + (release_freq * number_particles), release_freq)

In [None]:
np.arange(0,2* release_freq * number_particles, release_freq)

In [None]:
data0 = xr.open_dataset('/home/vicentev/scratch/vicentev/Simulations_Runs/PBDEs_0112007_run_1_days_testing_sim_1_day.zarr', engine = 'zarr')
data1 = xr.open_dataset('/home/vicentev/scratch/vicentev/Simulations_Runs/PBDEs_0112007_run_2_days_testing_sim_2_day.zarr', engine = 'zarr')

In [None]:
t = 23
plt.scatter(data1.lon[:,t], data1.lat[:,t], c = 'k')
plt.scatter(lon_iona, lat_iona, c = 'r')

In [None]:
data_restart = xr.open_dataset('/home/vicentev/scratch/vicentev/Simulations_Runs/RESTART_Runs/PBDEs_0112007_run_2_days_testing_sim_2_day_restart_2_days_testing_restart.zarr', engine = 'zarr')

In [None]:
t0 = 23
t1 = 23

plt.scatter(data1.lon[:,t0], data1.lat[:,t0], c = 'k')
plt.scatter(data_restart.lon[:,t1], data_restart.lat[:,t1], c = 'b')

plt.scatter(lon_iona, lat_iona, c = 'r')

In [None]:
data_test = xr.open_dataset('/home/vicentev/scratch/vicentev/Simulations_Runs/PBDEs_0112007_run_1_days_testing_sim_1.zarr', engine = 'zarr')

In [None]:
t = 23
plt.scatter(data_test.lon[:,t], data_test.lat[:,t], c = 'k')
plt.scatter(lon_iona, lat_iona, c = 'r')