In [1]:
from pathlib import Path

import dask
import numpy as np
import pandas as pd
import xarray as xr
from dask.distributed import Client
from seapopym.configuration.no_transport.parameter import ForcingParameters, ForcingUnit, KernelParameters

from seapopym_optimization import wrapper

User parameters


In [None]:
nb_samples_by_batch = 1

quantity_of_interest = ["mean", "variance"]

time_start = "2005-01-01"
time_start_analysis = "2006-01-01"
time_end = "2007-01-01"

In [3]:
client = Client()
client

0,1
Connection method: Cluster object,Cluster type: distributed.LocalCluster
Dashboard: http://127.0.0.1:8787/status,

0,1
Dashboard: http://127.0.0.1:8787/status,Workers: 4
Total threads: 12,Total memory: 48.00 GiB
Status: running,Using processes: True

0,1
Comm: tcp://127.0.0.1:50630,Workers: 4
Dashboard: http://127.0.0.1:8787/status,Total threads: 12
Started: Just now,Total memory: 48.00 GiB

0,1
Comm: tcp://127.0.0.1:50641,Total threads: 3
Dashboard: http://127.0.0.1:50646/status,Memory: 12.00 GiB
Nanny: tcp://127.0.0.1:50633,
Local directory: /var/folders/z_/8j3qx1mn0299kkpjgz9g53780000gq/T/dask-scratch-space/worker-e8mojgyn,Local directory: /var/folders/z_/8j3qx1mn0299kkpjgz9g53780000gq/T/dask-scratch-space/worker-e8mojgyn

0,1
Comm: tcp://127.0.0.1:50642,Total threads: 3
Dashboard: http://127.0.0.1:50645/status,Memory: 12.00 GiB
Nanny: tcp://127.0.0.1:50635,
Local directory: /var/folders/z_/8j3qx1mn0299kkpjgz9g53780000gq/T/dask-scratch-space/worker-rl_sa0kv,Local directory: /var/folders/z_/8j3qx1mn0299kkpjgz9g53780000gq/T/dask-scratch-space/worker-rl_sa0kv

0,1
Comm: tcp://127.0.0.1:50643,Total threads: 3
Dashboard: http://127.0.0.1:50648/status,Memory: 12.00 GiB
Nanny: tcp://127.0.0.1:50637,
Local directory: /var/folders/z_/8j3qx1mn0299kkpjgz9g53780000gq/T/dask-scratch-space/worker-du8uemoi,Local directory: /var/folders/z_/8j3qx1mn0299kkpjgz9g53780000gq/T/dask-scratch-space/worker-du8uemoi

0,1
Comm: tcp://127.0.0.1:50644,Total threads: 3
Dashboard: http://127.0.0.1:50647/status,Memory: 12.00 GiB
Nanny: tcp://127.0.0.1:50639,
Local directory: /var/folders/z_/8j3qx1mn0299kkpjgz9g53780000gq/T/dask-scratch-space/worker-d12aua37,Local directory: /var/folders/z_/8j3qx1mn0299kkpjgz9g53780000gq/T/dask-scratch-space/worker-d12aua37


Samples (sobol sequence)


In [4]:
input_parameters = pd.read_parquet("./input_samples.parquet")
input_parameters

Unnamed: 0,energy_transfert,tr_0,gamma_tr,lambda_0,gamma_lambda
0,0.322816,9.083201,-0.319703,140.064326,-0.059983
1,0.003590,9.083201,-0.319703,140.064326,-0.059983
2,0.322816,41.212428,-0.319703,140.064326,-0.059983
3,0.322816,9.083201,-0.140274,140.064326,-0.059983
4,0.322816,9.083201,-0.319703,323.008823,-0.059983
...,...,...,...,...,...
1190695,0.251086,18.763531,-0.078224,88.508398,-0.361641
1190696,0.251086,34.402784,-0.371647,88.508398,-0.361641
1190697,0.251086,34.402784,-0.078224,473.318956,-0.361641
1190698,0.251086,34.402784,-0.078224,88.508398,-0.123918


Setting the output file. This file will be filled batch after batch with QoI values.


In [5]:
output_sobol_index_filepath = Path("./output_sobol_index.parquet")
if output_sobol_index_filepath.exists():
    output_sobol_index = pd.read_parquet(output_sobol_index_filepath)
else:
    output_sobol_index = pd.DataFrame(columns=quantity_of_interest)
    output_sobol_index.to_parquet(output_sobol_index_filepath)
output_sobol_index

Unnamed: 0,mean,variance


---

# Cost function definition

Prepare forcing and parameters definition


In [None]:
input_forcing = xr.open_dataset("../1_data_processing/1_3_Sensibility/all_stations.zarr", engine="zarr")
input_forcing = input_forcing.sel(time=slice(time_start, time_end))
input_forcing

In [41]:
stations_locations = pd.read_json("../1_data_processing/1_3_Sensibility/stations_locations.json")
stations_locations = stations_locations.set_index("name")
stations_locations

Unnamed: 0_level_0,latitude,longitude
name,Unnamed: 1_level_1,Unnamed: 2_level_1
BARENTS,74.62,26.969
HOT,22.752,-158.004
BATS,31.604,-64.2
PAPA,50.006,-149.996
GUAM,13.001,149.995


In [8]:
FORCING_PARAMETERS = ForcingParameters(
    temperature=ForcingUnit.from_dataset(forcing=input_forcing, name="T", resolution=0.08333, timestep=1),
    primary_production=ForcingUnit.from_dataset(input_forcing, name="npp", resolution=0.08333, timestep=1),
)

|	npp unit is milligram / day / meter ** 2, it will be converted to kilogram / day / meter ** 2.
[0m


In [9]:
def wrapper_model_generator_no_transport(fg_parameters):
    fg_parameters = wrapper.FunctionalGroupGeneratorNoTransport(np.array([fg_parameters]))
    return wrapper.model_generator_no_transport(
        fg_parameters=fg_parameters,
        forcing_parameters=FORCING_PARAMETERS,
    )

Official scoring function


In [62]:
for station in stations_locations.index:
    print(f"Station: {station}")
    print(
        f"Station location: {stations_locations.loc[station, 'latitude']}, {stations_locations.loc[station, 'longitude']}"
    )

Station: BARENTS
Station location: 74.62, 26.969
Station: HOT
Station location: 22.752, -158.004
Station: BATS
Station location: 31.604, -64.2
Station: PAPA
Station location: 50.006, -149.996
Station: GUAM
Station location: 13.001, 149.995


In [None]:
@dask.delayed
def cost_function(x: np.ndarray):
    energy_transfert, tr_0, gamma_tr, inv_lambda_0, gamma_inv_lambda = x.T
    fg_parameters = [0, 0, energy_transfert, tr_0, gamma_tr, inv_lambda_0, gamma_inv_lambda]

    model = wrapper_model_generator_no_transport(fg_parameters)

    model.run()
    biomass_forcing = model.export_biomass().sel(time=slice(time_start_analysis, time_end))

    results = {}
    for station in stations_locations.index:
        biomass_forcing_station = biomass_forcing.sel(
            latitude=stations_locations.loc[station, "latitude"], longitude=stations_locations.loc[station, "longitude"]
        )
        results.update(compute_quantity_of_interest(biomass_forcing_station, station))

    return results

In [35]:
def batch_cost_function_execution(input_parameters: pd.DataFrame) -> np.ndarray:
    resultats = [cost_function(param) for param in input_parameters.to_numpy()]
    return np.array(dask.compute(*resultats))

Test function


In [36]:
# TEST FUNCTION
# def batch_cost_function_execution(input_parameters: pd.DataFrame) -> np.ndarray:
#     return np.full((input_parameters.shape[0], len(quantity_of_interest)), 1)

---


Run as much batch you can


In [37]:
for batch_number in range(0, (len(input_parameters) // nb_samples_by_batch) + 1):
    min_batch = batch_number * nb_samples_by_batch
    max_batch = min(batch_number * nb_samples_by_batch + nb_samples_by_batch, len(input_parameters))
    print(f"Batch {batch_number} = {min_batch} : {max_batch}")

    if not (max_batch) in output_sobol_index.index:
        batch_samples = input_parameters.iloc[
            batch_number * nb_samples_by_batch : batch_number * nb_samples_by_batch + nb_samples_by_batch
        ]

        results = batch_cost_function_execution(batch_samples)

        output_sobol_index = pd.concat(
            [output_sobol_index, pd.DataFrame(results, columns=quantity_of_interest, index=batch_samples.index)]
        )
        output_sobol_index.to_parquet(output_sobol_index_filepath)
    break

Batch 0 = 0 : 1


  output_sobol_index = pd.concat(


Show output


In [38]:
output_sobol_index

Unnamed: 0,mean,variance
0,0.000744,3e-06
